Merge trunk version 222452 into gupc branch.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob2fd02f46eb1d7c2a1ac1e5c71797e62b7bfa5e1e
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "stringpool.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "output.h"
65 #include "dbxout.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "toplev.h"
77 #include "ggc.h"
78 #include "tm_p.h"
79 #include "target.h"
80 #include "target-def.h"
81 #include "common/common-target.h"
82 #include "langhooks.h"
83 #include "reload.h"
84 #include "cfgloop.h"
85 #include "sched-int.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "gimple-iterator.h"
96 #include "gimple-walk.h"
97 #include "intl.h"
98 #include "params.h"
99 #include "tm-constrs.h"
100 #include "ira.h"
101 #include "opts.h"
102 #include "tree-vectorizer.h"
103 #include "dumpfile.h"
104 #include "hash-map.h"
105 #include "plugin-api.h"
106 #include "ipa-ref.h"
107 #include "cgraph.h"
108 #include "target-globals.h"
109 #include "builtins.h"
110 #include "context.h"
111 #include "tree-pass.h"
112 #if TARGET_XCOFF
113 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
114 #endif
115 #if TARGET_MACHO
116 #include "gstab.h" /* for N_SLINE */
117 #endif
119 #ifndef TARGET_NO_PROTOTYPE
120 #define TARGET_NO_PROTOTYPE 0
121 #endif
123 #define min(A,B) ((A) < (B) ? (A) : (B))
124 #define max(A,B) ((A) > (B) ? (A) : (B))
126 /* Structure used to define the rs6000 stack */
127 typedef struct rs6000_stack {
128 int reload_completed; /* stack info won't change from here on */
129 int first_gp_reg_save; /* first callee saved GP register used */
130 int first_fp_reg_save; /* first callee saved FP register used */
131 int first_altivec_reg_save; /* first callee saved AltiVec register used */
132 int lr_save_p; /* true if the link reg needs to be saved */
133 int cr_save_p; /* true if the CR reg needs to be saved */
134 unsigned int vrsave_mask; /* mask of vec registers to save */
135 int push_p; /* true if we need to allocate stack space */
136 int calls_p; /* true if the function makes any calls */
137 int world_save_p; /* true if we're saving *everything*:
138 r13-r31, cr, f14-f31, vrsave, v20-v31 */
139 enum rs6000_abi abi; /* which ABI to use */
140 int gp_save_offset; /* offset to save GP regs from initial SP */
141 int fp_save_offset; /* offset to save FP regs from initial SP */
142 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
143 int lr_save_offset; /* offset to save LR from initial SP */
144 int cr_save_offset; /* offset to save CR from initial SP */
145 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
146 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
147 int varargs_save_offset; /* offset to save the varargs registers */
148 int ehrd_offset; /* offset to EH return data */
149 int ehcr_offset; /* offset to EH CR field data */
150 int reg_size; /* register size (4 or 8) */
151 HOST_WIDE_INT vars_size; /* variable save area size */
152 int parm_size; /* outgoing parameter size */
153 int save_size; /* save area size */
154 int fixed_size; /* fixed size of stack frame */
155 int gp_size; /* size of saved GP registers */
156 int fp_size; /* size of saved FP registers */
157 int altivec_size; /* size of saved AltiVec registers */
158 int cr_size; /* size to hold CR if not in save_size */
159 int vrsave_size; /* size to hold VRSAVE if not in save_size */
160 int altivec_padding_size; /* size of altivec alignment padding if
161 not in save_size */
162 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
163 int spe_padding_size;
164 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
165 int spe_64bit_regs_used;
166 int savres_strategy;
167 } rs6000_stack_t;
169 /* A C structure for machine-specific, per-function data.
170 This is added to the cfun structure. */
171 typedef struct GTY(()) machine_function
173 /* Whether the instruction chain has been scanned already. */
174 int insn_chain_scanned_p;
175 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
176 int ra_needs_full_frame;
177 /* Flags if __builtin_return_address (0) was used. */
178 int ra_need_lr;
179 /* Cache lr_save_p after expansion of builtin_eh_return. */
180 int lr_save_state;
181 /* Whether we need to save the TOC to the reserved stack location in the
182 function prologue. */
183 bool save_toc_in_prologue;
184 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
185 varargs save area. */
186 HOST_WIDE_INT varargs_save_offset;
187 /* Temporary stack slot to use for SDmode copies. This slot is
188 64-bits wide and is allocated early enough so that the offset
189 does not overflow the 16-bit load/store offset field. */
190 rtx sdmode_stack_slot;
191 /* Flag if r2 setup is needed with ELFv2 ABI. */
192 bool r2_setup_needed;
193 } machine_function;
195 /* Support targetm.vectorize.builtin_mask_for_load. */
196 static GTY(()) tree altivec_builtin_mask_for_load;
198 /* Set to nonzero once AIX common-mode calls have been defined. */
199 static GTY(()) int common_mode_defined;
201 /* Label number of label created for -mrelocatable, to call to so we can
202 get the address of the GOT section */
203 static int rs6000_pic_labelno;
205 #ifdef USING_ELFOS_H
206 /* Counter for labels which are to be placed in .fixup. */
207 int fixuplabelno = 0;
208 #endif
210 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
211 int dot_symbols;
213 /* Specify the machine mode that pointers have. After generation of rtl, the
214 compiler makes no further distinction between pointers and any other objects
215 of this machine mode. The type is unsigned since not all things that
216 include rs6000.h also include machmode.h. */
217 unsigned rs6000_pmode;
219 /* Width in bits of a pointer. */
220 unsigned rs6000_pointer_size;
222 #ifdef HAVE_AS_GNU_ATTRIBUTE
223 /* Flag whether floating point values have been passed/returned. */
224 static bool rs6000_passes_float;
225 /* Flag whether vector values have been passed/returned. */
226 static bool rs6000_passes_vector;
227 /* Flag whether small (<= 8 byte) structures have been returned. */
228 static bool rs6000_returns_struct;
229 #endif
231 /* Value is TRUE if register/mode pair is acceptable. */
232 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
234 /* Maximum number of registers needed for a given register class and mode. */
235 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
237 /* How many registers are needed for a given register and mode. */
238 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
240 /* Map register number to register class. */
241 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
243 static int dbg_cost_ctrl;
245 /* Built in types. */
246 tree rs6000_builtin_types[RS6000_BTI_MAX];
247 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
249 /* Flag to say the TOC is initialized */
250 int toc_initialized;
251 char toc_label_name[10];
253 /* Cached value of rs6000_variable_issue. This is cached in
254 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
255 static short cached_can_issue_more;
257 static GTY(()) section *read_only_data_section;
258 static GTY(()) section *private_data_section;
259 static GTY(()) section *tls_data_section;
260 static GTY(()) section *tls_private_data_section;
261 static GTY(()) section *read_only_private_data_section;
262 static GTY(()) section *sdata2_section;
263 static GTY(()) section *toc_section;
265 struct builtin_description
267 const HOST_WIDE_INT mask;
268 const enum insn_code icode;
269 const char *const name;
270 const enum rs6000_builtins code;
273 /* Describe the vector unit used for modes. */
274 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
275 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
277 /* Register classes for various constraints that are based on the target
278 switches. */
279 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
281 /* Describe the alignment of a vector. */
282 int rs6000_vector_align[NUM_MACHINE_MODES];
284 /* Map selected modes to types for builtins. */
285 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
287 /* What modes to automatically generate reciprocal divide estimate (fre) and
288 reciprocal sqrt (frsqrte) for. */
289 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
291 /* Masks to determine which reciprocal esitmate instructions to generate
292 automatically. */
293 enum rs6000_recip_mask {
294 RECIP_SF_DIV = 0x001, /* Use divide estimate */
295 RECIP_DF_DIV = 0x002,
296 RECIP_V4SF_DIV = 0x004,
297 RECIP_V2DF_DIV = 0x008,
299 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
300 RECIP_DF_RSQRT = 0x020,
301 RECIP_V4SF_RSQRT = 0x040,
302 RECIP_V2DF_RSQRT = 0x080,
304 /* Various combination of flags for -mrecip=xxx. */
305 RECIP_NONE = 0,
306 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
307 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
308 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
310 RECIP_HIGH_PRECISION = RECIP_ALL,
312 /* On low precision machines like the power5, don't enable double precision
313 reciprocal square root estimate, since it isn't accurate enough. */
314 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
317 /* -mrecip options. */
318 static struct
320 const char *string; /* option name */
321 unsigned int mask; /* mask bits to set */
322 } recip_options[] = {
323 { "all", RECIP_ALL },
324 { "none", RECIP_NONE },
325 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
326 | RECIP_V2DF_DIV) },
327 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
328 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
329 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
330 | RECIP_V2DF_RSQRT) },
331 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
332 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
335 /* Pointer to function (in rs6000-c.c) that can define or undefine target
336 macros that have changed. Languages that don't support the preprocessor
337 don't link in rs6000-c.c, so we can't call it directly. */
338 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
340 /* Simplfy register classes into simpler classifications. We assume
341 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
342 check for standard register classes (gpr/floating/altivec/vsx) and
343 floating/vector classes (float/altivec/vsx). */
345 enum rs6000_reg_type {
346 NO_REG_TYPE,
347 PSEUDO_REG_TYPE,
348 GPR_REG_TYPE,
349 VSX_REG_TYPE,
350 ALTIVEC_REG_TYPE,
351 FPR_REG_TYPE,
352 SPR_REG_TYPE,
353 CR_REG_TYPE,
354 SPE_ACC_TYPE,
355 SPEFSCR_REG_TYPE
358 /* Map register class to register type. */
359 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
361 /* First/last register type for the 'normal' register types (i.e. general
362 purpose, floating point, altivec, and VSX registers). */
363 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
365 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
368 /* Register classes we care about in secondary reload or go if legitimate
369 address. We only need to worry about GPR, FPR, and Altivec registers here,
370 along an ANY field that is the OR of the 3 register classes. */
372 enum rs6000_reload_reg_type {
373 RELOAD_REG_GPR, /* General purpose registers. */
374 RELOAD_REG_FPR, /* Traditional floating point regs. */
375 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
376 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
377 N_RELOAD_REG
380 /* For setting up register classes, loop through the 3 register classes mapping
381 into real registers, and skip the ANY class, which is just an OR of the
382 bits. */
383 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
384 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
386 /* Map reload register type to a register in the register class. */
387 struct reload_reg_map_type {
388 const char *name; /* Register class name. */
389 int reg; /* Register in the register class. */
392 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
393 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
394 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
395 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
396 { "Any", -1 }, /* RELOAD_REG_ANY. */
399 /* Mask bits for each register class, indexed per mode. Historically the
400 compiler has been more restrictive which types can do PRE_MODIFY instead of
401 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
402 typedef unsigned char addr_mask_type;
404 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
405 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
406 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
407 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
408 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
409 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
410 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
412 /* Register type masks based on the type, of valid addressing modes. */
413 struct rs6000_reg_addr {
414 enum insn_code reload_load; /* INSN to reload for loading. */
415 enum insn_code reload_store; /* INSN to reload for storing. */
416 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
417 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
418 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
419 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
420 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
423 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
425 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
426 static inline bool
427 mode_supports_pre_incdec_p (machine_mode mode)
429 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
430 != 0);
433 /* Helper function to say whether a mode supports PRE_MODIFY. */
434 static inline bool
435 mode_supports_pre_modify_p (machine_mode mode)
437 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
438 != 0);
442 /* Target cpu costs. */
444 struct processor_costs {
445 const int mulsi; /* cost of SImode multiplication. */
446 const int mulsi_const; /* cost of SImode multiplication by constant. */
447 const int mulsi_const9; /* cost of SImode mult by short constant. */
448 const int muldi; /* cost of DImode multiplication. */
449 const int divsi; /* cost of SImode division. */
450 const int divdi; /* cost of DImode division. */
451 const int fp; /* cost of simple SFmode and DFmode insns. */
452 const int dmul; /* cost of DFmode multiplication (and fmadd). */
453 const int sdiv; /* cost of SFmode division (fdivs). */
454 const int ddiv; /* cost of DFmode division (fdiv). */
455 const int cache_line_size; /* cache line size in bytes. */
456 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
457 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
458 const int simultaneous_prefetches; /* number of parallel prefetch
459 operations. */
460 const int sfdf_convert; /* cost of SF->DF conversion. */
463 const struct processor_costs *rs6000_cost;
465 /* Processor costs (relative to an add) */
467 /* Instruction size costs on 32bit processors. */
468 static const
469 struct processor_costs size32_cost = {
470 COSTS_N_INSNS (1), /* mulsi */
471 COSTS_N_INSNS (1), /* mulsi_const */
472 COSTS_N_INSNS (1), /* mulsi_const9 */
473 COSTS_N_INSNS (1), /* muldi */
474 COSTS_N_INSNS (1), /* divsi */
475 COSTS_N_INSNS (1), /* divdi */
476 COSTS_N_INSNS (1), /* fp */
477 COSTS_N_INSNS (1), /* dmul */
478 COSTS_N_INSNS (1), /* sdiv */
479 COSTS_N_INSNS (1), /* ddiv */
480 32, /* cache line size */
481 0, /* l1 cache */
482 0, /* l2 cache */
483 0, /* streams */
484 0, /* SF->DF convert */
487 /* Instruction size costs on 64bit processors. */
488 static const
489 struct processor_costs size64_cost = {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 128, /* cache line size */
501 0, /* l1 cache */
502 0, /* l2 cache */
503 0, /* streams */
504 0, /* SF->DF convert */
507 /* Instruction costs on RS64A processors. */
508 static const
509 struct processor_costs rs64a_cost = {
510 COSTS_N_INSNS (20), /* mulsi */
511 COSTS_N_INSNS (12), /* mulsi_const */
512 COSTS_N_INSNS (8), /* mulsi_const9 */
513 COSTS_N_INSNS (34), /* muldi */
514 COSTS_N_INSNS (65), /* divsi */
515 COSTS_N_INSNS (67), /* divdi */
516 COSTS_N_INSNS (4), /* fp */
517 COSTS_N_INSNS (4), /* dmul */
518 COSTS_N_INSNS (31), /* sdiv */
519 COSTS_N_INSNS (31), /* ddiv */
520 128, /* cache line size */
521 128, /* l1 cache */
522 2048, /* l2 cache */
523 1, /* streams */
524 0, /* SF->DF convert */
527 /* Instruction costs on MPCCORE processors. */
528 static const
529 struct processor_costs mpccore_cost = {
530 COSTS_N_INSNS (2), /* mulsi */
531 COSTS_N_INSNS (2), /* mulsi_const */
532 COSTS_N_INSNS (2), /* mulsi_const9 */
533 COSTS_N_INSNS (2), /* muldi */
534 COSTS_N_INSNS (6), /* divsi */
535 COSTS_N_INSNS (6), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (5), /* dmul */
538 COSTS_N_INSNS (10), /* sdiv */
539 COSTS_N_INSNS (17), /* ddiv */
540 32, /* cache line size */
541 4, /* l1 cache */
542 16, /* l2 cache */
543 1, /* streams */
544 0, /* SF->DF convert */
547 /* Instruction costs on PPC403 processors. */
548 static const
549 struct processor_costs ppc403_cost = {
550 COSTS_N_INSNS (4), /* mulsi */
551 COSTS_N_INSNS (4), /* mulsi_const */
552 COSTS_N_INSNS (4), /* mulsi_const9 */
553 COSTS_N_INSNS (4), /* muldi */
554 COSTS_N_INSNS (33), /* divsi */
555 COSTS_N_INSNS (33), /* divdi */
556 COSTS_N_INSNS (11), /* fp */
557 COSTS_N_INSNS (11), /* dmul */
558 COSTS_N_INSNS (11), /* sdiv */
559 COSTS_N_INSNS (11), /* ddiv */
560 32, /* cache line size */
561 4, /* l1 cache */
562 16, /* l2 cache */
563 1, /* streams */
564 0, /* SF->DF convert */
567 /* Instruction costs on PPC405 processors. */
568 static const
569 struct processor_costs ppc405_cost = {
570 COSTS_N_INSNS (5), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (3), /* mulsi_const9 */
573 COSTS_N_INSNS (5), /* muldi */
574 COSTS_N_INSNS (35), /* divsi */
575 COSTS_N_INSNS (35), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
581 16, /* l1 cache */
582 128, /* l2 cache */
583 1, /* streams */
584 0, /* SF->DF convert */
587 /* Instruction costs on PPC440 processors. */
588 static const
589 struct processor_costs ppc440_cost = {
590 COSTS_N_INSNS (3), /* mulsi */
591 COSTS_N_INSNS (2), /* mulsi_const */
592 COSTS_N_INSNS (2), /* mulsi_const9 */
593 COSTS_N_INSNS (3), /* muldi */
594 COSTS_N_INSNS (34), /* divsi */
595 COSTS_N_INSNS (34), /* divdi */
596 COSTS_N_INSNS (5), /* fp */
597 COSTS_N_INSNS (5), /* dmul */
598 COSTS_N_INSNS (19), /* sdiv */
599 COSTS_N_INSNS (33), /* ddiv */
600 32, /* cache line size */
601 32, /* l1 cache */
602 256, /* l2 cache */
603 1, /* streams */
604 0, /* SF->DF convert */
607 /* Instruction costs on PPC476 processors. */
608 static const
609 struct processor_costs ppc476_cost = {
610 COSTS_N_INSNS (4), /* mulsi */
611 COSTS_N_INSNS (4), /* mulsi_const */
612 COSTS_N_INSNS (4), /* mulsi_const9 */
613 COSTS_N_INSNS (4), /* muldi */
614 COSTS_N_INSNS (11), /* divsi */
615 COSTS_N_INSNS (11), /* divdi */
616 COSTS_N_INSNS (6), /* fp */
617 COSTS_N_INSNS (6), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* l1 cache line size */
621 32, /* l1 cache */
622 512, /* l2 cache */
623 1, /* streams */
624 0, /* SF->DF convert */
627 /* Instruction costs on PPC601 processors. */
628 static const
629 struct processor_costs ppc601_cost = {
630 COSTS_N_INSNS (5), /* mulsi */
631 COSTS_N_INSNS (5), /* mulsi_const */
632 COSTS_N_INSNS (5), /* mulsi_const9 */
633 COSTS_N_INSNS (5), /* muldi */
634 COSTS_N_INSNS (36), /* divsi */
635 COSTS_N_INSNS (36), /* divdi */
636 COSTS_N_INSNS (4), /* fp */
637 COSTS_N_INSNS (5), /* dmul */
638 COSTS_N_INSNS (17), /* sdiv */
639 COSTS_N_INSNS (31), /* ddiv */
640 32, /* cache line size */
641 32, /* l1 cache */
642 256, /* l2 cache */
643 1, /* streams */
644 0, /* SF->DF convert */
647 /* Instruction costs on PPC603 processors. */
648 static const
649 struct processor_costs ppc603_cost = {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (3), /* mulsi_const */
652 COSTS_N_INSNS (2), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (37), /* divsi */
655 COSTS_N_INSNS (37), /* divdi */
656 COSTS_N_INSNS (3), /* fp */
657 COSTS_N_INSNS (4), /* dmul */
658 COSTS_N_INSNS (18), /* sdiv */
659 COSTS_N_INSNS (33), /* ddiv */
660 32, /* cache line size */
661 8, /* l1 cache */
662 64, /* l2 cache */
663 1, /* streams */
664 0, /* SF->DF convert */
667 /* Instruction costs on PPC604 processors. */
668 static const
669 struct processor_costs ppc604_cost = {
670 COSTS_N_INSNS (4), /* mulsi */
671 COSTS_N_INSNS (4), /* mulsi_const */
672 COSTS_N_INSNS (4), /* mulsi_const9 */
673 COSTS_N_INSNS (4), /* muldi */
674 COSTS_N_INSNS (20), /* divsi */
675 COSTS_N_INSNS (20), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (3), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (32), /* ddiv */
680 32, /* cache line size */
681 16, /* l1 cache */
682 512, /* l2 cache */
683 1, /* streams */
684 0, /* SF->DF convert */
687 /* Instruction costs on PPC604e processors. */
688 static const
689 struct processor_costs ppc604e_cost = {
690 COSTS_N_INSNS (2), /* mulsi */
691 COSTS_N_INSNS (2), /* mulsi_const */
692 COSTS_N_INSNS (2), /* mulsi_const9 */
693 COSTS_N_INSNS (2), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
701 32, /* l1 cache */
702 1024, /* l2 cache */
703 1, /* streams */
704 0, /* SF->DF convert */
707 /* Instruction costs on PPC620 processors. */
708 static const
709 struct processor_costs ppc620_cost = {
710 COSTS_N_INSNS (5), /* mulsi */
711 COSTS_N_INSNS (4), /* mulsi_const */
712 COSTS_N_INSNS (3), /* mulsi_const9 */
713 COSTS_N_INSNS (7), /* muldi */
714 COSTS_N_INSNS (21), /* divsi */
715 COSTS_N_INSNS (37), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 128, /* cache line size */
721 32, /* l1 cache */
722 1024, /* l2 cache */
723 1, /* streams */
724 0, /* SF->DF convert */
727 /* Instruction costs on PPC630 processors. */
728 static const
729 struct processor_costs ppc630_cost = {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (17), /* sdiv */
739 COSTS_N_INSNS (21), /* ddiv */
740 128, /* cache line size */
741 64, /* l1 cache */
742 1024, /* l2 cache */
743 1, /* streams */
744 0, /* SF->DF convert */
747 /* Instruction costs on Cell processor. */
748 /* COSTS_N_INSNS (1) ~ one add. */
749 static const
750 struct processor_costs ppccell_cost = {
751 COSTS_N_INSNS (9/2)+2, /* mulsi */
752 COSTS_N_INSNS (6/2), /* mulsi_const */
753 COSTS_N_INSNS (6/2), /* mulsi_const9 */
754 COSTS_N_INSNS (15/2)+2, /* muldi */
755 COSTS_N_INSNS (38/2), /* divsi */
756 COSTS_N_INSNS (70/2), /* divdi */
757 COSTS_N_INSNS (10/2), /* fp */
758 COSTS_N_INSNS (10/2), /* dmul */
759 COSTS_N_INSNS (74/2), /* sdiv */
760 COSTS_N_INSNS (74/2), /* ddiv */
761 128, /* cache line size */
762 32, /* l1 cache */
763 512, /* l2 cache */
764 6, /* streams */
765 0, /* SF->DF convert */
768 /* Instruction costs on PPC750 and PPC7400 processors. */
769 static const
770 struct processor_costs ppc750_cost = {
771 COSTS_N_INSNS (5), /* mulsi */
772 COSTS_N_INSNS (3), /* mulsi_const */
773 COSTS_N_INSNS (2), /* mulsi_const9 */
774 COSTS_N_INSNS (5), /* muldi */
775 COSTS_N_INSNS (17), /* divsi */
776 COSTS_N_INSNS (17), /* divdi */
777 COSTS_N_INSNS (3), /* fp */
778 COSTS_N_INSNS (3), /* dmul */
779 COSTS_N_INSNS (17), /* sdiv */
780 COSTS_N_INSNS (31), /* ddiv */
781 32, /* cache line size */
782 32, /* l1 cache */
783 512, /* l2 cache */
784 1, /* streams */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC7450 processors. */
789 static const
790 struct processor_costs ppc7450_cost = {
791 COSTS_N_INSNS (4), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (3), /* mulsi_const9 */
794 COSTS_N_INSNS (4), /* muldi */
795 COSTS_N_INSNS (23), /* divsi */
796 COSTS_N_INSNS (23), /* divdi */
797 COSTS_N_INSNS (5), /* fp */
798 COSTS_N_INSNS (5), /* dmul */
799 COSTS_N_INSNS (21), /* sdiv */
800 COSTS_N_INSNS (35), /* ddiv */
801 32, /* cache line size */
802 32, /* l1 cache */
803 1024, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
808 /* Instruction costs on PPC8540 processors. */
809 static const
810 struct processor_costs ppc8540_cost = {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (4), /* mulsi_const */
813 COSTS_N_INSNS (4), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (19), /* divsi */
816 COSTS_N_INSNS (19), /* divdi */
817 COSTS_N_INSNS (4), /* fp */
818 COSTS_N_INSNS (4), /* dmul */
819 COSTS_N_INSNS (29), /* sdiv */
820 COSTS_N_INSNS (29), /* ddiv */
821 32, /* cache line size */
822 32, /* l1 cache */
823 256, /* l2 cache */
824 1, /* prefetch streams /*/
825 0, /* SF->DF convert */
828 /* Instruction costs on E300C2 and E300C3 cores. */
829 static const
830 struct processor_costs ppce300c2c3_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (3), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (18), /* sdiv */
840 COSTS_N_INSNS (33), /* ddiv */
842 16, /* l1 cache */
843 16, /* l2 cache */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
848 /* Instruction costs on PPCE500MC processors. */
849 static const
850 struct processor_costs ppce500mc_cost = {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (14), /* divsi */
856 COSTS_N_INSNS (14), /* divdi */
857 COSTS_N_INSNS (8), /* fp */
858 COSTS_N_INSNS (10), /* dmul */
859 COSTS_N_INSNS (36), /* sdiv */
860 COSTS_N_INSNS (66), /* ddiv */
861 64, /* cache line size */
862 32, /* l1 cache */
863 128, /* l2 cache */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
868 /* Instruction costs on PPCE500MC64 processors. */
869 static const
870 struct processor_costs ppce500mc64_cost = {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (4), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
882 32, /* l1 cache */
883 128, /* l2 cache */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
888 /* Instruction costs on PPCE5500 processors. */
889 static const
890 struct processor_costs ppce5500_cost = {
891 COSTS_N_INSNS (5), /* mulsi */
892 COSTS_N_INSNS (5), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (5), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (7), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
902 32, /* l1 cache */
903 128, /* l2 cache */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
908 /* Instruction costs on PPCE6500 processors. */
909 static const
910 struct processor_costs ppce6500_cost = {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
922 32, /* l1 cache */
923 128, /* l2 cache */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
928 /* Instruction costs on AppliedMicro Titan processors. */
929 static const
930 struct processor_costs titan_cost = {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (5), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (18), /* divsi */
936 COSTS_N_INSNS (18), /* divdi */
937 COSTS_N_INSNS (10), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (46), /* sdiv */
940 COSTS_N_INSNS (72), /* ddiv */
941 32, /* cache line size */
942 32, /* l1 cache */
943 512, /* l2 cache */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
948 /* Instruction costs on POWER4 and POWER5 processors. */
949 static const
950 struct processor_costs power4_cost = {
951 COSTS_N_INSNS (3), /* mulsi */
952 COSTS_N_INSNS (2), /* mulsi_const */
953 COSTS_N_INSNS (2), /* mulsi_const9 */
954 COSTS_N_INSNS (4), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (34), /* divdi */
957 COSTS_N_INSNS (3), /* fp */
958 COSTS_N_INSNS (3), /* dmul */
959 COSTS_N_INSNS (17), /* sdiv */
960 COSTS_N_INSNS (17), /* ddiv */
961 128, /* cache line size */
962 32, /* l1 cache */
963 1024, /* l2 cache */
964 8, /* prefetch streams /*/
965 0, /* SF->DF convert */
968 /* Instruction costs on POWER6 processors. */
969 static const
970 struct processor_costs power6_cost = {
971 COSTS_N_INSNS (8), /* mulsi */
972 COSTS_N_INSNS (8), /* mulsi_const */
973 COSTS_N_INSNS (8), /* mulsi_const9 */
974 COSTS_N_INSNS (8), /* muldi */
975 COSTS_N_INSNS (22), /* divsi */
976 COSTS_N_INSNS (28), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (13), /* sdiv */
980 COSTS_N_INSNS (16), /* ddiv */
981 128, /* cache line size */
982 64, /* l1 cache */
983 2048, /* l2 cache */
984 16, /* prefetch streams */
985 0, /* SF->DF convert */
988 /* Instruction costs on POWER7 processors. */
989 static const
990 struct processor_costs power7_cost = {
991 COSTS_N_INSNS (2), /* mulsi */
992 COSTS_N_INSNS (2), /* mulsi_const */
993 COSTS_N_INSNS (2), /* mulsi_const9 */
994 COSTS_N_INSNS (2), /* muldi */
995 COSTS_N_INSNS (18), /* divsi */
996 COSTS_N_INSNS (34), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1002 32, /* l1 cache */
1003 256, /* l2 cache */
1004 12, /* prefetch streams */
1005 COSTS_N_INSNS (3), /* SF->DF convert */
1008 /* Instruction costs on POWER8 processors. */
1009 static const
1010 struct processor_costs power8_cost = {
1011 COSTS_N_INSNS (3), /* mulsi */
1012 COSTS_N_INSNS (3), /* mulsi_const */
1013 COSTS_N_INSNS (3), /* mulsi_const9 */
1014 COSTS_N_INSNS (3), /* muldi */
1015 COSTS_N_INSNS (19), /* divsi */
1016 COSTS_N_INSNS (35), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (14), /* sdiv */
1020 COSTS_N_INSNS (17), /* ddiv */
1021 128, /* cache line size */
1022 32, /* l1 cache */
1023 256, /* l2 cache */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1028 /* Instruction costs on POWER A2 processors. */
1029 static const
1030 struct processor_costs ppca2_cost = {
1031 COSTS_N_INSNS (16), /* mulsi */
1032 COSTS_N_INSNS (16), /* mulsi_const */
1033 COSTS_N_INSNS (16), /* mulsi_const9 */
1034 COSTS_N_INSNS (16), /* muldi */
1035 COSTS_N_INSNS (22), /* divsi */
1036 COSTS_N_INSNS (28), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (59), /* sdiv */
1040 COSTS_N_INSNS (72), /* ddiv */
1042 16, /* l1 cache */
1043 2048, /* l2 cache */
1044 16, /* prefetch streams */
1045 0, /* SF->DF convert */
1049 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1050 #undef RS6000_BUILTIN_1
1051 #undef RS6000_BUILTIN_2
1052 #undef RS6000_BUILTIN_3
1053 #undef RS6000_BUILTIN_A
1054 #undef RS6000_BUILTIN_D
1055 #undef RS6000_BUILTIN_E
1056 #undef RS6000_BUILTIN_H
1057 #undef RS6000_BUILTIN_P
1058 #undef RS6000_BUILTIN_Q
1059 #undef RS6000_BUILTIN_S
1060 #undef RS6000_BUILTIN_X
1062 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1063 { NAME, ICODE, MASK, ATTR },
1065 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1066 { NAME, ICODE, MASK, ATTR },
1068 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1069 { NAME, ICODE, MASK, ATTR },
1071 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1072 { NAME, ICODE, MASK, ATTR },
1074 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1075 { NAME, ICODE, MASK, ATTR },
1077 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1078 { NAME, ICODE, MASK, ATTR },
1080 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1081 { NAME, ICODE, MASK, ATTR },
1083 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1084 { NAME, ICODE, MASK, ATTR },
1086 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1087 { NAME, ICODE, MASK, ATTR },
1089 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1090 { NAME, ICODE, MASK, ATTR },
1092 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1093 { NAME, ICODE, MASK, ATTR },
1095 struct rs6000_builtin_info_type {
1096 const char *name;
1097 const enum insn_code icode;
1098 const HOST_WIDE_INT mask;
1099 const unsigned attr;
1102 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1104 #include "rs6000-builtin.def"
1107 #undef RS6000_BUILTIN_1
1108 #undef RS6000_BUILTIN_2
1109 #undef RS6000_BUILTIN_3
1110 #undef RS6000_BUILTIN_A
1111 #undef RS6000_BUILTIN_D
1112 #undef RS6000_BUILTIN_E
1113 #undef RS6000_BUILTIN_H
1114 #undef RS6000_BUILTIN_P
1115 #undef RS6000_BUILTIN_Q
1116 #undef RS6000_BUILTIN_S
1117 #undef RS6000_BUILTIN_X
1119 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1120 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1123 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1124 static bool spe_func_has_64bit_regs_p (void);
1125 static struct machine_function * rs6000_init_machine_status (void);
1126 static int rs6000_ra_ever_killed (void);
1127 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1128 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1129 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1130 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1131 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1132 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1133 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1134 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1135 bool);
1136 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1137 static bool is_microcoded_insn (rtx_insn *);
1138 static bool is_nonpipeline_insn (rtx_insn *);
1139 static bool is_cracked_insn (rtx_insn *);
1140 static bool is_load_insn (rtx, rtx *);
1141 static bool is_store_insn (rtx, rtx *);
1142 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1143 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1144 static bool insn_must_be_first_in_group (rtx_insn *);
1145 static bool insn_must_be_last_in_group (rtx_insn *);
1146 static void altivec_init_builtins (void);
1147 static tree builtin_function_type (machine_mode, machine_mode,
1148 machine_mode, machine_mode,
1149 enum rs6000_builtins, const char *name);
1150 static void rs6000_common_init_builtins (void);
1151 static void paired_init_builtins (void);
1152 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1153 static void spe_init_builtins (void);
1154 static void htm_init_builtins (void);
1155 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1156 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1157 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1158 static rs6000_stack_t *rs6000_stack_info (void);
1159 static void is_altivec_return_reg (rtx, void *);
1160 int easy_vector_constant (rtx, machine_mode);
1161 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1162 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1163 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1164 bool, bool);
1165 #if TARGET_MACHO
1166 static void macho_branch_islands (void);
1167 #endif
1168 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1169 int, int *);
1170 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1171 int, int, int *);
1172 static bool rs6000_mode_dependent_address (const_rtx);
1173 static bool rs6000_debug_mode_dependent_address (const_rtx);
1174 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1175 machine_mode, rtx);
1176 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1177 machine_mode,
1178 rtx);
1179 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1180 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1181 enum reg_class);
1182 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1183 machine_mode);
1184 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1185 enum reg_class,
1186 machine_mode);
1187 static bool rs6000_cannot_change_mode_class (machine_mode,
1188 machine_mode,
1189 enum reg_class);
1190 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1191 machine_mode,
1192 enum reg_class);
1193 static bool rs6000_save_toc_in_prologue_p (void);
1195 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1196 int, int *)
1197 = rs6000_legitimize_reload_address;
1199 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1200 = rs6000_mode_dependent_address;
1202 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1203 machine_mode, rtx)
1204 = rs6000_secondary_reload_class;
1206 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1207 = rs6000_preferred_reload_class;
1209 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1210 machine_mode)
1211 = rs6000_secondary_memory_needed;
1213 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1214 machine_mode,
1215 enum reg_class)
1216 = rs6000_cannot_change_mode_class;
1218 const int INSN_NOT_AVAILABLE = -1;
1220 static void rs6000_print_isa_options (FILE *, int, const char *,
1221 HOST_WIDE_INT);
1222 static void rs6000_print_builtin_options (FILE *, int, const char *,
1223 HOST_WIDE_INT);
1225 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1226 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1227 enum rs6000_reg_type,
1228 machine_mode,
1229 secondary_reload_info *,
1230 bool);
1231 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1233 /* Hash table stuff for keeping track of TOC entries. */
1235 struct GTY((for_user)) toc_hash_struct
1237 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1238 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1239 rtx key;
1240 machine_mode key_mode;
1241 int labelno;
1244 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1246 static hashval_t hash (toc_hash_struct *);
1247 static bool equal (toc_hash_struct *, toc_hash_struct *);
1250 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1252 /* Hash table to keep track of the argument types for builtin functions. */
1254 struct GTY((for_user)) builtin_hash_struct
1256 tree type;
1257 machine_mode mode[4]; /* return value + 3 arguments. */
1258 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1261 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1263 static hashval_t hash (builtin_hash_struct *);
1264 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1267 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1270 /* Default register names. */
1271 char rs6000_reg_names[][8] =
1273 "0", "1", "2", "3", "4", "5", "6", "7",
1274 "8", "9", "10", "11", "12", "13", "14", "15",
1275 "16", "17", "18", "19", "20", "21", "22", "23",
1276 "24", "25", "26", "27", "28", "29", "30", "31",
1277 "0", "1", "2", "3", "4", "5", "6", "7",
1278 "8", "9", "10", "11", "12", "13", "14", "15",
1279 "16", "17", "18", "19", "20", "21", "22", "23",
1280 "24", "25", "26", "27", "28", "29", "30", "31",
1281 "mq", "lr", "ctr","ap",
1282 "0", "1", "2", "3", "4", "5", "6", "7",
1283 "ca",
1284 /* AltiVec registers. */
1285 "0", "1", "2", "3", "4", "5", "6", "7",
1286 "8", "9", "10", "11", "12", "13", "14", "15",
1287 "16", "17", "18", "19", "20", "21", "22", "23",
1288 "24", "25", "26", "27", "28", "29", "30", "31",
1289 "vrsave", "vscr",
1290 /* SPE registers. */
1291 "spe_acc", "spefscr",
1292 /* Soft frame pointer. */
1293 "sfp",
1294 /* HTM SPR registers. */
1295 "tfhar", "tfiar", "texasr",
1296 /* SPE High registers. */
1297 "0", "1", "2", "3", "4", "5", "6", "7",
1298 "8", "9", "10", "11", "12", "13", "14", "15",
1299 "16", "17", "18", "19", "20", "21", "22", "23",
1300 "24", "25", "26", "27", "28", "29", "30", "31"
1303 #ifdef TARGET_REGNAMES
1304 static const char alt_reg_names[][8] =
1306 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1307 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1308 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1309 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1310 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1311 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1312 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1313 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1314 "mq", "lr", "ctr", "ap",
1315 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1316 "ca",
1317 /* AltiVec registers. */
1318 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1319 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1320 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1321 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1322 "vrsave", "vscr",
1323 /* SPE registers. */
1324 "spe_acc", "spefscr",
1325 /* Soft frame pointer. */
1326 "sfp",
1327 /* HTM SPR registers. */
1328 "tfhar", "tfiar", "texasr",
1329 /* SPE High registers. */
1330 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1331 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1332 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1333 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1335 #endif
1337 /* Table of valid machine attributes. */
1339 static const struct attribute_spec rs6000_attribute_table[] =
1341 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1342 affects_type_identity } */
1343 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1344 false },
1345 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1346 false },
1347 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1348 false },
1349 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1350 false },
1351 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1352 false },
1353 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1354 SUBTARGET_ATTRIBUTE_TABLE,
1355 #endif
1356 { NULL, 0, 0, false, false, false, NULL, false }
1359 #ifndef TARGET_PROFILE_KERNEL
1360 #define TARGET_PROFILE_KERNEL 0
1361 #endif
1363 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1364 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1366 /* Initialize the GCC target structure. */
1367 #undef TARGET_ATTRIBUTE_TABLE
1368 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1369 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1370 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1371 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1372 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1374 #undef TARGET_ASM_ALIGNED_DI_OP
1375 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1377 /* Default unaligned ops are only provided for ELF. Find the ops needed
1378 for non-ELF systems. */
1379 #ifndef OBJECT_FORMAT_ELF
1380 #if TARGET_XCOFF
1381 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1382 64-bit targets. */
1383 #undef TARGET_ASM_UNALIGNED_HI_OP
1384 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1385 #undef TARGET_ASM_UNALIGNED_SI_OP
1386 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1387 #undef TARGET_ASM_UNALIGNED_DI_OP
1388 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1389 #else
1390 /* For Darwin. */
1391 #undef TARGET_ASM_UNALIGNED_HI_OP
1392 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1393 #undef TARGET_ASM_UNALIGNED_SI_OP
1394 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1395 #undef TARGET_ASM_UNALIGNED_DI_OP
1396 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1397 #undef TARGET_ASM_ALIGNED_DI_OP
1398 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1399 #endif
1400 #endif
1402 /* This hook deals with fixups for relocatable code and DI-mode objects
1403 in 64-bit code. */
1404 #undef TARGET_ASM_INTEGER
1405 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1407 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1408 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1409 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1410 #endif
1412 #undef TARGET_SET_UP_BY_PROLOGUE
1413 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1415 #undef TARGET_HAVE_TLS
1416 #define TARGET_HAVE_TLS HAVE_AS_TLS
1418 #undef TARGET_CANNOT_FORCE_CONST_MEM
1419 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1421 #undef TARGET_DELEGITIMIZE_ADDRESS
1422 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1424 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1425 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1427 #undef TARGET_ASM_FUNCTION_PROLOGUE
1428 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1429 #undef TARGET_ASM_FUNCTION_EPILOGUE
1430 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1432 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1433 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1435 #undef TARGET_LEGITIMIZE_ADDRESS
1436 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1438 #undef TARGET_SCHED_VARIABLE_ISSUE
1439 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1441 #undef TARGET_SCHED_ISSUE_RATE
1442 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1443 #undef TARGET_SCHED_ADJUST_COST
1444 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1445 #undef TARGET_SCHED_ADJUST_PRIORITY
1446 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1447 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1448 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1449 #undef TARGET_SCHED_INIT
1450 #define TARGET_SCHED_INIT rs6000_sched_init
1451 #undef TARGET_SCHED_FINISH
1452 #define TARGET_SCHED_FINISH rs6000_sched_finish
1453 #undef TARGET_SCHED_REORDER
1454 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1455 #undef TARGET_SCHED_REORDER2
1456 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1458 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1459 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1461 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1462 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1464 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1465 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1466 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1467 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1468 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1469 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1470 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1471 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1473 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1474 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1475 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1476 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1477 rs6000_builtin_support_vector_misalignment
1478 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1479 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1480 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1481 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1482 rs6000_builtin_vectorization_cost
1483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1485 rs6000_preferred_simd_mode
1486 #undef TARGET_VECTORIZE_INIT_COST
1487 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1488 #undef TARGET_VECTORIZE_ADD_STMT_COST
1489 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1490 #undef TARGET_VECTORIZE_FINISH_COST
1491 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1492 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1493 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1495 #undef TARGET_INIT_BUILTINS
1496 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1497 #undef TARGET_BUILTIN_DECL
1498 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1500 #undef TARGET_EXPAND_BUILTIN
1501 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1503 #undef TARGET_MANGLE_TYPE
1504 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1506 #undef TARGET_INIT_LIBFUNCS
1507 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1509 #if TARGET_MACHO
1510 #undef TARGET_BINDS_LOCAL_P
1511 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1512 #endif
1514 #undef TARGET_MS_BITFIELD_LAYOUT_P
1515 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1517 #undef TARGET_ASM_OUTPUT_MI_THUNK
1518 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1520 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1521 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1523 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1524 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1526 #undef TARGET_REGISTER_MOVE_COST
1527 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1528 #undef TARGET_MEMORY_MOVE_COST
1529 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1530 #undef TARGET_RTX_COSTS
1531 #define TARGET_RTX_COSTS rs6000_rtx_costs
1532 #undef TARGET_ADDRESS_COST
1533 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1535 #undef TARGET_DWARF_REGISTER_SPAN
1536 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1538 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1539 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1541 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1542 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1544 #undef TARGET_PROMOTE_FUNCTION_MODE
1545 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1547 #undef TARGET_RETURN_IN_MEMORY
1548 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1550 #undef TARGET_RETURN_IN_MSB
1551 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1553 #undef TARGET_SETUP_INCOMING_VARARGS
1554 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1556 /* Always strict argument naming on rs6000. */
1557 #undef TARGET_STRICT_ARGUMENT_NAMING
1558 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1559 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1560 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1561 #undef TARGET_SPLIT_COMPLEX_ARG
1562 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1563 #undef TARGET_MUST_PASS_IN_STACK
1564 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1565 #undef TARGET_PASS_BY_REFERENCE
1566 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1567 #undef TARGET_ARG_PARTIAL_BYTES
1568 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1569 #undef TARGET_FUNCTION_ARG_ADVANCE
1570 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1571 #undef TARGET_FUNCTION_ARG
1572 #define TARGET_FUNCTION_ARG rs6000_function_arg
1573 #undef TARGET_FUNCTION_ARG_BOUNDARY
1574 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1576 #undef TARGET_BUILD_BUILTIN_VA_LIST
1577 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1579 #undef TARGET_EXPAND_BUILTIN_VA_START
1580 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1582 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1583 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1585 #undef TARGET_EH_RETURN_FILTER_MODE
1586 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1588 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1589 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1591 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1592 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1594 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1595 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1597 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1598 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1600 #undef TARGET_MD_ASM_CLOBBERS
1601 #define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
1603 #undef TARGET_OPTION_OVERRIDE
1604 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1606 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1607 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1608 rs6000_builtin_vectorized_function
1610 #if !TARGET_MACHO
1611 #undef TARGET_STACK_PROTECT_FAIL
1612 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1613 #endif
1615 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1616 The PowerPC architecture requires only weak consistency among
1617 processors--that is, memory accesses between processors need not be
1618 sequentially consistent and memory accesses among processors can occur
1619 in any order. The ability to order memory accesses weakly provides
1620 opportunities for more efficient use of the system bus. Unless a
1621 dependency exists, the 604e allows read operations to precede store
1622 operations. */
1623 #undef TARGET_RELAXED_ORDERING
1624 #define TARGET_RELAXED_ORDERING true
1626 #ifdef HAVE_AS_TLS
1627 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1628 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1629 #endif
1631 /* Use a 32-bit anchor range. This leads to sequences like:
1633 addis tmp,anchor,high
1634 add dest,tmp,low
1636 where tmp itself acts as an anchor, and can be shared between
1637 accesses to the same 64k page. */
1638 #undef TARGET_MIN_ANCHOR_OFFSET
1639 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1640 #undef TARGET_MAX_ANCHOR_OFFSET
1641 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1642 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1643 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1644 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1645 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1647 #undef TARGET_BUILTIN_RECIPROCAL
1648 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1650 #undef TARGET_EXPAND_TO_RTL_HOOK
1651 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1653 #undef TARGET_INSTANTIATE_DECLS
1654 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1656 #undef TARGET_SECONDARY_RELOAD
1657 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1659 #undef TARGET_LEGITIMATE_ADDRESS_P
1660 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1662 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1663 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1665 #undef TARGET_LRA_P
1666 #define TARGET_LRA_P rs6000_lra_p
1668 #undef TARGET_CAN_ELIMINATE
1669 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1671 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1672 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1674 #undef TARGET_TRAMPOLINE_INIT
1675 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1677 #undef TARGET_FUNCTION_VALUE
1678 #define TARGET_FUNCTION_VALUE rs6000_function_value
1680 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1681 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1683 #undef TARGET_OPTION_SAVE
1684 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1686 #undef TARGET_OPTION_RESTORE
1687 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1689 #undef TARGET_OPTION_PRINT
1690 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1692 #undef TARGET_CAN_INLINE_P
1693 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1695 #undef TARGET_SET_CURRENT_FUNCTION
1696 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1698 #undef TARGET_LEGITIMATE_CONSTANT_P
1699 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1701 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1702 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1704 #undef TARGET_CAN_USE_DOLOOP_P
1705 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1707 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1708 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1710 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1711 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1712 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1713 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1714 #undef TARGET_UNWIND_WORD_MODE
1715 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1718 /* Processor table. */
1719 struct rs6000_ptt
1721 const char *const name; /* Canonical processor name. */
1722 const enum processor_type processor; /* Processor type enum value. */
1723 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1726 static struct rs6000_ptt const processor_target_table[] =
1728 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1729 #include "rs6000-cpus.def"
1730 #undef RS6000_CPU
1733 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1734 name is invalid. */
1736 static int
1737 rs6000_cpu_name_lookup (const char *name)
1739 size_t i;
1741 if (name != NULL)
1743 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1744 if (! strcmp (name, processor_target_table[i].name))
1745 return (int)i;
1748 return -1;
1752 /* Return number of consecutive hard regs needed starting at reg REGNO
1753 to hold something of mode MODE.
1754 This is ordinarily the length in words of a value of mode MODE
1755 but can be less for certain modes in special long registers.
1757 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1758 scalar instructions. The upper 32 bits are only available to the
1759 SIMD instructions.
1761 POWER and PowerPC GPRs hold 32 bits worth;
1762 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1764 static int
1765 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1767 unsigned HOST_WIDE_INT reg_size;
1769 /* TF/TD modes are special in that they always take 2 registers. */
1770 if (FP_REGNO_P (regno))
1771 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1772 ? UNITS_PER_VSX_WORD
1773 : UNITS_PER_FP_WORD);
1775 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1776 reg_size = UNITS_PER_SPE_WORD;
1778 else if (ALTIVEC_REGNO_P (regno))
1779 reg_size = UNITS_PER_ALTIVEC_WORD;
1781 /* The value returned for SCmode in the E500 double case is 2 for
1782 ABI compatibility; storing an SCmode value in a single register
1783 would require function_arg and rs6000_spe_function_arg to handle
1784 SCmode so as to pass the value correctly in a pair of
1785 registers. */
1786 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1787 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1788 reg_size = UNITS_PER_FP_WORD;
1790 else
1791 reg_size = UNITS_PER_WORD;
1793 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1796 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1797 MODE. */
1798 static int
1799 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1801 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1803 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1804 register combinations, and use PTImode where we need to deal with quad
1805 word memory operations. Don't allow quad words in the argument or frame
1806 pointer registers, just registers 0..31. */
1807 if (mode == PTImode)
1808 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1809 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1810 && ((regno & 1) == 0));
1812 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1813 implementations. Don't allow an item to be split between a FP register
1814 and an Altivec register. Allow TImode in all VSX registers if the user
1815 asked for it. */
1816 if (TARGET_VSX && VSX_REGNO_P (regno)
1817 && (VECTOR_MEM_VSX_P (mode)
1818 || reg_addr[mode].scalar_in_vmx_p
1819 || (TARGET_VSX_TIMODE && mode == TImode)
1820 || (TARGET_VADDUQM && mode == V1TImode)))
1822 if (FP_REGNO_P (regno))
1823 return FP_REGNO_P (last_regno);
1825 if (ALTIVEC_REGNO_P (regno))
1827 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1828 return 0;
1830 return ALTIVEC_REGNO_P (last_regno);
1834 /* The GPRs can hold any mode, but values bigger than one register
1835 cannot go past R31. */
1836 if (INT_REGNO_P (regno))
1837 return INT_REGNO_P (last_regno);
1839 /* The float registers (except for VSX vector modes) can only hold floating
1840 modes and DImode. */
1841 if (FP_REGNO_P (regno))
1843 if (SCALAR_FLOAT_MODE_P (mode)
1844 && (mode != TDmode || (regno % 2) == 0)
1845 && FP_REGNO_P (last_regno))
1846 return 1;
1848 if (GET_MODE_CLASS (mode) == MODE_INT
1849 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1850 return 1;
1852 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1853 && PAIRED_VECTOR_MODE (mode))
1854 return 1;
1856 return 0;
1859 /* The CR register can only hold CC modes. */
1860 if (CR_REGNO_P (regno))
1861 return GET_MODE_CLASS (mode) == MODE_CC;
1863 if (CA_REGNO_P (regno))
1864 return mode == Pmode || mode == SImode;
1866 /* AltiVec only in AldyVec registers. */
1867 if (ALTIVEC_REGNO_P (regno))
1868 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1869 || mode == V1TImode);
1871 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1872 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1873 return 1;
1875 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1876 and it must be able to fit within the register set. */
1878 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1881 /* Print interesting facts about registers. */
1882 static void
1883 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1885 int r, m;
1887 for (r = first_regno; r <= last_regno; ++r)
1889 const char *comma = "";
1890 int len;
1892 if (first_regno == last_regno)
1893 fprintf (stderr, "%s:\t", reg_name);
1894 else
1895 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1897 len = 8;
1898 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1899 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1901 if (len > 70)
1903 fprintf (stderr, ",\n\t");
1904 len = 8;
1905 comma = "";
1908 if (rs6000_hard_regno_nregs[m][r] > 1)
1909 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1910 rs6000_hard_regno_nregs[m][r]);
1911 else
1912 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1914 comma = ", ";
1917 if (call_used_regs[r])
1919 if (len > 70)
1921 fprintf (stderr, ",\n\t");
1922 len = 8;
1923 comma = "";
1926 len += fprintf (stderr, "%s%s", comma, "call-used");
1927 comma = ", ";
1930 if (fixed_regs[r])
1932 if (len > 70)
1934 fprintf (stderr, ",\n\t");
1935 len = 8;
1936 comma = "";
1939 len += fprintf (stderr, "%s%s", comma, "fixed");
1940 comma = ", ";
1943 if (len > 70)
1945 fprintf (stderr, ",\n\t");
1946 comma = "";
1949 len += fprintf (stderr, "%sreg-class = %s", comma,
1950 reg_class_names[(int)rs6000_regno_regclass[r]]);
1951 comma = ", ";
1953 if (len > 70)
1955 fprintf (stderr, ",\n\t");
1956 comma = "";
1959 fprintf (stderr, "%sregno = %d\n", comma, r);
1963 static const char *
1964 rs6000_debug_vector_unit (enum rs6000_vector v)
1966 const char *ret;
1968 switch (v)
1970 case VECTOR_NONE: ret = "none"; break;
1971 case VECTOR_ALTIVEC: ret = "altivec"; break;
1972 case VECTOR_VSX: ret = "vsx"; break;
1973 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1974 case VECTOR_PAIRED: ret = "paired"; break;
1975 case VECTOR_SPE: ret = "spe"; break;
1976 case VECTOR_OTHER: ret = "other"; break;
1977 default: ret = "unknown"; break;
1980 return ret;
1983 /* Inner function printing just the address mask for a particular reload
1984 register class. */
1985 DEBUG_FUNCTION char *
1986 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1988 static char ret[8];
1989 char *p = ret;
1991 if ((mask & RELOAD_REG_VALID) != 0)
1992 *p++ = 'v';
1993 else if (keep_spaces)
1994 *p++ = ' ';
1996 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1997 *p++ = 'm';
1998 else if (keep_spaces)
1999 *p++ = ' ';
2001 if ((mask & RELOAD_REG_INDEXED) != 0)
2002 *p++ = 'i';
2003 else if (keep_spaces)
2004 *p++ = ' ';
2006 if ((mask & RELOAD_REG_OFFSET) != 0)
2007 *p++ = 'o';
2008 else if (keep_spaces)
2009 *p++ = ' ';
2011 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2012 *p++ = '+';
2013 else if (keep_spaces)
2014 *p++ = ' ';
2016 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2017 *p++ = '+';
2018 else if (keep_spaces)
2019 *p++ = ' ';
2021 if ((mask & RELOAD_REG_AND_M16) != 0)
2022 *p++ = '&';
2023 else if (keep_spaces)
2024 *p++ = ' ';
2026 *p = '\0';
2028 return ret;
2031 /* Print the address masks in a human readble fashion. */
2032 DEBUG_FUNCTION void
2033 rs6000_debug_print_mode (ssize_t m)
2035 ssize_t rc;
2037 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2038 for (rc = 0; rc < N_RELOAD_REG; rc++)
2039 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2040 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2042 if (rs6000_vector_unit[m] != VECTOR_NONE
2043 || rs6000_vector_mem[m] != VECTOR_NONE
2044 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2045 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2046 || reg_addr[m].scalar_in_vmx_p)
2048 fprintf (stderr,
2049 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2050 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2051 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2052 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2053 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2054 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2057 fputs ("\n", stderr);
2060 #define DEBUG_FMT_ID "%-32s= "
2061 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2062 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2063 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2065 /* Print various interesting information with -mdebug=reg. */
2066 static void
2067 rs6000_debug_reg_global (void)
2069 static const char *const tf[2] = { "false", "true" };
2070 const char *nl = (const char *)0;
2071 int m;
2072 size_t m1, m2, v;
2073 char costly_num[20];
2074 char nop_num[20];
2075 char flags_buffer[40];
2076 const char *costly_str;
2077 const char *nop_str;
2078 const char *trace_str;
2079 const char *abi_str;
2080 const char *cmodel_str;
2081 struct cl_target_option cl_opts;
2083 /* Modes we want tieable information on. */
2084 static const machine_mode print_tieable_modes[] = {
2085 QImode,
2086 HImode,
2087 SImode,
2088 DImode,
2089 TImode,
2090 PTImode,
2091 SFmode,
2092 DFmode,
2093 TFmode,
2094 SDmode,
2095 DDmode,
2096 TDmode,
2097 V8QImode,
2098 V4HImode,
2099 V2SImode,
2100 V16QImode,
2101 V8HImode,
2102 V4SImode,
2103 V2DImode,
2104 V1TImode,
2105 V32QImode,
2106 V16HImode,
2107 V8SImode,
2108 V4DImode,
2109 V2TImode,
2110 V2SFmode,
2111 V4SFmode,
2112 V2DFmode,
2113 V8SFmode,
2114 V4DFmode,
2115 CCmode,
2116 CCUNSmode,
2117 CCEQmode,
2120 /* Virtual regs we are interested in. */
2121 const static struct {
2122 int regno; /* register number. */
2123 const char *name; /* register name. */
2124 } virtual_regs[] = {
2125 { STACK_POINTER_REGNUM, "stack pointer:" },
2126 { TOC_REGNUM, "toc: " },
2127 { STATIC_CHAIN_REGNUM, "static chain: " },
2128 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2129 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2130 { ARG_POINTER_REGNUM, "arg pointer: " },
2131 { FRAME_POINTER_REGNUM, "frame pointer:" },
2132 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2133 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2134 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2135 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2136 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2137 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2138 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2139 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2140 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2143 fputs ("\nHard register information:\n", stderr);
2144 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2145 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2146 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2147 LAST_ALTIVEC_REGNO,
2148 "vs");
2149 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2150 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2151 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2152 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2153 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2154 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2155 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2156 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2158 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2159 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2160 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2162 fprintf (stderr,
2163 "\n"
2164 "d reg_class = %s\n"
2165 "f reg_class = %s\n"
2166 "v reg_class = %s\n"
2167 "wa reg_class = %s\n"
2168 "wd reg_class = %s\n"
2169 "wf reg_class = %s\n"
2170 "wg reg_class = %s\n"
2171 "wh reg_class = %s\n"
2172 "wi reg_class = %s\n"
2173 "wj reg_class = %s\n"
2174 "wk reg_class = %s\n"
2175 "wl reg_class = %s\n"
2176 "wm reg_class = %s\n"
2177 "wr reg_class = %s\n"
2178 "ws reg_class = %s\n"
2179 "wt reg_class = %s\n"
2180 "wu reg_class = %s\n"
2181 "wv reg_class = %s\n"
2182 "ww reg_class = %s\n"
2183 "wx reg_class = %s\n"
2184 "wy reg_class = %s\n"
2185 "wz reg_class = %s\n"
2186 "\n",
2187 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2188 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2189 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2190 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2191 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2192 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2193 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2194 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2195 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2196 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2197 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2198 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2199 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2200 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2201 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2202 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2203 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2204 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2205 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2206 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2207 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2208 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2210 nl = "\n";
2211 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2212 rs6000_debug_print_mode (m);
2214 fputs ("\n", stderr);
2216 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2218 machine_mode mode1 = print_tieable_modes[m1];
2219 bool first_time = true;
2221 nl = (const char *)0;
2222 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2224 machine_mode mode2 = print_tieable_modes[m2];
2225 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2227 if (first_time)
2229 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2230 nl = "\n";
2231 first_time = false;
2234 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2238 if (!first_time)
2239 fputs ("\n", stderr);
2242 if (nl)
2243 fputs (nl, stderr);
2245 if (rs6000_recip_control)
2247 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2249 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2250 if (rs6000_recip_bits[m])
2252 fprintf (stderr,
2253 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2254 GET_MODE_NAME (m),
2255 (RS6000_RECIP_AUTO_RE_P (m)
2256 ? "auto"
2257 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2258 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2259 ? "auto"
2260 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2263 fputs ("\n", stderr);
2266 if (rs6000_cpu_index >= 0)
2268 const char *name = processor_target_table[rs6000_cpu_index].name;
2269 HOST_WIDE_INT flags
2270 = processor_target_table[rs6000_cpu_index].target_enable;
2272 sprintf (flags_buffer, "-mcpu=%s flags", name);
2273 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2275 else
2276 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2278 if (rs6000_tune_index >= 0)
2280 const char *name = processor_target_table[rs6000_tune_index].name;
2281 HOST_WIDE_INT flags
2282 = processor_target_table[rs6000_tune_index].target_enable;
2284 sprintf (flags_buffer, "-mtune=%s flags", name);
2285 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2287 else
2288 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2290 cl_target_option_save (&cl_opts, &global_options);
2291 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2292 rs6000_isa_flags);
2294 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2295 rs6000_isa_flags_explicit);
2297 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2298 rs6000_builtin_mask);
2300 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2302 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2303 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2305 switch (rs6000_sched_costly_dep)
2307 case max_dep_latency:
2308 costly_str = "max_dep_latency";
2309 break;
2311 case no_dep_costly:
2312 costly_str = "no_dep_costly";
2313 break;
2315 case all_deps_costly:
2316 costly_str = "all_deps_costly";
2317 break;
2319 case true_store_to_load_dep_costly:
2320 costly_str = "true_store_to_load_dep_costly";
2321 break;
2323 case store_to_load_dep_costly:
2324 costly_str = "store_to_load_dep_costly";
2325 break;
2327 default:
2328 costly_str = costly_num;
2329 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2330 break;
2333 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2335 switch (rs6000_sched_insert_nops)
2337 case sched_finish_regroup_exact:
2338 nop_str = "sched_finish_regroup_exact";
2339 break;
2341 case sched_finish_pad_groups:
2342 nop_str = "sched_finish_pad_groups";
2343 break;
2345 case sched_finish_none:
2346 nop_str = "sched_finish_none";
2347 break;
2349 default:
2350 nop_str = nop_num;
2351 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2352 break;
2355 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2357 switch (rs6000_sdata)
2359 default:
2360 case SDATA_NONE:
2361 break;
2363 case SDATA_DATA:
2364 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2365 break;
2367 case SDATA_SYSV:
2368 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2369 break;
2371 case SDATA_EABI:
2372 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2373 break;
2377 switch (rs6000_traceback)
2379 case traceback_default: trace_str = "default"; break;
2380 case traceback_none: trace_str = "none"; break;
2381 case traceback_part: trace_str = "part"; break;
2382 case traceback_full: trace_str = "full"; break;
2383 default: trace_str = "unknown"; break;
2386 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2388 switch (rs6000_current_cmodel)
2390 case CMODEL_SMALL: cmodel_str = "small"; break;
2391 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2392 case CMODEL_LARGE: cmodel_str = "large"; break;
2393 default: cmodel_str = "unknown"; break;
2396 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2398 switch (rs6000_current_abi)
2400 case ABI_NONE: abi_str = "none"; break;
2401 case ABI_AIX: abi_str = "aix"; break;
2402 case ABI_ELFv2: abi_str = "ELFv2"; break;
2403 case ABI_V4: abi_str = "V4"; break;
2404 case ABI_DARWIN: abi_str = "darwin"; break;
2405 default: abi_str = "unknown"; break;
2408 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2410 if (rs6000_altivec_abi)
2411 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2413 if (rs6000_spe_abi)
2414 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2416 if (rs6000_darwin64_abi)
2417 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2419 if (rs6000_float_gprs)
2420 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2422 fprintf (stderr, DEBUG_FMT_S, "fprs",
2423 (TARGET_FPRS ? "true" : "false"));
2425 fprintf (stderr, DEBUG_FMT_S, "single_float",
2426 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2428 fprintf (stderr, DEBUG_FMT_S, "double_float",
2429 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2431 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2432 (TARGET_SOFT_FLOAT ? "true" : "false"));
2434 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2435 (TARGET_E500_SINGLE ? "true" : "false"));
2437 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2438 (TARGET_E500_DOUBLE ? "true" : "false"));
2440 if (TARGET_LINK_STACK)
2441 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2443 if (targetm.lra_p ())
2444 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2446 if (TARGET_P8_FUSION)
2447 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2448 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2450 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2451 TARGET_SECURE_PLT ? "secure" : "bss");
2452 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2453 aix_struct_return ? "aix" : "sysv");
2454 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2455 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2456 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2457 tf[!!rs6000_align_branch_targets]);
2458 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2459 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2460 rs6000_long_double_type_size);
2461 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2462 (int)rs6000_sched_restricted_insns_priority);
2463 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2464 (int)END_BUILTINS);
2465 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2466 (int)RS6000_BUILTIN_COUNT);
2468 if (TARGET_VSX)
2469 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2470 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2474 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2475 legitimate address support to figure out the appropriate addressing to
2476 use. */
2478 static void
2479 rs6000_setup_reg_addr_masks (void)
2481 ssize_t rc, reg, m, nregs;
2482 addr_mask_type any_addr_mask, addr_mask;
2484 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2486 machine_mode m2 = (machine_mode)m;
2488 /* SDmode is special in that we want to access it only via REG+REG
2489 addressing on power7 and above, since we want to use the LFIWZX and
2490 STFIWZX instructions to load it. */
2491 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2493 any_addr_mask = 0;
2494 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2496 addr_mask = 0;
2497 reg = reload_reg_map[rc].reg;
2499 /* Can mode values go in the GPR/FPR/Altivec registers? */
2500 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2502 nregs = rs6000_hard_regno_nregs[m][reg];
2503 addr_mask |= RELOAD_REG_VALID;
2505 /* Indicate if the mode takes more than 1 physical register. If
2506 it takes a single register, indicate it can do REG+REG
2507 addressing. */
2508 if (nregs > 1 || m == BLKmode)
2509 addr_mask |= RELOAD_REG_MULTIPLE;
2510 else
2511 addr_mask |= RELOAD_REG_INDEXED;
2513 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2514 addressing. Restrict addressing on SPE for 64-bit types
2515 because of the SUBREG hackery used to address 64-bit floats in
2516 '32-bit' GPRs. */
2518 if (TARGET_UPDATE
2519 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2520 && GET_MODE_SIZE (m2) <= 8
2521 && !VECTOR_MODE_P (m2)
2522 && !COMPLEX_MODE_P (m2)
2523 && !indexed_only_p
2524 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2526 addr_mask |= RELOAD_REG_PRE_INCDEC;
2528 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2529 we don't allow PRE_MODIFY for some multi-register
2530 operations. */
2531 switch (m)
2533 default:
2534 addr_mask |= RELOAD_REG_PRE_MODIFY;
2535 break;
2537 case DImode:
2538 if (TARGET_POWERPC64)
2539 addr_mask |= RELOAD_REG_PRE_MODIFY;
2540 break;
2542 case DFmode:
2543 case DDmode:
2544 if (TARGET_DF_INSN)
2545 addr_mask |= RELOAD_REG_PRE_MODIFY;
2546 break;
2551 /* GPR and FPR registers can do REG+OFFSET addressing, except
2552 possibly for SDmode. */
2553 if ((addr_mask != 0) && !indexed_only_p
2554 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2555 addr_mask |= RELOAD_REG_OFFSET;
2557 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2558 addressing on 128-bit types. */
2559 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2560 && (addr_mask & RELOAD_REG_VALID) != 0)
2561 addr_mask |= RELOAD_REG_AND_M16;
2563 reg_addr[m].addr_mask[rc] = addr_mask;
2564 any_addr_mask |= addr_mask;
2567 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2572 /* Initialize the various global tables that are based on register size. */
2573 static void
2574 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2576 ssize_t r, m, c;
2577 int align64;
2578 int align32;
2580 /* Precalculate REGNO_REG_CLASS. */
2581 rs6000_regno_regclass[0] = GENERAL_REGS;
2582 for (r = 1; r < 32; ++r)
2583 rs6000_regno_regclass[r] = BASE_REGS;
2585 for (r = 32; r < 64; ++r)
2586 rs6000_regno_regclass[r] = FLOAT_REGS;
2588 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2589 rs6000_regno_regclass[r] = NO_REGS;
2591 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2592 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2594 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2595 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2596 rs6000_regno_regclass[r] = CR_REGS;
2598 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2599 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2600 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2601 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2602 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2603 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2604 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2605 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2606 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2607 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2608 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2609 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2611 /* Precalculate register class to simpler reload register class. We don't
2612 need all of the register classes that are combinations of different
2613 classes, just the simple ones that have constraint letters. */
2614 for (c = 0; c < N_REG_CLASSES; c++)
2615 reg_class_to_reg_type[c] = NO_REG_TYPE;
2617 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2618 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2619 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2620 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2621 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2622 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2623 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2624 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2625 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2626 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2627 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2628 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2630 if (TARGET_VSX)
2632 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2633 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2635 else
2637 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2638 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2641 /* Precalculate the valid memory formats as well as the vector information,
2642 this must be set up before the rs6000_hard_regno_nregs_internal calls
2643 below. */
2644 gcc_assert ((int)VECTOR_NONE == 0);
2645 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2646 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2648 gcc_assert ((int)CODE_FOR_nothing == 0);
2649 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2651 gcc_assert ((int)NO_REGS == 0);
2652 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2654 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2655 believes it can use native alignment or still uses 128-bit alignment. */
2656 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2658 align64 = 64;
2659 align32 = 32;
2661 else
2663 align64 = 128;
2664 align32 = 128;
2667 /* V2DF mode, VSX only. */
2668 if (TARGET_VSX)
2670 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2671 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2672 rs6000_vector_align[V2DFmode] = align64;
2675 /* V4SF mode, either VSX or Altivec. */
2676 if (TARGET_VSX)
2678 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2679 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2680 rs6000_vector_align[V4SFmode] = align32;
2682 else if (TARGET_ALTIVEC)
2684 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2685 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2686 rs6000_vector_align[V4SFmode] = align32;
2689 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2690 and stores. */
2691 if (TARGET_ALTIVEC)
2693 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2694 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2695 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2696 rs6000_vector_align[V4SImode] = align32;
2697 rs6000_vector_align[V8HImode] = align32;
2698 rs6000_vector_align[V16QImode] = align32;
2700 if (TARGET_VSX)
2702 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2703 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2704 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2706 else
2708 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2709 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2710 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2714 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2715 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2716 if (TARGET_VSX)
2718 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2719 rs6000_vector_unit[V2DImode]
2720 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2721 rs6000_vector_align[V2DImode] = align64;
2723 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2724 rs6000_vector_unit[V1TImode]
2725 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2726 rs6000_vector_align[V1TImode] = 128;
2729 /* DFmode, see if we want to use the VSX unit. Memory is handled
2730 differently, so don't set rs6000_vector_mem. */
2731 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2733 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2734 rs6000_vector_align[DFmode] = 64;
2737 /* SFmode, see if we want to use the VSX unit. */
2738 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2740 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2741 rs6000_vector_align[SFmode] = 32;
2744 /* Allow TImode in VSX register and set the VSX memory macros. */
2745 if (TARGET_VSX && TARGET_VSX_TIMODE)
2747 rs6000_vector_mem[TImode] = VECTOR_VSX;
2748 rs6000_vector_align[TImode] = align64;
2751 /* TODO add SPE and paired floating point vector support. */
2753 /* Register class constraints for the constraints that depend on compile
2754 switches. When the VSX code was added, different constraints were added
2755 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2756 of the VSX registers are used. The register classes for scalar floating
2757 point types is set, based on whether we allow that type into the upper
2758 (Altivec) registers. GCC has register classes to target the Altivec
2759 registers for load/store operations, to select using a VSX memory
2760 operation instead of the traditional floating point operation. The
2761 constraints are:
2763 d - Register class to use with traditional DFmode instructions.
2764 f - Register class to use with traditional SFmode instructions.
2765 v - Altivec register.
2766 wa - Any VSX register.
2767 wc - Reserved to represent individual CR bits (used in LLVM).
2768 wd - Preferred register class for V2DFmode.
2769 wf - Preferred register class for V4SFmode.
2770 wg - Float register for power6x move insns.
2771 wh - FP register for direct move instructions.
2772 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2773 wj - FP or VSX register to hold 64-bit integers for direct moves.
2774 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2775 wl - Float register if we can do 32-bit signed int loads.
2776 wm - VSX register for ISA 2.07 direct move operations.
2777 wn - always NO_REGS.
2778 wr - GPR if 64-bit mode is permitted.
2779 ws - Register class to do ISA 2.06 DF operations.
2780 wt - VSX register for TImode in VSX registers.
2781 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2782 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2783 ww - Register class to do SF conversions in with VSX operations.
2784 wx - Float register if we can do 32-bit int stores.
2785 wy - Register class to do ISA 2.07 SF operations.
2786 wz - Float register if we can do 32-bit unsigned int loads. */
2788 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2789 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2791 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2792 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2794 if (TARGET_VSX)
2796 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2797 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2798 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2799 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2801 if (TARGET_VSX_TIMODE)
2802 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2804 if (TARGET_UPPER_REGS_DF) /* DFmode */
2806 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2807 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2809 else
2810 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2813 /* Add conditional constraints based on various options, to allow us to
2814 collapse multiple insn patterns. */
2815 if (TARGET_ALTIVEC)
2816 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2818 if (TARGET_MFPGPR) /* DFmode */
2819 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2821 if (TARGET_LFIWAX)
2822 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2824 if (TARGET_DIRECT_MOVE)
2826 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2827 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2828 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2829 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2830 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2831 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2834 if (TARGET_POWERPC64)
2835 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2837 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2839 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2840 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2841 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2843 else if (TARGET_P8_VECTOR)
2845 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2846 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2848 else if (TARGET_VSX)
2849 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2851 if (TARGET_STFIWX)
2852 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2854 if (TARGET_LFIWZX)
2855 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2857 /* Set up the reload helper and direct move functions. */
2858 if (TARGET_VSX || TARGET_ALTIVEC)
2860 if (TARGET_64BIT)
2862 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2863 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2864 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2865 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2866 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2867 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2868 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2869 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2870 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2871 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2872 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2873 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2874 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2875 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2876 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2877 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2878 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2879 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2880 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2881 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2883 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2884 available. */
2885 if (TARGET_NO_SDMODE_STACK)
2887 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2888 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2891 if (TARGET_VSX_TIMODE)
2893 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2894 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2897 if (TARGET_DIRECT_MOVE)
2899 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2900 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2901 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2902 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2903 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2904 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2905 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2906 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2907 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2909 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2910 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2911 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2912 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2913 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2914 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2915 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2916 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2917 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2920 else
2922 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2923 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2924 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2925 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2926 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2927 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2928 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2929 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2930 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2931 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2932 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2933 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2934 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2935 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2936 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2937 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2938 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2939 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2940 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2941 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2943 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2944 available. */
2945 if (TARGET_NO_SDMODE_STACK)
2947 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2948 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2951 if (TARGET_VSX_TIMODE)
2953 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2954 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2957 if (TARGET_DIRECT_MOVE)
2959 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2960 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2961 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2965 if (TARGET_UPPER_REGS_DF)
2966 reg_addr[DFmode].scalar_in_vmx_p = true;
2968 if (TARGET_UPPER_REGS_SF)
2969 reg_addr[SFmode].scalar_in_vmx_p = true;
2972 /* Precalculate HARD_REGNO_NREGS. */
2973 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2974 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2975 rs6000_hard_regno_nregs[m][r]
2976 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2978 /* Precalculate HARD_REGNO_MODE_OK. */
2979 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2980 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2981 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2982 rs6000_hard_regno_mode_ok_p[m][r] = true;
2984 /* Precalculate CLASS_MAX_NREGS sizes. */
2985 for (c = 0; c < LIM_REG_CLASSES; ++c)
2987 int reg_size;
2989 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2990 reg_size = UNITS_PER_VSX_WORD;
2992 else if (c == ALTIVEC_REGS)
2993 reg_size = UNITS_PER_ALTIVEC_WORD;
2995 else if (c == FLOAT_REGS)
2996 reg_size = UNITS_PER_FP_WORD;
2998 else
2999 reg_size = UNITS_PER_WORD;
3001 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3003 machine_mode m2 = (machine_mode)m;
3004 int reg_size2 = reg_size;
3006 /* TFmode/TDmode always takes 2 registers, even in VSX. */
3007 if (TARGET_VSX && VSX_REG_CLASS_P (c)
3008 && (m == TDmode || m == TFmode))
3009 reg_size2 = UNITS_PER_FP_WORD;
3011 rs6000_class_max_nregs[m][c]
3012 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3016 if (TARGET_E500_DOUBLE)
3017 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3019 /* Calculate which modes to automatically generate code to use a the
3020 reciprocal divide and square root instructions. In the future, possibly
3021 automatically generate the instructions even if the user did not specify
3022 -mrecip. The older machines double precision reciprocal sqrt estimate is
3023 not accurate enough. */
3024 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3025 if (TARGET_FRES)
3026 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3027 if (TARGET_FRE)
3028 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3029 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3030 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3031 if (VECTOR_UNIT_VSX_P (V2DFmode))
3032 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3034 if (TARGET_FRSQRTES)
3035 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3036 if (TARGET_FRSQRTE)
3037 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3038 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3039 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3040 if (VECTOR_UNIT_VSX_P (V2DFmode))
3041 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3043 if (rs6000_recip_control)
3045 if (!flag_finite_math_only)
3046 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3047 if (flag_trapping_math)
3048 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3049 if (!flag_reciprocal_math)
3050 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3051 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3053 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3054 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3055 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3057 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3058 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3059 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3061 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3062 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3063 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3065 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3066 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3067 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3069 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3070 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3071 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3073 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3074 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3075 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3077 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3078 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3079 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3081 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3082 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3083 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3087 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3088 legitimate address support to figure out the appropriate addressing to
3089 use. */
3090 rs6000_setup_reg_addr_masks ();
3092 if (global_init_p || TARGET_DEBUG_TARGET)
3094 if (TARGET_DEBUG_REG)
3095 rs6000_debug_reg_global ();
3097 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3098 fprintf (stderr,
3099 "SImode variable mult cost = %d\n"
3100 "SImode constant mult cost = %d\n"
3101 "SImode short constant mult cost = %d\n"
3102 "DImode multipliciation cost = %d\n"
3103 "SImode division cost = %d\n"
3104 "DImode division cost = %d\n"
3105 "Simple fp operation cost = %d\n"
3106 "DFmode multiplication cost = %d\n"
3107 "SFmode division cost = %d\n"
3108 "DFmode division cost = %d\n"
3109 "cache line size = %d\n"
3110 "l1 cache size = %d\n"
3111 "l2 cache size = %d\n"
3112 "simultaneous prefetches = %d\n"
3113 "\n",
3114 rs6000_cost->mulsi,
3115 rs6000_cost->mulsi_const,
3116 rs6000_cost->mulsi_const9,
3117 rs6000_cost->muldi,
3118 rs6000_cost->divsi,
3119 rs6000_cost->divdi,
3120 rs6000_cost->fp,
3121 rs6000_cost->dmul,
3122 rs6000_cost->sdiv,
3123 rs6000_cost->ddiv,
3124 rs6000_cost->cache_line_size,
3125 rs6000_cost->l1_cache_size,
3126 rs6000_cost->l2_cache_size,
3127 rs6000_cost->simultaneous_prefetches);
3131 #if TARGET_MACHO
3132 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3134 static void
3135 darwin_rs6000_override_options (void)
3137 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3138 off. */
3139 rs6000_altivec_abi = 1;
3140 TARGET_ALTIVEC_VRSAVE = 1;
3141 rs6000_current_abi = ABI_DARWIN;
3143 if (DEFAULT_ABI == ABI_DARWIN
3144 && TARGET_64BIT)
3145 darwin_one_byte_bool = 1;
3147 if (TARGET_64BIT && ! TARGET_POWERPC64)
3149 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3150 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3152 if (flag_mkernel)
3154 rs6000_default_long_calls = 1;
3155 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3158 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3159 Altivec. */
3160 if (!flag_mkernel && !flag_apple_kext
3161 && TARGET_64BIT
3162 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3163 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3165 /* Unless the user (not the configurer) has explicitly overridden
3166 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3167 G4 unless targeting the kernel. */
3168 if (!flag_mkernel
3169 && !flag_apple_kext
3170 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3171 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3172 && ! global_options_set.x_rs6000_cpu_index)
3174 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3177 #endif
3179 /* If not otherwise specified by a target, make 'long double' equivalent to
3180 'double'. */
3182 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3183 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3184 #endif
3186 /* Return the builtin mask of the various options used that could affect which
3187 builtins were used. In the past we used target_flags, but we've run out of
3188 bits, and some options like SPE and PAIRED are no longer in
3189 target_flags. */
3191 HOST_WIDE_INT
3192 rs6000_builtin_mask_calculate (void)
3194 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3195 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3196 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3197 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3198 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3199 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3200 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3201 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3202 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3203 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3204 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3205 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3206 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3207 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3208 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3209 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3212 /* Implement TARGET_MD_ASM_CLOBBERS. All asm statements are considered
3213 to clobber the XER[CA] bit because clobbering that bit without telling
3214 the compiler worked just fine with versions of GCC before GCC 5, and
3215 breaking a lot of older code in ways that are hard to track down is
3216 not such a great idea. */
3218 static tree
3219 rs6000_md_asm_clobbers (tree, tree, tree clobbers)
3221 tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
3222 return tree_cons (NULL_TREE, s, clobbers);
3225 /* Override command line options. Mostly we process the processor type and
3226 sometimes adjust other TARGET_ options. */
3228 static bool
3229 rs6000_option_override_internal (bool global_init_p)
3231 bool ret = true;
3232 bool have_cpu = false;
3234 /* The default cpu requested at configure time, if any. */
3235 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3237 HOST_WIDE_INT set_masks;
3238 int cpu_index;
3239 int tune_index;
3240 struct cl_target_option *main_target_opt
3241 = ((global_init_p || target_option_default_node == NULL)
3242 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3244 /* Print defaults. */
3245 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3246 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3248 /* Remember the explicit arguments. */
3249 if (global_init_p)
3250 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3252 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3253 library functions, so warn about it. The flag may be useful for
3254 performance studies from time to time though, so don't disable it
3255 entirely. */
3256 if (global_options_set.x_rs6000_alignment_flags
3257 && rs6000_alignment_flags == MASK_ALIGN_POWER
3258 && DEFAULT_ABI == ABI_DARWIN
3259 && TARGET_64BIT)
3260 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3261 " it is incompatible with the installed C and C++ libraries");
3263 /* Numerous experiment shows that IRA based loop pressure
3264 calculation works better for RTL loop invariant motion on targets
3265 with enough (>= 32) registers. It is an expensive optimization.
3266 So it is on only for peak performance. */
3267 if (optimize >= 3 && global_init_p
3268 && !global_options_set.x_flag_ira_loop_pressure)
3269 flag_ira_loop_pressure = 1;
3271 /* Set the pointer size. */
3272 if (TARGET_64BIT)
3274 rs6000_pmode = (int)DImode;
3275 rs6000_pointer_size = 64;
3277 else
3279 rs6000_pmode = (int)SImode;
3280 rs6000_pointer_size = 32;
3283 /* Some OSs don't support saving the high part of 64-bit registers on context
3284 switch. Other OSs don't support saving Altivec registers. On those OSs,
3285 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3286 if the user wants either, the user must explicitly specify them and we
3287 won't interfere with the user's specification. */
3289 set_masks = POWERPC_MASKS;
3290 #ifdef OS_MISSING_POWERPC64
3291 if (OS_MISSING_POWERPC64)
3292 set_masks &= ~OPTION_MASK_POWERPC64;
3293 #endif
3294 #ifdef OS_MISSING_ALTIVEC
3295 if (OS_MISSING_ALTIVEC)
3296 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3297 #endif
3299 /* Don't override by the processor default if given explicitly. */
3300 set_masks &= ~rs6000_isa_flags_explicit;
3302 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3303 the cpu in a target attribute or pragma, but did not specify a tuning
3304 option, use the cpu for the tuning option rather than the option specified
3305 with -mtune on the command line. Process a '--with-cpu' configuration
3306 request as an implicit --cpu. */
3307 if (rs6000_cpu_index >= 0)
3309 cpu_index = rs6000_cpu_index;
3310 have_cpu = true;
3312 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3314 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3315 have_cpu = true;
3317 else if (implicit_cpu)
3319 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3320 have_cpu = true;
3322 else
3324 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3325 const char *default_cpu = ((!TARGET_POWERPC64)
3326 ? "powerpc"
3327 : ((BYTES_BIG_ENDIAN)
3328 ? "powerpc64"
3329 : "powerpc64le"));
3331 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3332 have_cpu = false;
3335 gcc_assert (cpu_index >= 0);
3337 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3338 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3339 with those from the cpu, except for options that were explicitly set. If
3340 we don't have a cpu, do not override the target bits set in
3341 TARGET_DEFAULT. */
3342 if (have_cpu)
3344 rs6000_isa_flags &= ~set_masks;
3345 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3346 & set_masks);
3348 else
3350 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3351 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3352 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3353 to using rs6000_isa_flags, we need to do the initialization here.
3355 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3356 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3357 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3358 : processor_target_table[cpu_index].target_enable);
3359 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3362 if (rs6000_tune_index >= 0)
3363 tune_index = rs6000_tune_index;
3364 else if (have_cpu)
3365 rs6000_tune_index = tune_index = cpu_index;
3366 else
3368 size_t i;
3369 enum processor_type tune_proc
3370 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3372 tune_index = -1;
3373 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3374 if (processor_target_table[i].processor == tune_proc)
3376 rs6000_tune_index = tune_index = i;
3377 break;
3381 gcc_assert (tune_index >= 0);
3382 rs6000_cpu = processor_target_table[tune_index].processor;
3384 /* Pick defaults for SPE related control flags. Do this early to make sure
3385 that the TARGET_ macros are representative ASAP. */
3387 int spe_capable_cpu =
3388 (rs6000_cpu == PROCESSOR_PPC8540
3389 || rs6000_cpu == PROCESSOR_PPC8548);
3391 if (!global_options_set.x_rs6000_spe_abi)
3392 rs6000_spe_abi = spe_capable_cpu;
3394 if (!global_options_set.x_rs6000_spe)
3395 rs6000_spe = spe_capable_cpu;
3397 if (!global_options_set.x_rs6000_float_gprs)
3398 rs6000_float_gprs =
3399 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3400 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3401 : 0);
3404 if (global_options_set.x_rs6000_spe_abi
3405 && rs6000_spe_abi
3406 && !TARGET_SPE_ABI)
3407 error ("not configured for SPE ABI");
3409 if (global_options_set.x_rs6000_spe
3410 && rs6000_spe
3411 && !TARGET_SPE)
3412 error ("not configured for SPE instruction set");
3414 if (main_target_opt != NULL
3415 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3416 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3417 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3418 error ("target attribute or pragma changes SPE ABI");
3420 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3421 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3422 || rs6000_cpu == PROCESSOR_PPCE5500)
3424 if (TARGET_ALTIVEC)
3425 error ("AltiVec not supported in this target");
3426 if (TARGET_SPE)
3427 error ("SPE not supported in this target");
3429 if (rs6000_cpu == PROCESSOR_PPCE6500)
3431 if (TARGET_SPE)
3432 error ("SPE not supported in this target");
3435 /* Disable Cell microcode if we are optimizing for the Cell
3436 and not optimizing for size. */
3437 if (rs6000_gen_cell_microcode == -1)
3438 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3439 && !optimize_size);
3441 /* If we are optimizing big endian systems for space and it's OK to
3442 use instructions that would be microcoded on the Cell, use the
3443 load/store multiple and string instructions. */
3444 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3445 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3446 | OPTION_MASK_STRING);
3448 /* Don't allow -mmultiple or -mstring on little endian systems
3449 unless the cpu is a 750, because the hardware doesn't support the
3450 instructions used in little endian mode, and causes an alignment
3451 trap. The 750 does not cause an alignment trap (except when the
3452 target is unaligned). */
3454 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3456 if (TARGET_MULTIPLE)
3458 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3459 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3460 warning (0, "-mmultiple is not supported on little endian systems");
3463 if (TARGET_STRING)
3465 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3466 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3467 warning (0, "-mstring is not supported on little endian systems");
3471 /* If little-endian, default to -mstrict-align on older processors.
3472 Testing for htm matches power8 and later. */
3473 if (!BYTES_BIG_ENDIAN
3474 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3475 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3477 /* -maltivec={le,be} implies -maltivec. */
3478 if (rs6000_altivec_element_order != 0)
3479 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3481 /* Disallow -maltivec=le in big endian mode for now. This is not
3482 known to be useful for anyone. */
3483 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3485 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3486 rs6000_altivec_element_order = 0;
3489 /* Add some warnings for VSX. */
3490 if (TARGET_VSX)
3492 const char *msg = NULL;
3493 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3494 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3496 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3497 msg = N_("-mvsx requires hardware floating point");
3498 else
3500 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3501 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3504 else if (TARGET_PAIRED_FLOAT)
3505 msg = N_("-mvsx and -mpaired are incompatible");
3506 else if (TARGET_AVOID_XFORM > 0)
3507 msg = N_("-mvsx needs indexed addressing");
3508 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3509 & OPTION_MASK_ALTIVEC))
3511 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3512 msg = N_("-mvsx and -mno-altivec are incompatible");
3513 else
3514 msg = N_("-mno-altivec disables vsx");
3517 if (msg)
3519 warning (0, msg);
3520 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3521 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3525 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3526 the -mcpu setting to enable options that conflict. */
3527 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3528 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3529 | OPTION_MASK_ALTIVEC
3530 | OPTION_MASK_VSX)) != 0)
3531 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3532 | OPTION_MASK_DIRECT_MOVE)
3533 & ~rs6000_isa_flags_explicit);
3535 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3536 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3538 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3539 unless the user explicitly used the -mno-<option> to disable the code. */
3540 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3541 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3542 else if (TARGET_VSX)
3543 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3544 else if (TARGET_POPCNTD)
3545 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3546 else if (TARGET_DFP)
3547 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3548 else if (TARGET_CMPB)
3549 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3550 else if (TARGET_FPRND)
3551 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3552 else if (TARGET_POPCNTB)
3553 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3554 else if (TARGET_ALTIVEC)
3555 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3557 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3559 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3560 error ("-mcrypto requires -maltivec");
3561 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3564 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3566 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3567 error ("-mdirect-move requires -mvsx");
3568 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3571 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3573 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3574 error ("-mpower8-vector requires -maltivec");
3575 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3578 if (TARGET_P8_VECTOR && !TARGET_VSX)
3580 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3581 error ("-mpower8-vector requires -mvsx");
3582 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3585 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3587 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3588 error ("-mvsx-timode requires -mvsx");
3589 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3592 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3594 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3595 error ("-mhard-dfp requires -mhard-float");
3596 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3599 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3600 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3601 the individual option. */
3602 if (TARGET_UPPER_REGS > 0)
3604 if (TARGET_VSX
3605 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3607 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3608 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3610 if (TARGET_P8_VECTOR
3611 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3613 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3614 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3617 else if (TARGET_UPPER_REGS == 0)
3619 if (TARGET_VSX
3620 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3622 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3623 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3625 if (TARGET_P8_VECTOR
3626 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3628 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3629 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3633 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3635 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3636 error ("-mupper-regs-df requires -mvsx");
3637 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3640 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3642 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3643 error ("-mupper-regs-sf requires -mpower8-vector");
3644 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3647 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3648 silently turn off quad memory mode. */
3649 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3651 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3652 warning (0, N_("-mquad-memory requires 64-bit mode"));
3654 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3655 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3657 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3658 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3661 /* Non-atomic quad memory load/store are disabled for little endian, since
3662 the words are reversed, but atomic operations can still be done by
3663 swapping the words. */
3664 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3666 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3667 warning (0, N_("-mquad-memory is not available in little endian mode"));
3669 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3672 /* Assume if the user asked for normal quad memory instructions, they want
3673 the atomic versions as well, unless they explicity told us not to use quad
3674 word atomic instructions. */
3675 if (TARGET_QUAD_MEMORY
3676 && !TARGET_QUAD_MEMORY_ATOMIC
3677 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3678 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3680 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3681 generating power8 instructions. */
3682 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3683 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3684 & OPTION_MASK_P8_FUSION);
3686 /* Power8 does not fuse sign extended loads with the addis. If we are
3687 optimizing at high levels for speed, convert a sign extended load into a
3688 zero extending load, and an explicit sign extension. */
3689 if (TARGET_P8_FUSION
3690 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3691 && optimize_function_for_speed_p (cfun)
3692 && optimize >= 3)
3693 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3695 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3696 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3698 /* E500mc does "better" if we inline more aggressively. Respect the
3699 user's opinion, though. */
3700 if (rs6000_block_move_inline_limit == 0
3701 && (rs6000_cpu == PROCESSOR_PPCE500MC
3702 || rs6000_cpu == PROCESSOR_PPCE500MC64
3703 || rs6000_cpu == PROCESSOR_PPCE5500
3704 || rs6000_cpu == PROCESSOR_PPCE6500))
3705 rs6000_block_move_inline_limit = 128;
3707 /* store_one_arg depends on expand_block_move to handle at least the
3708 size of reg_parm_stack_space. */
3709 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3710 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3712 if (global_init_p)
3714 /* If the appropriate debug option is enabled, replace the target hooks
3715 with debug versions that call the real version and then prints
3716 debugging information. */
3717 if (TARGET_DEBUG_COST)
3719 targetm.rtx_costs = rs6000_debug_rtx_costs;
3720 targetm.address_cost = rs6000_debug_address_cost;
3721 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3724 if (TARGET_DEBUG_ADDR)
3726 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3727 targetm.legitimize_address = rs6000_debug_legitimize_address;
3728 rs6000_secondary_reload_class_ptr
3729 = rs6000_debug_secondary_reload_class;
3730 rs6000_secondary_memory_needed_ptr
3731 = rs6000_debug_secondary_memory_needed;
3732 rs6000_cannot_change_mode_class_ptr
3733 = rs6000_debug_cannot_change_mode_class;
3734 rs6000_preferred_reload_class_ptr
3735 = rs6000_debug_preferred_reload_class;
3736 rs6000_legitimize_reload_address_ptr
3737 = rs6000_debug_legitimize_reload_address;
3738 rs6000_mode_dependent_address_ptr
3739 = rs6000_debug_mode_dependent_address;
3742 if (rs6000_veclibabi_name)
3744 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3745 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3746 else
3748 error ("unknown vectorization library ABI type (%s) for "
3749 "-mveclibabi= switch", rs6000_veclibabi_name);
3750 ret = false;
3755 if (!global_options_set.x_rs6000_long_double_type_size)
3757 if (main_target_opt != NULL
3758 && (main_target_opt->x_rs6000_long_double_type_size
3759 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3760 error ("target attribute or pragma changes long double size");
3761 else
3762 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3765 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3766 if (!global_options_set.x_rs6000_ieeequad)
3767 rs6000_ieeequad = 1;
3768 #endif
3770 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3771 target attribute or pragma which automatically enables both options,
3772 unless the altivec ABI was set. This is set by default for 64-bit, but
3773 not for 32-bit. */
3774 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3775 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3776 & ~rs6000_isa_flags_explicit);
3778 /* Enable Altivec ABI for AIX -maltivec. */
3779 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3781 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3782 error ("target attribute or pragma changes AltiVec ABI");
3783 else
3784 rs6000_altivec_abi = 1;
3787 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3788 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3789 be explicitly overridden in either case. */
3790 if (TARGET_ELF)
3792 if (!global_options_set.x_rs6000_altivec_abi
3793 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3795 if (main_target_opt != NULL &&
3796 !main_target_opt->x_rs6000_altivec_abi)
3797 error ("target attribute or pragma changes AltiVec ABI");
3798 else
3799 rs6000_altivec_abi = 1;
3803 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3804 So far, the only darwin64 targets are also MACH-O. */
3805 if (TARGET_MACHO
3806 && DEFAULT_ABI == ABI_DARWIN
3807 && TARGET_64BIT)
3809 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3810 error ("target attribute or pragma changes darwin64 ABI");
3811 else
3813 rs6000_darwin64_abi = 1;
3814 /* Default to natural alignment, for better performance. */
3815 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3819 /* Place FP constants in the constant pool instead of TOC
3820 if section anchors enabled. */
3821 if (flag_section_anchors
3822 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3823 TARGET_NO_FP_IN_TOC = 1;
3825 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3826 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3828 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3829 SUBTARGET_OVERRIDE_OPTIONS;
3830 #endif
3831 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3832 SUBSUBTARGET_OVERRIDE_OPTIONS;
3833 #endif
3834 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3835 SUB3TARGET_OVERRIDE_OPTIONS;
3836 #endif
3838 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3839 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3841 /* For the E500 family of cores, reset the single/double FP flags to let us
3842 check that they remain constant across attributes or pragmas. Also,
3843 clear a possible request for string instructions, not supported and which
3844 we might have silently queried above for -Os.
3846 For other families, clear ISEL in case it was set implicitly.
3849 switch (rs6000_cpu)
3851 case PROCESSOR_PPC8540:
3852 case PROCESSOR_PPC8548:
3853 case PROCESSOR_PPCE500MC:
3854 case PROCESSOR_PPCE500MC64:
3855 case PROCESSOR_PPCE5500:
3856 case PROCESSOR_PPCE6500:
3858 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3859 rs6000_double_float = TARGET_E500_DOUBLE;
3861 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3863 break;
3865 default:
3867 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3868 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3870 break;
3873 if (main_target_opt)
3875 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3876 error ("target attribute or pragma changes single precision floating "
3877 "point");
3878 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3879 error ("target attribute or pragma changes double precision floating "
3880 "point");
3883 /* Detect invalid option combinations with E500. */
3884 CHECK_E500_OPTIONS;
3886 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3887 && rs6000_cpu != PROCESSOR_POWER5
3888 && rs6000_cpu != PROCESSOR_POWER6
3889 && rs6000_cpu != PROCESSOR_POWER7
3890 && rs6000_cpu != PROCESSOR_POWER8
3891 && rs6000_cpu != PROCESSOR_PPCA2
3892 && rs6000_cpu != PROCESSOR_CELL
3893 && rs6000_cpu != PROCESSOR_PPC476);
3894 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3895 || rs6000_cpu == PROCESSOR_POWER5
3896 || rs6000_cpu == PROCESSOR_POWER7
3897 || rs6000_cpu == PROCESSOR_POWER8);
3898 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3899 || rs6000_cpu == PROCESSOR_POWER5
3900 || rs6000_cpu == PROCESSOR_POWER6
3901 || rs6000_cpu == PROCESSOR_POWER7
3902 || rs6000_cpu == PROCESSOR_POWER8
3903 || rs6000_cpu == PROCESSOR_PPCE500MC
3904 || rs6000_cpu == PROCESSOR_PPCE500MC64
3905 || rs6000_cpu == PROCESSOR_PPCE5500
3906 || rs6000_cpu == PROCESSOR_PPCE6500);
3908 /* Allow debug switches to override the above settings. These are set to -1
3909 in rs6000.opt to indicate the user hasn't directly set the switch. */
3910 if (TARGET_ALWAYS_HINT >= 0)
3911 rs6000_always_hint = TARGET_ALWAYS_HINT;
3913 if (TARGET_SCHED_GROUPS >= 0)
3914 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3916 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3917 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3919 rs6000_sched_restricted_insns_priority
3920 = (rs6000_sched_groups ? 1 : 0);
3922 /* Handle -msched-costly-dep option. */
3923 rs6000_sched_costly_dep
3924 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3926 if (rs6000_sched_costly_dep_str)
3928 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3929 rs6000_sched_costly_dep = no_dep_costly;
3930 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3931 rs6000_sched_costly_dep = all_deps_costly;
3932 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3933 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3934 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3935 rs6000_sched_costly_dep = store_to_load_dep_costly;
3936 else
3937 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3938 atoi (rs6000_sched_costly_dep_str));
3941 /* Handle -minsert-sched-nops option. */
3942 rs6000_sched_insert_nops
3943 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3945 if (rs6000_sched_insert_nops_str)
3947 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3948 rs6000_sched_insert_nops = sched_finish_none;
3949 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3950 rs6000_sched_insert_nops = sched_finish_pad_groups;
3951 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3952 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3953 else
3954 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3955 atoi (rs6000_sched_insert_nops_str));
3958 if (global_init_p)
3960 #ifdef TARGET_REGNAMES
3961 /* If the user desires alternate register names, copy in the
3962 alternate names now. */
3963 if (TARGET_REGNAMES)
3964 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3965 #endif
3967 /* Set aix_struct_return last, after the ABI is determined.
3968 If -maix-struct-return or -msvr4-struct-return was explicitly
3969 used, don't override with the ABI default. */
3970 if (!global_options_set.x_aix_struct_return)
3971 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3973 #if 0
3974 /* IBM XL compiler defaults to unsigned bitfields. */
3975 if (TARGET_XL_COMPAT)
3976 flag_signed_bitfields = 0;
3977 #endif
3979 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3980 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3982 if (TARGET_TOC)
3983 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3985 /* We can only guarantee the availability of DI pseudo-ops when
3986 assembling for 64-bit targets. */
3987 if (!TARGET_64BIT)
3989 targetm.asm_out.aligned_op.di = NULL;
3990 targetm.asm_out.unaligned_op.di = NULL;
3994 /* Set branch target alignment, if not optimizing for size. */
3995 if (!optimize_size)
3997 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3998 aligned 8byte to avoid misprediction by the branch predictor. */
3999 if (rs6000_cpu == PROCESSOR_TITAN
4000 || rs6000_cpu == PROCESSOR_CELL)
4002 if (align_functions <= 0)
4003 align_functions = 8;
4004 if (align_jumps <= 0)
4005 align_jumps = 8;
4006 if (align_loops <= 0)
4007 align_loops = 8;
4009 if (rs6000_align_branch_targets)
4011 if (align_functions <= 0)
4012 align_functions = 16;
4013 if (align_jumps <= 0)
4014 align_jumps = 16;
4015 if (align_loops <= 0)
4017 can_override_loop_align = 1;
4018 align_loops = 16;
4021 if (align_jumps_max_skip <= 0)
4022 align_jumps_max_skip = 15;
4023 if (align_loops_max_skip <= 0)
4024 align_loops_max_skip = 15;
4027 /* Arrange to save and restore machine status around nested functions. */
4028 init_machine_status = rs6000_init_machine_status;
4030 /* We should always be splitting complex arguments, but we can't break
4031 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4032 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4033 targetm.calls.split_complex_arg = NULL;
4036 /* Initialize rs6000_cost with the appropriate target costs. */
4037 if (optimize_size)
4038 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4039 else
4040 switch (rs6000_cpu)
4042 case PROCESSOR_RS64A:
4043 rs6000_cost = &rs64a_cost;
4044 break;
4046 case PROCESSOR_MPCCORE:
4047 rs6000_cost = &mpccore_cost;
4048 break;
4050 case PROCESSOR_PPC403:
4051 rs6000_cost = &ppc403_cost;
4052 break;
4054 case PROCESSOR_PPC405:
4055 rs6000_cost = &ppc405_cost;
4056 break;
4058 case PROCESSOR_PPC440:
4059 rs6000_cost = &ppc440_cost;
4060 break;
4062 case PROCESSOR_PPC476:
4063 rs6000_cost = &ppc476_cost;
4064 break;
4066 case PROCESSOR_PPC601:
4067 rs6000_cost = &ppc601_cost;
4068 break;
4070 case PROCESSOR_PPC603:
4071 rs6000_cost = &ppc603_cost;
4072 break;
4074 case PROCESSOR_PPC604:
4075 rs6000_cost = &ppc604_cost;
4076 break;
4078 case PROCESSOR_PPC604e:
4079 rs6000_cost = &ppc604e_cost;
4080 break;
4082 case PROCESSOR_PPC620:
4083 rs6000_cost = &ppc620_cost;
4084 break;
4086 case PROCESSOR_PPC630:
4087 rs6000_cost = &ppc630_cost;
4088 break;
4090 case PROCESSOR_CELL:
4091 rs6000_cost = &ppccell_cost;
4092 break;
4094 case PROCESSOR_PPC750:
4095 case PROCESSOR_PPC7400:
4096 rs6000_cost = &ppc750_cost;
4097 break;
4099 case PROCESSOR_PPC7450:
4100 rs6000_cost = &ppc7450_cost;
4101 break;
4103 case PROCESSOR_PPC8540:
4104 case PROCESSOR_PPC8548:
4105 rs6000_cost = &ppc8540_cost;
4106 break;
4108 case PROCESSOR_PPCE300C2:
4109 case PROCESSOR_PPCE300C3:
4110 rs6000_cost = &ppce300c2c3_cost;
4111 break;
4113 case PROCESSOR_PPCE500MC:
4114 rs6000_cost = &ppce500mc_cost;
4115 break;
4117 case PROCESSOR_PPCE500MC64:
4118 rs6000_cost = &ppce500mc64_cost;
4119 break;
4121 case PROCESSOR_PPCE5500:
4122 rs6000_cost = &ppce5500_cost;
4123 break;
4125 case PROCESSOR_PPCE6500:
4126 rs6000_cost = &ppce6500_cost;
4127 break;
4129 case PROCESSOR_TITAN:
4130 rs6000_cost = &titan_cost;
4131 break;
4133 case PROCESSOR_POWER4:
4134 case PROCESSOR_POWER5:
4135 rs6000_cost = &power4_cost;
4136 break;
4138 case PROCESSOR_POWER6:
4139 rs6000_cost = &power6_cost;
4140 break;
4142 case PROCESSOR_POWER7:
4143 rs6000_cost = &power7_cost;
4144 break;
4146 case PROCESSOR_POWER8:
4147 rs6000_cost = &power8_cost;
4148 break;
4150 case PROCESSOR_PPCA2:
4151 rs6000_cost = &ppca2_cost;
4152 break;
4154 default:
4155 gcc_unreachable ();
4158 if (global_init_p)
4160 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4161 rs6000_cost->simultaneous_prefetches,
4162 global_options.x_param_values,
4163 global_options_set.x_param_values);
4164 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4165 global_options.x_param_values,
4166 global_options_set.x_param_values);
4167 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4168 rs6000_cost->cache_line_size,
4169 global_options.x_param_values,
4170 global_options_set.x_param_values);
4171 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4172 global_options.x_param_values,
4173 global_options_set.x_param_values);
4175 /* Increase loop peeling limits based on performance analysis. */
4176 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4177 global_options.x_param_values,
4178 global_options_set.x_param_values);
4179 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4180 global_options.x_param_values,
4181 global_options_set.x_param_values);
4183 /* If using typedef char *va_list, signal that
4184 __builtin_va_start (&ap, 0) can be optimized to
4185 ap = __builtin_next_arg (0). */
4186 if (DEFAULT_ABI != ABI_V4)
4187 targetm.expand_builtin_va_start = NULL;
4190 /* Set up single/double float flags.
4191 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4192 then set both flags. */
4193 if (TARGET_HARD_FLOAT && TARGET_FPRS
4194 && rs6000_single_float == 0 && rs6000_double_float == 0)
4195 rs6000_single_float = rs6000_double_float = 1;
4197 /* If not explicitly specified via option, decide whether to generate indexed
4198 load/store instructions. */
4199 if (TARGET_AVOID_XFORM == -1)
4200 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4201 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4202 need indexed accesses and the type used is the scalar type of the element
4203 being loaded or stored. */
4204 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4205 && !TARGET_ALTIVEC);
4207 /* Set the -mrecip options. */
4208 if (rs6000_recip_name)
4210 char *p = ASTRDUP (rs6000_recip_name);
4211 char *q;
4212 unsigned int mask, i;
4213 bool invert;
4215 while ((q = strtok (p, ",")) != NULL)
4217 p = NULL;
4218 if (*q == '!')
4220 invert = true;
4221 q++;
4223 else
4224 invert = false;
4226 if (!strcmp (q, "default"))
4227 mask = ((TARGET_RECIP_PRECISION)
4228 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4229 else
4231 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4232 if (!strcmp (q, recip_options[i].string))
4234 mask = recip_options[i].mask;
4235 break;
4238 if (i == ARRAY_SIZE (recip_options))
4240 error ("unknown option for -mrecip=%s", q);
4241 invert = false;
4242 mask = 0;
4243 ret = false;
4247 if (invert)
4248 rs6000_recip_control &= ~mask;
4249 else
4250 rs6000_recip_control |= mask;
4254 /* Determine when unaligned vector accesses are permitted, and when
4255 they are preferred over masked Altivec loads. Note that if
4256 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4257 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4258 not true. */
4259 if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) {
4260 if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8
4261 && TARGET_ALLOW_MOVMISALIGN != 0)
4262 TARGET_EFFICIENT_UNALIGNED_VSX = 1;
4263 else
4264 TARGET_EFFICIENT_UNALIGNED_VSX = 0;
4267 if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8)
4268 TARGET_ALLOW_MOVMISALIGN = 1;
4270 /* Set the builtin mask of the various options used that could affect which
4271 builtins were used. In the past we used target_flags, but we've run out
4272 of bits, and some options like SPE and PAIRED are no longer in
4273 target_flags. */
4274 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4275 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4276 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4277 rs6000_builtin_mask);
4279 /* Initialize all of the registers. */
4280 rs6000_init_hard_regno_mode_ok (global_init_p);
4282 /* Save the initial options in case the user does function specific options */
4283 if (global_init_p)
4284 target_option_default_node = target_option_current_node
4285 = build_target_option_node (&global_options);
4287 /* If not explicitly specified via option, decide whether to generate the
4288 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4289 if (TARGET_LINK_STACK == -1)
4290 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4292 return ret;
4295 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4296 define the target cpu type. */
4298 static void
4299 rs6000_option_override (void)
4301 (void) rs6000_option_override_internal (true);
4303 /* Register machine-specific passes. This needs to be done at start-up.
4304 It's convenient to do it here (like i386 does). */
4305 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4307 struct register_pass_info analyze_swaps_info
4308 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4310 register_pass (&analyze_swaps_info);
4314 /* Implement targetm.vectorize.builtin_mask_for_load. */
4315 static tree
4316 rs6000_builtin_mask_for_load (void)
4318 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4319 if ((TARGET_ALTIVEC && !TARGET_VSX)
4320 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4321 return altivec_builtin_mask_for_load;
4322 else
4323 return 0;
4326 /* Implement LOOP_ALIGN. */
4328 rs6000_loop_align (rtx label)
4330 basic_block bb;
4331 int ninsns;
4333 /* Don't override loop alignment if -falign-loops was specified. */
4334 if (!can_override_loop_align)
4335 return align_loops_log;
4337 bb = BLOCK_FOR_INSN (label);
4338 ninsns = num_loop_insns(bb->loop_father);
4340 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4341 if (ninsns > 4 && ninsns <= 8
4342 && (rs6000_cpu == PROCESSOR_POWER4
4343 || rs6000_cpu == PROCESSOR_POWER5
4344 || rs6000_cpu == PROCESSOR_POWER6
4345 || rs6000_cpu == PROCESSOR_POWER7
4346 || rs6000_cpu == PROCESSOR_POWER8))
4347 return 5;
4348 else
4349 return align_loops_log;
4352 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4353 static int
4354 rs6000_loop_align_max_skip (rtx_insn *label)
4356 return (1 << rs6000_loop_align (label)) - 1;
4359 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4360 after applying N number of iterations. This routine does not determine
4361 how may iterations are required to reach desired alignment. */
4363 static bool
4364 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4366 if (is_packed)
4367 return false;
4369 if (TARGET_32BIT)
4371 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4372 return true;
4374 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4375 return true;
4377 return false;
4379 else
4381 if (TARGET_MACHO)
4382 return false;
4384 /* Assuming that all other types are naturally aligned. CHECKME! */
4385 return true;
4389 /* Return true if the vector misalignment factor is supported by the
4390 target. */
4391 static bool
4392 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4393 const_tree type,
4394 int misalignment,
4395 bool is_packed)
4397 if (TARGET_VSX)
4399 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4400 return true;
4402 /* Return if movmisalign pattern is not supported for this mode. */
4403 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4404 return false;
4406 if (misalignment == -1)
4408 /* Misalignment factor is unknown at compile time but we know
4409 it's word aligned. */
4410 if (rs6000_vector_alignment_reachable (type, is_packed))
4412 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4414 if (element_size == 64 || element_size == 32)
4415 return true;
4418 return false;
4421 /* VSX supports word-aligned vector. */
4422 if (misalignment % 4 == 0)
4423 return true;
4425 return false;
4428 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4429 static int
4430 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4431 tree vectype, int misalign)
4433 unsigned elements;
4434 tree elem_type;
4436 switch (type_of_cost)
4438 case scalar_stmt:
4439 case scalar_load:
4440 case scalar_store:
4441 case vector_stmt:
4442 case vector_load:
4443 case vector_store:
4444 case vec_to_scalar:
4445 case scalar_to_vec:
4446 case cond_branch_not_taken:
4447 return 1;
4449 case vec_perm:
4450 if (TARGET_VSX)
4451 return 3;
4452 else
4453 return 1;
4455 case vec_promote_demote:
4456 if (TARGET_VSX)
4457 return 4;
4458 else
4459 return 1;
4461 case cond_branch_taken:
4462 return 3;
4464 case unaligned_load:
4465 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4466 return 1;
4468 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4470 elements = TYPE_VECTOR_SUBPARTS (vectype);
4471 if (elements == 2)
4472 /* Double word aligned. */
4473 return 2;
4475 if (elements == 4)
4477 switch (misalign)
4479 case 8:
4480 /* Double word aligned. */
4481 return 2;
4483 case -1:
4484 /* Unknown misalignment. */
4485 case 4:
4486 case 12:
4487 /* Word aligned. */
4488 return 22;
4490 default:
4491 gcc_unreachable ();
4496 if (TARGET_ALTIVEC)
4497 /* Misaligned loads are not supported. */
4498 gcc_unreachable ();
4500 return 2;
4502 case unaligned_store:
4503 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4504 return 1;
4506 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4508 elements = TYPE_VECTOR_SUBPARTS (vectype);
4509 if (elements == 2)
4510 /* Double word aligned. */
4511 return 2;
4513 if (elements == 4)
4515 switch (misalign)
4517 case 8:
4518 /* Double word aligned. */
4519 return 2;
4521 case -1:
4522 /* Unknown misalignment. */
4523 case 4:
4524 case 12:
4525 /* Word aligned. */
4526 return 23;
4528 default:
4529 gcc_unreachable ();
4534 if (TARGET_ALTIVEC)
4535 /* Misaligned stores are not supported. */
4536 gcc_unreachable ();
4538 return 2;
4540 case vec_construct:
4541 elements = TYPE_VECTOR_SUBPARTS (vectype);
4542 elem_type = TREE_TYPE (vectype);
4543 /* 32-bit vectors loaded into registers are stored as double
4544 precision, so we need n/2 converts in addition to the usual
4545 n/2 merges to construct a vector of short floats from them. */
4546 if (SCALAR_FLOAT_TYPE_P (elem_type)
4547 && TYPE_PRECISION (elem_type) == 32)
4548 return elements + 1;
4549 else
4550 return elements / 2 + 1;
4552 default:
4553 gcc_unreachable ();
4557 /* Implement targetm.vectorize.preferred_simd_mode. */
4559 static machine_mode
4560 rs6000_preferred_simd_mode (machine_mode mode)
4562 if (TARGET_VSX)
4563 switch (mode)
4565 case DFmode:
4566 return V2DFmode;
4567 default:;
4569 if (TARGET_ALTIVEC || TARGET_VSX)
4570 switch (mode)
4572 case SFmode:
4573 return V4SFmode;
4574 case TImode:
4575 return V1TImode;
4576 case DImode:
4577 return V2DImode;
4578 case SImode:
4579 return V4SImode;
4580 case HImode:
4581 return V8HImode;
4582 case QImode:
4583 return V16QImode;
4584 default:;
4586 if (TARGET_SPE)
4587 switch (mode)
4589 case SFmode:
4590 return V2SFmode;
4591 case SImode:
4592 return V2SImode;
4593 default:;
4595 if (TARGET_PAIRED_FLOAT
4596 && mode == SFmode)
4597 return V2SFmode;
4598 return word_mode;
4601 typedef struct _rs6000_cost_data
4603 struct loop *loop_info;
4604 unsigned cost[3];
4605 } rs6000_cost_data;
4607 /* Test for likely overcommitment of vector hardware resources. If a
4608 loop iteration is relatively large, and too large a percentage of
4609 instructions in the loop are vectorized, the cost model may not
4610 adequately reflect delays from unavailable vector resources.
4611 Penalize the loop body cost for this case. */
4613 static void
4614 rs6000_density_test (rs6000_cost_data *data)
4616 const int DENSITY_PCT_THRESHOLD = 85;
4617 const int DENSITY_SIZE_THRESHOLD = 70;
4618 const int DENSITY_PENALTY = 10;
4619 struct loop *loop = data->loop_info;
4620 basic_block *bbs = get_loop_body (loop);
4621 int nbbs = loop->num_nodes;
4622 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4623 int i, density_pct;
4625 for (i = 0; i < nbbs; i++)
4627 basic_block bb = bbs[i];
4628 gimple_stmt_iterator gsi;
4630 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4632 gimple stmt = gsi_stmt (gsi);
4633 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4635 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4636 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4637 not_vec_cost++;
4641 free (bbs);
4642 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4644 if (density_pct > DENSITY_PCT_THRESHOLD
4645 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4647 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_NOTE, vect_location,
4650 "density %d%%, cost %d exceeds threshold, penalizing "
4651 "loop body cost by %d%%", density_pct,
4652 vec_cost + not_vec_cost, DENSITY_PENALTY);
4656 /* Implement targetm.vectorize.init_cost. */
4658 static void *
4659 rs6000_init_cost (struct loop *loop_info)
4661 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4662 data->loop_info = loop_info;
4663 data->cost[vect_prologue] = 0;
4664 data->cost[vect_body] = 0;
4665 data->cost[vect_epilogue] = 0;
4666 return data;
4669 /* Implement targetm.vectorize.add_stmt_cost. */
4671 static unsigned
4672 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4673 struct _stmt_vec_info *stmt_info, int misalign,
4674 enum vect_cost_model_location where)
4676 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4677 unsigned retval = 0;
4679 if (flag_vect_cost_model)
4681 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4682 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4683 misalign);
4684 /* Statements in an inner loop relative to the loop being
4685 vectorized are weighted more heavily. The value here is
4686 arbitrary and could potentially be improved with analysis. */
4687 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4688 count *= 50; /* FIXME. */
4690 retval = (unsigned) (count * stmt_cost);
4691 cost_data->cost[where] += retval;
4694 return retval;
4697 /* Implement targetm.vectorize.finish_cost. */
4699 static void
4700 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4701 unsigned *body_cost, unsigned *epilogue_cost)
4703 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4705 if (cost_data->loop_info)
4706 rs6000_density_test (cost_data);
4708 *prologue_cost = cost_data->cost[vect_prologue];
4709 *body_cost = cost_data->cost[vect_body];
4710 *epilogue_cost = cost_data->cost[vect_epilogue];
4713 /* Implement targetm.vectorize.destroy_cost_data. */
4715 static void
4716 rs6000_destroy_cost_data (void *data)
4718 free (data);
4721 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4722 library with vectorized intrinsics. */
4724 static tree
4725 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4727 char name[32];
4728 const char *suffix = NULL;
4729 tree fntype, new_fndecl, bdecl = NULL_TREE;
4730 int n_args = 1;
4731 const char *bname;
4732 machine_mode el_mode, in_mode;
4733 int n, in_n;
4735 /* Libmass is suitable for unsafe math only as it does not correctly support
4736 parts of IEEE with the required precision such as denormals. Only support
4737 it if we have VSX to use the simd d2 or f4 functions.
4738 XXX: Add variable length support. */
4739 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4740 return NULL_TREE;
4742 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4743 n = TYPE_VECTOR_SUBPARTS (type_out);
4744 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4745 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4746 if (el_mode != in_mode
4747 || n != in_n)
4748 return NULL_TREE;
4750 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4752 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4753 switch (fn)
4755 case BUILT_IN_ATAN2:
4756 case BUILT_IN_HYPOT:
4757 case BUILT_IN_POW:
4758 n_args = 2;
4759 /* fall through */
4761 case BUILT_IN_ACOS:
4762 case BUILT_IN_ACOSH:
4763 case BUILT_IN_ASIN:
4764 case BUILT_IN_ASINH:
4765 case BUILT_IN_ATAN:
4766 case BUILT_IN_ATANH:
4767 case BUILT_IN_CBRT:
4768 case BUILT_IN_COS:
4769 case BUILT_IN_COSH:
4770 case BUILT_IN_ERF:
4771 case BUILT_IN_ERFC:
4772 case BUILT_IN_EXP2:
4773 case BUILT_IN_EXP:
4774 case BUILT_IN_EXPM1:
4775 case BUILT_IN_LGAMMA:
4776 case BUILT_IN_LOG10:
4777 case BUILT_IN_LOG1P:
4778 case BUILT_IN_LOG2:
4779 case BUILT_IN_LOG:
4780 case BUILT_IN_SIN:
4781 case BUILT_IN_SINH:
4782 case BUILT_IN_SQRT:
4783 case BUILT_IN_TAN:
4784 case BUILT_IN_TANH:
4785 bdecl = builtin_decl_implicit (fn);
4786 suffix = "d2"; /* pow -> powd2 */
4787 if (el_mode != DFmode
4788 || n != 2
4789 || !bdecl)
4790 return NULL_TREE;
4791 break;
4793 case BUILT_IN_ATAN2F:
4794 case BUILT_IN_HYPOTF:
4795 case BUILT_IN_POWF:
4796 n_args = 2;
4797 /* fall through */
4799 case BUILT_IN_ACOSF:
4800 case BUILT_IN_ACOSHF:
4801 case BUILT_IN_ASINF:
4802 case BUILT_IN_ASINHF:
4803 case BUILT_IN_ATANF:
4804 case BUILT_IN_ATANHF:
4805 case BUILT_IN_CBRTF:
4806 case BUILT_IN_COSF:
4807 case BUILT_IN_COSHF:
4808 case BUILT_IN_ERFF:
4809 case BUILT_IN_ERFCF:
4810 case BUILT_IN_EXP2F:
4811 case BUILT_IN_EXPF:
4812 case BUILT_IN_EXPM1F:
4813 case BUILT_IN_LGAMMAF:
4814 case BUILT_IN_LOG10F:
4815 case BUILT_IN_LOG1PF:
4816 case BUILT_IN_LOG2F:
4817 case BUILT_IN_LOGF:
4818 case BUILT_IN_SINF:
4819 case BUILT_IN_SINHF:
4820 case BUILT_IN_SQRTF:
4821 case BUILT_IN_TANF:
4822 case BUILT_IN_TANHF:
4823 bdecl = builtin_decl_implicit (fn);
4824 suffix = "4"; /* powf -> powf4 */
4825 if (el_mode != SFmode
4826 || n != 4
4827 || !bdecl)
4828 return NULL_TREE;
4829 break;
4831 default:
4832 return NULL_TREE;
4835 else
4836 return NULL_TREE;
4838 gcc_assert (suffix != NULL);
4839 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4840 if (!bname)
4841 return NULL_TREE;
4843 strcpy (name, bname + sizeof ("__builtin_") - 1);
4844 strcat (name, suffix);
4846 if (n_args == 1)
4847 fntype = build_function_type_list (type_out, type_in, NULL);
4848 else if (n_args == 2)
4849 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4850 else
4851 gcc_unreachable ();
4853 /* Build a function declaration for the vectorized function. */
4854 new_fndecl = build_decl (BUILTINS_LOCATION,
4855 FUNCTION_DECL, get_identifier (name), fntype);
4856 TREE_PUBLIC (new_fndecl) = 1;
4857 DECL_EXTERNAL (new_fndecl) = 1;
4858 DECL_IS_NOVOPS (new_fndecl) = 1;
4859 TREE_READONLY (new_fndecl) = 1;
4861 return new_fndecl;
4864 /* Returns a function decl for a vectorized version of the builtin function
4865 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4866 if it is not available. */
4868 static tree
4869 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4870 tree type_in)
4872 machine_mode in_mode, out_mode;
4873 int in_n, out_n;
4875 if (TARGET_DEBUG_BUILTIN)
4876 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4877 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4878 GET_MODE_NAME (TYPE_MODE (type_out)),
4879 GET_MODE_NAME (TYPE_MODE (type_in)));
4881 if (TREE_CODE (type_out) != VECTOR_TYPE
4882 || TREE_CODE (type_in) != VECTOR_TYPE
4883 || !TARGET_VECTORIZE_BUILTINS)
4884 return NULL_TREE;
4886 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4887 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4888 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4889 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4891 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4893 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4894 switch (fn)
4896 case BUILT_IN_CLZIMAX:
4897 case BUILT_IN_CLZLL:
4898 case BUILT_IN_CLZL:
4899 case BUILT_IN_CLZ:
4900 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4902 if (out_mode == QImode && out_n == 16)
4903 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4904 else if (out_mode == HImode && out_n == 8)
4905 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4906 else if (out_mode == SImode && out_n == 4)
4907 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4908 else if (out_mode == DImode && out_n == 2)
4909 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4911 break;
4912 case BUILT_IN_COPYSIGN:
4913 if (VECTOR_UNIT_VSX_P (V2DFmode)
4914 && out_mode == DFmode && out_n == 2
4915 && in_mode == DFmode && in_n == 2)
4916 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4917 break;
4918 case BUILT_IN_COPYSIGNF:
4919 if (out_mode != SFmode || out_n != 4
4920 || in_mode != SFmode || in_n != 4)
4921 break;
4922 if (VECTOR_UNIT_VSX_P (V4SFmode))
4923 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4924 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4925 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4926 break;
4927 case BUILT_IN_POPCOUNTIMAX:
4928 case BUILT_IN_POPCOUNTLL:
4929 case BUILT_IN_POPCOUNTL:
4930 case BUILT_IN_POPCOUNT:
4931 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4933 if (out_mode == QImode && out_n == 16)
4934 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4935 else if (out_mode == HImode && out_n == 8)
4936 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4937 else if (out_mode == SImode && out_n == 4)
4938 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4939 else if (out_mode == DImode && out_n == 2)
4940 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4942 break;
4943 case BUILT_IN_SQRT:
4944 if (VECTOR_UNIT_VSX_P (V2DFmode)
4945 && out_mode == DFmode && out_n == 2
4946 && in_mode == DFmode && in_n == 2)
4947 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4948 break;
4949 case BUILT_IN_SQRTF:
4950 if (VECTOR_UNIT_VSX_P (V4SFmode)
4951 && out_mode == SFmode && out_n == 4
4952 && in_mode == SFmode && in_n == 4)
4953 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4954 break;
4955 case BUILT_IN_CEIL:
4956 if (VECTOR_UNIT_VSX_P (V2DFmode)
4957 && out_mode == DFmode && out_n == 2
4958 && in_mode == DFmode && in_n == 2)
4959 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4960 break;
4961 case BUILT_IN_CEILF:
4962 if (out_mode != SFmode || out_n != 4
4963 || in_mode != SFmode || in_n != 4)
4964 break;
4965 if (VECTOR_UNIT_VSX_P (V4SFmode))
4966 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4967 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4968 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4969 break;
4970 case BUILT_IN_FLOOR:
4971 if (VECTOR_UNIT_VSX_P (V2DFmode)
4972 && out_mode == DFmode && out_n == 2
4973 && in_mode == DFmode && in_n == 2)
4974 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4975 break;
4976 case BUILT_IN_FLOORF:
4977 if (out_mode != SFmode || out_n != 4
4978 || in_mode != SFmode || in_n != 4)
4979 break;
4980 if (VECTOR_UNIT_VSX_P (V4SFmode))
4981 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4982 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4983 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4984 break;
4985 case BUILT_IN_FMA:
4986 if (VECTOR_UNIT_VSX_P (V2DFmode)
4987 && out_mode == DFmode && out_n == 2
4988 && in_mode == DFmode && in_n == 2)
4989 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4990 break;
4991 case BUILT_IN_FMAF:
4992 if (VECTOR_UNIT_VSX_P (V4SFmode)
4993 && out_mode == SFmode && out_n == 4
4994 && in_mode == SFmode && in_n == 4)
4995 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4996 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4997 && out_mode == SFmode && out_n == 4
4998 && in_mode == SFmode && in_n == 4)
4999 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5000 break;
5001 case BUILT_IN_TRUNC:
5002 if (VECTOR_UNIT_VSX_P (V2DFmode)
5003 && out_mode == DFmode && out_n == 2
5004 && in_mode == DFmode && in_n == 2)
5005 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5006 break;
5007 case BUILT_IN_TRUNCF:
5008 if (out_mode != SFmode || out_n != 4
5009 || in_mode != SFmode || in_n != 4)
5010 break;
5011 if (VECTOR_UNIT_VSX_P (V4SFmode))
5012 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5013 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5014 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5015 break;
5016 case BUILT_IN_NEARBYINT:
5017 if (VECTOR_UNIT_VSX_P (V2DFmode)
5018 && flag_unsafe_math_optimizations
5019 && out_mode == DFmode && out_n == 2
5020 && in_mode == DFmode && in_n == 2)
5021 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5022 break;
5023 case BUILT_IN_NEARBYINTF:
5024 if (VECTOR_UNIT_VSX_P (V4SFmode)
5025 && flag_unsafe_math_optimizations
5026 && out_mode == SFmode && out_n == 4
5027 && in_mode == SFmode && in_n == 4)
5028 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5029 break;
5030 case BUILT_IN_RINT:
5031 if (VECTOR_UNIT_VSX_P (V2DFmode)
5032 && !flag_trapping_math
5033 && out_mode == DFmode && out_n == 2
5034 && in_mode == DFmode && in_n == 2)
5035 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5036 break;
5037 case BUILT_IN_RINTF:
5038 if (VECTOR_UNIT_VSX_P (V4SFmode)
5039 && !flag_trapping_math
5040 && out_mode == SFmode && out_n == 4
5041 && in_mode == SFmode && in_n == 4)
5042 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5043 break;
5044 default:
5045 break;
5049 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
5051 enum rs6000_builtins fn
5052 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
5053 switch (fn)
5055 case RS6000_BUILTIN_RSQRTF:
5056 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5057 && out_mode == SFmode && out_n == 4
5058 && in_mode == SFmode && in_n == 4)
5059 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5060 break;
5061 case RS6000_BUILTIN_RSQRT:
5062 if (VECTOR_UNIT_VSX_P (V2DFmode)
5063 && out_mode == DFmode && out_n == 2
5064 && in_mode == DFmode && in_n == 2)
5065 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5066 break;
5067 case RS6000_BUILTIN_RECIPF:
5068 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5069 && out_mode == SFmode && out_n == 4
5070 && in_mode == SFmode && in_n == 4)
5071 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5072 break;
5073 case RS6000_BUILTIN_RECIP:
5074 if (VECTOR_UNIT_VSX_P (V2DFmode)
5075 && out_mode == DFmode && out_n == 2
5076 && in_mode == DFmode && in_n == 2)
5077 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5078 break;
5079 default:
5080 break;
5084 /* Generate calls to libmass if appropriate. */
5085 if (rs6000_veclib_handler)
5086 return rs6000_veclib_handler (fndecl, type_out, type_in);
5088 return NULL_TREE;
5091 /* Default CPU string for rs6000*_file_start functions. */
5092 static const char *rs6000_default_cpu;
5094 /* Do anything needed at the start of the asm file. */
5096 static void
5097 rs6000_file_start (void)
5099 char buffer[80];
5100 const char *start = buffer;
5101 FILE *file = asm_out_file;
5103 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5105 default_file_start ();
5107 if (flag_verbose_asm)
5109 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5111 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5113 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5114 start = "";
5117 if (global_options_set.x_rs6000_cpu_index)
5119 fprintf (file, "%s -mcpu=%s", start,
5120 processor_target_table[rs6000_cpu_index].name);
5121 start = "";
5124 if (global_options_set.x_rs6000_tune_index)
5126 fprintf (file, "%s -mtune=%s", start,
5127 processor_target_table[rs6000_tune_index].name);
5128 start = "";
5131 if (PPC405_ERRATUM77)
5133 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5134 start = "";
5137 #ifdef USING_ELFOS_H
5138 switch (rs6000_sdata)
5140 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5141 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5142 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5143 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5146 if (rs6000_sdata && g_switch_value)
5148 fprintf (file, "%s -G %d", start,
5149 g_switch_value);
5150 start = "";
5152 #endif
5154 if (*start == '\0')
5155 putc ('\n', file);
5158 #ifdef USING_ELFOS_H
5159 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5160 || !global_options_set.x_rs6000_cpu_index)
5162 fputs ("\t.machine ", asm_out_file);
5163 if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5164 fputs ("power8\n", asm_out_file);
5165 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5166 fputs ("power7\n", asm_out_file);
5167 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5168 fputs ("power6\n", asm_out_file);
5169 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5170 fputs ("power5\n", asm_out_file);
5171 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5172 fputs ("power4\n", asm_out_file);
5173 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5174 fputs ("ppc64\n", asm_out_file);
5175 else
5176 fputs ("ppc\n", asm_out_file);
5178 #endif
5180 if (DEFAULT_ABI == ABI_ELFv2)
5181 fprintf (file, "\t.abiversion 2\n");
5183 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5184 || (TARGET_ELF && flag_pic == 2))
5186 switch_to_section (toc_section);
5187 switch_to_section (text_section);
5192 /* Return nonzero if this function is known to have a null epilogue. */
5195 direct_return (void)
5197 if (reload_completed)
5199 rs6000_stack_t *info = rs6000_stack_info ();
5201 if (info->first_gp_reg_save == 32
5202 && info->first_fp_reg_save == 64
5203 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5204 && ! info->lr_save_p
5205 && ! info->cr_save_p
5206 && info->vrsave_mask == 0
5207 && ! info->push_p)
5208 return 1;
5211 return 0;
5214 /* Return the number of instructions it takes to form a constant in an
5215 integer register. */
5218 num_insns_constant_wide (HOST_WIDE_INT value)
5220 /* signed constant loadable with addi */
5221 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5222 return 1;
5224 /* constant loadable with addis */
5225 else if ((value & 0xffff) == 0
5226 && (value >> 31 == -1 || value >> 31 == 0))
5227 return 1;
5229 else if (TARGET_POWERPC64)
5231 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5232 HOST_WIDE_INT high = value >> 31;
5234 if (high == 0 || high == -1)
5235 return 2;
5237 high >>= 1;
5239 if (low == 0)
5240 return num_insns_constant_wide (high) + 1;
5241 else if (high == 0)
5242 return num_insns_constant_wide (low) + 1;
5243 else
5244 return (num_insns_constant_wide (high)
5245 + num_insns_constant_wide (low) + 1);
5248 else
5249 return 2;
5253 num_insns_constant (rtx op, machine_mode mode)
5255 HOST_WIDE_INT low, high;
5257 switch (GET_CODE (op))
5259 case CONST_INT:
5260 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5261 && mask64_operand (op, mode))
5262 return 2;
5263 else
5264 return num_insns_constant_wide (INTVAL (op));
5266 case CONST_WIDE_INT:
5268 int i;
5269 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5270 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5271 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5272 return ins;
5275 case CONST_DOUBLE:
5276 if (mode == SFmode || mode == SDmode)
5278 long l;
5279 REAL_VALUE_TYPE rv;
5281 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5282 if (DECIMAL_FLOAT_MODE_P (mode))
5283 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5284 else
5285 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5286 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5289 long l[2];
5290 REAL_VALUE_TYPE rv;
5292 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5293 if (DECIMAL_FLOAT_MODE_P (mode))
5294 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5295 else
5296 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5297 high = l[WORDS_BIG_ENDIAN == 0];
5298 low = l[WORDS_BIG_ENDIAN != 0];
5300 if (TARGET_32BIT)
5301 return (num_insns_constant_wide (low)
5302 + num_insns_constant_wide (high));
5303 else
5305 if ((high == 0 && low >= 0)
5306 || (high == -1 && low < 0))
5307 return num_insns_constant_wide (low);
5309 else if (mask64_operand (op, mode))
5310 return 2;
5312 else if (low == 0)
5313 return num_insns_constant_wide (high) + 1;
5315 else
5316 return (num_insns_constant_wide (high)
5317 + num_insns_constant_wide (low) + 1);
5320 default:
5321 gcc_unreachable ();
5325 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5326 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5327 corresponding element of the vector, but for V4SFmode and V2SFmode,
5328 the corresponding "float" is interpreted as an SImode integer. */
5330 HOST_WIDE_INT
5331 const_vector_elt_as_int (rtx op, unsigned int elt)
5333 rtx tmp;
5335 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5336 gcc_assert (GET_MODE (op) != V2DImode
5337 && GET_MODE (op) != V2DFmode);
5339 tmp = CONST_VECTOR_ELT (op, elt);
5340 if (GET_MODE (op) == V4SFmode
5341 || GET_MODE (op) == V2SFmode)
5342 tmp = gen_lowpart (SImode, tmp);
5343 return INTVAL (tmp);
5346 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5347 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5348 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5349 all items are set to the same value and contain COPIES replicas of the
5350 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5351 operand and the others are set to the value of the operand's msb. */
5353 static bool
5354 vspltis_constant (rtx op, unsigned step, unsigned copies)
5356 machine_mode mode = GET_MODE (op);
5357 machine_mode inner = GET_MODE_INNER (mode);
5359 unsigned i;
5360 unsigned nunits;
5361 unsigned bitsize;
5362 unsigned mask;
5364 HOST_WIDE_INT val;
5365 HOST_WIDE_INT splat_val;
5366 HOST_WIDE_INT msb_val;
5368 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5369 return false;
5371 nunits = GET_MODE_NUNITS (mode);
5372 bitsize = GET_MODE_BITSIZE (inner);
5373 mask = GET_MODE_MASK (inner);
5375 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5376 splat_val = val;
5377 msb_val = val >= 0 ? 0 : -1;
5379 /* Construct the value to be splatted, if possible. If not, return 0. */
5380 for (i = 2; i <= copies; i *= 2)
5382 HOST_WIDE_INT small_val;
5383 bitsize /= 2;
5384 small_val = splat_val >> bitsize;
5385 mask >>= bitsize;
5386 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5387 return false;
5388 splat_val = small_val;
5391 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5392 if (EASY_VECTOR_15 (splat_val))
5395 /* Also check if we can splat, and then add the result to itself. Do so if
5396 the value is positive, of if the splat instruction is using OP's mode;
5397 for splat_val < 0, the splat and the add should use the same mode. */
5398 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5399 && (splat_val >= 0 || (step == 1 && copies == 1)))
5402 /* Also check if are loading up the most significant bit which can be done by
5403 loading up -1 and shifting the value left by -1. */
5404 else if (EASY_VECTOR_MSB (splat_val, inner))
5407 else
5408 return false;
5410 /* Check if VAL is present in every STEP-th element, and the
5411 other elements are filled with its most significant bit. */
5412 for (i = 1; i < nunits; ++i)
5414 HOST_WIDE_INT desired_val;
5415 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5416 if ((i & (step - 1)) == 0)
5417 desired_val = val;
5418 else
5419 desired_val = msb_val;
5421 if (desired_val != const_vector_elt_as_int (op, elt))
5422 return false;
5425 return true;
5429 /* Return true if OP is of the given MODE and can be synthesized
5430 with a vspltisb, vspltish or vspltisw. */
5432 bool
5433 easy_altivec_constant (rtx op, machine_mode mode)
5435 unsigned step, copies;
5437 if (mode == VOIDmode)
5438 mode = GET_MODE (op);
5439 else if (mode != GET_MODE (op))
5440 return false;
5442 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5443 constants. */
5444 if (mode == V2DFmode)
5445 return zero_constant (op, mode);
5447 else if (mode == V2DImode)
5449 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5450 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5451 return false;
5453 if (zero_constant (op, mode))
5454 return true;
5456 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5457 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5458 return true;
5460 return false;
5463 /* V1TImode is a special container for TImode. Ignore for now. */
5464 else if (mode == V1TImode)
5465 return false;
5467 /* Start with a vspltisw. */
5468 step = GET_MODE_NUNITS (mode) / 4;
5469 copies = 1;
5471 if (vspltis_constant (op, step, copies))
5472 return true;
5474 /* Then try with a vspltish. */
5475 if (step == 1)
5476 copies <<= 1;
5477 else
5478 step >>= 1;
5480 if (vspltis_constant (op, step, copies))
5481 return true;
5483 /* And finally a vspltisb. */
5484 if (step == 1)
5485 copies <<= 1;
5486 else
5487 step >>= 1;
5489 if (vspltis_constant (op, step, copies))
5490 return true;
5492 return false;
5495 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5496 result is OP. Abort if it is not possible. */
5499 gen_easy_altivec_constant (rtx op)
5501 machine_mode mode = GET_MODE (op);
5502 int nunits = GET_MODE_NUNITS (mode);
5503 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5504 unsigned step = nunits / 4;
5505 unsigned copies = 1;
5507 /* Start with a vspltisw. */
5508 if (vspltis_constant (op, step, copies))
5509 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5511 /* Then try with a vspltish. */
5512 if (step == 1)
5513 copies <<= 1;
5514 else
5515 step >>= 1;
5517 if (vspltis_constant (op, step, copies))
5518 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5520 /* And finally a vspltisb. */
5521 if (step == 1)
5522 copies <<= 1;
5523 else
5524 step >>= 1;
5526 if (vspltis_constant (op, step, copies))
5527 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5529 gcc_unreachable ();
5532 const char *
5533 output_vec_const_move (rtx *operands)
5535 int cst, cst2;
5536 machine_mode mode;
5537 rtx dest, vec;
5539 dest = operands[0];
5540 vec = operands[1];
5541 mode = GET_MODE (dest);
5543 if (TARGET_VSX)
5545 if (zero_constant (vec, mode))
5546 return "xxlxor %x0,%x0,%x0";
5548 if ((mode == V2DImode || mode == V1TImode)
5549 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5550 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5551 return "vspltisw %0,-1";
5554 if (TARGET_ALTIVEC)
5556 rtx splat_vec;
5557 if (zero_constant (vec, mode))
5558 return "vxor %0,%0,%0";
5560 splat_vec = gen_easy_altivec_constant (vec);
5561 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5562 operands[1] = XEXP (splat_vec, 0);
5563 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5564 return "#";
5566 switch (GET_MODE (splat_vec))
5568 case V4SImode:
5569 return "vspltisw %0,%1";
5571 case V8HImode:
5572 return "vspltish %0,%1";
5574 case V16QImode:
5575 return "vspltisb %0,%1";
5577 default:
5578 gcc_unreachable ();
5582 gcc_assert (TARGET_SPE);
5584 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5585 pattern of V1DI, V4HI, and V2SF.
5587 FIXME: We should probably return # and add post reload
5588 splitters for these, but this way is so easy ;-). */
5589 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5590 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5591 operands[1] = CONST_VECTOR_ELT (vec, 0);
5592 operands[2] = CONST_VECTOR_ELT (vec, 1);
5593 if (cst == cst2)
5594 return "li %0,%1\n\tevmergelo %0,%0,%0";
5595 else if (WORDS_BIG_ENDIAN)
5596 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5597 else
5598 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5601 /* Initialize TARGET of vector PAIRED to VALS. */
5603 void
5604 paired_expand_vector_init (rtx target, rtx vals)
5606 machine_mode mode = GET_MODE (target);
5607 int n_elts = GET_MODE_NUNITS (mode);
5608 int n_var = 0;
5609 rtx x, new_rtx, tmp, constant_op, op1, op2;
5610 int i;
5612 for (i = 0; i < n_elts; ++i)
5614 x = XVECEXP (vals, 0, i);
5615 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5616 ++n_var;
5618 if (n_var == 0)
5620 /* Load from constant pool. */
5621 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5622 return;
5625 if (n_var == 2)
5627 /* The vector is initialized only with non-constants. */
5628 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5629 XVECEXP (vals, 0, 1));
5631 emit_move_insn (target, new_rtx);
5632 return;
5635 /* One field is non-constant and the other one is a constant. Load the
5636 constant from the constant pool and use ps_merge instruction to
5637 construct the whole vector. */
5638 op1 = XVECEXP (vals, 0, 0);
5639 op2 = XVECEXP (vals, 0, 1);
5641 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5643 tmp = gen_reg_rtx (GET_MODE (constant_op));
5644 emit_move_insn (tmp, constant_op);
5646 if (CONSTANT_P (op1))
5647 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5648 else
5649 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5651 emit_move_insn (target, new_rtx);
5654 void
5655 paired_expand_vector_move (rtx operands[])
5657 rtx op0 = operands[0], op1 = operands[1];
5659 emit_move_insn (op0, op1);
5662 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5663 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5664 operands for the relation operation COND. This is a recursive
5665 function. */
5667 static void
5668 paired_emit_vector_compare (enum rtx_code rcode,
5669 rtx dest, rtx op0, rtx op1,
5670 rtx cc_op0, rtx cc_op1)
5672 rtx tmp = gen_reg_rtx (V2SFmode);
5673 rtx tmp1, max, min;
5675 gcc_assert (TARGET_PAIRED_FLOAT);
5676 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5678 switch (rcode)
5680 case LT:
5681 case LTU:
5682 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5683 return;
5684 case GE:
5685 case GEU:
5686 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5687 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5688 return;
5689 case LE:
5690 case LEU:
5691 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5692 return;
5693 case GT:
5694 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5695 return;
5696 case EQ:
5697 tmp1 = gen_reg_rtx (V2SFmode);
5698 max = gen_reg_rtx (V2SFmode);
5699 min = gen_reg_rtx (V2SFmode);
5700 gen_reg_rtx (V2SFmode);
5702 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5703 emit_insn (gen_selv2sf4
5704 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5705 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5706 emit_insn (gen_selv2sf4
5707 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5708 emit_insn (gen_subv2sf3 (tmp1, min, max));
5709 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5710 return;
5711 case NE:
5712 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5713 return;
5714 case UNLE:
5715 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5716 return;
5717 case UNLT:
5718 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5719 return;
5720 case UNGE:
5721 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5722 return;
5723 case UNGT:
5724 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5725 return;
5726 default:
5727 gcc_unreachable ();
5730 return;
5733 /* Emit vector conditional expression.
5734 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5735 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5738 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5739 rtx cond, rtx cc_op0, rtx cc_op1)
5741 enum rtx_code rcode = GET_CODE (cond);
5743 if (!TARGET_PAIRED_FLOAT)
5744 return 0;
5746 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5748 return 1;
5751 /* Initialize vector TARGET to VALS. */
5753 void
5754 rs6000_expand_vector_init (rtx target, rtx vals)
5756 machine_mode mode = GET_MODE (target);
5757 machine_mode inner_mode = GET_MODE_INNER (mode);
5758 int n_elts = GET_MODE_NUNITS (mode);
5759 int n_var = 0, one_var = -1;
5760 bool all_same = true, all_const_zero = true;
5761 rtx x, mem;
5762 int i;
5764 for (i = 0; i < n_elts; ++i)
5766 x = XVECEXP (vals, 0, i);
5767 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5768 ++n_var, one_var = i;
5769 else if (x != CONST0_RTX (inner_mode))
5770 all_const_zero = false;
5772 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5773 all_same = false;
5776 if (n_var == 0)
5778 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5779 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5780 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5782 /* Zero register. */
5783 emit_insn (gen_rtx_SET (VOIDmode, target,
5784 gen_rtx_XOR (mode, target, target)));
5785 return;
5787 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5789 /* Splat immediate. */
5790 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5791 return;
5793 else
5795 /* Load from constant pool. */
5796 emit_move_insn (target, const_vec);
5797 return;
5801 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5802 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5804 rtx op0 = XVECEXP (vals, 0, 0);
5805 rtx op1 = XVECEXP (vals, 0, 1);
5806 if (all_same)
5808 if (!MEM_P (op0) && !REG_P (op0))
5809 op0 = force_reg (inner_mode, op0);
5810 if (mode == V2DFmode)
5811 emit_insn (gen_vsx_splat_v2df (target, op0));
5812 else
5813 emit_insn (gen_vsx_splat_v2di (target, op0));
5815 else
5817 op0 = force_reg (inner_mode, op0);
5818 op1 = force_reg (inner_mode, op1);
5819 if (mode == V2DFmode)
5820 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5821 else
5822 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5824 return;
5827 /* With single precision floating point on VSX, know that internally single
5828 precision is actually represented as a double, and either make 2 V2DF
5829 vectors, and convert these vectors to single precision, or do one
5830 conversion, and splat the result to the other elements. */
5831 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5833 if (all_same)
5835 rtx freg = gen_reg_rtx (V4SFmode);
5836 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5837 rtx cvt = ((TARGET_XSCVDPSPN)
5838 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5839 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5841 emit_insn (cvt);
5842 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5844 else
5846 rtx dbl_even = gen_reg_rtx (V2DFmode);
5847 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5848 rtx flt_even = gen_reg_rtx (V4SFmode);
5849 rtx flt_odd = gen_reg_rtx (V4SFmode);
5850 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5851 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5852 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5853 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5855 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5856 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5857 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5858 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5859 rs6000_expand_extract_even (target, flt_even, flt_odd);
5861 return;
5864 /* Store value to stack temp. Load vector element. Splat. However, splat
5865 of 64-bit items is not supported on Altivec. */
5866 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5868 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5869 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5870 XVECEXP (vals, 0, 0));
5871 x = gen_rtx_UNSPEC (VOIDmode,
5872 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5873 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5874 gen_rtvec (2,
5875 gen_rtx_SET (VOIDmode,
5876 target, mem),
5877 x)));
5878 x = gen_rtx_VEC_SELECT (inner_mode, target,
5879 gen_rtx_PARALLEL (VOIDmode,
5880 gen_rtvec (1, const0_rtx)));
5881 emit_insn (gen_rtx_SET (VOIDmode, target,
5882 gen_rtx_VEC_DUPLICATE (mode, x)));
5883 return;
5886 /* One field is non-constant. Load constant then overwrite
5887 varying field. */
5888 if (n_var == 1)
5890 rtx copy = copy_rtx (vals);
5892 /* Load constant part of vector, substitute neighboring value for
5893 varying element. */
5894 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5895 rs6000_expand_vector_init (target, copy);
5897 /* Insert variable. */
5898 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5899 return;
5902 /* Construct the vector in memory one field at a time
5903 and load the whole vector. */
5904 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5905 for (i = 0; i < n_elts; i++)
5906 emit_move_insn (adjust_address_nv (mem, inner_mode,
5907 i * GET_MODE_SIZE (inner_mode)),
5908 XVECEXP (vals, 0, i));
5909 emit_move_insn (target, mem);
5912 /* Set field ELT of TARGET to VAL. */
5914 void
5915 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5917 machine_mode mode = GET_MODE (target);
5918 machine_mode inner_mode = GET_MODE_INNER (mode);
5919 rtx reg = gen_reg_rtx (mode);
5920 rtx mask, mem, x;
5921 int width = GET_MODE_SIZE (inner_mode);
5922 int i;
5924 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5926 rtx (*set_func) (rtx, rtx, rtx, rtx)
5927 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5928 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5929 return;
5932 /* Simplify setting single element vectors like V1TImode. */
5933 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5935 emit_move_insn (target, gen_lowpart (mode, val));
5936 return;
5939 /* Load single variable value. */
5940 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5941 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5942 x = gen_rtx_UNSPEC (VOIDmode,
5943 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5944 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5945 gen_rtvec (2,
5946 gen_rtx_SET (VOIDmode,
5947 reg, mem),
5948 x)));
5950 /* Linear sequence. */
5951 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5952 for (i = 0; i < 16; ++i)
5953 XVECEXP (mask, 0, i) = GEN_INT (i);
5955 /* Set permute mask to insert element into target. */
5956 for (i = 0; i < width; ++i)
5957 XVECEXP (mask, 0, elt*width + i)
5958 = GEN_INT (i + 0x10);
5959 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5961 if (BYTES_BIG_ENDIAN)
5962 x = gen_rtx_UNSPEC (mode,
5963 gen_rtvec (3, target, reg,
5964 force_reg (V16QImode, x)),
5965 UNSPEC_VPERM);
5966 else
5968 /* Invert selector. We prefer to generate VNAND on P8 so
5969 that future fusion opportunities can kick in, but must
5970 generate VNOR elsewhere. */
5971 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5972 rtx iorx = (TARGET_P8_VECTOR
5973 ? gen_rtx_IOR (V16QImode, notx, notx)
5974 : gen_rtx_AND (V16QImode, notx, notx));
5975 rtx tmp = gen_reg_rtx (V16QImode);
5976 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5978 /* Permute with operands reversed and adjusted selector. */
5979 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5980 UNSPEC_VPERM);
5983 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5986 /* Extract field ELT from VEC into TARGET. */
5988 void
5989 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5991 machine_mode mode = GET_MODE (vec);
5992 machine_mode inner_mode = GET_MODE_INNER (mode);
5993 rtx mem;
5995 if (VECTOR_MEM_VSX_P (mode))
5997 switch (mode)
5999 default:
6000 break;
6001 case V1TImode:
6002 gcc_assert (elt == 0 && inner_mode == TImode);
6003 emit_move_insn (target, gen_lowpart (TImode, vec));
6004 break;
6005 case V2DFmode:
6006 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6007 return;
6008 case V2DImode:
6009 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6010 return;
6011 case V4SFmode:
6012 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6013 return;
6017 /* Allocate mode-sized buffer. */
6018 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6020 emit_move_insn (mem, vec);
6022 /* Add offset to field within buffer matching vector element. */
6023 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6025 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6028 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
6029 implement ANDing by the mask IN. */
6030 void
6031 build_mask64_2_operands (rtx in, rtx *out)
6033 unsigned HOST_WIDE_INT c, lsb, m1, m2;
6034 int shift;
6036 gcc_assert (GET_CODE (in) == CONST_INT);
6038 c = INTVAL (in);
6039 if (c & 1)
6041 /* Assume c initially something like 0x00fff000000fffff. The idea
6042 is to rotate the word so that the middle ^^^^^^ group of zeros
6043 is at the MS end and can be cleared with an rldicl mask. We then
6044 rotate back and clear off the MS ^^ group of zeros with a
6045 second rldicl. */
6046 c = ~c; /* c == 0xff000ffffff00000 */
6047 lsb = c & -c; /* lsb == 0x0000000000100000 */
6048 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
6049 c = ~c; /* c == 0x00fff000000fffff */
6050 c &= -lsb; /* c == 0x00fff00000000000 */
6051 lsb = c & -c; /* lsb == 0x0000100000000000 */
6052 c = ~c; /* c == 0xff000fffffffffff */
6053 c &= -lsb; /* c == 0xff00000000000000 */
6054 shift = 0;
6055 while ((lsb >>= 1) != 0)
6056 shift++; /* shift == 44 on exit from loop */
6057 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
6058 m1 = ~m1; /* m1 == 0x000000ffffffffff */
6059 m2 = ~c; /* m2 == 0x00ffffffffffffff */
6061 else
6063 /* Assume c initially something like 0xff000f0000000000. The idea
6064 is to rotate the word so that the ^^^ middle group of zeros
6065 is at the LS end and can be cleared with an rldicr mask. We then
6066 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
6067 a second rldicr. */
6068 lsb = c & -c; /* lsb == 0x0000010000000000 */
6069 m2 = -lsb; /* m2 == 0xffffff0000000000 */
6070 c = ~c; /* c == 0x00fff0ffffffffff */
6071 c &= -lsb; /* c == 0x00fff00000000000 */
6072 lsb = c & -c; /* lsb == 0x0000100000000000 */
6073 c = ~c; /* c == 0xff000fffffffffff */
6074 c &= -lsb; /* c == 0xff00000000000000 */
6075 shift = 0;
6076 while ((lsb >>= 1) != 0)
6077 shift++; /* shift == 44 on exit from loop */
6078 m1 = ~c; /* m1 == 0x00ffffffffffffff */
6079 m1 >>= shift; /* m1 == 0x0000000000000fff */
6080 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
6083 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
6084 masks will be all 1's. We are guaranteed more than one transition. */
6085 out[0] = GEN_INT (64 - shift);
6086 out[1] = GEN_INT (m1);
6087 out[2] = GEN_INT (shift);
6088 out[3] = GEN_INT (m2);
6091 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6093 bool
6094 invalid_e500_subreg (rtx op, machine_mode mode)
6096 if (TARGET_E500_DOUBLE)
6098 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6099 subreg:TI and reg:TF. Decimal float modes are like integer
6100 modes (only low part of each register used) for this
6101 purpose. */
6102 if (GET_CODE (op) == SUBREG
6103 && (mode == SImode || mode == DImode || mode == TImode
6104 || mode == DDmode || mode == TDmode || mode == PTImode)
6105 && REG_P (SUBREG_REG (op))
6106 && (GET_MODE (SUBREG_REG (op)) == DFmode
6107 || GET_MODE (SUBREG_REG (op)) == TFmode))
6108 return true;
6110 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6111 reg:TI. */
6112 if (GET_CODE (op) == SUBREG
6113 && (mode == DFmode || mode == TFmode)
6114 && REG_P (SUBREG_REG (op))
6115 && (GET_MODE (SUBREG_REG (op)) == DImode
6116 || GET_MODE (SUBREG_REG (op)) == TImode
6117 || GET_MODE (SUBREG_REG (op)) == PTImode
6118 || GET_MODE (SUBREG_REG (op)) == DDmode
6119 || GET_MODE (SUBREG_REG (op)) == TDmode))
6120 return true;
6123 if (TARGET_SPE
6124 && GET_CODE (op) == SUBREG
6125 && mode == SImode
6126 && REG_P (SUBREG_REG (op))
6127 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6128 return true;
6130 return false;
6133 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6134 selects whether the alignment is abi mandated, optional, or
6135 both abi and optional alignment. */
6137 unsigned int
6138 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6140 if (how != align_opt)
6142 if (TREE_CODE (type) == VECTOR_TYPE)
6144 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6145 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6147 if (align < 64)
6148 align = 64;
6150 else if (align < 128)
6151 align = 128;
6153 else if (TARGET_E500_DOUBLE
6154 && TREE_CODE (type) == REAL_TYPE
6155 && TYPE_MODE (type) == DFmode)
6157 if (align < 64)
6158 align = 64;
6162 if (how != align_abi)
6164 if (TREE_CODE (type) == ARRAY_TYPE
6165 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6167 if (align < BITS_PER_WORD)
6168 align = BITS_PER_WORD;
6172 return align;
6175 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6177 bool
6178 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6180 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6182 if (computed != 128)
6184 static bool warned;
6185 if (!warned && warn_psabi)
6187 warned = true;
6188 inform (input_location,
6189 "the layout of aggregates containing vectors with"
6190 " %d-byte alignment has changed in GCC 5",
6191 computed / BITS_PER_UNIT);
6194 /* In current GCC there is no special case. */
6195 return false;
6198 return false;
6201 /* AIX increases natural record alignment to doubleword if the first
6202 field is an FP double while the FP fields remain word aligned. */
6204 unsigned int
6205 rs6000_special_round_type_align (tree type, unsigned int computed,
6206 unsigned int specified)
6208 unsigned int align = MAX (computed, specified);
6209 tree field = TYPE_FIELDS (type);
6211 /* Skip all non field decls */
6212 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6213 field = DECL_CHAIN (field);
6215 if (field != NULL && field != type)
6217 type = TREE_TYPE (field);
6218 while (TREE_CODE (type) == ARRAY_TYPE)
6219 type = TREE_TYPE (type);
6221 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6222 align = MAX (align, 64);
6225 return align;
6228 /* Darwin increases record alignment to the natural alignment of
6229 the first field. */
6231 unsigned int
6232 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6233 unsigned int specified)
6235 unsigned int align = MAX (computed, specified);
6237 if (TYPE_PACKED (type))
6238 return align;
6240 /* Find the first field, looking down into aggregates. */
6241 do {
6242 tree field = TYPE_FIELDS (type);
6243 /* Skip all non field decls */
6244 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6245 field = DECL_CHAIN (field);
6246 if (! field)
6247 break;
6248 /* A packed field does not contribute any extra alignment. */
6249 if (DECL_PACKED (field))
6250 return align;
6251 type = TREE_TYPE (field);
6252 while (TREE_CODE (type) == ARRAY_TYPE)
6253 type = TREE_TYPE (type);
6254 } while (AGGREGATE_TYPE_P (type));
6256 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6257 align = MAX (align, TYPE_ALIGN (type));
6259 return align;
6262 /* Return 1 for an operand in small memory on V.4/eabi. */
6265 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6266 machine_mode mode ATTRIBUTE_UNUSED)
6268 #if TARGET_ELF
6269 rtx sym_ref;
6271 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6272 return 0;
6274 if (DEFAULT_ABI != ABI_V4)
6275 return 0;
6277 /* Vector and float memory instructions have a limited offset on the
6278 SPE, so using a vector or float variable directly as an operand is
6279 not useful. */
6280 if (TARGET_SPE
6281 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6282 return 0;
6284 if (GET_CODE (op) == SYMBOL_REF)
6285 sym_ref = op;
6287 else if (GET_CODE (op) != CONST
6288 || GET_CODE (XEXP (op, 0)) != PLUS
6289 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6290 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6291 return 0;
6293 else
6295 rtx sum = XEXP (op, 0);
6296 HOST_WIDE_INT summand;
6298 /* We have to be careful here, because it is the referenced address
6299 that must be 32k from _SDA_BASE_, not just the symbol. */
6300 summand = INTVAL (XEXP (sum, 1));
6301 if (summand < 0 || summand > g_switch_value)
6302 return 0;
6304 sym_ref = XEXP (sum, 0);
6307 return SYMBOL_REF_SMALL_P (sym_ref);
6308 #else
6309 return 0;
6310 #endif
6313 /* Return true if either operand is a general purpose register. */
6315 bool
6316 gpr_or_gpr_p (rtx op0, rtx op1)
6318 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6319 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6322 /* Return true if this is a move direct operation between GPR registers and
6323 floating point/VSX registers. */
6325 bool
6326 direct_move_p (rtx op0, rtx op1)
6328 int regno0, regno1;
6330 if (!REG_P (op0) || !REG_P (op1))
6331 return false;
6333 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6334 return false;
6336 regno0 = REGNO (op0);
6337 regno1 = REGNO (op1);
6338 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6339 return false;
6341 if (INT_REGNO_P (regno0))
6342 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6344 else if (INT_REGNO_P (regno1))
6346 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6347 return true;
6349 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6350 return true;
6353 return false;
6356 /* Return true if this is a load or store quad operation. This function does
6357 not handle the atomic quad memory instructions. */
6359 bool
6360 quad_load_store_p (rtx op0, rtx op1)
6362 bool ret;
6364 if (!TARGET_QUAD_MEMORY)
6365 ret = false;
6367 else if (REG_P (op0) && MEM_P (op1))
6368 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6369 && quad_memory_operand (op1, GET_MODE (op1))
6370 && !reg_overlap_mentioned_p (op0, op1));
6372 else if (MEM_P (op0) && REG_P (op1))
6373 ret = (quad_memory_operand (op0, GET_MODE (op0))
6374 && quad_int_reg_operand (op1, GET_MODE (op1)));
6376 else
6377 ret = false;
6379 if (TARGET_DEBUG_ADDR)
6381 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6382 ret ? "true" : "false");
6383 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6386 return ret;
6389 /* Given an address, return a constant offset term if one exists. */
6391 static rtx
6392 address_offset (rtx op)
6394 if (GET_CODE (op) == PRE_INC
6395 || GET_CODE (op) == PRE_DEC)
6396 op = XEXP (op, 0);
6397 else if (GET_CODE (op) == PRE_MODIFY
6398 || GET_CODE (op) == LO_SUM)
6399 op = XEXP (op, 1);
6401 if (GET_CODE (op) == CONST)
6402 op = XEXP (op, 0);
6404 if (GET_CODE (op) == PLUS)
6405 op = XEXP (op, 1);
6407 if (CONST_INT_P (op))
6408 return op;
6410 return NULL_RTX;
6413 /* Return true if the MEM operand is a memory operand suitable for use
6414 with a (full width, possibly multiple) gpr load/store. On
6415 powerpc64 this means the offset must be divisible by 4.
6416 Implements 'Y' constraint.
6418 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6419 a constraint function we know the operand has satisfied a suitable
6420 memory predicate. Also accept some odd rtl generated by reload
6421 (see rs6000_legitimize_reload_address for various forms). It is
6422 important that reload rtl be accepted by appropriate constraints
6423 but not by the operand predicate.
6425 Offsetting a lo_sum should not be allowed, except where we know by
6426 alignment that a 32k boundary is not crossed, but see the ???
6427 comment in rs6000_legitimize_reload_address. Note that by
6428 "offsetting" here we mean a further offset to access parts of the
6429 MEM. It's fine to have a lo_sum where the inner address is offset
6430 from a sym, since the same sym+offset will appear in the high part
6431 of the address calculation. */
6433 bool
6434 mem_operand_gpr (rtx op, machine_mode mode)
6436 unsigned HOST_WIDE_INT offset;
6437 int extra;
6438 rtx addr = XEXP (op, 0);
6440 op = address_offset (addr);
6441 if (op == NULL_RTX)
6442 return true;
6444 offset = INTVAL (op);
6445 if (TARGET_POWERPC64 && (offset & 3) != 0)
6446 return false;
6448 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6449 if (extra < 0)
6450 extra = 0;
6452 if (GET_CODE (addr) == LO_SUM)
6453 /* For lo_sum addresses, we must allow any offset except one that
6454 causes a wrap, so test only the low 16 bits. */
6455 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6457 return offset + 0x8000 < 0x10000u - extra;
6460 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6462 static bool
6463 reg_offset_addressing_ok_p (machine_mode mode)
6465 switch (mode)
6467 case V16QImode:
6468 case V8HImode:
6469 case V4SFmode:
6470 case V4SImode:
6471 case V2DFmode:
6472 case V2DImode:
6473 case V1TImode:
6474 case TImode:
6475 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6476 TImode is not a vector mode, if we want to use the VSX registers to
6477 move it around, we need to restrict ourselves to reg+reg
6478 addressing. */
6479 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6480 return false;
6481 break;
6483 case V4HImode:
6484 case V2SImode:
6485 case V1DImode:
6486 case V2SFmode:
6487 /* Paired vector modes. Only reg+reg addressing is valid. */
6488 if (TARGET_PAIRED_FLOAT)
6489 return false;
6490 break;
6492 case SDmode:
6493 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6494 addressing for the LFIWZX and STFIWX instructions. */
6495 if (TARGET_NO_SDMODE_STACK)
6496 return false;
6497 break;
6499 default:
6500 break;
6503 return true;
6506 static bool
6507 virtual_stack_registers_memory_p (rtx op)
6509 int regnum;
6511 if (GET_CODE (op) == REG)
6512 regnum = REGNO (op);
6514 else if (GET_CODE (op) == PLUS
6515 && GET_CODE (XEXP (op, 0)) == REG
6516 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6517 regnum = REGNO (XEXP (op, 0));
6519 else
6520 return false;
6522 return (regnum >= FIRST_VIRTUAL_REGISTER
6523 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6526 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6527 is known to not straddle a 32k boundary. */
6529 static bool
6530 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6531 machine_mode mode)
6533 tree decl, type;
6534 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6536 if (GET_CODE (op) != SYMBOL_REF)
6537 return false;
6539 dsize = GET_MODE_SIZE (mode);
6540 decl = SYMBOL_REF_DECL (op);
6541 if (!decl)
6543 if (dsize == 0)
6544 return false;
6546 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6547 replacing memory addresses with an anchor plus offset. We
6548 could find the decl by rummaging around in the block->objects
6549 VEC for the given offset but that seems like too much work. */
6550 dalign = BITS_PER_UNIT;
6551 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6552 && SYMBOL_REF_ANCHOR_P (op)
6553 && SYMBOL_REF_BLOCK (op) != NULL)
6555 struct object_block *block = SYMBOL_REF_BLOCK (op);
6557 dalign = block->alignment;
6558 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6560 else if (CONSTANT_POOL_ADDRESS_P (op))
6562 /* It would be nice to have get_pool_align().. */
6563 machine_mode cmode = get_pool_mode (op);
6565 dalign = GET_MODE_ALIGNMENT (cmode);
6568 else if (DECL_P (decl))
6570 dalign = DECL_ALIGN (decl);
6572 if (dsize == 0)
6574 /* Allow BLKmode when the entire object is known to not
6575 cross a 32k boundary. */
6576 if (!DECL_SIZE_UNIT (decl))
6577 return false;
6579 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6580 return false;
6582 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6583 if (dsize > 32768)
6584 return false;
6586 return dalign / BITS_PER_UNIT >= dsize;
6589 else
6591 type = TREE_TYPE (decl);
6593 dalign = TYPE_ALIGN (type);
6594 if (CONSTANT_CLASS_P (decl))
6595 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6596 else
6597 dalign = DATA_ALIGNMENT (decl, dalign);
6599 if (dsize == 0)
6601 /* BLKmode, check the entire object. */
6602 if (TREE_CODE (decl) == STRING_CST)
6603 dsize = TREE_STRING_LENGTH (decl);
6604 else if (TYPE_SIZE_UNIT (type)
6605 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6606 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6607 else
6608 return false;
6609 if (dsize > 32768)
6610 return false;
6612 return dalign / BITS_PER_UNIT >= dsize;
6616 /* Find how many bits of the alignment we know for this access. */
6617 mask = dalign / BITS_PER_UNIT - 1;
6618 lsb = offset & -offset;
6619 mask &= lsb - 1;
6620 dalign = mask + 1;
6622 return dalign >= dsize;
6625 static bool
6626 constant_pool_expr_p (rtx op)
6628 rtx base, offset;
6630 split_const (op, &base, &offset);
6631 return (GET_CODE (base) == SYMBOL_REF
6632 && CONSTANT_POOL_ADDRESS_P (base)
6633 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6636 static const_rtx tocrel_base, tocrel_offset;
6638 /* Return true if OP is a toc pointer relative address (the output
6639 of create_TOC_reference). If STRICT, do not match high part or
6640 non-split -mcmodel=large/medium toc pointer relative addresses. */
6642 bool
6643 toc_relative_expr_p (const_rtx op, bool strict)
6645 if (!TARGET_TOC)
6646 return false;
6648 if (TARGET_CMODEL != CMODEL_SMALL)
6650 /* Only match the low part. */
6651 if (GET_CODE (op) == LO_SUM
6652 && REG_P (XEXP (op, 0))
6653 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6654 op = XEXP (op, 1);
6655 else if (strict)
6656 return false;
6659 tocrel_base = op;
6660 tocrel_offset = const0_rtx;
6661 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6663 tocrel_base = XEXP (op, 0);
6664 tocrel_offset = XEXP (op, 1);
6667 return (GET_CODE (tocrel_base) == UNSPEC
6668 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6671 /* Return true if X is a constant pool address, and also for cmodel=medium
6672 if X is a toc-relative address known to be offsettable within MODE. */
6674 bool
6675 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6676 bool strict)
6678 return (toc_relative_expr_p (x, strict)
6679 && (TARGET_CMODEL != CMODEL_MEDIUM
6680 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6681 || mode == QImode
6682 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6683 INTVAL (tocrel_offset), mode)));
6686 static bool
6687 legitimate_small_data_p (machine_mode mode, rtx x)
6689 return (DEFAULT_ABI == ABI_V4
6690 && !flag_pic && !TARGET_TOC
6691 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6692 && small_data_operand (x, mode));
6695 /* SPE offset addressing is limited to 5-bits worth of double words. */
6696 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6698 bool
6699 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6700 bool strict, bool worst_case)
6702 unsigned HOST_WIDE_INT offset;
6703 unsigned int extra;
6705 if (GET_CODE (x) != PLUS)
6706 return false;
6707 if (!REG_P (XEXP (x, 0)))
6708 return false;
6709 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6710 return false;
6711 if (!reg_offset_addressing_ok_p (mode))
6712 return virtual_stack_registers_memory_p (x);
6713 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6714 return true;
6715 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6716 return false;
6718 offset = INTVAL (XEXP (x, 1));
6719 extra = 0;
6720 switch (mode)
6722 case V4HImode:
6723 case V2SImode:
6724 case V1DImode:
6725 case V2SFmode:
6726 /* SPE vector modes. */
6727 return SPE_CONST_OFFSET_OK (offset);
6729 case DFmode:
6730 case DDmode:
6731 case DImode:
6732 /* On e500v2, we may have:
6734 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6736 Which gets addressed with evldd instructions. */
6737 if (TARGET_E500_DOUBLE)
6738 return SPE_CONST_OFFSET_OK (offset);
6740 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6741 addressing. */
6742 if (VECTOR_MEM_VSX_P (mode))
6743 return false;
6745 if (!worst_case)
6746 break;
6747 if (!TARGET_POWERPC64)
6748 extra = 4;
6749 else if (offset & 3)
6750 return false;
6751 break;
6753 case TFmode:
6754 if (TARGET_E500_DOUBLE)
6755 return (SPE_CONST_OFFSET_OK (offset)
6756 && SPE_CONST_OFFSET_OK (offset + 8));
6757 /* fall through */
6759 case TDmode:
6760 case TImode:
6761 case PTImode:
6762 extra = 8;
6763 if (!worst_case)
6764 break;
6765 if (!TARGET_POWERPC64)
6766 extra = 12;
6767 else if (offset & 3)
6768 return false;
6769 break;
6771 default:
6772 break;
6775 offset += 0x8000;
6776 return offset < 0x10000 - extra;
6779 bool
6780 legitimate_indexed_address_p (rtx x, int strict)
6782 rtx op0, op1;
6784 if (GET_CODE (x) != PLUS)
6785 return false;
6787 op0 = XEXP (x, 0);
6788 op1 = XEXP (x, 1);
6790 /* Recognize the rtl generated by reload which we know will later be
6791 replaced with proper base and index regs. */
6792 if (!strict
6793 && reload_in_progress
6794 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6795 && REG_P (op1))
6796 return true;
6798 return (REG_P (op0) && REG_P (op1)
6799 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6800 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6801 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6802 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6805 bool
6806 avoiding_indexed_address_p (machine_mode mode)
6808 /* Avoid indexed addressing for modes that have non-indexed
6809 load/store instruction forms. */
6810 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6813 bool
6814 legitimate_indirect_address_p (rtx x, int strict)
6816 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6819 bool
6820 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6822 if (!TARGET_MACHO || !flag_pic
6823 || mode != SImode || GET_CODE (x) != MEM)
6824 return false;
6825 x = XEXP (x, 0);
6827 if (GET_CODE (x) != LO_SUM)
6828 return false;
6829 if (GET_CODE (XEXP (x, 0)) != REG)
6830 return false;
6831 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6832 return false;
6833 x = XEXP (x, 1);
6835 return CONSTANT_P (x);
6838 static bool
6839 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6841 if (GET_CODE (x) != LO_SUM)
6842 return false;
6843 if (GET_CODE (XEXP (x, 0)) != REG)
6844 return false;
6845 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6846 return false;
6847 /* Restrict addressing for DI because of our SUBREG hackery. */
6848 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6849 return false;
6850 x = XEXP (x, 1);
6852 if (TARGET_ELF || TARGET_MACHO)
6854 bool large_toc_ok;
6856 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6857 return false;
6858 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6859 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6860 recognizes some LO_SUM addresses as valid although this
6861 function says opposite. In most cases, LRA through different
6862 transformations can generate correct code for address reloads.
6863 It can not manage only some LO_SUM cases. So we need to add
6864 code analogous to one in rs6000_legitimize_reload_address for
6865 LOW_SUM here saying that some addresses are still valid. */
6866 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6867 && small_toc_ref (x, VOIDmode));
6868 if (TARGET_TOC && ! large_toc_ok)
6869 return false;
6870 if (GET_MODE_NUNITS (mode) != 1)
6871 return false;
6872 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6873 && !(/* ??? Assume floating point reg based on mode? */
6874 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6875 && (mode == DFmode || mode == DDmode)))
6876 return false;
6878 return CONSTANT_P (x) || large_toc_ok;
6881 return false;
6885 /* Try machine-dependent ways of modifying an illegitimate address
6886 to be legitimate. If we find one, return the new, valid address.
6887 This is used from only one place: `memory_address' in explow.c.
6889 OLDX is the address as it was before break_out_memory_refs was
6890 called. In some cases it is useful to look at this to decide what
6891 needs to be done.
6893 It is always safe for this function to do nothing. It exists to
6894 recognize opportunities to optimize the output.
6896 On RS/6000, first check for the sum of a register with a constant
6897 integer that is out of range. If so, generate code to add the
6898 constant with the low-order 16 bits masked to the register and force
6899 this result into another register (this can be done with `cau').
6900 Then generate an address of REG+(CONST&0xffff), allowing for the
6901 possibility of bit 16 being a one.
6903 Then check for the sum of a register and something not constant, try to
6904 load the other things into a register and return the sum. */
6906 static rtx
6907 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6908 machine_mode mode)
6910 unsigned int extra;
6912 if (!reg_offset_addressing_ok_p (mode))
6914 if (virtual_stack_registers_memory_p (x))
6915 return x;
6917 /* In theory we should not be seeing addresses of the form reg+0,
6918 but just in case it is generated, optimize it away. */
6919 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6920 return force_reg (Pmode, XEXP (x, 0));
6922 /* For TImode with load/store quad, restrict addresses to just a single
6923 pointer, so it works with both GPRs and VSX registers. */
6924 /* Make sure both operands are registers. */
6925 else if (GET_CODE (x) == PLUS
6926 && (mode != TImode || !TARGET_QUAD_MEMORY))
6927 return gen_rtx_PLUS (Pmode,
6928 force_reg (Pmode, XEXP (x, 0)),
6929 force_reg (Pmode, XEXP (x, 1)));
6930 else
6931 return force_reg (Pmode, x);
6933 if (GET_CODE (x) == SYMBOL_REF)
6935 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6936 if (model != 0)
6937 return rs6000_legitimize_tls_address (x, model);
6940 extra = 0;
6941 switch (mode)
6943 case TFmode:
6944 case TDmode:
6945 case TImode:
6946 case PTImode:
6947 /* As in legitimate_offset_address_p we do not assume
6948 worst-case. The mode here is just a hint as to the registers
6949 used. A TImode is usually in gprs, but may actually be in
6950 fprs. Leave worst-case scenario for reload to handle via
6951 insn constraints. PTImode is only GPRs. */
6952 extra = 8;
6953 break;
6954 default:
6955 break;
6958 if (GET_CODE (x) == PLUS
6959 && GET_CODE (XEXP (x, 0)) == REG
6960 && GET_CODE (XEXP (x, 1)) == CONST_INT
6961 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6962 >= 0x10000 - extra)
6963 && !(SPE_VECTOR_MODE (mode)
6964 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6966 HOST_WIDE_INT high_int, low_int;
6967 rtx sum;
6968 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6969 if (low_int >= 0x8000 - extra)
6970 low_int = 0;
6971 high_int = INTVAL (XEXP (x, 1)) - low_int;
6972 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6973 GEN_INT (high_int)), 0);
6974 return plus_constant (Pmode, sum, low_int);
6976 else if (GET_CODE (x) == PLUS
6977 && GET_CODE (XEXP (x, 0)) == REG
6978 && GET_CODE (XEXP (x, 1)) != CONST_INT
6979 && GET_MODE_NUNITS (mode) == 1
6980 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6981 || (/* ??? Assume floating point reg based on mode? */
6982 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6983 && (mode == DFmode || mode == DDmode)))
6984 && !avoiding_indexed_address_p (mode))
6986 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6987 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6989 else if (SPE_VECTOR_MODE (mode)
6990 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6992 if (mode == DImode)
6993 return x;
6994 /* We accept [reg + reg] and [reg + OFFSET]. */
6996 if (GET_CODE (x) == PLUS)
6998 rtx op1 = XEXP (x, 0);
6999 rtx op2 = XEXP (x, 1);
7000 rtx y;
7002 op1 = force_reg (Pmode, op1);
7004 if (GET_CODE (op2) != REG
7005 && (GET_CODE (op2) != CONST_INT
7006 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7007 || (GET_MODE_SIZE (mode) > 8
7008 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7009 op2 = force_reg (Pmode, op2);
7011 /* We can't always do [reg + reg] for these, because [reg +
7012 reg + offset] is not a legitimate addressing mode. */
7013 y = gen_rtx_PLUS (Pmode, op1, op2);
7015 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7016 return force_reg (Pmode, y);
7017 else
7018 return y;
7021 return force_reg (Pmode, x);
7023 else if ((TARGET_ELF
7024 #if TARGET_MACHO
7025 || !MACHO_DYNAMIC_NO_PIC_P
7026 #endif
7028 && TARGET_32BIT
7029 && TARGET_NO_TOC
7030 && ! flag_pic
7031 && GET_CODE (x) != CONST_INT
7032 && GET_CODE (x) != CONST_WIDE_INT
7033 && GET_CODE (x) != CONST_DOUBLE
7034 && CONSTANT_P (x)
7035 && GET_MODE_NUNITS (mode) == 1
7036 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7037 || (/* ??? Assume floating point reg based on mode? */
7038 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7039 && (mode == DFmode || mode == DDmode))))
7041 rtx reg = gen_reg_rtx (Pmode);
7042 if (TARGET_ELF)
7043 emit_insn (gen_elf_high (reg, x));
7044 else
7045 emit_insn (gen_macho_high (reg, x));
7046 return gen_rtx_LO_SUM (Pmode, reg, x);
7048 else if (TARGET_TOC
7049 && GET_CODE (x) == SYMBOL_REF
7050 && constant_pool_expr_p (x)
7051 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7052 return create_TOC_reference (x, NULL_RTX);
7053 else
7054 return x;
7057 /* Debug version of rs6000_legitimize_address. */
7058 static rtx
7059 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7061 rtx ret;
7062 rtx_insn *insns;
7064 start_sequence ();
7065 ret = rs6000_legitimize_address (x, oldx, mode);
7066 insns = get_insns ();
7067 end_sequence ();
7069 if (ret != x)
7071 fprintf (stderr,
7072 "\nrs6000_legitimize_address: mode %s, old code %s, "
7073 "new code %s, modified\n",
7074 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7075 GET_RTX_NAME (GET_CODE (ret)));
7077 fprintf (stderr, "Original address:\n");
7078 debug_rtx (x);
7080 fprintf (stderr, "oldx:\n");
7081 debug_rtx (oldx);
7083 fprintf (stderr, "New address:\n");
7084 debug_rtx (ret);
7086 if (insns)
7088 fprintf (stderr, "Insns added:\n");
7089 debug_rtx_list (insns, 20);
7092 else
7094 fprintf (stderr,
7095 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7096 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7098 debug_rtx (x);
7101 if (insns)
7102 emit_insn (insns);
7104 return ret;
7107 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7108 We need to emit DTP-relative relocations. */
7110 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7111 static void
7112 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7114 switch (size)
7116 case 4:
7117 fputs ("\t.long\t", file);
7118 break;
7119 case 8:
7120 fputs (DOUBLE_INT_ASM_OP, file);
7121 break;
7122 default:
7123 gcc_unreachable ();
7125 output_addr_const (file, x);
7126 fputs ("@dtprel+0x8000", file);
7129 /* Return true if X is a symbol that refers to real (rather than emulated)
7130 TLS. */
7132 static bool
7133 rs6000_real_tls_symbol_ref_p (rtx x)
7135 return (GET_CODE (x) == SYMBOL_REF
7136 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7139 /* In the name of slightly smaller debug output, and to cater to
7140 general assembler lossage, recognize various UNSPEC sequences
7141 and turn them back into a direct symbol reference. */
7143 static rtx
7144 rs6000_delegitimize_address (rtx orig_x)
7146 rtx x, y, offset;
7148 orig_x = delegitimize_mem_from_attrs (orig_x);
7149 x = orig_x;
7150 if (MEM_P (x))
7151 x = XEXP (x, 0);
7153 y = x;
7154 if (TARGET_CMODEL != CMODEL_SMALL
7155 && GET_CODE (y) == LO_SUM)
7156 y = XEXP (y, 1);
7158 offset = NULL_RTX;
7159 if (GET_CODE (y) == PLUS
7160 && GET_MODE (y) == Pmode
7161 && CONST_INT_P (XEXP (y, 1)))
7163 offset = XEXP (y, 1);
7164 y = XEXP (y, 0);
7167 if (GET_CODE (y) == UNSPEC
7168 && XINT (y, 1) == UNSPEC_TOCREL)
7170 y = XVECEXP (y, 0, 0);
7172 #ifdef HAVE_AS_TLS
7173 /* Do not associate thread-local symbols with the original
7174 constant pool symbol. */
7175 if (TARGET_XCOFF
7176 && GET_CODE (y) == SYMBOL_REF
7177 && CONSTANT_POOL_ADDRESS_P (y)
7178 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7179 return orig_x;
7180 #endif
7182 if (offset != NULL_RTX)
7183 y = gen_rtx_PLUS (Pmode, y, offset);
7184 if (!MEM_P (orig_x))
7185 return y;
7186 else
7187 return replace_equiv_address_nv (orig_x, y);
7190 if (TARGET_MACHO
7191 && GET_CODE (orig_x) == LO_SUM
7192 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7194 y = XEXP (XEXP (orig_x, 1), 0);
7195 if (GET_CODE (y) == UNSPEC
7196 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7197 return XVECEXP (y, 0, 0);
7200 return orig_x;
7203 /* Return true if X shouldn't be emitted into the debug info.
7204 The linker doesn't like .toc section references from
7205 .debug_* sections, so reject .toc section symbols. */
7207 static bool
7208 rs6000_const_not_ok_for_debug_p (rtx x)
7210 if (GET_CODE (x) == SYMBOL_REF
7211 && CONSTANT_POOL_ADDRESS_P (x))
7213 rtx c = get_pool_constant (x);
7214 machine_mode cmode = get_pool_mode (x);
7215 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7216 return true;
7219 return false;
7222 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7224 static GTY(()) rtx rs6000_tls_symbol;
7225 static rtx
7226 rs6000_tls_get_addr (void)
7228 if (!rs6000_tls_symbol)
7229 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7231 return rs6000_tls_symbol;
7234 /* Construct the SYMBOL_REF for TLS GOT references. */
7236 static GTY(()) rtx rs6000_got_symbol;
7237 static rtx
7238 rs6000_got_sym (void)
7240 if (!rs6000_got_symbol)
7242 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7243 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7244 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7247 return rs6000_got_symbol;
7250 /* AIX Thread-Local Address support. */
7252 static rtx
7253 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7255 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7256 const char *name;
7257 char *tlsname;
7259 name = XSTR (addr, 0);
7260 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7261 or the symbol will be in TLS private data section. */
7262 if (name[strlen (name) - 1] != ']'
7263 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7264 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7266 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7267 strcpy (tlsname, name);
7268 strcat (tlsname,
7269 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7270 tlsaddr = copy_rtx (addr);
7271 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7273 else
7274 tlsaddr = addr;
7276 /* Place addr into TOC constant pool. */
7277 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7279 /* Output the TOC entry and create the MEM referencing the value. */
7280 if (constant_pool_expr_p (XEXP (sym, 0))
7281 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7283 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7284 mem = gen_const_mem (Pmode, tocref);
7285 set_mem_alias_set (mem, get_TOC_alias_set ());
7287 else
7288 return sym;
7290 /* Use global-dynamic for local-dynamic. */
7291 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7292 || model == TLS_MODEL_LOCAL_DYNAMIC)
7294 /* Create new TOC reference for @m symbol. */
7295 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7296 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7297 strcpy (tlsname, "*LCM");
7298 strcat (tlsname, name + 3);
7299 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7300 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7301 tocref = create_TOC_reference (modaddr, NULL_RTX);
7302 rtx modmem = gen_const_mem (Pmode, tocref);
7303 set_mem_alias_set (modmem, get_TOC_alias_set ());
7305 rtx modreg = gen_reg_rtx (Pmode);
7306 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7308 tmpreg = gen_reg_rtx (Pmode);
7309 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7311 dest = gen_reg_rtx (Pmode);
7312 if (TARGET_32BIT)
7313 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7314 else
7315 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7316 return dest;
7318 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7319 else if (TARGET_32BIT)
7321 tlsreg = gen_reg_rtx (SImode);
7322 emit_insn (gen_tls_get_tpointer (tlsreg));
7324 else
7325 tlsreg = gen_rtx_REG (DImode, 13);
7327 /* Load the TOC value into temporary register. */
7328 tmpreg = gen_reg_rtx (Pmode);
7329 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7330 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7331 gen_rtx_MINUS (Pmode, addr, tlsreg));
7333 /* Add TOC symbol value to TLS pointer. */
7334 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7336 return dest;
7339 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7340 this (thread-local) address. */
7342 static rtx
7343 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7345 rtx dest, insn;
7347 if (TARGET_XCOFF)
7348 return rs6000_legitimize_tls_address_aix (addr, model);
7350 dest = gen_reg_rtx (Pmode);
7351 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7353 rtx tlsreg;
7355 if (TARGET_64BIT)
7357 tlsreg = gen_rtx_REG (Pmode, 13);
7358 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7360 else
7362 tlsreg = gen_rtx_REG (Pmode, 2);
7363 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7365 emit_insn (insn);
7367 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7369 rtx tlsreg, tmp;
7371 tmp = gen_reg_rtx (Pmode);
7372 if (TARGET_64BIT)
7374 tlsreg = gen_rtx_REG (Pmode, 13);
7375 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7377 else
7379 tlsreg = gen_rtx_REG (Pmode, 2);
7380 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7382 emit_insn (insn);
7383 if (TARGET_64BIT)
7384 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7385 else
7386 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7387 emit_insn (insn);
7389 else
7391 rtx r3, got, tga, tmp1, tmp2, call_insn;
7393 /* We currently use relocations like @got@tlsgd for tls, which
7394 means the linker will handle allocation of tls entries, placing
7395 them in the .got section. So use a pointer to the .got section,
7396 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7397 or to secondary GOT sections used by 32-bit -fPIC. */
7398 if (TARGET_64BIT)
7399 got = gen_rtx_REG (Pmode, 2);
7400 else
7402 if (flag_pic == 1)
7403 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7404 else
7406 rtx gsym = rs6000_got_sym ();
7407 got = gen_reg_rtx (Pmode);
7408 if (flag_pic == 0)
7409 rs6000_emit_move (got, gsym, Pmode);
7410 else
7412 rtx mem, lab, last;
7414 tmp1 = gen_reg_rtx (Pmode);
7415 tmp2 = gen_reg_rtx (Pmode);
7416 mem = gen_const_mem (Pmode, tmp1);
7417 lab = gen_label_rtx ();
7418 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7419 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7420 if (TARGET_LINK_STACK)
7421 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7422 emit_move_insn (tmp2, mem);
7423 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7424 set_unique_reg_note (last, REG_EQUAL, gsym);
7429 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7431 tga = rs6000_tls_get_addr ();
7432 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7433 1, const0_rtx, Pmode);
7435 r3 = gen_rtx_REG (Pmode, 3);
7436 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7438 if (TARGET_64BIT)
7439 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7440 else
7441 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7443 else if (DEFAULT_ABI == ABI_V4)
7444 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7445 else
7446 gcc_unreachable ();
7447 call_insn = last_call_insn ();
7448 PATTERN (call_insn) = insn;
7449 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7450 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7451 pic_offset_table_rtx);
7453 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7455 tga = rs6000_tls_get_addr ();
7456 tmp1 = gen_reg_rtx (Pmode);
7457 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7458 1, const0_rtx, Pmode);
7460 r3 = gen_rtx_REG (Pmode, 3);
7461 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7463 if (TARGET_64BIT)
7464 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7465 else
7466 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7468 else if (DEFAULT_ABI == ABI_V4)
7469 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7470 else
7471 gcc_unreachable ();
7472 call_insn = last_call_insn ();
7473 PATTERN (call_insn) = insn;
7474 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7475 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7476 pic_offset_table_rtx);
7478 if (rs6000_tls_size == 16)
7480 if (TARGET_64BIT)
7481 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7482 else
7483 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7485 else if (rs6000_tls_size == 32)
7487 tmp2 = gen_reg_rtx (Pmode);
7488 if (TARGET_64BIT)
7489 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7490 else
7491 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7492 emit_insn (insn);
7493 if (TARGET_64BIT)
7494 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7495 else
7496 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7498 else
7500 tmp2 = gen_reg_rtx (Pmode);
7501 if (TARGET_64BIT)
7502 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7503 else
7504 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7505 emit_insn (insn);
7506 insn = gen_rtx_SET (Pmode, dest,
7507 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7509 emit_insn (insn);
7511 else
7513 /* IE, or 64-bit offset LE. */
7514 tmp2 = gen_reg_rtx (Pmode);
7515 if (TARGET_64BIT)
7516 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7517 else
7518 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7519 emit_insn (insn);
7520 if (TARGET_64BIT)
7521 insn = gen_tls_tls_64 (dest, tmp2, addr);
7522 else
7523 insn = gen_tls_tls_32 (dest, tmp2, addr);
7524 emit_insn (insn);
7528 return dest;
7531 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7533 static bool
7534 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7536 if (GET_CODE (x) == HIGH
7537 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7538 return true;
7540 /* A TLS symbol in the TOC cannot contain a sum. */
7541 if (GET_CODE (x) == CONST
7542 && GET_CODE (XEXP (x, 0)) == PLUS
7543 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7544 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7545 return true;
7547 /* Do not place an ELF TLS symbol in the constant pool. */
7548 return TARGET_ELF && tls_referenced_p (x);
7551 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7552 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7553 can be addressed relative to the toc pointer. */
7555 static bool
7556 use_toc_relative_ref (rtx sym)
7558 return ((constant_pool_expr_p (sym)
7559 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7560 get_pool_mode (sym)))
7561 || (TARGET_CMODEL == CMODEL_MEDIUM
7562 && SYMBOL_REF_LOCAL_P (sym)));
7565 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7566 replace the input X, or the original X if no replacement is called for.
7567 The output parameter *WIN is 1 if the calling macro should goto WIN,
7568 0 if it should not.
7570 For RS/6000, we wish to handle large displacements off a base
7571 register by splitting the addend across an addiu/addis and the mem insn.
7572 This cuts number of extra insns needed from 3 to 1.
7574 On Darwin, we use this to generate code for floating point constants.
7575 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7576 The Darwin code is inside #if TARGET_MACHO because only then are the
7577 machopic_* functions defined. */
7578 static rtx
7579 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7580 int opnum, int type,
7581 int ind_levels ATTRIBUTE_UNUSED, int *win)
7583 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7585 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7586 DFmode/DImode MEM. */
7587 if (reg_offset_p
7588 && opnum == 1
7589 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7590 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7591 reg_offset_p = false;
7593 /* We must recognize output that we have already generated ourselves. */
7594 if (GET_CODE (x) == PLUS
7595 && GET_CODE (XEXP (x, 0)) == PLUS
7596 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7597 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7598 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7600 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7601 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7602 opnum, (enum reload_type) type);
7603 *win = 1;
7604 return x;
7607 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7608 if (GET_CODE (x) == LO_SUM
7609 && GET_CODE (XEXP (x, 0)) == HIGH)
7611 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7612 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7613 opnum, (enum reload_type) type);
7614 *win = 1;
7615 return x;
7618 #if TARGET_MACHO
7619 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7620 && GET_CODE (x) == LO_SUM
7621 && GET_CODE (XEXP (x, 0)) == PLUS
7622 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7623 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7624 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7625 && machopic_operand_p (XEXP (x, 1)))
7627 /* Result of previous invocation of this function on Darwin
7628 floating point constant. */
7629 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7630 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7631 opnum, (enum reload_type) type);
7632 *win = 1;
7633 return x;
7635 #endif
7637 if (TARGET_CMODEL != CMODEL_SMALL
7638 && reg_offset_p
7639 && small_toc_ref (x, VOIDmode))
7641 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7642 x = gen_rtx_LO_SUM (Pmode, hi, x);
7643 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7644 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7645 opnum, (enum reload_type) type);
7646 *win = 1;
7647 return x;
7650 if (GET_CODE (x) == PLUS
7651 && GET_CODE (XEXP (x, 0)) == REG
7652 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7653 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7654 && GET_CODE (XEXP (x, 1)) == CONST_INT
7655 && reg_offset_p
7656 && !SPE_VECTOR_MODE (mode)
7657 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7658 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7660 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7661 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7662 HOST_WIDE_INT high
7663 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7665 /* Check for 32-bit overflow. */
7666 if (high + low != val)
7668 *win = 0;
7669 return x;
7672 /* Reload the high part into a base reg; leave the low part
7673 in the mem directly. */
7675 x = gen_rtx_PLUS (GET_MODE (x),
7676 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7677 GEN_INT (high)),
7678 GEN_INT (low));
7680 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7681 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7682 opnum, (enum reload_type) type);
7683 *win = 1;
7684 return x;
7687 if (GET_CODE (x) == SYMBOL_REF
7688 && reg_offset_p
7689 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7690 && !SPE_VECTOR_MODE (mode)
7691 #if TARGET_MACHO
7692 && DEFAULT_ABI == ABI_DARWIN
7693 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7694 && machopic_symbol_defined_p (x)
7695 #else
7696 && DEFAULT_ABI == ABI_V4
7697 && !flag_pic
7698 #endif
7699 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7700 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7701 without fprs.
7702 ??? Assume floating point reg based on mode? This assumption is
7703 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7704 where reload ends up doing a DFmode load of a constant from
7705 mem using two gprs. Unfortunately, at this point reload
7706 hasn't yet selected regs so poking around in reload data
7707 won't help and even if we could figure out the regs reliably,
7708 we'd still want to allow this transformation when the mem is
7709 naturally aligned. Since we say the address is good here, we
7710 can't disable offsets from LO_SUMs in mem_operand_gpr.
7711 FIXME: Allow offset from lo_sum for other modes too, when
7712 mem is sufficiently aligned.
7714 Also disallow this if the type can go in VMX/Altivec registers, since
7715 those registers do not have d-form (reg+offset) address modes. */
7716 && !reg_addr[mode].scalar_in_vmx_p
7717 && mode != TFmode
7718 && mode != TDmode
7719 && (mode != TImode || !TARGET_VSX_TIMODE)
7720 && mode != PTImode
7721 && (mode != DImode || TARGET_POWERPC64)
7722 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7723 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7725 #if TARGET_MACHO
7726 if (flag_pic)
7728 rtx offset = machopic_gen_offset (x);
7729 x = gen_rtx_LO_SUM (GET_MODE (x),
7730 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7731 gen_rtx_HIGH (Pmode, offset)), offset);
7733 else
7734 #endif
7735 x = gen_rtx_LO_SUM (GET_MODE (x),
7736 gen_rtx_HIGH (Pmode, x), x);
7738 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7739 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7740 opnum, (enum reload_type) type);
7741 *win = 1;
7742 return x;
7745 /* Reload an offset address wrapped by an AND that represents the
7746 masking of the lower bits. Strip the outer AND and let reload
7747 convert the offset address into an indirect address. For VSX,
7748 force reload to create the address with an AND in a separate
7749 register, because we can't guarantee an altivec register will
7750 be used. */
7751 if (VECTOR_MEM_ALTIVEC_P (mode)
7752 && GET_CODE (x) == AND
7753 && GET_CODE (XEXP (x, 0)) == PLUS
7754 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7755 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7756 && GET_CODE (XEXP (x, 1)) == CONST_INT
7757 && INTVAL (XEXP (x, 1)) == -16)
7759 x = XEXP (x, 0);
7760 *win = 1;
7761 return x;
7764 if (TARGET_TOC
7765 && reg_offset_p
7766 && GET_CODE (x) == SYMBOL_REF
7767 && use_toc_relative_ref (x))
7769 x = create_TOC_reference (x, NULL_RTX);
7770 if (TARGET_CMODEL != CMODEL_SMALL)
7771 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7772 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7773 opnum, (enum reload_type) type);
7774 *win = 1;
7775 return x;
7777 *win = 0;
7778 return x;
7781 /* Debug version of rs6000_legitimize_reload_address. */
7782 static rtx
7783 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7784 int opnum, int type,
7785 int ind_levels, int *win)
7787 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7788 ind_levels, win);
7789 fprintf (stderr,
7790 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7791 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7792 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7793 debug_rtx (x);
7795 if (x == ret)
7796 fprintf (stderr, "Same address returned\n");
7797 else if (!ret)
7798 fprintf (stderr, "NULL returned\n");
7799 else
7801 fprintf (stderr, "New address:\n");
7802 debug_rtx (ret);
7805 return ret;
7808 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7809 that is a valid memory address for an instruction.
7810 The MODE argument is the machine mode for the MEM expression
7811 that wants to use this address.
7813 On the RS/6000, there are four valid address: a SYMBOL_REF that
7814 refers to a constant pool entry of an address (or the sum of it
7815 plus a constant), a short (16-bit signed) constant plus a register,
7816 the sum of two registers, or a register indirect, possibly with an
7817 auto-increment. For DFmode, DDmode and DImode with a constant plus
7818 register, we must ensure that both words are addressable or PowerPC64
7819 with offset word aligned.
7821 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7822 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7823 because adjacent memory cells are accessed by adding word-sized offsets
7824 during assembly output. */
7825 static bool
7826 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7828 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7830 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7831 if (VECTOR_MEM_ALTIVEC_P (mode)
7832 && GET_CODE (x) == AND
7833 && GET_CODE (XEXP (x, 1)) == CONST_INT
7834 && INTVAL (XEXP (x, 1)) == -16)
7835 x = XEXP (x, 0);
7837 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7838 return 0;
7839 if (legitimate_indirect_address_p (x, reg_ok_strict))
7840 return 1;
7841 if (TARGET_UPDATE
7842 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7843 && mode_supports_pre_incdec_p (mode)
7844 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7845 return 1;
7846 if (virtual_stack_registers_memory_p (x))
7847 return 1;
7848 if (reg_offset_p && legitimate_small_data_p (mode, x))
7849 return 1;
7850 if (reg_offset_p
7851 && legitimate_constant_pool_address_p (x, mode,
7852 reg_ok_strict || lra_in_progress))
7853 return 1;
7854 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7855 allow register indirect addresses. This will allow the values to go in
7856 either GPRs or VSX registers without reloading. The vector types would
7857 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7858 somewhat split, in that some uses are GPR based, and some VSX based. */
7859 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7860 return 0;
7861 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7862 if (! reg_ok_strict
7863 && reg_offset_p
7864 && GET_CODE (x) == PLUS
7865 && GET_CODE (XEXP (x, 0)) == REG
7866 && (XEXP (x, 0) == virtual_stack_vars_rtx
7867 || XEXP (x, 0) == arg_pointer_rtx)
7868 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7869 return 1;
7870 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7871 return 1;
7872 if (mode != TFmode
7873 && mode != TDmode
7874 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7875 || TARGET_POWERPC64
7876 || (mode != DFmode && mode != DDmode)
7877 || (TARGET_E500_DOUBLE && mode != DDmode))
7878 && (TARGET_POWERPC64 || mode != DImode)
7879 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7880 && mode != PTImode
7881 && !avoiding_indexed_address_p (mode)
7882 && legitimate_indexed_address_p (x, reg_ok_strict))
7883 return 1;
7884 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7885 && mode_supports_pre_modify_p (mode)
7886 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7887 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7888 reg_ok_strict, false)
7889 || (!avoiding_indexed_address_p (mode)
7890 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7891 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7892 return 1;
7893 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7894 return 1;
7895 return 0;
7898 /* Debug version of rs6000_legitimate_address_p. */
7899 static bool
7900 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7901 bool reg_ok_strict)
7903 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7904 fprintf (stderr,
7905 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7906 "strict = %d, reload = %s, code = %s\n",
7907 ret ? "true" : "false",
7908 GET_MODE_NAME (mode),
7909 reg_ok_strict,
7910 (reload_completed
7911 ? "after"
7912 : (reload_in_progress ? "progress" : "before")),
7913 GET_RTX_NAME (GET_CODE (x)));
7914 debug_rtx (x);
7916 return ret;
7919 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7921 static bool
7922 rs6000_mode_dependent_address_p (const_rtx addr,
7923 addr_space_t as ATTRIBUTE_UNUSED)
7925 return rs6000_mode_dependent_address_ptr (addr);
7928 /* Go to LABEL if ADDR (a legitimate address expression)
7929 has an effect that depends on the machine mode it is used for.
7931 On the RS/6000 this is true of all integral offsets (since AltiVec
7932 and VSX modes don't allow them) or is a pre-increment or decrement.
7934 ??? Except that due to conceptual problems in offsettable_address_p
7935 we can't really report the problems of integral offsets. So leave
7936 this assuming that the adjustable offset must be valid for the
7937 sub-words of a TFmode operand, which is what we had before. */
7939 static bool
7940 rs6000_mode_dependent_address (const_rtx addr)
7942 switch (GET_CODE (addr))
7944 case PLUS:
7945 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7946 is considered a legitimate address before reload, so there
7947 are no offset restrictions in that case. Note that this
7948 condition is safe in strict mode because any address involving
7949 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7950 been rejected as illegitimate. */
7951 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7952 && XEXP (addr, 0) != arg_pointer_rtx
7953 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7955 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7956 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7958 break;
7960 case LO_SUM:
7961 /* Anything in the constant pool is sufficiently aligned that
7962 all bytes have the same high part address. */
7963 return !legitimate_constant_pool_address_p (addr, QImode, false);
7965 /* Auto-increment cases are now treated generically in recog.c. */
7966 case PRE_MODIFY:
7967 return TARGET_UPDATE;
7969 /* AND is only allowed in Altivec loads. */
7970 case AND:
7971 return true;
7973 default:
7974 break;
7977 return false;
7980 /* Debug version of rs6000_mode_dependent_address. */
7981 static bool
7982 rs6000_debug_mode_dependent_address (const_rtx addr)
7984 bool ret = rs6000_mode_dependent_address (addr);
7986 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7987 ret ? "true" : "false");
7988 debug_rtx (addr);
7990 return ret;
7993 /* Implement FIND_BASE_TERM. */
7996 rs6000_find_base_term (rtx op)
7998 rtx base;
8000 base = op;
8001 if (GET_CODE (base) == CONST)
8002 base = XEXP (base, 0);
8003 if (GET_CODE (base) == PLUS)
8004 base = XEXP (base, 0);
8005 if (GET_CODE (base) == UNSPEC)
8006 switch (XINT (base, 1))
8008 case UNSPEC_TOCREL:
8009 case UNSPEC_MACHOPIC_OFFSET:
8010 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8011 for aliasing purposes. */
8012 return XVECEXP (base, 0, 0);
8015 return op;
8018 /* More elaborate version of recog's offsettable_memref_p predicate
8019 that works around the ??? note of rs6000_mode_dependent_address.
8020 In particular it accepts
8022 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8024 in 32-bit mode, that the recog predicate rejects. */
8026 static bool
8027 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8029 bool worst_case;
8031 if (!MEM_P (op))
8032 return false;
8034 /* First mimic offsettable_memref_p. */
8035 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8036 return true;
8038 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8039 the latter predicate knows nothing about the mode of the memory
8040 reference and, therefore, assumes that it is the largest supported
8041 mode (TFmode). As a consequence, legitimate offsettable memory
8042 references are rejected. rs6000_legitimate_offset_address_p contains
8043 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8044 at least with a little bit of help here given that we know the
8045 actual registers used. */
8046 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8047 || GET_MODE_SIZE (reg_mode) == 4);
8048 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8049 true, worst_case);
8052 /* Change register usage conditional on target flags. */
8053 static void
8054 rs6000_conditional_register_usage (void)
8056 int i;
8058 if (TARGET_DEBUG_TARGET)
8059 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8061 /* Set MQ register fixed (already call_used) so that it will not be
8062 allocated. */
8063 fixed_regs[64] = 1;
8065 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8066 if (TARGET_64BIT)
8067 fixed_regs[13] = call_used_regs[13]
8068 = call_really_used_regs[13] = 1;
8070 /* Conditionally disable FPRs. */
8071 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8072 for (i = 32; i < 64; i++)
8073 fixed_regs[i] = call_used_regs[i]
8074 = call_really_used_regs[i] = 1;
8076 /* The TOC register is not killed across calls in a way that is
8077 visible to the compiler. */
8078 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8079 call_really_used_regs[2] = 0;
8081 if (DEFAULT_ABI == ABI_V4
8082 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8083 && flag_pic == 2)
8084 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8086 if (DEFAULT_ABI == ABI_V4
8087 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8088 && flag_pic == 1)
8089 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8090 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8091 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8093 if (DEFAULT_ABI == ABI_DARWIN
8094 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8095 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8096 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8097 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8099 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8100 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8101 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8103 if (TARGET_SPE)
8105 global_regs[SPEFSCR_REGNO] = 1;
8106 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8107 registers in prologues and epilogues. We no longer use r14
8108 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8109 pool for link-compatibility with older versions of GCC. Once
8110 "old" code has died out, we can return r14 to the allocation
8111 pool. */
8112 fixed_regs[14]
8113 = call_used_regs[14]
8114 = call_really_used_regs[14] = 1;
8117 if (!TARGET_ALTIVEC && !TARGET_VSX)
8119 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8120 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8121 call_really_used_regs[VRSAVE_REGNO] = 1;
8124 if (TARGET_ALTIVEC || TARGET_VSX)
8125 global_regs[VSCR_REGNO] = 1;
8127 if (TARGET_ALTIVEC_ABI)
8129 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8130 call_used_regs[i] = call_really_used_regs[i] = 1;
8132 /* AIX reserves VR20:31 in non-extended ABI mode. */
8133 if (TARGET_XCOFF)
8134 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8135 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8140 /* Output insns to set DEST equal to the constant SOURCE as a series of
8141 lis, ori and shl instructions and return TRUE. */
8143 bool
8144 rs6000_emit_set_const (rtx dest, rtx source)
8146 machine_mode mode = GET_MODE (dest);
8147 rtx temp, set;
8148 rtx_insn *insn;
8149 HOST_WIDE_INT c;
8151 gcc_checking_assert (CONST_INT_P (source));
8152 c = INTVAL (source);
8153 switch (mode)
8155 case QImode:
8156 case HImode:
8157 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8158 return true;
8160 case SImode:
8161 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8163 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8164 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8165 emit_insn (gen_rtx_SET (VOIDmode, dest,
8166 gen_rtx_IOR (SImode, copy_rtx (temp),
8167 GEN_INT (c & 0xffff))));
8168 break;
8170 case DImode:
8171 if (!TARGET_POWERPC64)
8173 rtx hi, lo;
8175 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8176 DImode);
8177 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8178 DImode);
8179 emit_move_insn (hi, GEN_INT (c >> 32));
8180 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8181 emit_move_insn (lo, GEN_INT (c));
8183 else
8184 rs6000_emit_set_long_const (dest, c);
8185 break;
8187 default:
8188 gcc_unreachable ();
8191 insn = get_last_insn ();
8192 set = single_set (insn);
8193 if (! CONSTANT_P (SET_SRC (set)))
8194 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8196 return true;
8199 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8200 Output insns to set DEST equal to the constant C as a series of
8201 lis, ori and shl instructions. */
8203 static void
8204 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8206 rtx temp;
8207 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8209 ud1 = c & 0xffff;
8210 c = c >> 16;
8211 ud2 = c & 0xffff;
8212 c = c >> 16;
8213 ud3 = c & 0xffff;
8214 c = c >> 16;
8215 ud4 = c & 0xffff;
8217 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8218 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8219 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8221 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8222 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8224 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8226 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8227 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8228 if (ud1 != 0)
8229 emit_move_insn (dest,
8230 gen_rtx_IOR (DImode, copy_rtx (temp),
8231 GEN_INT (ud1)));
8233 else if (ud3 == 0 && ud4 == 0)
8235 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8237 gcc_assert (ud2 & 0x8000);
8238 emit_move_insn (copy_rtx (temp),
8239 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8240 if (ud1 != 0)
8241 emit_move_insn (copy_rtx (temp),
8242 gen_rtx_IOR (DImode, copy_rtx (temp),
8243 GEN_INT (ud1)));
8244 emit_move_insn (dest,
8245 gen_rtx_ZERO_EXTEND (DImode,
8246 gen_lowpart (SImode,
8247 copy_rtx (temp))));
8249 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8250 || (ud4 == 0 && ! (ud3 & 0x8000)))
8252 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8254 emit_move_insn (copy_rtx (temp),
8255 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8256 if (ud2 != 0)
8257 emit_move_insn (copy_rtx (temp),
8258 gen_rtx_IOR (DImode, copy_rtx (temp),
8259 GEN_INT (ud2)));
8260 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8261 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8262 GEN_INT (16)));
8263 if (ud1 != 0)
8264 emit_move_insn (dest,
8265 gen_rtx_IOR (DImode, copy_rtx (temp),
8266 GEN_INT (ud1)));
8268 else
8270 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8272 emit_move_insn (copy_rtx (temp),
8273 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8274 if (ud3 != 0)
8275 emit_move_insn (copy_rtx (temp),
8276 gen_rtx_IOR (DImode, copy_rtx (temp),
8277 GEN_INT (ud3)));
8279 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8280 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8281 GEN_INT (32)));
8282 if (ud2 != 0)
8283 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8284 gen_rtx_IOR (DImode, copy_rtx (temp),
8285 GEN_INT (ud2 << 16)));
8286 if (ud1 != 0)
8287 emit_move_insn (dest,
8288 gen_rtx_IOR (DImode, copy_rtx (temp),
8289 GEN_INT (ud1)));
8293 /* Helper for the following. Get rid of [r+r] memory refs
8294 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8296 static void
8297 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8299 if (reload_in_progress)
8300 return;
8302 if (GET_CODE (operands[0]) == MEM
8303 && GET_CODE (XEXP (operands[0], 0)) != REG
8304 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8305 GET_MODE (operands[0]), false))
8306 operands[0]
8307 = replace_equiv_address (operands[0],
8308 copy_addr_to_reg (XEXP (operands[0], 0)));
8310 if (GET_CODE (operands[1]) == MEM
8311 && GET_CODE (XEXP (operands[1], 0)) != REG
8312 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8313 GET_MODE (operands[1]), false))
8314 operands[1]
8315 = replace_equiv_address (operands[1],
8316 copy_addr_to_reg (XEXP (operands[1], 0)));
8319 /* Generate a vector of constants to permute MODE for a little-endian
8320 storage operation by swapping the two halves of a vector. */
8321 static rtvec
8322 rs6000_const_vec (machine_mode mode)
8324 int i, subparts;
8325 rtvec v;
8327 switch (mode)
8329 case V1TImode:
8330 subparts = 1;
8331 break;
8332 case V2DFmode:
8333 case V2DImode:
8334 subparts = 2;
8335 break;
8336 case V4SFmode:
8337 case V4SImode:
8338 subparts = 4;
8339 break;
8340 case V8HImode:
8341 subparts = 8;
8342 break;
8343 case V16QImode:
8344 subparts = 16;
8345 break;
8346 default:
8347 gcc_unreachable();
8350 v = rtvec_alloc (subparts);
8352 for (i = 0; i < subparts / 2; ++i)
8353 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8354 for (i = subparts / 2; i < subparts; ++i)
8355 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8357 return v;
8360 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8361 for a VSX load or store operation. */
8363 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8365 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8366 return gen_rtx_VEC_SELECT (mode, source, par);
8369 /* Emit a little-endian load from vector memory location SOURCE to VSX
8370 register DEST in mode MODE. The load is done with two permuting
8371 insn's that represent an lxvd2x and xxpermdi. */
8372 void
8373 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8375 rtx tmp, permute_mem, permute_reg;
8377 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8378 V1TImode). */
8379 if (mode == TImode || mode == V1TImode)
8381 mode = V2DImode;
8382 dest = gen_lowpart (V2DImode, dest);
8383 source = adjust_address (source, V2DImode, 0);
8386 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8387 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8388 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8389 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8390 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8393 /* Emit a little-endian store to vector memory location DEST from VSX
8394 register SOURCE in mode MODE. The store is done with two permuting
8395 insn's that represent an xxpermdi and an stxvd2x. */
8396 void
8397 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8399 rtx tmp, permute_src, permute_tmp;
8401 /* This should never be called during or after reload, because it does
8402 not re-permute the source register. It is intended only for use
8403 during expand. */
8404 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8406 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8407 V1TImode). */
8408 if (mode == TImode || mode == V1TImode)
8410 mode = V2DImode;
8411 dest = adjust_address (dest, V2DImode, 0);
8412 source = gen_lowpart (V2DImode, source);
8415 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8416 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8417 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8418 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8419 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8422 /* Emit a sequence representing a little-endian VSX load or store,
8423 moving data from SOURCE to DEST in mode MODE. This is done
8424 separately from rs6000_emit_move to ensure it is called only
8425 during expand. LE VSX loads and stores introduced later are
8426 handled with a split. The expand-time RTL generation allows
8427 us to optimize away redundant pairs of register-permutes. */
8428 void
8429 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8431 gcc_assert (!BYTES_BIG_ENDIAN
8432 && VECTOR_MEM_VSX_P (mode)
8433 && !gpr_or_gpr_p (dest, source)
8434 && (MEM_P (source) ^ MEM_P (dest)));
8436 if (MEM_P (source))
8438 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8439 rs6000_emit_le_vsx_load (dest, source, mode);
8441 else
8443 if (!REG_P (source))
8444 source = force_reg (mode, source);
8445 rs6000_emit_le_vsx_store (dest, source, mode);
8449 /* Emit a move from SOURCE to DEST in mode MODE. */
8450 void
8451 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8453 rtx operands[2];
8454 operands[0] = dest;
8455 operands[1] = source;
8457 if (TARGET_DEBUG_ADDR)
8459 fprintf (stderr,
8460 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8461 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8462 GET_MODE_NAME (mode),
8463 reload_in_progress,
8464 reload_completed,
8465 can_create_pseudo_p ());
8466 debug_rtx (dest);
8467 fprintf (stderr, "source:\n");
8468 debug_rtx (source);
8471 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8472 if (CONST_WIDE_INT_P (operands[1])
8473 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8475 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8476 gcc_unreachable ();
8479 /* Check if GCC is setting up a block move that will end up using FP
8480 registers as temporaries. We must make sure this is acceptable. */
8481 if (GET_CODE (operands[0]) == MEM
8482 && GET_CODE (operands[1]) == MEM
8483 && mode == DImode
8484 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8485 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8486 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8487 ? 32 : MEM_ALIGN (operands[0])))
8488 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8489 ? 32
8490 : MEM_ALIGN (operands[1]))))
8491 && ! MEM_VOLATILE_P (operands [0])
8492 && ! MEM_VOLATILE_P (operands [1]))
8494 emit_move_insn (adjust_address (operands[0], SImode, 0),
8495 adjust_address (operands[1], SImode, 0));
8496 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8497 adjust_address (copy_rtx (operands[1]), SImode, 4));
8498 return;
8501 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8502 && !gpc_reg_operand (operands[1], mode))
8503 operands[1] = force_reg (mode, operands[1]);
8505 /* Recognize the case where operand[1] is a reference to thread-local
8506 data and load its address to a register. */
8507 if (tls_referenced_p (operands[1]))
8509 enum tls_model model;
8510 rtx tmp = operands[1];
8511 rtx addend = NULL;
8513 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8515 addend = XEXP (XEXP (tmp, 0), 1);
8516 tmp = XEXP (XEXP (tmp, 0), 0);
8519 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8520 model = SYMBOL_REF_TLS_MODEL (tmp);
8521 gcc_assert (model != 0);
8523 tmp = rs6000_legitimize_tls_address (tmp, model);
8524 if (addend)
8526 tmp = gen_rtx_PLUS (mode, tmp, addend);
8527 tmp = force_operand (tmp, operands[0]);
8529 operands[1] = tmp;
8532 /* Handle the case where reload calls us with an invalid address. */
8533 if (reload_in_progress && mode == Pmode
8534 && (! general_operand (operands[1], mode)
8535 || ! nonimmediate_operand (operands[0], mode)))
8536 goto emit_set;
8538 /* 128-bit constant floating-point values on Darwin should really be loaded
8539 as two parts. However, this premature splitting is a problem when DFmode
8540 values can go into Altivec registers. */
8541 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8542 && !reg_addr[DFmode].scalar_in_vmx_p
8543 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8545 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8546 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8547 DFmode);
8548 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8549 GET_MODE_SIZE (DFmode)),
8550 simplify_gen_subreg (DFmode, operands[1], mode,
8551 GET_MODE_SIZE (DFmode)),
8552 DFmode);
8553 return;
8556 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8557 cfun->machine->sdmode_stack_slot =
8558 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8561 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8562 p1:SD) if p1 is not of floating point class and p0 is spilled as
8563 we can have no analogous movsd_store for this. */
8564 if (lra_in_progress && mode == DDmode
8565 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8566 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8567 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8568 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8570 enum reg_class cl;
8571 int regno = REGNO (SUBREG_REG (operands[1]));
8573 if (regno >= FIRST_PSEUDO_REGISTER)
8575 cl = reg_preferred_class (regno);
8576 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8578 if (regno >= 0 && ! FP_REGNO_P (regno))
8580 mode = SDmode;
8581 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8582 operands[1] = SUBREG_REG (operands[1]);
8585 if (lra_in_progress
8586 && mode == SDmode
8587 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8588 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8589 && (REG_P (operands[1])
8590 || (GET_CODE (operands[1]) == SUBREG
8591 && REG_P (SUBREG_REG (operands[1])))))
8593 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8594 ? SUBREG_REG (operands[1]) : operands[1]);
8595 enum reg_class cl;
8597 if (regno >= FIRST_PSEUDO_REGISTER)
8599 cl = reg_preferred_class (regno);
8600 gcc_assert (cl != NO_REGS);
8601 regno = ira_class_hard_regs[cl][0];
8603 if (FP_REGNO_P (regno))
8605 if (GET_MODE (operands[0]) != DDmode)
8606 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8607 emit_insn (gen_movsd_store (operands[0], operands[1]));
8609 else if (INT_REGNO_P (regno))
8610 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8611 else
8612 gcc_unreachable();
8613 return;
8615 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8616 p:DD)) if p0 is not of floating point class and p1 is spilled as
8617 we can have no analogous movsd_load for this. */
8618 if (lra_in_progress && mode == DDmode
8619 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8620 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8621 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8622 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8624 enum reg_class cl;
8625 int regno = REGNO (SUBREG_REG (operands[0]));
8627 if (regno >= FIRST_PSEUDO_REGISTER)
8629 cl = reg_preferred_class (regno);
8630 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8632 if (regno >= 0 && ! FP_REGNO_P (regno))
8634 mode = SDmode;
8635 operands[0] = SUBREG_REG (operands[0]);
8636 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8639 if (lra_in_progress
8640 && mode == SDmode
8641 && (REG_P (operands[0])
8642 || (GET_CODE (operands[0]) == SUBREG
8643 && REG_P (SUBREG_REG (operands[0]))))
8644 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8645 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8647 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8648 ? SUBREG_REG (operands[0]) : operands[0]);
8649 enum reg_class cl;
8651 if (regno >= FIRST_PSEUDO_REGISTER)
8653 cl = reg_preferred_class (regno);
8654 gcc_assert (cl != NO_REGS);
8655 regno = ira_class_hard_regs[cl][0];
8657 if (FP_REGNO_P (regno))
8659 if (GET_MODE (operands[1]) != DDmode)
8660 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8661 emit_insn (gen_movsd_load (operands[0], operands[1]));
8663 else if (INT_REGNO_P (regno))
8664 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8665 else
8666 gcc_unreachable();
8667 return;
8670 if (reload_in_progress
8671 && mode == SDmode
8672 && cfun->machine->sdmode_stack_slot != NULL_RTX
8673 && MEM_P (operands[0])
8674 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8675 && REG_P (operands[1]))
8677 if (FP_REGNO_P (REGNO (operands[1])))
8679 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8680 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8681 emit_insn (gen_movsd_store (mem, operands[1]));
8683 else if (INT_REGNO_P (REGNO (operands[1])))
8685 rtx mem = operands[0];
8686 if (BYTES_BIG_ENDIAN)
8687 mem = adjust_address_nv (mem, mode, 4);
8688 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8689 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8691 else
8692 gcc_unreachable();
8693 return;
8695 if (reload_in_progress
8696 && mode == SDmode
8697 && REG_P (operands[0])
8698 && MEM_P (operands[1])
8699 && cfun->machine->sdmode_stack_slot != NULL_RTX
8700 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8702 if (FP_REGNO_P (REGNO (operands[0])))
8704 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8705 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8706 emit_insn (gen_movsd_load (operands[0], mem));
8708 else if (INT_REGNO_P (REGNO (operands[0])))
8710 rtx mem = operands[1];
8711 if (BYTES_BIG_ENDIAN)
8712 mem = adjust_address_nv (mem, mode, 4);
8713 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8714 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8716 else
8717 gcc_unreachable();
8718 return;
8721 /* FIXME: In the long term, this switch statement should go away
8722 and be replaced by a sequence of tests based on things like
8723 mode == Pmode. */
8724 switch (mode)
8726 case HImode:
8727 case QImode:
8728 if (CONSTANT_P (operands[1])
8729 && GET_CODE (operands[1]) != CONST_INT)
8730 operands[1] = force_const_mem (mode, operands[1]);
8731 break;
8733 case TFmode:
8734 case TDmode:
8735 rs6000_eliminate_indexed_memrefs (operands);
8736 /* fall through */
8738 case DFmode:
8739 case DDmode:
8740 case SFmode:
8741 case SDmode:
8742 if (CONSTANT_P (operands[1])
8743 && ! easy_fp_constant (operands[1], mode))
8744 operands[1] = force_const_mem (mode, operands[1]);
8745 break;
8747 case V16QImode:
8748 case V8HImode:
8749 case V4SFmode:
8750 case V4SImode:
8751 case V4HImode:
8752 case V2SFmode:
8753 case V2SImode:
8754 case V1DImode:
8755 case V2DFmode:
8756 case V2DImode:
8757 case V1TImode:
8758 if (CONSTANT_P (operands[1])
8759 && !easy_vector_constant (operands[1], mode))
8760 operands[1] = force_const_mem (mode, operands[1]);
8761 break;
8763 case SImode:
8764 case DImode:
8765 /* Use default pattern for address of ELF small data */
8766 if (TARGET_ELF
8767 && mode == Pmode
8768 && DEFAULT_ABI == ABI_V4
8769 && (GET_CODE (operands[1]) == SYMBOL_REF
8770 || GET_CODE (operands[1]) == CONST)
8771 && small_data_operand (operands[1], mode))
8773 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8774 return;
8777 if (DEFAULT_ABI == ABI_V4
8778 && mode == Pmode && mode == SImode
8779 && flag_pic == 1 && got_operand (operands[1], mode))
8781 emit_insn (gen_movsi_got (operands[0], operands[1]));
8782 return;
8785 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8786 && TARGET_NO_TOC
8787 && ! flag_pic
8788 && mode == Pmode
8789 && CONSTANT_P (operands[1])
8790 && GET_CODE (operands[1]) != HIGH
8791 && GET_CODE (operands[1]) != CONST_INT)
8793 rtx target = (!can_create_pseudo_p ()
8794 ? operands[0]
8795 : gen_reg_rtx (mode));
8797 /* If this is a function address on -mcall-aixdesc,
8798 convert it to the address of the descriptor. */
8799 if (DEFAULT_ABI == ABI_AIX
8800 && GET_CODE (operands[1]) == SYMBOL_REF
8801 && XSTR (operands[1], 0)[0] == '.')
8803 const char *name = XSTR (operands[1], 0);
8804 rtx new_ref;
8805 while (*name == '.')
8806 name++;
8807 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8808 CONSTANT_POOL_ADDRESS_P (new_ref)
8809 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8810 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8811 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8812 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8813 operands[1] = new_ref;
8816 if (DEFAULT_ABI == ABI_DARWIN)
8818 #if TARGET_MACHO
8819 if (MACHO_DYNAMIC_NO_PIC_P)
8821 /* Take care of any required data indirection. */
8822 operands[1] = rs6000_machopic_legitimize_pic_address (
8823 operands[1], mode, operands[0]);
8824 if (operands[0] != operands[1])
8825 emit_insn (gen_rtx_SET (VOIDmode,
8826 operands[0], operands[1]));
8827 return;
8829 #endif
8830 emit_insn (gen_macho_high (target, operands[1]));
8831 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8832 return;
8835 emit_insn (gen_elf_high (target, operands[1]));
8836 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8837 return;
8840 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8841 and we have put it in the TOC, we just need to make a TOC-relative
8842 reference to it. */
8843 if (TARGET_TOC
8844 && GET_CODE (operands[1]) == SYMBOL_REF
8845 && use_toc_relative_ref (operands[1]))
8846 operands[1] = create_TOC_reference (operands[1], operands[0]);
8847 else if (mode == Pmode
8848 && CONSTANT_P (operands[1])
8849 && GET_CODE (operands[1]) != HIGH
8850 && ((GET_CODE (operands[1]) != CONST_INT
8851 && ! easy_fp_constant (operands[1], mode))
8852 || (GET_CODE (operands[1]) == CONST_INT
8853 && (num_insns_constant (operands[1], mode)
8854 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8855 || (GET_CODE (operands[0]) == REG
8856 && FP_REGNO_P (REGNO (operands[0]))))
8857 && !toc_relative_expr_p (operands[1], false)
8858 && (TARGET_CMODEL == CMODEL_SMALL
8859 || can_create_pseudo_p ()
8860 || (REG_P (operands[0])
8861 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8864 #if TARGET_MACHO
8865 /* Darwin uses a special PIC legitimizer. */
8866 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8868 operands[1] =
8869 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8870 operands[0]);
8871 if (operands[0] != operands[1])
8872 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8873 return;
8875 #endif
8877 /* If we are to limit the number of things we put in the TOC and
8878 this is a symbol plus a constant we can add in one insn,
8879 just put the symbol in the TOC and add the constant. Don't do
8880 this if reload is in progress. */
8881 if (GET_CODE (operands[1]) == CONST
8882 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8883 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8884 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8885 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8886 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8887 && ! side_effects_p (operands[0]))
8889 rtx sym =
8890 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8891 rtx other = XEXP (XEXP (operands[1], 0), 1);
8893 sym = force_reg (mode, sym);
8894 emit_insn (gen_add3_insn (operands[0], sym, other));
8895 return;
8898 operands[1] = force_const_mem (mode, operands[1]);
8900 if (TARGET_TOC
8901 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8902 && constant_pool_expr_p (XEXP (operands[1], 0))
8903 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8904 get_pool_constant (XEXP (operands[1], 0)),
8905 get_pool_mode (XEXP (operands[1], 0))))
8907 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8908 operands[0]);
8909 operands[1] = gen_const_mem (mode, tocref);
8910 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8913 break;
8915 case TImode:
8916 if (!VECTOR_MEM_VSX_P (TImode))
8917 rs6000_eliminate_indexed_memrefs (operands);
8918 break;
8920 case PTImode:
8921 rs6000_eliminate_indexed_memrefs (operands);
8922 break;
8924 default:
8925 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8928 /* Above, we may have called force_const_mem which may have returned
8929 an invalid address. If we can, fix this up; otherwise, reload will
8930 have to deal with it. */
8931 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8932 operands[1] = validize_mem (operands[1]);
8934 emit_set:
8935 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8938 /* Return true if a structure, union or array containing FIELD should be
8939 accessed using `BLKMODE'.
8941 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8942 entire thing in a DI and use subregs to access the internals.
8943 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8944 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8945 best thing to do is set structs to BLKmode and avoid Severe Tire
8946 Damage.
8948 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8949 fit into 1, whereas DI still needs two. */
8951 static bool
8952 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8954 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8955 || (TARGET_E500_DOUBLE && mode == DFmode));
8958 /* Nonzero if we can use a floating-point register to pass this arg. */
8959 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8960 (SCALAR_FLOAT_MODE_P (MODE) \
8961 && (CUM)->fregno <= FP_ARG_MAX_REG \
8962 && TARGET_HARD_FLOAT && TARGET_FPRS)
8964 /* Nonzero if we can use an AltiVec register to pass this arg. */
8965 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8966 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8967 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8968 && TARGET_ALTIVEC_ABI \
8969 && (NAMED))
8971 /* Walk down the type tree of TYPE counting consecutive base elements.
8972 If *MODEP is VOIDmode, then set it to the first valid floating point
8973 or vector type. If a non-floating point or vector type is found, or
8974 if a floating point or vector type that doesn't match a non-VOIDmode
8975 *MODEP is found, then return -1, otherwise return the count in the
8976 sub-tree. */
8978 static int
8979 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8981 machine_mode mode;
8982 HOST_WIDE_INT size;
8984 switch (TREE_CODE (type))
8986 case REAL_TYPE:
8987 mode = TYPE_MODE (type);
8988 if (!SCALAR_FLOAT_MODE_P (mode))
8989 return -1;
8991 if (*modep == VOIDmode)
8992 *modep = mode;
8994 if (*modep == mode)
8995 return 1;
8997 break;
8999 case COMPLEX_TYPE:
9000 mode = TYPE_MODE (TREE_TYPE (type));
9001 if (!SCALAR_FLOAT_MODE_P (mode))
9002 return -1;
9004 if (*modep == VOIDmode)
9005 *modep = mode;
9007 if (*modep == mode)
9008 return 2;
9010 break;
9012 case VECTOR_TYPE:
9013 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9014 return -1;
9016 /* Use V4SImode as representative of all 128-bit vector types. */
9017 size = int_size_in_bytes (type);
9018 switch (size)
9020 case 16:
9021 mode = V4SImode;
9022 break;
9023 default:
9024 return -1;
9027 if (*modep == VOIDmode)
9028 *modep = mode;
9030 /* Vector modes are considered to be opaque: two vectors are
9031 equivalent for the purposes of being homogeneous aggregates
9032 if they are the same size. */
9033 if (*modep == mode)
9034 return 1;
9036 break;
9038 case ARRAY_TYPE:
9040 int count;
9041 tree index = TYPE_DOMAIN (type);
9043 /* Can't handle incomplete types nor sizes that are not
9044 fixed. */
9045 if (!COMPLETE_TYPE_P (type)
9046 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9047 return -1;
9049 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9050 if (count == -1
9051 || !index
9052 || !TYPE_MAX_VALUE (index)
9053 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9054 || !TYPE_MIN_VALUE (index)
9055 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9056 || count < 0)
9057 return -1;
9059 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9060 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9062 /* There must be no padding. */
9063 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9064 return -1;
9066 return count;
9069 case RECORD_TYPE:
9071 int count = 0;
9072 int sub_count;
9073 tree field;
9075 /* Can't handle incomplete types nor sizes that are not
9076 fixed. */
9077 if (!COMPLETE_TYPE_P (type)
9078 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9079 return -1;
9081 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9083 if (TREE_CODE (field) != FIELD_DECL)
9084 continue;
9086 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9087 if (sub_count < 0)
9088 return -1;
9089 count += sub_count;
9092 /* There must be no padding. */
9093 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9094 return -1;
9096 return count;
9099 case UNION_TYPE:
9100 case QUAL_UNION_TYPE:
9102 /* These aren't very interesting except in a degenerate case. */
9103 int count = 0;
9104 int sub_count;
9105 tree field;
9107 /* Can't handle incomplete types nor sizes that are not
9108 fixed. */
9109 if (!COMPLETE_TYPE_P (type)
9110 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9111 return -1;
9113 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9115 if (TREE_CODE (field) != FIELD_DECL)
9116 continue;
9118 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9119 if (sub_count < 0)
9120 return -1;
9121 count = count > sub_count ? count : sub_count;
9124 /* There must be no padding. */
9125 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9126 return -1;
9128 return count;
9131 default:
9132 break;
9135 return -1;
9138 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9139 float or vector aggregate that shall be passed in FP/vector registers
9140 according to the ELFv2 ABI, return the homogeneous element mode in
9141 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9143 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9145 static bool
9146 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9147 machine_mode *elt_mode,
9148 int *n_elts)
9150 /* Note that we do not accept complex types at the top level as
9151 homogeneous aggregates; these types are handled via the
9152 targetm.calls.split_complex_arg mechanism. Complex types
9153 can be elements of homogeneous aggregates, however. */
9154 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9156 machine_mode field_mode = VOIDmode;
9157 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9159 if (field_count > 0)
9161 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9162 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9164 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9165 up to AGGR_ARG_NUM_REG registers. */
9166 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9168 if (elt_mode)
9169 *elt_mode = field_mode;
9170 if (n_elts)
9171 *n_elts = field_count;
9172 return true;
9177 if (elt_mode)
9178 *elt_mode = mode;
9179 if (n_elts)
9180 *n_elts = 1;
9181 return false;
9184 /* Return a nonzero value to say to return the function value in
9185 memory, just as large structures are always returned. TYPE will be
9186 the data type of the value, and FNTYPE will be the type of the
9187 function doing the returning, or @code{NULL} for libcalls.
9189 The AIX ABI for the RS/6000 specifies that all structures are
9190 returned in memory. The Darwin ABI does the same.
9192 For the Darwin 64 Bit ABI, a function result can be returned in
9193 registers or in memory, depending on the size of the return data
9194 type. If it is returned in registers, the value occupies the same
9195 registers as it would if it were the first and only function
9196 argument. Otherwise, the function places its result in memory at
9197 the location pointed to by GPR3.
9199 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9200 but a draft put them in memory, and GCC used to implement the draft
9201 instead of the final standard. Therefore, aix_struct_return
9202 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9203 compatibility can change DRAFT_V4_STRUCT_RET to override the
9204 default, and -m switches get the final word. See
9205 rs6000_option_override_internal for more details.
9207 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9208 long double support is enabled. These values are returned in memory.
9210 int_size_in_bytes returns -1 for variable size objects, which go in
9211 memory always. The cast to unsigned makes -1 > 8. */
9213 static bool
9214 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9216 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9217 if (TARGET_MACHO
9218 && rs6000_darwin64_abi
9219 && TREE_CODE (type) == RECORD_TYPE
9220 && int_size_in_bytes (type) > 0)
9222 CUMULATIVE_ARGS valcum;
9223 rtx valret;
9225 valcum.words = 0;
9226 valcum.fregno = FP_ARG_MIN_REG;
9227 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9228 /* Do a trial code generation as if this were going to be passed
9229 as an argument; if any part goes in memory, we return NULL. */
9230 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9231 if (valret)
9232 return false;
9233 /* Otherwise fall through to more conventional ABI rules. */
9236 #if HAVE_UPC_PTS_STRUCT_REP
9237 if (POINTER_TYPE_P (type) && upc_shared_type_p (TREE_TYPE (type)))
9238 return true;
9239 #endif
9241 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9242 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9243 NULL, NULL))
9244 return false;
9246 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9247 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9248 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9249 return false;
9251 if (AGGREGATE_TYPE_P (type)
9252 && (aix_struct_return
9253 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9254 return true;
9256 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9257 modes only exist for GCC vector types if -maltivec. */
9258 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9259 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9260 return false;
9262 /* Return synthetic vectors in memory. */
9263 if (TREE_CODE (type) == VECTOR_TYPE
9264 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9266 static bool warned_for_return_big_vectors = false;
9267 if (!warned_for_return_big_vectors)
9269 warning (0, "GCC vector returned by reference: "
9270 "non-standard ABI extension with no compatibility guarantee");
9271 warned_for_return_big_vectors = true;
9273 return true;
9276 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9277 return true;
9279 return false;
9282 /* Specify whether values returned in registers should be at the most
9283 significant end of a register. We want aggregates returned by
9284 value to match the way aggregates are passed to functions. */
9286 static bool
9287 rs6000_return_in_msb (const_tree valtype)
9289 return (DEFAULT_ABI == ABI_ELFv2
9290 && BYTES_BIG_ENDIAN
9291 && AGGREGATE_TYPE_P (valtype)
9292 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9295 #ifdef HAVE_AS_GNU_ATTRIBUTE
9296 /* Return TRUE if a call to function FNDECL may be one that
9297 potentially affects the function calling ABI of the object file. */
9299 static bool
9300 call_ABI_of_interest (tree fndecl)
9302 if (symtab->state == EXPANSION)
9304 struct cgraph_node *c_node;
9306 /* Libcalls are always interesting. */
9307 if (fndecl == NULL_TREE)
9308 return true;
9310 /* Any call to an external function is interesting. */
9311 if (DECL_EXTERNAL (fndecl))
9312 return true;
9314 /* Interesting functions that we are emitting in this object file. */
9315 c_node = cgraph_node::get (fndecl);
9316 c_node = c_node->ultimate_alias_target ();
9317 return !c_node->only_called_directly_p ();
9319 return false;
9321 #endif
9323 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9324 for a call to a function whose data type is FNTYPE.
9325 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9327 For incoming args we set the number of arguments in the prototype large
9328 so we never return a PARALLEL. */
9330 void
9331 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9332 rtx libname ATTRIBUTE_UNUSED, int incoming,
9333 int libcall, int n_named_args,
9334 tree fndecl ATTRIBUTE_UNUSED,
9335 machine_mode return_mode ATTRIBUTE_UNUSED)
9337 static CUMULATIVE_ARGS zero_cumulative;
9339 *cum = zero_cumulative;
9340 cum->words = 0;
9341 cum->fregno = FP_ARG_MIN_REG;
9342 cum->vregno = ALTIVEC_ARG_MIN_REG;
9343 cum->prototype = (fntype && prototype_p (fntype));
9344 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9345 ? CALL_LIBCALL : CALL_NORMAL);
9346 cum->sysv_gregno = GP_ARG_MIN_REG;
9347 cum->stdarg = stdarg_p (fntype);
9349 cum->nargs_prototype = 0;
9350 if (incoming || cum->prototype)
9351 cum->nargs_prototype = n_named_args;
9353 /* Check for a longcall attribute. */
9354 if ((!fntype && rs6000_default_long_calls)
9355 || (fntype
9356 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9357 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9358 cum->call_cookie |= CALL_LONG;
9360 if (TARGET_DEBUG_ARG)
9362 fprintf (stderr, "\ninit_cumulative_args:");
9363 if (fntype)
9365 tree ret_type = TREE_TYPE (fntype);
9366 fprintf (stderr, " ret code = %s,",
9367 get_tree_code_name (TREE_CODE (ret_type)));
9370 if (cum->call_cookie & CALL_LONG)
9371 fprintf (stderr, " longcall,");
9373 fprintf (stderr, " proto = %d, nargs = %d\n",
9374 cum->prototype, cum->nargs_prototype);
9377 #ifdef HAVE_AS_GNU_ATTRIBUTE
9378 if (DEFAULT_ABI == ABI_V4)
9380 cum->escapes = call_ABI_of_interest (fndecl);
9381 if (cum->escapes)
9383 tree return_type;
9385 if (fntype)
9387 return_type = TREE_TYPE (fntype);
9388 return_mode = TYPE_MODE (return_type);
9390 else
9391 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9393 if (return_type != NULL)
9395 if (TREE_CODE (return_type) == RECORD_TYPE
9396 && TYPE_TRANSPARENT_AGGR (return_type))
9398 return_type = TREE_TYPE (first_field (return_type));
9399 return_mode = TYPE_MODE (return_type);
9401 if (AGGREGATE_TYPE_P (return_type)
9402 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9403 <= 8))
9404 rs6000_returns_struct = true;
9406 if (SCALAR_FLOAT_MODE_P (return_mode))
9407 rs6000_passes_float = true;
9408 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9409 || SPE_VECTOR_MODE (return_mode))
9410 rs6000_passes_vector = true;
9413 #endif
9415 if (fntype
9416 && !TARGET_ALTIVEC
9417 && TARGET_ALTIVEC_ABI
9418 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9420 error ("cannot return value in vector register because"
9421 " altivec instructions are disabled, use -maltivec"
9422 " to enable them");
9426 /* The mode the ABI uses for a word. This is not the same as word_mode
9427 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9429 static machine_mode
9430 rs6000_abi_word_mode (void)
9432 return TARGET_32BIT ? SImode : DImode;
9435 /* On rs6000, function arguments are promoted, as are function return
9436 values. */
9438 static machine_mode
9439 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9440 machine_mode mode,
9441 int *punsignedp ATTRIBUTE_UNUSED,
9442 const_tree, int)
9444 PROMOTE_MODE (mode, *punsignedp, type);
9446 return mode;
9449 /* Return true if TYPE must be passed on the stack and not in registers. */
9451 static bool
9452 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9454 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9455 return must_pass_in_stack_var_size (mode, type);
9456 else
9457 return must_pass_in_stack_var_size_or_pad (mode, type);
9460 /* If defined, a C expression which determines whether, and in which
9461 direction, to pad out an argument with extra space. The value
9462 should be of type `enum direction': either `upward' to pad above
9463 the argument, `downward' to pad below, or `none' to inhibit
9464 padding.
9466 For the AIX ABI structs are always stored left shifted in their
9467 argument slot. */
9469 enum direction
9470 function_arg_padding (machine_mode mode, const_tree type)
9472 #ifndef AGGREGATE_PADDING_FIXED
9473 #define AGGREGATE_PADDING_FIXED 0
9474 #endif
9475 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9476 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9477 #endif
9479 if (!AGGREGATE_PADDING_FIXED)
9481 /* GCC used to pass structures of the same size as integer types as
9482 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9483 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9484 passed padded downward, except that -mstrict-align further
9485 muddied the water in that multi-component structures of 2 and 4
9486 bytes in size were passed padded upward.
9488 The following arranges for best compatibility with previous
9489 versions of gcc, but removes the -mstrict-align dependency. */
9490 if (BYTES_BIG_ENDIAN)
9492 HOST_WIDE_INT size = 0;
9494 if (mode == BLKmode)
9496 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9497 size = int_size_in_bytes (type);
9499 else
9500 size = GET_MODE_SIZE (mode);
9502 if (size == 1 || size == 2 || size == 4)
9503 return downward;
9505 return upward;
9508 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9510 if (type != 0 && AGGREGATE_TYPE_P (type))
9511 return upward;
9514 /* Fall back to the default. */
9515 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9518 /* If defined, a C expression that gives the alignment boundary, in bits,
9519 of an argument with the specified mode and type. If it is not defined,
9520 PARM_BOUNDARY is used for all arguments.
9522 V.4 wants long longs and doubles to be double word aligned. Just
9523 testing the mode size is a boneheaded way to do this as it means
9524 that other types such as complex int are also double word aligned.
9525 However, we're stuck with this because changing the ABI might break
9526 existing library interfaces.
9528 Doubleword align SPE vectors.
9529 Quadword align Altivec/VSX vectors.
9530 Quadword align large synthetic vector types. */
9532 static unsigned int
9533 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9535 machine_mode elt_mode;
9536 int n_elts;
9538 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9540 if (DEFAULT_ABI == ABI_V4
9541 && (GET_MODE_SIZE (mode) == 8
9542 || (TARGET_HARD_FLOAT
9543 && TARGET_FPRS
9544 && (mode == TFmode || mode == TDmode))))
9545 return 64;
9546 else if (SPE_VECTOR_MODE (mode)
9547 || (type && TREE_CODE (type) == VECTOR_TYPE
9548 && int_size_in_bytes (type) >= 8
9549 && int_size_in_bytes (type) < 16))
9550 return 64;
9551 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9552 || (type && TREE_CODE (type) == VECTOR_TYPE
9553 && int_size_in_bytes (type) >= 16))
9554 return 128;
9556 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9557 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9558 -mcompat-align-parm is used. */
9559 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9560 || DEFAULT_ABI == ABI_ELFv2)
9561 && type && TYPE_ALIGN (type) > 64)
9563 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9564 or homogeneous float/vector aggregates here. We already handled
9565 vector aggregates above, but still need to check for float here. */
9566 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9567 && !SCALAR_FLOAT_MODE_P (elt_mode));
9569 /* We used to check for BLKmode instead of the above aggregate type
9570 check. Warn when this results in any difference to the ABI. */
9571 if (aggregate_p != (mode == BLKmode))
9573 static bool warned;
9574 if (!warned && warn_psabi)
9576 warned = true;
9577 inform (input_location,
9578 "the ABI of passing aggregates with %d-byte alignment"
9579 " has changed in GCC 5",
9580 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9584 if (aggregate_p)
9585 return 128;
9588 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9589 implement the "aggregate type" check as a BLKmode check here; this
9590 means certain aggregate types are in fact not aligned. */
9591 if (TARGET_MACHO && rs6000_darwin64_abi
9592 && mode == BLKmode
9593 && type && TYPE_ALIGN (type) > 64)
9594 return 128;
9596 return PARM_BOUNDARY;
9599 /* The offset in words to the start of the parameter save area. */
9601 static unsigned int
9602 rs6000_parm_offset (void)
9604 return (DEFAULT_ABI == ABI_V4 ? 2
9605 : DEFAULT_ABI == ABI_ELFv2 ? 4
9606 : 6);
9609 /* For a function parm of MODE and TYPE, return the starting word in
9610 the parameter area. NWORDS of the parameter area are already used. */
9612 static unsigned int
9613 rs6000_parm_start (machine_mode mode, const_tree type,
9614 unsigned int nwords)
9616 unsigned int align;
9618 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9619 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9622 /* Compute the size (in words) of a function argument. */
9624 static unsigned long
9625 rs6000_arg_size (machine_mode mode, const_tree type)
9627 unsigned long size;
9629 if (mode != BLKmode)
9630 size = GET_MODE_SIZE (mode);
9631 else
9632 size = int_size_in_bytes (type);
9634 if (TARGET_32BIT)
9635 return (size + 3) >> 2;
9636 else
9637 return (size + 7) >> 3;
9640 /* Use this to flush pending int fields. */
9642 static void
9643 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9644 HOST_WIDE_INT bitpos, int final)
9646 unsigned int startbit, endbit;
9647 int intregs, intoffset;
9648 machine_mode mode;
9650 /* Handle the situations where a float is taking up the first half
9651 of the GPR, and the other half is empty (typically due to
9652 alignment restrictions). We can detect this by a 8-byte-aligned
9653 int field, or by seeing that this is the final flush for this
9654 argument. Count the word and continue on. */
9655 if (cum->floats_in_gpr == 1
9656 && (cum->intoffset % 64 == 0
9657 || (cum->intoffset == -1 && final)))
9659 cum->words++;
9660 cum->floats_in_gpr = 0;
9663 if (cum->intoffset == -1)
9664 return;
9666 intoffset = cum->intoffset;
9667 cum->intoffset = -1;
9668 cum->floats_in_gpr = 0;
9670 if (intoffset % BITS_PER_WORD != 0)
9672 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9673 MODE_INT, 0);
9674 if (mode == BLKmode)
9676 /* We couldn't find an appropriate mode, which happens,
9677 e.g., in packed structs when there are 3 bytes to load.
9678 Back intoffset back to the beginning of the word in this
9679 case. */
9680 intoffset = intoffset & -BITS_PER_WORD;
9684 startbit = intoffset & -BITS_PER_WORD;
9685 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9686 intregs = (endbit - startbit) / BITS_PER_WORD;
9687 cum->words += intregs;
9688 /* words should be unsigned. */
9689 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9691 int pad = (endbit/BITS_PER_WORD) - cum->words;
9692 cum->words += pad;
9696 /* The darwin64 ABI calls for us to recurse down through structs,
9697 looking for elements passed in registers. Unfortunately, we have
9698 to track int register count here also because of misalignments
9699 in powerpc alignment mode. */
9701 static void
9702 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9703 const_tree type,
9704 HOST_WIDE_INT startbitpos)
9706 tree f;
9708 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9709 if (TREE_CODE (f) == FIELD_DECL)
9711 HOST_WIDE_INT bitpos = startbitpos;
9712 tree ftype = TREE_TYPE (f);
9713 machine_mode mode;
9714 if (ftype == error_mark_node)
9715 continue;
9716 mode = TYPE_MODE (ftype);
9718 if (DECL_SIZE (f) != 0
9719 && tree_fits_uhwi_p (bit_position (f)))
9720 bitpos += int_bit_position (f);
9722 /* ??? FIXME: else assume zero offset. */
9724 if (TREE_CODE (ftype) == RECORD_TYPE)
9725 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9726 else if (USE_FP_FOR_ARG_P (cum, mode))
9728 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9729 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9730 cum->fregno += n_fpregs;
9731 /* Single-precision floats present a special problem for
9732 us, because they are smaller than an 8-byte GPR, and so
9733 the structure-packing rules combined with the standard
9734 varargs behavior mean that we want to pack float/float
9735 and float/int combinations into a single register's
9736 space. This is complicated by the arg advance flushing,
9737 which works on arbitrarily large groups of int-type
9738 fields. */
9739 if (mode == SFmode)
9741 if (cum->floats_in_gpr == 1)
9743 /* Two floats in a word; count the word and reset
9744 the float count. */
9745 cum->words++;
9746 cum->floats_in_gpr = 0;
9748 else if (bitpos % 64 == 0)
9750 /* A float at the beginning of an 8-byte word;
9751 count it and put off adjusting cum->words until
9752 we see if a arg advance flush is going to do it
9753 for us. */
9754 cum->floats_in_gpr++;
9756 else
9758 /* The float is at the end of a word, preceded
9759 by integer fields, so the arg advance flush
9760 just above has already set cum->words and
9761 everything is taken care of. */
9764 else
9765 cum->words += n_fpregs;
9767 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9769 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9770 cum->vregno++;
9771 cum->words += 2;
9773 else if (cum->intoffset == -1)
9774 cum->intoffset = bitpos;
9778 /* Check for an item that needs to be considered specially under the darwin 64
9779 bit ABI. These are record types where the mode is BLK or the structure is
9780 8 bytes in size. */
9781 static int
9782 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9784 return rs6000_darwin64_abi
9785 && ((mode == BLKmode
9786 && TREE_CODE (type) == RECORD_TYPE
9787 && int_size_in_bytes (type) > 0)
9788 || (type && TREE_CODE (type) == RECORD_TYPE
9789 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9792 /* Update the data in CUM to advance over an argument
9793 of mode MODE and data type TYPE.
9794 (TYPE is null for libcalls where that information may not be available.)
9796 Note that for args passed by reference, function_arg will be called
9797 with MODE and TYPE set to that of the pointer to the arg, not the arg
9798 itself. */
9800 static void
9801 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9802 const_tree type, bool named, int depth)
9804 machine_mode elt_mode;
9805 int n_elts;
9807 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9809 /* Only tick off an argument if we're not recursing. */
9810 if (depth == 0)
9811 cum->nargs_prototype--;
9813 #ifdef HAVE_AS_GNU_ATTRIBUTE
9814 if (DEFAULT_ABI == ABI_V4
9815 && cum->escapes)
9817 if (SCALAR_FLOAT_MODE_P (mode))
9818 rs6000_passes_float = true;
9819 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9820 rs6000_passes_vector = true;
9821 else if (SPE_VECTOR_MODE (mode)
9822 && !cum->stdarg
9823 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9824 rs6000_passes_vector = true;
9826 #endif
9828 if (TARGET_ALTIVEC_ABI
9829 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9830 || (type && TREE_CODE (type) == VECTOR_TYPE
9831 && int_size_in_bytes (type) == 16)))
9833 bool stack = false;
9835 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9837 cum->vregno += n_elts;
9839 if (!TARGET_ALTIVEC)
9840 error ("cannot pass argument in vector register because"
9841 " altivec instructions are disabled, use -maltivec"
9842 " to enable them");
9844 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9845 even if it is going to be passed in a vector register.
9846 Darwin does the same for variable-argument functions. */
9847 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9848 && TARGET_64BIT)
9849 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9850 stack = true;
9852 else
9853 stack = true;
9855 if (stack)
9857 int align;
9859 /* Vector parameters must be 16-byte aligned. In 32-bit
9860 mode this means we need to take into account the offset
9861 to the parameter save area. In 64-bit mode, they just
9862 have to start on an even word, since the parameter save
9863 area is 16-byte aligned. */
9864 if (TARGET_32BIT)
9865 align = -(rs6000_parm_offset () + cum->words) & 3;
9866 else
9867 align = cum->words & 1;
9868 cum->words += align + rs6000_arg_size (mode, type);
9870 if (TARGET_DEBUG_ARG)
9872 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9873 cum->words, align);
9874 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9875 cum->nargs_prototype, cum->prototype,
9876 GET_MODE_NAME (mode));
9880 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9881 && !cum->stdarg
9882 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9883 cum->sysv_gregno++;
9885 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9887 int size = int_size_in_bytes (type);
9888 /* Variable sized types have size == -1 and are
9889 treated as if consisting entirely of ints.
9890 Pad to 16 byte boundary if needed. */
9891 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9892 && (cum->words % 2) != 0)
9893 cum->words++;
9894 /* For varargs, we can just go up by the size of the struct. */
9895 if (!named)
9896 cum->words += (size + 7) / 8;
9897 else
9899 /* It is tempting to say int register count just goes up by
9900 sizeof(type)/8, but this is wrong in a case such as
9901 { int; double; int; } [powerpc alignment]. We have to
9902 grovel through the fields for these too. */
9903 cum->intoffset = 0;
9904 cum->floats_in_gpr = 0;
9905 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9906 rs6000_darwin64_record_arg_advance_flush (cum,
9907 size * BITS_PER_UNIT, 1);
9909 if (TARGET_DEBUG_ARG)
9911 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9912 cum->words, TYPE_ALIGN (type), size);
9913 fprintf (stderr,
9914 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9915 cum->nargs_prototype, cum->prototype,
9916 GET_MODE_NAME (mode));
9919 else if (DEFAULT_ABI == ABI_V4)
9921 if (TARGET_HARD_FLOAT && TARGET_FPRS
9922 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9923 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9924 || (mode == TFmode && !TARGET_IEEEQUAD)
9925 || mode == SDmode || mode == DDmode || mode == TDmode))
9927 /* _Decimal128 must use an even/odd register pair. This assumes
9928 that the register number is odd when fregno is odd. */
9929 if (mode == TDmode && (cum->fregno % 2) == 1)
9930 cum->fregno++;
9932 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9933 <= FP_ARG_V4_MAX_REG)
9934 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9935 else
9937 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9938 if (mode == DFmode || mode == TFmode
9939 || mode == DDmode || mode == TDmode)
9940 cum->words += cum->words & 1;
9941 cum->words += rs6000_arg_size (mode, type);
9944 else
9946 int n_words = rs6000_arg_size (mode, type);
9947 int gregno = cum->sysv_gregno;
9949 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9950 (r7,r8) or (r9,r10). As does any other 2 word item such
9951 as complex int due to a historical mistake. */
9952 if (n_words == 2)
9953 gregno += (1 - gregno) & 1;
9955 /* Multi-reg args are not split between registers and stack. */
9956 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9958 /* Long long and SPE vectors are aligned on the stack.
9959 So are other 2 word items such as complex int due to
9960 a historical mistake. */
9961 if (n_words == 2)
9962 cum->words += cum->words & 1;
9963 cum->words += n_words;
9966 /* Note: continuing to accumulate gregno past when we've started
9967 spilling to the stack indicates the fact that we've started
9968 spilling to the stack to expand_builtin_saveregs. */
9969 cum->sysv_gregno = gregno + n_words;
9972 if (TARGET_DEBUG_ARG)
9974 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9975 cum->words, cum->fregno);
9976 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9977 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9978 fprintf (stderr, "mode = %4s, named = %d\n",
9979 GET_MODE_NAME (mode), named);
9982 else
9984 int n_words = rs6000_arg_size (mode, type);
9985 int start_words = cum->words;
9986 int align_words = rs6000_parm_start (mode, type, start_words);
9988 cum->words = align_words + n_words;
9990 if (SCALAR_FLOAT_MODE_P (elt_mode)
9991 && TARGET_HARD_FLOAT && TARGET_FPRS)
9993 /* _Decimal128 must be passed in an even/odd float register pair.
9994 This assumes that the register number is odd when fregno is
9995 odd. */
9996 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9997 cum->fregno++;
9998 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
10001 if (TARGET_DEBUG_ARG)
10003 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10004 cum->words, cum->fregno);
10005 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
10006 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
10007 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
10008 named, align_words - start_words, depth);
10013 static void
10014 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10015 const_tree type, bool named)
10017 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10021 static rtx
10022 spe_build_register_parallel (machine_mode mode, int gregno)
10024 rtx r1, r3, r5, r7;
10026 switch (mode)
10028 case DFmode:
10029 r1 = gen_rtx_REG (DImode, gregno);
10030 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10031 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10033 case DCmode:
10034 case TFmode:
10035 r1 = gen_rtx_REG (DImode, gregno);
10036 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10037 r3 = gen_rtx_REG (DImode, gregno + 2);
10038 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10039 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10041 case TCmode:
10042 r1 = gen_rtx_REG (DImode, gregno);
10043 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10044 r3 = gen_rtx_REG (DImode, gregno + 2);
10045 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10046 r5 = gen_rtx_REG (DImode, gregno + 4);
10047 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10048 r7 = gen_rtx_REG (DImode, gregno + 6);
10049 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10050 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10052 default:
10053 gcc_unreachable ();
10057 /* Determine where to put a SIMD argument on the SPE. */
10058 static rtx
10059 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10060 const_tree type)
10062 int gregno = cum->sysv_gregno;
10064 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10065 are passed and returned in a pair of GPRs for ABI compatibility. */
10066 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10067 || mode == DCmode || mode == TCmode))
10069 int n_words = rs6000_arg_size (mode, type);
10071 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10072 if (mode == DFmode)
10073 gregno += (1 - gregno) & 1;
10075 /* Multi-reg args are not split between registers and stack. */
10076 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10077 return NULL_RTX;
10079 return spe_build_register_parallel (mode, gregno);
10081 if (cum->stdarg)
10083 int n_words = rs6000_arg_size (mode, type);
10085 /* SPE vectors are put in odd registers. */
10086 if (n_words == 2 && (gregno & 1) == 0)
10087 gregno += 1;
10089 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10091 rtx r1, r2;
10092 machine_mode m = SImode;
10094 r1 = gen_rtx_REG (m, gregno);
10095 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10096 r2 = gen_rtx_REG (m, gregno + 1);
10097 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10098 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10100 else
10101 return NULL_RTX;
10103 else
10105 if (gregno <= GP_ARG_MAX_REG)
10106 return gen_rtx_REG (mode, gregno);
10107 else
10108 return NULL_RTX;
10112 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10113 structure between cum->intoffset and bitpos to integer registers. */
10115 static void
10116 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10117 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10119 machine_mode mode;
10120 unsigned int regno;
10121 unsigned int startbit, endbit;
10122 int this_regno, intregs, intoffset;
10123 rtx reg;
10125 if (cum->intoffset == -1)
10126 return;
10128 intoffset = cum->intoffset;
10129 cum->intoffset = -1;
10131 /* If this is the trailing part of a word, try to only load that
10132 much into the register. Otherwise load the whole register. Note
10133 that in the latter case we may pick up unwanted bits. It's not a
10134 problem at the moment but may wish to revisit. */
10136 if (intoffset % BITS_PER_WORD != 0)
10138 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10139 MODE_INT, 0);
10140 if (mode == BLKmode)
10142 /* We couldn't find an appropriate mode, which happens,
10143 e.g., in packed structs when there are 3 bytes to load.
10144 Back intoffset back to the beginning of the word in this
10145 case. */
10146 intoffset = intoffset & -BITS_PER_WORD;
10147 mode = word_mode;
10150 else
10151 mode = word_mode;
10153 startbit = intoffset & -BITS_PER_WORD;
10154 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10155 intregs = (endbit - startbit) / BITS_PER_WORD;
10156 this_regno = cum->words + intoffset / BITS_PER_WORD;
10158 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10159 cum->use_stack = 1;
10161 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10162 if (intregs <= 0)
10163 return;
10165 intoffset /= BITS_PER_UNIT;
10168 regno = GP_ARG_MIN_REG + this_regno;
10169 reg = gen_rtx_REG (mode, regno);
10170 rvec[(*k)++] =
10171 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10173 this_regno += 1;
10174 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10175 mode = word_mode;
10176 intregs -= 1;
10178 while (intregs > 0);
10181 /* Recursive workhorse for the following. */
10183 static void
10184 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10185 HOST_WIDE_INT startbitpos, rtx rvec[],
10186 int *k)
10188 tree f;
10190 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10191 if (TREE_CODE (f) == FIELD_DECL)
10193 HOST_WIDE_INT bitpos = startbitpos;
10194 tree ftype = TREE_TYPE (f);
10195 machine_mode mode;
10196 if (ftype == error_mark_node)
10197 continue;
10198 mode = TYPE_MODE (ftype);
10200 if (DECL_SIZE (f) != 0
10201 && tree_fits_uhwi_p (bit_position (f)))
10202 bitpos += int_bit_position (f);
10204 /* ??? FIXME: else assume zero offset. */
10206 if (TREE_CODE (ftype) == RECORD_TYPE)
10207 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10208 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10210 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10211 #if 0
10212 switch (mode)
10214 case SCmode: mode = SFmode; break;
10215 case DCmode: mode = DFmode; break;
10216 case TCmode: mode = TFmode; break;
10217 default: break;
10219 #endif
10220 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10221 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10223 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10224 && (mode == TFmode || mode == TDmode));
10225 /* Long double or _Decimal128 split over regs and memory. */
10226 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10227 cum->use_stack=1;
10229 rvec[(*k)++]
10230 = gen_rtx_EXPR_LIST (VOIDmode,
10231 gen_rtx_REG (mode, cum->fregno++),
10232 GEN_INT (bitpos / BITS_PER_UNIT));
10233 if (mode == TFmode || mode == TDmode)
10234 cum->fregno++;
10236 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10238 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10239 rvec[(*k)++]
10240 = gen_rtx_EXPR_LIST (VOIDmode,
10241 gen_rtx_REG (mode, cum->vregno++),
10242 GEN_INT (bitpos / BITS_PER_UNIT));
10244 else if (cum->intoffset == -1)
10245 cum->intoffset = bitpos;
10249 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10250 the register(s) to be used for each field and subfield of a struct
10251 being passed by value, along with the offset of where the
10252 register's value may be found in the block. FP fields go in FP
10253 register, vector fields go in vector registers, and everything
10254 else goes in int registers, packed as in memory.
10256 This code is also used for function return values. RETVAL indicates
10257 whether this is the case.
10259 Much of this is taken from the SPARC V9 port, which has a similar
10260 calling convention. */
10262 static rtx
10263 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10264 bool named, bool retval)
10266 rtx rvec[FIRST_PSEUDO_REGISTER];
10267 int k = 1, kbase = 1;
10268 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10269 /* This is a copy; modifications are not visible to our caller. */
10270 CUMULATIVE_ARGS copy_cum = *orig_cum;
10271 CUMULATIVE_ARGS *cum = &copy_cum;
10273 /* Pad to 16 byte boundary if needed. */
10274 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10275 && (cum->words % 2) != 0)
10276 cum->words++;
10278 cum->intoffset = 0;
10279 cum->use_stack = 0;
10280 cum->named = named;
10282 /* Put entries into rvec[] for individual FP and vector fields, and
10283 for the chunks of memory that go in int regs. Note we start at
10284 element 1; 0 is reserved for an indication of using memory, and
10285 may or may not be filled in below. */
10286 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10287 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10289 /* If any part of the struct went on the stack put all of it there.
10290 This hack is because the generic code for
10291 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10292 parts of the struct are not at the beginning. */
10293 if (cum->use_stack)
10295 if (retval)
10296 return NULL_RTX; /* doesn't go in registers at all */
10297 kbase = 0;
10298 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10300 if (k > 1 || cum->use_stack)
10301 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10302 else
10303 return NULL_RTX;
10306 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10308 static rtx
10309 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10310 int align_words)
10312 int n_units;
10313 int i, k;
10314 rtx rvec[GP_ARG_NUM_REG + 1];
10316 if (align_words >= GP_ARG_NUM_REG)
10317 return NULL_RTX;
10319 n_units = rs6000_arg_size (mode, type);
10321 /* Optimize the simple case where the arg fits in one gpr, except in
10322 the case of BLKmode due to assign_parms assuming that registers are
10323 BITS_PER_WORD wide. */
10324 if (n_units == 0
10325 || (n_units == 1 && mode != BLKmode))
10326 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10328 k = 0;
10329 if (align_words + n_units > GP_ARG_NUM_REG)
10330 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10331 using a magic NULL_RTX component.
10332 This is not strictly correct. Only some of the arg belongs in
10333 memory, not all of it. However, the normal scheme using
10334 function_arg_partial_nregs can result in unusual subregs, eg.
10335 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10336 store the whole arg to memory is often more efficient than code
10337 to store pieces, and we know that space is available in the right
10338 place for the whole arg. */
10339 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10341 i = 0;
10344 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10345 rtx off = GEN_INT (i++ * 4);
10346 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10348 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10350 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10353 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10354 but must also be copied into the parameter save area starting at
10355 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10356 to the GPRs and/or memory. Return the number of elements used. */
10358 static int
10359 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10360 int align_words, rtx *rvec)
10362 int k = 0;
10364 if (align_words < GP_ARG_NUM_REG)
10366 int n_words = rs6000_arg_size (mode, type);
10368 if (align_words + n_words > GP_ARG_NUM_REG
10369 || mode == BLKmode
10370 || (TARGET_32BIT && TARGET_POWERPC64))
10372 /* If this is partially on the stack, then we only
10373 include the portion actually in registers here. */
10374 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10375 int i = 0;
10377 if (align_words + n_words > GP_ARG_NUM_REG)
10379 /* Not all of the arg fits in gprs. Say that it goes in memory
10380 too, using a magic NULL_RTX component. Also see comment in
10381 rs6000_mixed_function_arg for why the normal
10382 function_arg_partial_nregs scheme doesn't work in this case. */
10383 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10388 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10389 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10390 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10392 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10394 else
10396 /* The whole arg fits in gprs. */
10397 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10398 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10401 else
10403 /* It's entirely in memory. */
10404 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10407 return k;
10410 /* RVEC is a vector of K components of an argument of mode MODE.
10411 Construct the final function_arg return value from it. */
10413 static rtx
10414 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10416 gcc_assert (k >= 1);
10418 /* Avoid returning a PARALLEL in the trivial cases. */
10419 if (k == 1)
10421 if (XEXP (rvec[0], 0) == NULL_RTX)
10422 return NULL_RTX;
10424 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10425 return XEXP (rvec[0], 0);
10428 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10431 /* Determine where to put an argument to a function.
10432 Value is zero to push the argument on the stack,
10433 or a hard register in which to store the argument.
10435 MODE is the argument's machine mode.
10436 TYPE is the data type of the argument (as a tree).
10437 This is null for libcalls where that information may
10438 not be available.
10439 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10440 the preceding args and about the function being called. It is
10441 not modified in this routine.
10442 NAMED is nonzero if this argument is a named parameter
10443 (otherwise it is an extra parameter matching an ellipsis).
10445 On RS/6000 the first eight words of non-FP are normally in registers
10446 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10447 Under V.4, the first 8 FP args are in registers.
10449 If this is floating-point and no prototype is specified, we use
10450 both an FP and integer register (or possibly FP reg and stack). Library
10451 functions (when CALL_LIBCALL is set) always have the proper types for args,
10452 so we can pass the FP value just in one register. emit_library_function
10453 doesn't support PARALLEL anyway.
10455 Note that for args passed by reference, function_arg will be called
10456 with MODE and TYPE set to that of the pointer to the arg, not the arg
10457 itself. */
10459 static rtx
10460 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10461 const_tree type, bool named)
10463 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10464 enum rs6000_abi abi = DEFAULT_ABI;
10465 machine_mode elt_mode;
10466 int n_elts;
10468 /* Return a marker to indicate whether CR1 needs to set or clear the
10469 bit that V.4 uses to say fp args were passed in registers.
10470 Assume that we don't need the marker for software floating point,
10471 or compiler generated library calls. */
10472 if (mode == VOIDmode)
10474 if (abi == ABI_V4
10475 && (cum->call_cookie & CALL_LIBCALL) == 0
10476 && (cum->stdarg
10477 || (cum->nargs_prototype < 0
10478 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10480 /* For the SPE, we need to crxor CR6 always. */
10481 if (TARGET_SPE_ABI)
10482 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10483 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10484 return GEN_INT (cum->call_cookie
10485 | ((cum->fregno == FP_ARG_MIN_REG)
10486 ? CALL_V4_SET_FP_ARGS
10487 : CALL_V4_CLEAR_FP_ARGS));
10490 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10493 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10495 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10497 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10498 if (rslt != NULL_RTX)
10499 return rslt;
10500 /* Else fall through to usual handling. */
10503 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10505 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10506 rtx r, off;
10507 int i, k = 0;
10509 /* Do we also need to pass this argument in the parameter
10510 save area? */
10511 if (TARGET_64BIT && ! cum->prototype)
10513 int align_words = (cum->words + 1) & ~1;
10514 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10517 /* Describe where this argument goes in the vector registers. */
10518 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10520 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10521 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10522 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10525 return rs6000_finish_function_arg (mode, rvec, k);
10527 else if (TARGET_ALTIVEC_ABI
10528 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10529 || (type && TREE_CODE (type) == VECTOR_TYPE
10530 && int_size_in_bytes (type) == 16)))
10532 if (named || abi == ABI_V4)
10533 return NULL_RTX;
10534 else
10536 /* Vector parameters to varargs functions under AIX or Darwin
10537 get passed in memory and possibly also in GPRs. */
10538 int align, align_words, n_words;
10539 machine_mode part_mode;
10541 /* Vector parameters must be 16-byte aligned. In 32-bit
10542 mode this means we need to take into account the offset
10543 to the parameter save area. In 64-bit mode, they just
10544 have to start on an even word, since the parameter save
10545 area is 16-byte aligned. */
10546 if (TARGET_32BIT)
10547 align = -(rs6000_parm_offset () + cum->words) & 3;
10548 else
10549 align = cum->words & 1;
10550 align_words = cum->words + align;
10552 /* Out of registers? Memory, then. */
10553 if (align_words >= GP_ARG_NUM_REG)
10554 return NULL_RTX;
10556 if (TARGET_32BIT && TARGET_POWERPC64)
10557 return rs6000_mixed_function_arg (mode, type, align_words);
10559 /* The vector value goes in GPRs. Only the part of the
10560 value in GPRs is reported here. */
10561 part_mode = mode;
10562 n_words = rs6000_arg_size (mode, type);
10563 if (align_words + n_words > GP_ARG_NUM_REG)
10564 /* Fortunately, there are only two possibilities, the value
10565 is either wholly in GPRs or half in GPRs and half not. */
10566 part_mode = DImode;
10568 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10571 else if (TARGET_SPE_ABI && TARGET_SPE
10572 && (SPE_VECTOR_MODE (mode)
10573 || (TARGET_E500_DOUBLE && (mode == DFmode
10574 || mode == DCmode
10575 || mode == TFmode
10576 || mode == TCmode))))
10577 return rs6000_spe_function_arg (cum, mode, type);
10579 else if (abi == ABI_V4)
10581 if (TARGET_HARD_FLOAT && TARGET_FPRS
10582 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10583 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10584 || (mode == TFmode && !TARGET_IEEEQUAD)
10585 || mode == SDmode || mode == DDmode || mode == TDmode))
10587 /* _Decimal128 must use an even/odd register pair. This assumes
10588 that the register number is odd when fregno is odd. */
10589 if (mode == TDmode && (cum->fregno % 2) == 1)
10590 cum->fregno++;
10592 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10593 <= FP_ARG_V4_MAX_REG)
10594 return gen_rtx_REG (mode, cum->fregno);
10595 else
10596 return NULL_RTX;
10598 else
10600 int n_words = rs6000_arg_size (mode, type);
10601 int gregno = cum->sysv_gregno;
10603 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10604 (r7,r8) or (r9,r10). As does any other 2 word item such
10605 as complex int due to a historical mistake. */
10606 if (n_words == 2)
10607 gregno += (1 - gregno) & 1;
10609 /* Multi-reg args are not split between registers and stack. */
10610 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10611 return NULL_RTX;
10613 if (TARGET_32BIT && TARGET_POWERPC64)
10614 return rs6000_mixed_function_arg (mode, type,
10615 gregno - GP_ARG_MIN_REG);
10616 return gen_rtx_REG (mode, gregno);
10619 else
10621 int align_words = rs6000_parm_start (mode, type, cum->words);
10623 /* _Decimal128 must be passed in an even/odd float register pair.
10624 This assumes that the register number is odd when fregno is odd. */
10625 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10626 cum->fregno++;
10628 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10630 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10631 rtx r, off;
10632 int i, k = 0;
10633 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10634 int fpr_words;
10636 /* Do we also need to pass this argument in the parameter
10637 save area? */
10638 if (type && (cum->nargs_prototype <= 0
10639 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10640 && TARGET_XL_COMPAT
10641 && align_words >= GP_ARG_NUM_REG)))
10642 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10644 /* Describe where this argument goes in the fprs. */
10645 for (i = 0; i < n_elts
10646 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10648 /* Check if the argument is split over registers and memory.
10649 This can only ever happen for long double or _Decimal128;
10650 complex types are handled via split_complex_arg. */
10651 machine_mode fmode = elt_mode;
10652 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10654 gcc_assert (fmode == TFmode || fmode == TDmode);
10655 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10658 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10659 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10660 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10663 /* If there were not enough FPRs to hold the argument, the rest
10664 usually goes into memory. However, if the current position
10665 is still within the register parameter area, a portion may
10666 actually have to go into GPRs.
10668 Note that it may happen that the portion of the argument
10669 passed in the first "half" of the first GPR was already
10670 passed in the last FPR as well.
10672 For unnamed arguments, we already set up GPRs to cover the
10673 whole argument in rs6000_psave_function_arg, so there is
10674 nothing further to do at this point. */
10675 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10676 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10677 && cum->nargs_prototype > 0)
10679 static bool warned;
10681 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10682 int n_words = rs6000_arg_size (mode, type);
10684 align_words += fpr_words;
10685 n_words -= fpr_words;
10689 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10690 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10691 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10693 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10695 if (!warned && warn_psabi)
10697 warned = true;
10698 inform (input_location,
10699 "the ABI of passing homogeneous float aggregates"
10700 " has changed in GCC 5");
10704 return rs6000_finish_function_arg (mode, rvec, k);
10706 else if (align_words < GP_ARG_NUM_REG)
10708 if (TARGET_32BIT && TARGET_POWERPC64)
10709 return rs6000_mixed_function_arg (mode, type, align_words);
10711 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10713 else
10714 return NULL_RTX;
10718 /* For an arg passed partly in registers and partly in memory, this is
10719 the number of bytes passed in registers. For args passed entirely in
10720 registers or entirely in memory, zero. When an arg is described by a
10721 PARALLEL, perhaps using more than one register type, this function
10722 returns the number of bytes used by the first element of the PARALLEL. */
10724 static int
10725 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10726 tree type, bool named)
10728 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10729 bool passed_in_gprs = true;
10730 int ret = 0;
10731 int align_words;
10732 machine_mode elt_mode;
10733 int n_elts;
10735 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10737 if (DEFAULT_ABI == ABI_V4)
10738 return 0;
10740 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10742 /* If we are passing this arg in the fixed parameter save area
10743 (gprs or memory) as well as VRs, we do not use the partial
10744 bytes mechanism; instead, rs6000_function_arg will return a
10745 PARALLEL including a memory element as necessary. */
10746 if (TARGET_64BIT && ! cum->prototype)
10747 return 0;
10749 /* Otherwise, we pass in VRs only. Check for partial copies. */
10750 passed_in_gprs = false;
10751 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10752 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10755 /* In this complicated case we just disable the partial_nregs code. */
10756 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10757 return 0;
10759 align_words = rs6000_parm_start (mode, type, cum->words);
10761 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10763 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10765 /* If we are passing this arg in the fixed parameter save area
10766 (gprs or memory) as well as FPRs, we do not use the partial
10767 bytes mechanism; instead, rs6000_function_arg will return a
10768 PARALLEL including a memory element as necessary. */
10769 if (type
10770 && (cum->nargs_prototype <= 0
10771 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10772 && TARGET_XL_COMPAT
10773 && align_words >= GP_ARG_NUM_REG)))
10774 return 0;
10776 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10777 passed_in_gprs = false;
10778 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10780 /* Compute number of bytes / words passed in FPRs. If there
10781 is still space available in the register parameter area
10782 *after* that amount, a part of the argument will be passed
10783 in GPRs. In that case, the total amount passed in any
10784 registers is equal to the amount that would have been passed
10785 in GPRs if everything were passed there, so we fall back to
10786 the GPR code below to compute the appropriate value. */
10787 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10788 * MIN (8, GET_MODE_SIZE (elt_mode)));
10789 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10791 if (align_words + fpr_words < GP_ARG_NUM_REG)
10792 passed_in_gprs = true;
10793 else
10794 ret = fpr;
10798 if (passed_in_gprs
10799 && align_words < GP_ARG_NUM_REG
10800 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10801 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10803 if (ret != 0 && TARGET_DEBUG_ARG)
10804 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10806 return ret;
10809 /* A C expression that indicates when an argument must be passed by
10810 reference. If nonzero for an argument, a copy of that argument is
10811 made in memory and a pointer to the argument is passed instead of
10812 the argument itself. The pointer is passed in whatever way is
10813 appropriate for passing a pointer to that type.
10815 Under V.4, aggregates and long double are passed by reference.
10817 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10818 reference unless the AltiVec vector extension ABI is in force.
10820 As an extension to all ABIs, variable sized types are passed by
10821 reference. */
10823 static bool
10824 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10825 machine_mode mode, const_tree type,
10826 bool named ATTRIBUTE_UNUSED)
10828 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10830 if (TARGET_DEBUG_ARG)
10831 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10832 return 1;
10835 if (!type)
10836 return 0;
10838 #if HAVE_UPC_PTS_STRUCT_REP
10839 if (DEFAULT_ABI == ABI_V4 && POINTER_TYPE_P (type)
10840 && upc_shared_type_p (TREE_TYPE (type)))
10842 if (TARGET_DEBUG_ARG)
10843 fprintf (stderr,
10844 "function_arg_pass_by_reference: V4 UPC ptr to shared\n");
10845 return 1;
10847 #endif
10849 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10851 if (TARGET_DEBUG_ARG)
10852 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10853 return 1;
10856 if (int_size_in_bytes (type) < 0)
10858 if (TARGET_DEBUG_ARG)
10859 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10860 return 1;
10863 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10864 modes only exist for GCC vector types if -maltivec. */
10865 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10867 if (TARGET_DEBUG_ARG)
10868 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10869 return 1;
10872 /* Pass synthetic vectors in memory. */
10873 if (TREE_CODE (type) == VECTOR_TYPE
10874 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10876 static bool warned_for_pass_big_vectors = false;
10877 if (TARGET_DEBUG_ARG)
10878 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10879 if (!warned_for_pass_big_vectors)
10881 warning (0, "GCC vector passed by reference: "
10882 "non-standard ABI extension with no compatibility guarantee");
10883 warned_for_pass_big_vectors = true;
10885 return 1;
10888 return 0;
10891 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10892 already processes. Return true if the parameter must be passed
10893 (fully or partially) on the stack. */
10895 static bool
10896 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10898 machine_mode mode;
10899 int unsignedp;
10900 rtx entry_parm;
10902 /* Catch errors. */
10903 if (type == NULL || type == error_mark_node)
10904 return true;
10906 /* Handle types with no storage requirement. */
10907 if (TYPE_MODE (type) == VOIDmode)
10908 return false;
10910 /* Handle complex types. */
10911 if (TREE_CODE (type) == COMPLEX_TYPE)
10912 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10913 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10915 /* Handle transparent aggregates. */
10916 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10917 && TYPE_TRANSPARENT_AGGR (type))
10918 type = TREE_TYPE (first_field (type));
10920 /* See if this arg was passed by invisible reference. */
10921 if (pass_by_reference (get_cumulative_args (args_so_far),
10922 TYPE_MODE (type), type, true))
10923 type = build_pointer_type (type);
10925 /* Find mode as it is passed by the ABI. */
10926 unsignedp = TYPE_UNSIGNED (type);
10927 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10929 /* If we must pass in stack, we need a stack. */
10930 if (rs6000_must_pass_in_stack (mode, type))
10931 return true;
10933 /* If there is no incoming register, we need a stack. */
10934 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10935 if (entry_parm == NULL)
10936 return true;
10938 /* Likewise if we need to pass both in registers and on the stack. */
10939 if (GET_CODE (entry_parm) == PARALLEL
10940 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10941 return true;
10943 /* Also true if we're partially in registers and partially not. */
10944 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10945 return true;
10947 /* Update info on where next arg arrives in registers. */
10948 rs6000_function_arg_advance (args_so_far, mode, type, true);
10949 return false;
10952 /* Return true if FUN has no prototype, has a variable argument
10953 list, or passes any parameter in memory. */
10955 static bool
10956 rs6000_function_parms_need_stack (tree fun, bool incoming)
10958 tree fntype, result;
10959 CUMULATIVE_ARGS args_so_far_v;
10960 cumulative_args_t args_so_far;
10962 if (!fun)
10963 /* Must be a libcall, all of which only use reg parms. */
10964 return false;
10966 fntype = fun;
10967 if (!TYPE_P (fun))
10968 fntype = TREE_TYPE (fun);
10970 /* Varargs functions need the parameter save area. */
10971 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10972 return true;
10974 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10975 args_so_far = pack_cumulative_args (&args_so_far_v);
10977 /* When incoming, we will have been passed the function decl.
10978 It is necessary to use the decl to handle K&R style functions,
10979 where TYPE_ARG_TYPES may not be available. */
10980 if (incoming)
10982 gcc_assert (DECL_P (fun));
10983 result = DECL_RESULT (fun);
10985 else
10986 result = TREE_TYPE (fntype);
10988 if (result && aggregate_value_p (result, fntype))
10990 if (!TYPE_P (result))
10991 result = TREE_TYPE (result);
10992 result = build_pointer_type (result);
10993 rs6000_parm_needs_stack (args_so_far, result);
10996 if (incoming)
10998 tree parm;
11000 for (parm = DECL_ARGUMENTS (fun);
11001 parm && parm != void_list_node;
11002 parm = TREE_CHAIN (parm))
11003 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11004 return true;
11006 else
11008 function_args_iterator args_iter;
11009 tree arg_type;
11011 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11012 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11013 return true;
11016 return false;
11019 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11020 usually a constant depending on the ABI. However, in the ELFv2 ABI
11021 the register parameter area is optional when calling a function that
11022 has a prototype is scope, has no variable argument list, and passes
11023 all parameters in registers. */
11026 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11028 int reg_parm_stack_space;
11030 switch (DEFAULT_ABI)
11032 default:
11033 reg_parm_stack_space = 0;
11034 break;
11036 case ABI_AIX:
11037 case ABI_DARWIN:
11038 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11039 break;
11041 case ABI_ELFv2:
11042 /* ??? Recomputing this every time is a bit expensive. Is there
11043 a place to cache this information? */
11044 if (rs6000_function_parms_need_stack (fun, incoming))
11045 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11046 else
11047 reg_parm_stack_space = 0;
11048 break;
11051 return reg_parm_stack_space;
11054 static void
11055 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11057 int i;
11058 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11060 if (nregs == 0)
11061 return;
11063 for (i = 0; i < nregs; i++)
11065 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11066 if (reload_completed)
11068 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11069 tem = NULL_RTX;
11070 else
11071 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11072 i * GET_MODE_SIZE (reg_mode));
11074 else
11075 tem = replace_equiv_address (tem, XEXP (tem, 0));
11077 gcc_assert (tem);
11079 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11083 /* Perform any needed actions needed for a function that is receiving a
11084 variable number of arguments.
11086 CUM is as above.
11088 MODE and TYPE are the mode and type of the current parameter.
11090 PRETEND_SIZE is a variable that should be set to the amount of stack
11091 that must be pushed by the prolog to pretend that our caller pushed
11094 Normally, this macro will push all remaining incoming registers on the
11095 stack and set PRETEND_SIZE to the length of the registers pushed. */
11097 static void
11098 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11099 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11100 int no_rtl)
11102 CUMULATIVE_ARGS next_cum;
11103 int reg_size = TARGET_32BIT ? 4 : 8;
11104 rtx save_area = NULL_RTX, mem;
11105 int first_reg_offset;
11106 alias_set_type set;
11108 /* Skip the last named argument. */
11109 next_cum = *get_cumulative_args (cum);
11110 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11112 if (DEFAULT_ABI == ABI_V4)
11114 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11116 if (! no_rtl)
11118 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11119 HOST_WIDE_INT offset = 0;
11121 /* Try to optimize the size of the varargs save area.
11122 The ABI requires that ap.reg_save_area is doubleword
11123 aligned, but we don't need to allocate space for all
11124 the bytes, only those to which we actually will save
11125 anything. */
11126 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11127 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11128 if (TARGET_HARD_FLOAT && TARGET_FPRS
11129 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11130 && cfun->va_list_fpr_size)
11132 if (gpr_reg_num)
11133 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11134 * UNITS_PER_FP_WORD;
11135 if (cfun->va_list_fpr_size
11136 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11137 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11138 else
11139 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11140 * UNITS_PER_FP_WORD;
11142 if (gpr_reg_num)
11144 offset = -((first_reg_offset * reg_size) & ~7);
11145 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11147 gpr_reg_num = cfun->va_list_gpr_size;
11148 if (reg_size == 4 && (first_reg_offset & 1))
11149 gpr_reg_num++;
11151 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11153 else if (fpr_size)
11154 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11155 * UNITS_PER_FP_WORD
11156 - (int) (GP_ARG_NUM_REG * reg_size);
11158 if (gpr_size + fpr_size)
11160 rtx reg_save_area
11161 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11162 gcc_assert (GET_CODE (reg_save_area) == MEM);
11163 reg_save_area = XEXP (reg_save_area, 0);
11164 if (GET_CODE (reg_save_area) == PLUS)
11166 gcc_assert (XEXP (reg_save_area, 0)
11167 == virtual_stack_vars_rtx);
11168 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11169 offset += INTVAL (XEXP (reg_save_area, 1));
11171 else
11172 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11175 cfun->machine->varargs_save_offset = offset;
11176 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11179 else
11181 first_reg_offset = next_cum.words;
11182 save_area = virtual_incoming_args_rtx;
11184 if (targetm.calls.must_pass_in_stack (mode, type))
11185 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11188 set = get_varargs_alias_set ();
11189 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11190 && cfun->va_list_gpr_size)
11192 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11194 if (va_list_gpr_counter_field)
11195 /* V4 va_list_gpr_size counts number of registers needed. */
11196 n_gpr = cfun->va_list_gpr_size;
11197 else
11198 /* char * va_list instead counts number of bytes needed. */
11199 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11201 if (nregs > n_gpr)
11202 nregs = n_gpr;
11204 mem = gen_rtx_MEM (BLKmode,
11205 plus_constant (Pmode, save_area,
11206 first_reg_offset * reg_size));
11207 MEM_NOTRAP_P (mem) = 1;
11208 set_mem_alias_set (mem, set);
11209 set_mem_align (mem, BITS_PER_WORD);
11211 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11212 nregs);
11215 /* Save FP registers if needed. */
11216 if (DEFAULT_ABI == ABI_V4
11217 && TARGET_HARD_FLOAT && TARGET_FPRS
11218 && ! no_rtl
11219 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11220 && cfun->va_list_fpr_size)
11222 int fregno = next_cum.fregno, nregs;
11223 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11224 rtx lab = gen_label_rtx ();
11225 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11226 * UNITS_PER_FP_WORD);
11228 emit_jump_insn
11229 (gen_rtx_SET (VOIDmode,
11230 pc_rtx,
11231 gen_rtx_IF_THEN_ELSE (VOIDmode,
11232 gen_rtx_NE (VOIDmode, cr1,
11233 const0_rtx),
11234 gen_rtx_LABEL_REF (VOIDmode, lab),
11235 pc_rtx)));
11237 for (nregs = 0;
11238 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11239 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11241 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11242 ? DFmode : SFmode,
11243 plus_constant (Pmode, save_area, off));
11244 MEM_NOTRAP_P (mem) = 1;
11245 set_mem_alias_set (mem, set);
11246 set_mem_align (mem, GET_MODE_ALIGNMENT (
11247 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11248 ? DFmode : SFmode));
11249 emit_move_insn (mem, gen_rtx_REG (
11250 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11251 ? DFmode : SFmode, fregno));
11254 emit_label (lab);
11258 /* Create the va_list data type. */
11260 static tree
11261 rs6000_build_builtin_va_list (void)
11263 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11265 /* For AIX, prefer 'char *' because that's what the system
11266 header files like. */
11267 if (DEFAULT_ABI != ABI_V4)
11268 return build_pointer_type (char_type_node);
11270 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11271 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11272 get_identifier ("__va_list_tag"), record);
11274 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11275 unsigned_char_type_node);
11276 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11277 unsigned_char_type_node);
11278 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11279 every user file. */
11280 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11281 get_identifier ("reserved"), short_unsigned_type_node);
11282 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11283 get_identifier ("overflow_arg_area"),
11284 ptr_type_node);
11285 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11286 get_identifier ("reg_save_area"),
11287 ptr_type_node);
11289 va_list_gpr_counter_field = f_gpr;
11290 va_list_fpr_counter_field = f_fpr;
11292 DECL_FIELD_CONTEXT (f_gpr) = record;
11293 DECL_FIELD_CONTEXT (f_fpr) = record;
11294 DECL_FIELD_CONTEXT (f_res) = record;
11295 DECL_FIELD_CONTEXT (f_ovf) = record;
11296 DECL_FIELD_CONTEXT (f_sav) = record;
11298 TYPE_STUB_DECL (record) = type_decl;
11299 TYPE_NAME (record) = type_decl;
11300 TYPE_FIELDS (record) = f_gpr;
11301 DECL_CHAIN (f_gpr) = f_fpr;
11302 DECL_CHAIN (f_fpr) = f_res;
11303 DECL_CHAIN (f_res) = f_ovf;
11304 DECL_CHAIN (f_ovf) = f_sav;
11306 layout_type (record);
11308 /* The correct type is an array type of one element. */
11309 return build_array_type (record, build_index_type (size_zero_node));
11312 /* Implement va_start. */
11314 static void
11315 rs6000_va_start (tree valist, rtx nextarg)
11317 HOST_WIDE_INT words, n_gpr, n_fpr;
11318 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11319 tree gpr, fpr, ovf, sav, t;
11321 /* Only SVR4 needs something special. */
11322 if (DEFAULT_ABI != ABI_V4)
11324 std_expand_builtin_va_start (valist, nextarg);
11325 return;
11328 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11329 f_fpr = DECL_CHAIN (f_gpr);
11330 f_res = DECL_CHAIN (f_fpr);
11331 f_ovf = DECL_CHAIN (f_res);
11332 f_sav = DECL_CHAIN (f_ovf);
11334 valist = build_simple_mem_ref (valist);
11335 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11336 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11337 f_fpr, NULL_TREE);
11338 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11339 f_ovf, NULL_TREE);
11340 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11341 f_sav, NULL_TREE);
11343 /* Count number of gp and fp argument registers used. */
11344 words = crtl->args.info.words;
11345 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11346 GP_ARG_NUM_REG);
11347 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11348 FP_ARG_NUM_REG);
11350 if (TARGET_DEBUG_ARG)
11351 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11352 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11353 words, n_gpr, n_fpr);
11355 if (cfun->va_list_gpr_size)
11357 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11358 build_int_cst (NULL_TREE, n_gpr));
11359 TREE_SIDE_EFFECTS (t) = 1;
11360 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11363 if (cfun->va_list_fpr_size)
11365 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11366 build_int_cst (NULL_TREE, n_fpr));
11367 TREE_SIDE_EFFECTS (t) = 1;
11368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11370 #ifdef HAVE_AS_GNU_ATTRIBUTE
11371 if (call_ABI_of_interest (cfun->decl))
11372 rs6000_passes_float = true;
11373 #endif
11376 /* Find the overflow area. */
11377 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11378 if (words != 0)
11379 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11380 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11381 TREE_SIDE_EFFECTS (t) = 1;
11382 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11384 /* If there were no va_arg invocations, don't set up the register
11385 save area. */
11386 if (!cfun->va_list_gpr_size
11387 && !cfun->va_list_fpr_size
11388 && n_gpr < GP_ARG_NUM_REG
11389 && n_fpr < FP_ARG_V4_MAX_REG)
11390 return;
11392 /* Find the register save area. */
11393 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11394 if (cfun->machine->varargs_save_offset)
11395 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11396 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11397 TREE_SIDE_EFFECTS (t) = 1;
11398 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11401 /* Implement va_arg. */
11403 static tree
11404 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11405 gimple_seq *post_p)
11407 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11408 tree gpr, fpr, ovf, sav, reg, t, u;
11409 int size, rsize, n_reg, sav_ofs, sav_scale;
11410 tree lab_false, lab_over, addr;
11411 int align;
11412 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11413 int regalign = 0;
11414 gimple stmt;
11416 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11418 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11419 return build_va_arg_indirect_ref (t);
11422 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11423 earlier version of gcc, with the property that it always applied alignment
11424 adjustments to the va-args (even for zero-sized types). The cheapest way
11425 to deal with this is to replicate the effect of the part of
11426 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11427 of relevance.
11428 We don't need to check for pass-by-reference because of the test above.
11429 We can return a simplifed answer, since we know there's no offset to add. */
11431 if (((TARGET_MACHO
11432 && rs6000_darwin64_abi)
11433 || DEFAULT_ABI == ABI_ELFv2
11434 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11435 && integer_zerop (TYPE_SIZE (type)))
11437 unsigned HOST_WIDE_INT align, boundary;
11438 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11439 align = PARM_BOUNDARY / BITS_PER_UNIT;
11440 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11441 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11442 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11443 boundary /= BITS_PER_UNIT;
11444 if (boundary > align)
11446 tree t ;
11447 /* This updates arg ptr by the amount that would be necessary
11448 to align the zero-sized (but not zero-alignment) item. */
11449 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11450 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11451 gimplify_and_add (t, pre_p);
11453 t = fold_convert (sizetype, valist_tmp);
11454 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11455 fold_convert (TREE_TYPE (valist),
11456 fold_build2 (BIT_AND_EXPR, sizetype, t,
11457 size_int (-boundary))));
11458 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11459 gimplify_and_add (t, pre_p);
11461 /* Since it is zero-sized there's no increment for the item itself. */
11462 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11463 return build_va_arg_indirect_ref (valist_tmp);
11466 if (DEFAULT_ABI != ABI_V4)
11468 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11470 tree elem_type = TREE_TYPE (type);
11471 machine_mode elem_mode = TYPE_MODE (elem_type);
11472 int elem_size = GET_MODE_SIZE (elem_mode);
11474 if (elem_size < UNITS_PER_WORD)
11476 tree real_part, imag_part;
11477 gimple_seq post = NULL;
11479 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11480 &post);
11481 /* Copy the value into a temporary, lest the formal temporary
11482 be reused out from under us. */
11483 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11484 gimple_seq_add_seq (pre_p, post);
11486 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11487 post_p);
11489 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11493 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11496 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11497 f_fpr = DECL_CHAIN (f_gpr);
11498 f_res = DECL_CHAIN (f_fpr);
11499 f_ovf = DECL_CHAIN (f_res);
11500 f_sav = DECL_CHAIN (f_ovf);
11502 valist = build_va_arg_indirect_ref (valist);
11503 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11504 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11505 f_fpr, NULL_TREE);
11506 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11507 f_ovf, NULL_TREE);
11508 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11509 f_sav, NULL_TREE);
11511 size = int_size_in_bytes (type);
11512 rsize = (size + 3) / 4;
11513 align = 1;
11515 if (TARGET_HARD_FLOAT && TARGET_FPRS
11516 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11517 || (TARGET_DOUBLE_FLOAT
11518 && (TYPE_MODE (type) == DFmode
11519 || TYPE_MODE (type) == TFmode
11520 || TYPE_MODE (type) == SDmode
11521 || TYPE_MODE (type) == DDmode
11522 || TYPE_MODE (type) == TDmode))))
11524 /* FP args go in FP registers, if present. */
11525 reg = fpr;
11526 n_reg = (size + 7) / 8;
11527 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11528 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11529 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11530 align = 8;
11532 else
11534 /* Otherwise into GP registers. */
11535 reg = gpr;
11536 n_reg = rsize;
11537 sav_ofs = 0;
11538 sav_scale = 4;
11539 if (n_reg == 2)
11540 align = 8;
11543 /* Pull the value out of the saved registers.... */
11545 lab_over = NULL;
11546 addr = create_tmp_var (ptr_type_node, "addr");
11548 /* AltiVec vectors never go in registers when -mabi=altivec. */
11549 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11550 align = 16;
11551 else
11553 lab_false = create_artificial_label (input_location);
11554 lab_over = create_artificial_label (input_location);
11556 /* Long long and SPE vectors are aligned in the registers.
11557 As are any other 2 gpr item such as complex int due to a
11558 historical mistake. */
11559 u = reg;
11560 if (n_reg == 2 && reg == gpr)
11562 regalign = 1;
11563 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11564 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11565 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11566 unshare_expr (reg), u);
11568 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11569 reg number is 0 for f1, so we want to make it odd. */
11570 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11572 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11573 build_int_cst (TREE_TYPE (reg), 1));
11574 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11577 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11578 t = build2 (GE_EXPR, boolean_type_node, u, t);
11579 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11580 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11581 gimplify_and_add (t, pre_p);
11583 t = sav;
11584 if (sav_ofs)
11585 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11587 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11588 build_int_cst (TREE_TYPE (reg), n_reg));
11589 u = fold_convert (sizetype, u);
11590 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11591 t = fold_build_pointer_plus (t, u);
11593 /* _Decimal32 varargs are located in the second word of the 64-bit
11594 FP register for 32-bit binaries. */
11595 if (TARGET_32BIT
11596 && TARGET_HARD_FLOAT && TARGET_FPRS
11597 && TYPE_MODE (type) == SDmode)
11598 t = fold_build_pointer_plus_hwi (t, size);
11600 gimplify_assign (addr, t, pre_p);
11602 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11604 stmt = gimple_build_label (lab_false);
11605 gimple_seq_add_stmt (pre_p, stmt);
11607 if ((n_reg == 2 && !regalign) || n_reg > 2)
11609 /* Ensure that we don't find any more args in regs.
11610 Alignment has taken care of for special cases. */
11611 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11615 /* ... otherwise out of the overflow area. */
11617 /* Care for on-stack alignment if needed. */
11618 t = ovf;
11619 if (align != 1)
11621 t = fold_build_pointer_plus_hwi (t, align - 1);
11622 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11623 build_int_cst (TREE_TYPE (t), -align));
11625 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11627 gimplify_assign (unshare_expr (addr), t, pre_p);
11629 t = fold_build_pointer_plus_hwi (t, size);
11630 gimplify_assign (unshare_expr (ovf), t, pre_p);
11632 if (lab_over)
11634 stmt = gimple_build_label (lab_over);
11635 gimple_seq_add_stmt (pre_p, stmt);
11638 if (STRICT_ALIGNMENT
11639 && (TYPE_ALIGN (type)
11640 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11642 /* The value (of type complex double, for example) may not be
11643 aligned in memory in the saved registers, so copy via a
11644 temporary. (This is the same code as used for SPARC.) */
11645 tree tmp = create_tmp_var (type, "va_arg_tmp");
11646 tree dest_addr = build_fold_addr_expr (tmp);
11648 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11649 3, dest_addr, addr, size_int (rsize * 4));
11651 gimplify_and_add (copy, pre_p);
11652 addr = dest_addr;
11655 addr = fold_convert (ptrtype, addr);
11656 return build_va_arg_indirect_ref (addr);
11659 /* Builtins. */
11661 static void
11662 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11664 tree t;
11665 unsigned classify = rs6000_builtin_info[(int)code].attr;
11666 const char *attr_string = "";
11668 gcc_assert (name != NULL);
11669 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11671 if (rs6000_builtin_decls[(int)code])
11672 fatal_error (input_location,
11673 "internal error: builtin function %s already processed", name);
11675 rs6000_builtin_decls[(int)code] = t =
11676 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11678 /* Set any special attributes. */
11679 if ((classify & RS6000_BTC_CONST) != 0)
11681 /* const function, function only depends on the inputs. */
11682 TREE_READONLY (t) = 1;
11683 TREE_NOTHROW (t) = 1;
11684 attr_string = ", pure";
11686 else if ((classify & RS6000_BTC_PURE) != 0)
11688 /* pure function, function can read global memory, but does not set any
11689 external state. */
11690 DECL_PURE_P (t) = 1;
11691 TREE_NOTHROW (t) = 1;
11692 attr_string = ", const";
11694 else if ((classify & RS6000_BTC_FP) != 0)
11696 /* Function is a math function. If rounding mode is on, then treat the
11697 function as not reading global memory, but it can have arbitrary side
11698 effects. If it is off, then assume the function is a const function.
11699 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11700 builtin-attribute.def that is used for the math functions. */
11701 TREE_NOTHROW (t) = 1;
11702 if (flag_rounding_math)
11704 DECL_PURE_P (t) = 1;
11705 DECL_IS_NOVOPS (t) = 1;
11706 attr_string = ", fp, pure";
11708 else
11710 TREE_READONLY (t) = 1;
11711 attr_string = ", fp, const";
11714 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11715 gcc_unreachable ();
11717 if (TARGET_DEBUG_BUILTIN)
11718 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11719 (int)code, name, attr_string);
11722 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11724 #undef RS6000_BUILTIN_1
11725 #undef RS6000_BUILTIN_2
11726 #undef RS6000_BUILTIN_3
11727 #undef RS6000_BUILTIN_A
11728 #undef RS6000_BUILTIN_D
11729 #undef RS6000_BUILTIN_E
11730 #undef RS6000_BUILTIN_H
11731 #undef RS6000_BUILTIN_P
11732 #undef RS6000_BUILTIN_Q
11733 #undef RS6000_BUILTIN_S
11734 #undef RS6000_BUILTIN_X
11736 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11737 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11738 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11739 { MASK, ICODE, NAME, ENUM },
11741 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11742 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11743 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11744 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11745 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11746 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11747 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11748 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11750 static const struct builtin_description bdesc_3arg[] =
11752 #include "rs6000-builtin.def"
11755 /* DST operations: void foo (void *, const int, const char). */
11757 #undef RS6000_BUILTIN_1
11758 #undef RS6000_BUILTIN_2
11759 #undef RS6000_BUILTIN_3
11760 #undef RS6000_BUILTIN_A
11761 #undef RS6000_BUILTIN_D
11762 #undef RS6000_BUILTIN_E
11763 #undef RS6000_BUILTIN_H
11764 #undef RS6000_BUILTIN_P
11765 #undef RS6000_BUILTIN_Q
11766 #undef RS6000_BUILTIN_S
11767 #undef RS6000_BUILTIN_X
11769 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11770 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11771 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11772 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11773 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11774 { MASK, ICODE, NAME, ENUM },
11776 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11777 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11778 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11779 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11780 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11781 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11783 static const struct builtin_description bdesc_dst[] =
11785 #include "rs6000-builtin.def"
11788 /* Simple binary operations: VECc = foo (VECa, VECb). */
11790 #undef RS6000_BUILTIN_1
11791 #undef RS6000_BUILTIN_2
11792 #undef RS6000_BUILTIN_3
11793 #undef RS6000_BUILTIN_A
11794 #undef RS6000_BUILTIN_D
11795 #undef RS6000_BUILTIN_E
11796 #undef RS6000_BUILTIN_H
11797 #undef RS6000_BUILTIN_P
11798 #undef RS6000_BUILTIN_Q
11799 #undef RS6000_BUILTIN_S
11800 #undef RS6000_BUILTIN_X
11802 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11803 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11804 { MASK, ICODE, NAME, ENUM },
11806 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11807 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11808 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11809 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11810 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11811 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11812 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11813 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11814 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11816 static const struct builtin_description bdesc_2arg[] =
11818 #include "rs6000-builtin.def"
11821 #undef RS6000_BUILTIN_1
11822 #undef RS6000_BUILTIN_2
11823 #undef RS6000_BUILTIN_3
11824 #undef RS6000_BUILTIN_A
11825 #undef RS6000_BUILTIN_D
11826 #undef RS6000_BUILTIN_E
11827 #undef RS6000_BUILTIN_H
11828 #undef RS6000_BUILTIN_P
11829 #undef RS6000_BUILTIN_Q
11830 #undef RS6000_BUILTIN_S
11831 #undef RS6000_BUILTIN_X
11833 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11834 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11835 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11836 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11837 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11838 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11839 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11840 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11841 { MASK, ICODE, NAME, ENUM },
11843 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11844 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11845 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11847 /* AltiVec predicates. */
11849 static const struct builtin_description bdesc_altivec_preds[] =
11851 #include "rs6000-builtin.def"
11854 /* SPE predicates. */
11855 #undef RS6000_BUILTIN_1
11856 #undef RS6000_BUILTIN_2
11857 #undef RS6000_BUILTIN_3
11858 #undef RS6000_BUILTIN_A
11859 #undef RS6000_BUILTIN_D
11860 #undef RS6000_BUILTIN_E
11861 #undef RS6000_BUILTIN_H
11862 #undef RS6000_BUILTIN_P
11863 #undef RS6000_BUILTIN_Q
11864 #undef RS6000_BUILTIN_S
11865 #undef RS6000_BUILTIN_X
11867 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11868 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11869 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11870 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11871 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11872 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11873 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11874 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11875 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11876 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11877 { MASK, ICODE, NAME, ENUM },
11879 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11881 static const struct builtin_description bdesc_spe_predicates[] =
11883 #include "rs6000-builtin.def"
11886 /* SPE evsel predicates. */
11887 #undef RS6000_BUILTIN_1
11888 #undef RS6000_BUILTIN_2
11889 #undef RS6000_BUILTIN_3
11890 #undef RS6000_BUILTIN_A
11891 #undef RS6000_BUILTIN_D
11892 #undef RS6000_BUILTIN_E
11893 #undef RS6000_BUILTIN_H
11894 #undef RS6000_BUILTIN_P
11895 #undef RS6000_BUILTIN_Q
11896 #undef RS6000_BUILTIN_S
11897 #undef RS6000_BUILTIN_X
11899 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11900 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11901 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11902 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11903 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11904 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11905 { MASK, ICODE, NAME, ENUM },
11907 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11908 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11909 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11910 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11911 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11913 static const struct builtin_description bdesc_spe_evsel[] =
11915 #include "rs6000-builtin.def"
11918 /* PAIRED predicates. */
11919 #undef RS6000_BUILTIN_1
11920 #undef RS6000_BUILTIN_2
11921 #undef RS6000_BUILTIN_3
11922 #undef RS6000_BUILTIN_A
11923 #undef RS6000_BUILTIN_D
11924 #undef RS6000_BUILTIN_E
11925 #undef RS6000_BUILTIN_H
11926 #undef RS6000_BUILTIN_P
11927 #undef RS6000_BUILTIN_Q
11928 #undef RS6000_BUILTIN_S
11929 #undef RS6000_BUILTIN_X
11931 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11932 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11933 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11934 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11935 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11936 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11937 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11938 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11939 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11940 { MASK, ICODE, NAME, ENUM },
11942 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11943 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11945 static const struct builtin_description bdesc_paired_preds[] =
11947 #include "rs6000-builtin.def"
11950 /* ABS* operations. */
11952 #undef RS6000_BUILTIN_1
11953 #undef RS6000_BUILTIN_2
11954 #undef RS6000_BUILTIN_3
11955 #undef RS6000_BUILTIN_A
11956 #undef RS6000_BUILTIN_D
11957 #undef RS6000_BUILTIN_E
11958 #undef RS6000_BUILTIN_H
11959 #undef RS6000_BUILTIN_P
11960 #undef RS6000_BUILTIN_Q
11961 #undef RS6000_BUILTIN_S
11962 #undef RS6000_BUILTIN_X
11964 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11965 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11966 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11967 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11968 { MASK, ICODE, NAME, ENUM },
11970 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11971 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11972 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11973 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11974 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11975 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11976 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11978 static const struct builtin_description bdesc_abs[] =
11980 #include "rs6000-builtin.def"
11983 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11984 foo (VECa). */
11986 #undef RS6000_BUILTIN_1
11987 #undef RS6000_BUILTIN_2
11988 #undef RS6000_BUILTIN_3
11989 #undef RS6000_BUILTIN_A
11990 #undef RS6000_BUILTIN_D
11991 #undef RS6000_BUILTIN_E
11992 #undef RS6000_BUILTIN_H
11993 #undef RS6000_BUILTIN_P
11994 #undef RS6000_BUILTIN_Q
11995 #undef RS6000_BUILTIN_S
11996 #undef RS6000_BUILTIN_X
11998 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11999 { MASK, ICODE, NAME, ENUM },
12001 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12002 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12003 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12004 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12005 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12006 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12007 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12008 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12009 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12010 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12012 static const struct builtin_description bdesc_1arg[] =
12014 #include "rs6000-builtin.def"
12017 /* HTM builtins. */
12018 #undef RS6000_BUILTIN_1
12019 #undef RS6000_BUILTIN_2
12020 #undef RS6000_BUILTIN_3
12021 #undef RS6000_BUILTIN_A
12022 #undef RS6000_BUILTIN_D
12023 #undef RS6000_BUILTIN_E
12024 #undef RS6000_BUILTIN_H
12025 #undef RS6000_BUILTIN_P
12026 #undef RS6000_BUILTIN_Q
12027 #undef RS6000_BUILTIN_S
12028 #undef RS6000_BUILTIN_X
12030 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12031 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12032 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12033 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12034 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12035 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12036 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12037 { MASK, ICODE, NAME, ENUM },
12039 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12040 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12041 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12042 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12044 static const struct builtin_description bdesc_htm[] =
12046 #include "rs6000-builtin.def"
12049 #undef RS6000_BUILTIN_1
12050 #undef RS6000_BUILTIN_2
12051 #undef RS6000_BUILTIN_3
12052 #undef RS6000_BUILTIN_A
12053 #undef RS6000_BUILTIN_D
12054 #undef RS6000_BUILTIN_E
12055 #undef RS6000_BUILTIN_H
12056 #undef RS6000_BUILTIN_P
12057 #undef RS6000_BUILTIN_Q
12058 #undef RS6000_BUILTIN_S
12060 /* Return true if a builtin function is overloaded. */
12061 bool
12062 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12064 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12067 /* Expand an expression EXP that calls a builtin without arguments. */
12068 static rtx
12069 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12071 rtx pat;
12072 machine_mode tmode = insn_data[icode].operand[0].mode;
12074 if (icode == CODE_FOR_nothing)
12075 /* Builtin not supported on this processor. */
12076 return 0;
12078 if (target == 0
12079 || GET_MODE (target) != tmode
12080 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12081 target = gen_reg_rtx (tmode);
12083 pat = GEN_FCN (icode) (target);
12084 if (! pat)
12085 return 0;
12086 emit_insn (pat);
12088 return target;
12092 static rtx
12093 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12095 rtx pat;
12096 tree arg0 = CALL_EXPR_ARG (exp, 0);
12097 tree arg1 = CALL_EXPR_ARG (exp, 1);
12098 rtx op0 = expand_normal (arg0);
12099 rtx op1 = expand_normal (arg1);
12100 machine_mode mode0 = insn_data[icode].operand[0].mode;
12101 machine_mode mode1 = insn_data[icode].operand[1].mode;
12103 if (icode == CODE_FOR_nothing)
12104 /* Builtin not supported on this processor. */
12105 return 0;
12107 /* If we got invalid arguments bail out before generating bad rtl. */
12108 if (arg0 == error_mark_node || arg1 == error_mark_node)
12109 return const0_rtx;
12111 if (GET_CODE (op0) != CONST_INT
12112 || INTVAL (op0) > 255
12113 || INTVAL (op0) < 0)
12115 error ("argument 1 must be an 8-bit field value");
12116 return const0_rtx;
12119 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12120 op0 = copy_to_mode_reg (mode0, op0);
12122 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12123 op1 = copy_to_mode_reg (mode1, op1);
12125 pat = GEN_FCN (icode) (op0, op1);
12126 if (! pat)
12127 return const0_rtx;
12128 emit_insn (pat);
12130 return NULL_RTX;
12134 static rtx
12135 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12137 rtx pat;
12138 tree arg0 = CALL_EXPR_ARG (exp, 0);
12139 rtx op0 = expand_normal (arg0);
12140 machine_mode tmode = insn_data[icode].operand[0].mode;
12141 machine_mode mode0 = insn_data[icode].operand[1].mode;
12143 if (icode == CODE_FOR_nothing)
12144 /* Builtin not supported on this processor. */
12145 return 0;
12147 /* If we got invalid arguments bail out before generating bad rtl. */
12148 if (arg0 == error_mark_node)
12149 return const0_rtx;
12151 if (icode == CODE_FOR_altivec_vspltisb
12152 || icode == CODE_FOR_altivec_vspltish
12153 || icode == CODE_FOR_altivec_vspltisw
12154 || icode == CODE_FOR_spe_evsplatfi
12155 || icode == CODE_FOR_spe_evsplati)
12157 /* Only allow 5-bit *signed* literals. */
12158 if (GET_CODE (op0) != CONST_INT
12159 || INTVAL (op0) > 15
12160 || INTVAL (op0) < -16)
12162 error ("argument 1 must be a 5-bit signed literal");
12163 return const0_rtx;
12167 if (target == 0
12168 || GET_MODE (target) != tmode
12169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12170 target = gen_reg_rtx (tmode);
12172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12173 op0 = copy_to_mode_reg (mode0, op0);
12175 pat = GEN_FCN (icode) (target, op0);
12176 if (! pat)
12177 return 0;
12178 emit_insn (pat);
12180 return target;
12183 static rtx
12184 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12186 rtx pat, scratch1, scratch2;
12187 tree arg0 = CALL_EXPR_ARG (exp, 0);
12188 rtx op0 = expand_normal (arg0);
12189 machine_mode tmode = insn_data[icode].operand[0].mode;
12190 machine_mode mode0 = insn_data[icode].operand[1].mode;
12192 /* If we have invalid arguments, bail out before generating bad rtl. */
12193 if (arg0 == error_mark_node)
12194 return const0_rtx;
12196 if (target == 0
12197 || GET_MODE (target) != tmode
12198 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12199 target = gen_reg_rtx (tmode);
12201 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12202 op0 = copy_to_mode_reg (mode0, op0);
12204 scratch1 = gen_reg_rtx (mode0);
12205 scratch2 = gen_reg_rtx (mode0);
12207 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12208 if (! pat)
12209 return 0;
12210 emit_insn (pat);
12212 return target;
12215 static rtx
12216 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12218 rtx pat;
12219 tree arg0 = CALL_EXPR_ARG (exp, 0);
12220 tree arg1 = CALL_EXPR_ARG (exp, 1);
12221 rtx op0 = expand_normal (arg0);
12222 rtx op1 = expand_normal (arg1);
12223 machine_mode tmode = insn_data[icode].operand[0].mode;
12224 machine_mode mode0 = insn_data[icode].operand[1].mode;
12225 machine_mode mode1 = insn_data[icode].operand[2].mode;
12227 if (icode == CODE_FOR_nothing)
12228 /* Builtin not supported on this processor. */
12229 return 0;
12231 /* If we got invalid arguments bail out before generating bad rtl. */
12232 if (arg0 == error_mark_node || arg1 == error_mark_node)
12233 return const0_rtx;
12235 if (icode == CODE_FOR_altivec_vcfux
12236 || icode == CODE_FOR_altivec_vcfsx
12237 || icode == CODE_FOR_altivec_vctsxs
12238 || icode == CODE_FOR_altivec_vctuxs
12239 || icode == CODE_FOR_altivec_vspltb
12240 || icode == CODE_FOR_altivec_vsplth
12241 || icode == CODE_FOR_altivec_vspltw
12242 || icode == CODE_FOR_spe_evaddiw
12243 || icode == CODE_FOR_spe_evldd
12244 || icode == CODE_FOR_spe_evldh
12245 || icode == CODE_FOR_spe_evldw
12246 || icode == CODE_FOR_spe_evlhhesplat
12247 || icode == CODE_FOR_spe_evlhhossplat
12248 || icode == CODE_FOR_spe_evlhhousplat
12249 || icode == CODE_FOR_spe_evlwhe
12250 || icode == CODE_FOR_spe_evlwhos
12251 || icode == CODE_FOR_spe_evlwhou
12252 || icode == CODE_FOR_spe_evlwhsplat
12253 || icode == CODE_FOR_spe_evlwwsplat
12254 || icode == CODE_FOR_spe_evrlwi
12255 || icode == CODE_FOR_spe_evslwi
12256 || icode == CODE_FOR_spe_evsrwis
12257 || icode == CODE_FOR_spe_evsubifw
12258 || icode == CODE_FOR_spe_evsrwiu)
12260 /* Only allow 5-bit unsigned literals. */
12261 STRIP_NOPS (arg1);
12262 if (TREE_CODE (arg1) != INTEGER_CST
12263 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12265 error ("argument 2 must be a 5-bit unsigned literal");
12266 return const0_rtx;
12270 if (target == 0
12271 || GET_MODE (target) != tmode
12272 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12273 target = gen_reg_rtx (tmode);
12275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12276 op0 = copy_to_mode_reg (mode0, op0);
12277 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12278 op1 = copy_to_mode_reg (mode1, op1);
12280 pat = GEN_FCN (icode) (target, op0, op1);
12281 if (! pat)
12282 return 0;
12283 emit_insn (pat);
12285 return target;
12288 static rtx
12289 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12291 rtx pat, scratch;
12292 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12293 tree arg0 = CALL_EXPR_ARG (exp, 1);
12294 tree arg1 = CALL_EXPR_ARG (exp, 2);
12295 rtx op0 = expand_normal (arg0);
12296 rtx op1 = expand_normal (arg1);
12297 machine_mode tmode = SImode;
12298 machine_mode mode0 = insn_data[icode].operand[1].mode;
12299 machine_mode mode1 = insn_data[icode].operand[2].mode;
12300 int cr6_form_int;
12302 if (TREE_CODE (cr6_form) != INTEGER_CST)
12304 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12305 return const0_rtx;
12307 else
12308 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12310 gcc_assert (mode0 == mode1);
12312 /* If we have invalid arguments, bail out before generating bad rtl. */
12313 if (arg0 == error_mark_node || arg1 == error_mark_node)
12314 return const0_rtx;
12316 if (target == 0
12317 || GET_MODE (target) != tmode
12318 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12319 target = gen_reg_rtx (tmode);
12321 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12322 op0 = copy_to_mode_reg (mode0, op0);
12323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12324 op1 = copy_to_mode_reg (mode1, op1);
12326 scratch = gen_reg_rtx (mode0);
12328 pat = GEN_FCN (icode) (scratch, op0, op1);
12329 if (! pat)
12330 return 0;
12331 emit_insn (pat);
12333 /* The vec_any* and vec_all* predicates use the same opcodes for two
12334 different operations, but the bits in CR6 will be different
12335 depending on what information we want. So we have to play tricks
12336 with CR6 to get the right bits out.
12338 If you think this is disgusting, look at the specs for the
12339 AltiVec predicates. */
12341 switch (cr6_form_int)
12343 case 0:
12344 emit_insn (gen_cr6_test_for_zero (target));
12345 break;
12346 case 1:
12347 emit_insn (gen_cr6_test_for_zero_reverse (target));
12348 break;
12349 case 2:
12350 emit_insn (gen_cr6_test_for_lt (target));
12351 break;
12352 case 3:
12353 emit_insn (gen_cr6_test_for_lt_reverse (target));
12354 break;
12355 default:
12356 error ("argument 1 of __builtin_altivec_predicate is out of range");
12357 break;
12360 return target;
12363 static rtx
12364 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12366 rtx pat, addr;
12367 tree arg0 = CALL_EXPR_ARG (exp, 0);
12368 tree arg1 = CALL_EXPR_ARG (exp, 1);
12369 machine_mode tmode = insn_data[icode].operand[0].mode;
12370 machine_mode mode0 = Pmode;
12371 machine_mode mode1 = Pmode;
12372 rtx op0 = expand_normal (arg0);
12373 rtx op1 = expand_normal (arg1);
12375 if (icode == CODE_FOR_nothing)
12376 /* Builtin not supported on this processor. */
12377 return 0;
12379 /* If we got invalid arguments bail out before generating bad rtl. */
12380 if (arg0 == error_mark_node || arg1 == error_mark_node)
12381 return const0_rtx;
12383 if (target == 0
12384 || GET_MODE (target) != tmode
12385 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12386 target = gen_reg_rtx (tmode);
12388 op1 = copy_to_mode_reg (mode1, op1);
12390 if (op0 == const0_rtx)
12392 addr = gen_rtx_MEM (tmode, op1);
12394 else
12396 op0 = copy_to_mode_reg (mode0, op0);
12397 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12400 pat = GEN_FCN (icode) (target, addr);
12402 if (! pat)
12403 return 0;
12404 emit_insn (pat);
12406 return target;
12409 /* Return a constant vector for use as a little-endian permute control vector
12410 to reverse the order of elements of the given vector mode. */
12411 static rtx
12412 swap_selector_for_mode (machine_mode mode)
12414 /* These are little endian vectors, so their elements are reversed
12415 from what you would normally expect for a permute control vector. */
12416 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12417 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12418 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12419 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12420 unsigned int *swaparray, i;
12421 rtx perm[16];
12423 switch (mode)
12425 case V2DFmode:
12426 case V2DImode:
12427 swaparray = swap2;
12428 break;
12429 case V4SFmode:
12430 case V4SImode:
12431 swaparray = swap4;
12432 break;
12433 case V8HImode:
12434 swaparray = swap8;
12435 break;
12436 case V16QImode:
12437 swaparray = swap16;
12438 break;
12439 default:
12440 gcc_unreachable ();
12443 for (i = 0; i < 16; ++i)
12444 perm[i] = GEN_INT (swaparray[i]);
12446 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12449 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12450 with -maltivec=be specified. Issue the load followed by an element-reversing
12451 permute. */
12452 void
12453 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12455 rtx tmp = gen_reg_rtx (mode);
12456 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12457 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12458 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12459 rtx sel = swap_selector_for_mode (mode);
12460 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12462 gcc_assert (REG_P (op0));
12463 emit_insn (par);
12464 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12467 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12468 with -maltivec=be specified. Issue the store preceded by an element-reversing
12469 permute. */
12470 void
12471 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12473 rtx tmp = gen_reg_rtx (mode);
12474 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12475 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12476 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12477 rtx sel = swap_selector_for_mode (mode);
12478 rtx vperm;
12480 gcc_assert (REG_P (op1));
12481 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12482 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12483 emit_insn (par);
12486 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12487 specified. Issue the store preceded by an element-reversing permute. */
12488 void
12489 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12491 machine_mode inner_mode = GET_MODE_INNER (mode);
12492 rtx tmp = gen_reg_rtx (mode);
12493 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12494 rtx sel = swap_selector_for_mode (mode);
12495 rtx vperm;
12497 gcc_assert (REG_P (op1));
12498 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12499 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12500 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12503 static rtx
12504 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12506 rtx pat, addr;
12507 tree arg0 = CALL_EXPR_ARG (exp, 0);
12508 tree arg1 = CALL_EXPR_ARG (exp, 1);
12509 machine_mode tmode = insn_data[icode].operand[0].mode;
12510 machine_mode mode0 = Pmode;
12511 machine_mode mode1 = Pmode;
12512 rtx op0 = expand_normal (arg0);
12513 rtx op1 = expand_normal (arg1);
12515 if (icode == CODE_FOR_nothing)
12516 /* Builtin not supported on this processor. */
12517 return 0;
12519 /* If we got invalid arguments bail out before generating bad rtl. */
12520 if (arg0 == error_mark_node || arg1 == error_mark_node)
12521 return const0_rtx;
12523 if (target == 0
12524 || GET_MODE (target) != tmode
12525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12526 target = gen_reg_rtx (tmode);
12528 op1 = copy_to_mode_reg (mode1, op1);
12530 if (op0 == const0_rtx)
12532 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12534 else
12536 op0 = copy_to_mode_reg (mode0, op0);
12537 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12540 pat = GEN_FCN (icode) (target, addr);
12542 if (! pat)
12543 return 0;
12544 emit_insn (pat);
12546 return target;
12549 static rtx
12550 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12552 tree arg0 = CALL_EXPR_ARG (exp, 0);
12553 tree arg1 = CALL_EXPR_ARG (exp, 1);
12554 tree arg2 = CALL_EXPR_ARG (exp, 2);
12555 rtx op0 = expand_normal (arg0);
12556 rtx op1 = expand_normal (arg1);
12557 rtx op2 = expand_normal (arg2);
12558 rtx pat;
12559 machine_mode mode0 = insn_data[icode].operand[0].mode;
12560 machine_mode mode1 = insn_data[icode].operand[1].mode;
12561 machine_mode mode2 = insn_data[icode].operand[2].mode;
12563 /* Invalid arguments. Bail before doing anything stoopid! */
12564 if (arg0 == error_mark_node
12565 || arg1 == error_mark_node
12566 || arg2 == error_mark_node)
12567 return const0_rtx;
12569 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12570 op0 = copy_to_mode_reg (mode2, op0);
12571 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12572 op1 = copy_to_mode_reg (mode0, op1);
12573 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12574 op2 = copy_to_mode_reg (mode1, op2);
12576 pat = GEN_FCN (icode) (op1, op2, op0);
12577 if (pat)
12578 emit_insn (pat);
12579 return NULL_RTX;
12582 static rtx
12583 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12585 tree arg0 = CALL_EXPR_ARG (exp, 0);
12586 tree arg1 = CALL_EXPR_ARG (exp, 1);
12587 tree arg2 = CALL_EXPR_ARG (exp, 2);
12588 rtx op0 = expand_normal (arg0);
12589 rtx op1 = expand_normal (arg1);
12590 rtx op2 = expand_normal (arg2);
12591 rtx pat, addr;
12592 machine_mode tmode = insn_data[icode].operand[0].mode;
12593 machine_mode mode1 = Pmode;
12594 machine_mode mode2 = Pmode;
12596 /* Invalid arguments. Bail before doing anything stoopid! */
12597 if (arg0 == error_mark_node
12598 || arg1 == error_mark_node
12599 || arg2 == error_mark_node)
12600 return const0_rtx;
12602 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12603 op0 = copy_to_mode_reg (tmode, op0);
12605 op2 = copy_to_mode_reg (mode2, op2);
12607 if (op1 == const0_rtx)
12609 addr = gen_rtx_MEM (tmode, op2);
12611 else
12613 op1 = copy_to_mode_reg (mode1, op1);
12614 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12617 pat = GEN_FCN (icode) (addr, op0);
12618 if (pat)
12619 emit_insn (pat);
12620 return NULL_RTX;
12623 static rtx
12624 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12626 tree arg0 = CALL_EXPR_ARG (exp, 0);
12627 tree arg1 = CALL_EXPR_ARG (exp, 1);
12628 tree arg2 = CALL_EXPR_ARG (exp, 2);
12629 rtx op0 = expand_normal (arg0);
12630 rtx op1 = expand_normal (arg1);
12631 rtx op2 = expand_normal (arg2);
12632 rtx pat, addr;
12633 machine_mode tmode = insn_data[icode].operand[0].mode;
12634 machine_mode smode = insn_data[icode].operand[1].mode;
12635 machine_mode mode1 = Pmode;
12636 machine_mode mode2 = Pmode;
12638 /* Invalid arguments. Bail before doing anything stoopid! */
12639 if (arg0 == error_mark_node
12640 || arg1 == error_mark_node
12641 || arg2 == error_mark_node)
12642 return const0_rtx;
12644 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12645 op0 = copy_to_mode_reg (smode, op0);
12647 op2 = copy_to_mode_reg (mode2, op2);
12649 if (op1 == const0_rtx)
12651 addr = gen_rtx_MEM (tmode, op2);
12653 else
12655 op1 = copy_to_mode_reg (mode1, op1);
12656 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12659 pat = GEN_FCN (icode) (addr, op0);
12660 if (pat)
12661 emit_insn (pat);
12662 return NULL_RTX;
12665 /* Return the appropriate SPR number associated with the given builtin. */
12666 static inline HOST_WIDE_INT
12667 htm_spr_num (enum rs6000_builtins code)
12669 if (code == HTM_BUILTIN_GET_TFHAR
12670 || code == HTM_BUILTIN_SET_TFHAR)
12671 return TFHAR_SPR;
12672 else if (code == HTM_BUILTIN_GET_TFIAR
12673 || code == HTM_BUILTIN_SET_TFIAR)
12674 return TFIAR_SPR;
12675 else if (code == HTM_BUILTIN_GET_TEXASR
12676 || code == HTM_BUILTIN_SET_TEXASR)
12677 return TEXASR_SPR;
12678 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12679 || code == HTM_BUILTIN_SET_TEXASRU);
12680 return TEXASRU_SPR;
12683 /* Return the appropriate SPR regno associated with the given builtin. */
12684 static inline HOST_WIDE_INT
12685 htm_spr_regno (enum rs6000_builtins code)
12687 if (code == HTM_BUILTIN_GET_TFHAR
12688 || code == HTM_BUILTIN_SET_TFHAR)
12689 return TFHAR_REGNO;
12690 else if (code == HTM_BUILTIN_GET_TFIAR
12691 || code == HTM_BUILTIN_SET_TFIAR)
12692 return TFIAR_REGNO;
12693 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12694 || code == HTM_BUILTIN_SET_TEXASR
12695 || code == HTM_BUILTIN_GET_TEXASRU
12696 || code == HTM_BUILTIN_SET_TEXASRU);
12697 return TEXASR_REGNO;
12700 /* Return the correct ICODE value depending on whether we are
12701 setting or reading the HTM SPRs. */
12702 static inline enum insn_code
12703 rs6000_htm_spr_icode (bool nonvoid)
12705 if (nonvoid)
12706 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12707 else
12708 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12711 /* Expand the HTM builtin in EXP and store the result in TARGET.
12712 Store true in *EXPANDEDP if we found a builtin to expand. */
12713 static rtx
12714 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12716 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12717 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12718 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12719 const struct builtin_description *d;
12720 size_t i;
12722 *expandedp = false;
12724 /* Expand the HTM builtins. */
12725 d = bdesc_htm;
12726 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12727 if (d->code == fcode)
12729 rtx op[MAX_HTM_OPERANDS], pat;
12730 int nopnds = 0;
12731 tree arg;
12732 call_expr_arg_iterator iter;
12733 unsigned attr = rs6000_builtin_info[fcode].attr;
12734 enum insn_code icode = d->icode;
12736 if (attr & RS6000_BTC_SPR)
12737 icode = rs6000_htm_spr_icode (nonvoid);
12739 if (nonvoid)
12741 machine_mode tmode = insn_data[icode].operand[0].mode;
12742 if (!target
12743 || GET_MODE (target) != tmode
12744 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12745 target = gen_reg_rtx (tmode);
12746 op[nopnds++] = target;
12749 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12751 const struct insn_operand_data *insn_op;
12753 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12754 return NULL_RTX;
12756 insn_op = &insn_data[icode].operand[nopnds];
12758 op[nopnds] = expand_normal (arg);
12760 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12762 if (!strcmp (insn_op->constraint, "n"))
12764 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12765 if (!CONST_INT_P (op[nopnds]))
12766 error ("argument %d must be an unsigned literal", arg_num);
12767 else
12768 error ("argument %d is an unsigned literal that is "
12769 "out of range", arg_num);
12770 return const0_rtx;
12772 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12775 nopnds++;
12778 /* Handle the builtins for extended mnemonics. These accept
12779 no arguments, but map to builtins that take arguments. */
12780 switch (fcode)
12782 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12783 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12784 op[nopnds++] = GEN_INT (1);
12785 #ifdef ENABLE_CHECKING
12786 attr |= RS6000_BTC_UNARY;
12787 #endif
12788 break;
12789 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12790 op[nopnds++] = GEN_INT (0);
12791 #ifdef ENABLE_CHECKING
12792 attr |= RS6000_BTC_UNARY;
12793 #endif
12794 break;
12795 default:
12796 break;
12799 /* If this builtin accesses SPRs, then pass in the appropriate
12800 SPR number and SPR regno as the last two operands. */
12801 if (attr & RS6000_BTC_SPR)
12803 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12804 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12807 #ifdef ENABLE_CHECKING
12808 int expected_nopnds = 0;
12809 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12810 expected_nopnds = 1;
12811 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12812 expected_nopnds = 2;
12813 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12814 expected_nopnds = 3;
12815 if (!(attr & RS6000_BTC_VOID))
12816 expected_nopnds += 1;
12817 if (attr & RS6000_BTC_SPR)
12818 expected_nopnds += 2;
12820 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12821 #endif
12823 switch (nopnds)
12825 case 1:
12826 pat = GEN_FCN (icode) (op[0]);
12827 break;
12828 case 2:
12829 pat = GEN_FCN (icode) (op[0], op[1]);
12830 break;
12831 case 3:
12832 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12833 break;
12834 case 4:
12835 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12836 break;
12837 default:
12838 gcc_unreachable ();
12840 if (!pat)
12841 return NULL_RTX;
12842 emit_insn (pat);
12844 *expandedp = true;
12845 if (nonvoid)
12846 return target;
12847 return const0_rtx;
12850 return NULL_RTX;
12853 static rtx
12854 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12856 rtx pat;
12857 tree arg0 = CALL_EXPR_ARG (exp, 0);
12858 tree arg1 = CALL_EXPR_ARG (exp, 1);
12859 tree arg2 = CALL_EXPR_ARG (exp, 2);
12860 rtx op0 = expand_normal (arg0);
12861 rtx op1 = expand_normal (arg1);
12862 rtx op2 = expand_normal (arg2);
12863 machine_mode tmode = insn_data[icode].operand[0].mode;
12864 machine_mode mode0 = insn_data[icode].operand[1].mode;
12865 machine_mode mode1 = insn_data[icode].operand[2].mode;
12866 machine_mode mode2 = insn_data[icode].operand[3].mode;
12868 if (icode == CODE_FOR_nothing)
12869 /* Builtin not supported on this processor. */
12870 return 0;
12872 /* If we got invalid arguments bail out before generating bad rtl. */
12873 if (arg0 == error_mark_node
12874 || arg1 == error_mark_node
12875 || arg2 == error_mark_node)
12876 return const0_rtx;
12878 /* Check and prepare argument depending on the instruction code.
12880 Note that a switch statement instead of the sequence of tests
12881 would be incorrect as many of the CODE_FOR values could be
12882 CODE_FOR_nothing and that would yield multiple alternatives
12883 with identical values. We'd never reach here at runtime in
12884 this case. */
12885 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12886 || icode == CODE_FOR_altivec_vsldoi_v4si
12887 || icode == CODE_FOR_altivec_vsldoi_v8hi
12888 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12890 /* Only allow 4-bit unsigned literals. */
12891 STRIP_NOPS (arg2);
12892 if (TREE_CODE (arg2) != INTEGER_CST
12893 || TREE_INT_CST_LOW (arg2) & ~0xf)
12895 error ("argument 3 must be a 4-bit unsigned literal");
12896 return const0_rtx;
12899 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12900 || icode == CODE_FOR_vsx_xxpermdi_v2di
12901 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12902 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12903 || icode == CODE_FOR_vsx_xxsldwi_v4si
12904 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12905 || icode == CODE_FOR_vsx_xxsldwi_v2di
12906 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12908 /* Only allow 2-bit unsigned literals. */
12909 STRIP_NOPS (arg2);
12910 if (TREE_CODE (arg2) != INTEGER_CST
12911 || TREE_INT_CST_LOW (arg2) & ~0x3)
12913 error ("argument 3 must be a 2-bit unsigned literal");
12914 return const0_rtx;
12917 else if (icode == CODE_FOR_vsx_set_v2df
12918 || icode == CODE_FOR_vsx_set_v2di
12919 || icode == CODE_FOR_bcdadd
12920 || icode == CODE_FOR_bcdadd_lt
12921 || icode == CODE_FOR_bcdadd_eq
12922 || icode == CODE_FOR_bcdadd_gt
12923 || icode == CODE_FOR_bcdsub
12924 || icode == CODE_FOR_bcdsub_lt
12925 || icode == CODE_FOR_bcdsub_eq
12926 || icode == CODE_FOR_bcdsub_gt)
12928 /* Only allow 1-bit unsigned literals. */
12929 STRIP_NOPS (arg2);
12930 if (TREE_CODE (arg2) != INTEGER_CST
12931 || TREE_INT_CST_LOW (arg2) & ~0x1)
12933 error ("argument 3 must be a 1-bit unsigned literal");
12934 return const0_rtx;
12937 else if (icode == CODE_FOR_dfp_ddedpd_dd
12938 || icode == CODE_FOR_dfp_ddedpd_td)
12940 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12941 STRIP_NOPS (arg0);
12942 if (TREE_CODE (arg0) != INTEGER_CST
12943 || TREE_INT_CST_LOW (arg2) & ~0x3)
12945 error ("argument 1 must be 0 or 2");
12946 return const0_rtx;
12949 else if (icode == CODE_FOR_dfp_denbcd_dd
12950 || icode == CODE_FOR_dfp_denbcd_td)
12952 /* Only allow 1-bit unsigned literals. */
12953 STRIP_NOPS (arg0);
12954 if (TREE_CODE (arg0) != INTEGER_CST
12955 || TREE_INT_CST_LOW (arg0) & ~0x1)
12957 error ("argument 1 must be a 1-bit unsigned literal");
12958 return const0_rtx;
12961 else if (icode == CODE_FOR_dfp_dscli_dd
12962 || icode == CODE_FOR_dfp_dscli_td
12963 || icode == CODE_FOR_dfp_dscri_dd
12964 || icode == CODE_FOR_dfp_dscri_td)
12966 /* Only allow 6-bit unsigned literals. */
12967 STRIP_NOPS (arg1);
12968 if (TREE_CODE (arg1) != INTEGER_CST
12969 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12971 error ("argument 2 must be a 6-bit unsigned literal");
12972 return const0_rtx;
12975 else if (icode == CODE_FOR_crypto_vshasigmaw
12976 || icode == CODE_FOR_crypto_vshasigmad)
12978 /* Check whether the 2nd and 3rd arguments are integer constants and in
12979 range and prepare arguments. */
12980 STRIP_NOPS (arg1);
12981 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12983 error ("argument 2 must be 0 or 1");
12984 return const0_rtx;
12987 STRIP_NOPS (arg2);
12988 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12990 error ("argument 3 must be in the range 0..15");
12991 return const0_rtx;
12995 if (target == 0
12996 || GET_MODE (target) != tmode
12997 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12998 target = gen_reg_rtx (tmode);
13000 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13001 op0 = copy_to_mode_reg (mode0, op0);
13002 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13003 op1 = copy_to_mode_reg (mode1, op1);
13004 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13005 op2 = copy_to_mode_reg (mode2, op2);
13007 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13008 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13009 else
13010 pat = GEN_FCN (icode) (target, op0, op1, op2);
13011 if (! pat)
13012 return 0;
13013 emit_insn (pat);
13015 return target;
13018 /* Expand the lvx builtins. */
13019 static rtx
13020 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13022 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13023 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13024 tree arg0;
13025 machine_mode tmode, mode0;
13026 rtx pat, op0;
13027 enum insn_code icode;
13029 switch (fcode)
13031 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13032 icode = CODE_FOR_vector_altivec_load_v16qi;
13033 break;
13034 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13035 icode = CODE_FOR_vector_altivec_load_v8hi;
13036 break;
13037 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13038 icode = CODE_FOR_vector_altivec_load_v4si;
13039 break;
13040 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13041 icode = CODE_FOR_vector_altivec_load_v4sf;
13042 break;
13043 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13044 icode = CODE_FOR_vector_altivec_load_v2df;
13045 break;
13046 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13047 icode = CODE_FOR_vector_altivec_load_v2di;
13048 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13049 icode = CODE_FOR_vector_altivec_load_v1ti;
13050 break;
13051 default:
13052 *expandedp = false;
13053 return NULL_RTX;
13056 *expandedp = true;
13058 arg0 = CALL_EXPR_ARG (exp, 0);
13059 op0 = expand_normal (arg0);
13060 tmode = insn_data[icode].operand[0].mode;
13061 mode0 = insn_data[icode].operand[1].mode;
13063 if (target == 0
13064 || GET_MODE (target) != tmode
13065 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13066 target = gen_reg_rtx (tmode);
13068 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13069 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13071 pat = GEN_FCN (icode) (target, op0);
13072 if (! pat)
13073 return 0;
13074 emit_insn (pat);
13075 return target;
13078 /* Expand the stvx builtins. */
13079 static rtx
13080 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13081 bool *expandedp)
13083 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13084 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13085 tree arg0, arg1;
13086 machine_mode mode0, mode1;
13087 rtx pat, op0, op1;
13088 enum insn_code icode;
13090 switch (fcode)
13092 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13093 icode = CODE_FOR_vector_altivec_store_v16qi;
13094 break;
13095 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13096 icode = CODE_FOR_vector_altivec_store_v8hi;
13097 break;
13098 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13099 icode = CODE_FOR_vector_altivec_store_v4si;
13100 break;
13101 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13102 icode = CODE_FOR_vector_altivec_store_v4sf;
13103 break;
13104 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13105 icode = CODE_FOR_vector_altivec_store_v2df;
13106 break;
13107 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13108 icode = CODE_FOR_vector_altivec_store_v2di;
13109 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13110 icode = CODE_FOR_vector_altivec_store_v1ti;
13111 break;
13112 default:
13113 *expandedp = false;
13114 return NULL_RTX;
13117 arg0 = CALL_EXPR_ARG (exp, 0);
13118 arg1 = CALL_EXPR_ARG (exp, 1);
13119 op0 = expand_normal (arg0);
13120 op1 = expand_normal (arg1);
13121 mode0 = insn_data[icode].operand[0].mode;
13122 mode1 = insn_data[icode].operand[1].mode;
13124 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13125 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13126 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13127 op1 = copy_to_mode_reg (mode1, op1);
13129 pat = GEN_FCN (icode) (op0, op1);
13130 if (pat)
13131 emit_insn (pat);
13133 *expandedp = true;
13134 return NULL_RTX;
13137 /* Expand the dst builtins. */
13138 static rtx
13139 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13140 bool *expandedp)
13142 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13143 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13144 tree arg0, arg1, arg2;
13145 machine_mode mode0, mode1;
13146 rtx pat, op0, op1, op2;
13147 const struct builtin_description *d;
13148 size_t i;
13150 *expandedp = false;
13152 /* Handle DST variants. */
13153 d = bdesc_dst;
13154 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13155 if (d->code == fcode)
13157 arg0 = CALL_EXPR_ARG (exp, 0);
13158 arg1 = CALL_EXPR_ARG (exp, 1);
13159 arg2 = CALL_EXPR_ARG (exp, 2);
13160 op0 = expand_normal (arg0);
13161 op1 = expand_normal (arg1);
13162 op2 = expand_normal (arg2);
13163 mode0 = insn_data[d->icode].operand[0].mode;
13164 mode1 = insn_data[d->icode].operand[1].mode;
13166 /* Invalid arguments, bail out before generating bad rtl. */
13167 if (arg0 == error_mark_node
13168 || arg1 == error_mark_node
13169 || arg2 == error_mark_node)
13170 return const0_rtx;
13172 *expandedp = true;
13173 STRIP_NOPS (arg2);
13174 if (TREE_CODE (arg2) != INTEGER_CST
13175 || TREE_INT_CST_LOW (arg2) & ~0x3)
13177 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13178 return const0_rtx;
13181 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13182 op0 = copy_to_mode_reg (Pmode, op0);
13183 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13184 op1 = copy_to_mode_reg (mode1, op1);
13186 pat = GEN_FCN (d->icode) (op0, op1, op2);
13187 if (pat != 0)
13188 emit_insn (pat);
13190 return NULL_RTX;
13193 return NULL_RTX;
13196 /* Expand vec_init builtin. */
13197 static rtx
13198 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13200 machine_mode tmode = TYPE_MODE (type);
13201 machine_mode inner_mode = GET_MODE_INNER (tmode);
13202 int i, n_elt = GET_MODE_NUNITS (tmode);
13204 gcc_assert (VECTOR_MODE_P (tmode));
13205 gcc_assert (n_elt == call_expr_nargs (exp));
13207 if (!target || !register_operand (target, tmode))
13208 target = gen_reg_rtx (tmode);
13210 /* If we have a vector compromised of a single element, such as V1TImode, do
13211 the initialization directly. */
13212 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13214 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13215 emit_move_insn (target, gen_lowpart (tmode, x));
13217 else
13219 rtvec v = rtvec_alloc (n_elt);
13221 for (i = 0; i < n_elt; ++i)
13223 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13224 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13227 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13230 return target;
13233 /* Return the integer constant in ARG. Constrain it to be in the range
13234 of the subparts of VEC_TYPE; issue an error if not. */
13236 static int
13237 get_element_number (tree vec_type, tree arg)
13239 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13241 if (!tree_fits_uhwi_p (arg)
13242 || (elt = tree_to_uhwi (arg), elt > max))
13244 error ("selector must be an integer constant in the range 0..%wi", max);
13245 return 0;
13248 return elt;
13251 /* Expand vec_set builtin. */
13252 static rtx
13253 altivec_expand_vec_set_builtin (tree exp)
13255 machine_mode tmode, mode1;
13256 tree arg0, arg1, arg2;
13257 int elt;
13258 rtx op0, op1;
13260 arg0 = CALL_EXPR_ARG (exp, 0);
13261 arg1 = CALL_EXPR_ARG (exp, 1);
13262 arg2 = CALL_EXPR_ARG (exp, 2);
13264 tmode = TYPE_MODE (TREE_TYPE (arg0));
13265 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13266 gcc_assert (VECTOR_MODE_P (tmode));
13268 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13269 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13270 elt = get_element_number (TREE_TYPE (arg0), arg2);
13272 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13273 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13275 op0 = force_reg (tmode, op0);
13276 op1 = force_reg (mode1, op1);
13278 rs6000_expand_vector_set (op0, op1, elt);
13280 return op0;
13283 /* Expand vec_ext builtin. */
13284 static rtx
13285 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13287 machine_mode tmode, mode0;
13288 tree arg0, arg1;
13289 int elt;
13290 rtx op0;
13292 arg0 = CALL_EXPR_ARG (exp, 0);
13293 arg1 = CALL_EXPR_ARG (exp, 1);
13295 op0 = expand_normal (arg0);
13296 elt = get_element_number (TREE_TYPE (arg0), arg1);
13298 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13299 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13300 gcc_assert (VECTOR_MODE_P (mode0));
13302 op0 = force_reg (mode0, op0);
13304 if (optimize || !target || !register_operand (target, tmode))
13305 target = gen_reg_rtx (tmode);
13307 rs6000_expand_vector_extract (target, op0, elt);
13309 return target;
13312 /* Expand the builtin in EXP and store the result in TARGET. Store
13313 true in *EXPANDEDP if we found a builtin to expand. */
13314 static rtx
13315 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13317 const struct builtin_description *d;
13318 size_t i;
13319 enum insn_code icode;
13320 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13321 tree arg0;
13322 rtx op0, pat;
13323 machine_mode tmode, mode0;
13324 enum rs6000_builtins fcode
13325 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13327 if (rs6000_overloaded_builtin_p (fcode))
13329 *expandedp = true;
13330 error ("unresolved overload for Altivec builtin %qF", fndecl);
13332 /* Given it is invalid, just generate a normal call. */
13333 return expand_call (exp, target, false);
13336 target = altivec_expand_ld_builtin (exp, target, expandedp);
13337 if (*expandedp)
13338 return target;
13340 target = altivec_expand_st_builtin (exp, target, expandedp);
13341 if (*expandedp)
13342 return target;
13344 target = altivec_expand_dst_builtin (exp, target, expandedp);
13345 if (*expandedp)
13346 return target;
13348 *expandedp = true;
13350 switch (fcode)
13352 case ALTIVEC_BUILTIN_STVX_V2DF:
13353 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13354 case ALTIVEC_BUILTIN_STVX_V2DI:
13355 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13356 case ALTIVEC_BUILTIN_STVX_V4SF:
13357 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13358 case ALTIVEC_BUILTIN_STVX:
13359 case ALTIVEC_BUILTIN_STVX_V4SI:
13360 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13361 case ALTIVEC_BUILTIN_STVX_V8HI:
13362 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13363 case ALTIVEC_BUILTIN_STVX_V16QI:
13364 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13365 case ALTIVEC_BUILTIN_STVEBX:
13366 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13367 case ALTIVEC_BUILTIN_STVEHX:
13368 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13369 case ALTIVEC_BUILTIN_STVEWX:
13370 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13371 case ALTIVEC_BUILTIN_STVXL_V2DF:
13372 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13373 case ALTIVEC_BUILTIN_STVXL_V2DI:
13374 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13375 case ALTIVEC_BUILTIN_STVXL_V4SF:
13376 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13377 case ALTIVEC_BUILTIN_STVXL:
13378 case ALTIVEC_BUILTIN_STVXL_V4SI:
13379 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13380 case ALTIVEC_BUILTIN_STVXL_V8HI:
13381 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13382 case ALTIVEC_BUILTIN_STVXL_V16QI:
13383 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13385 case ALTIVEC_BUILTIN_STVLX:
13386 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13387 case ALTIVEC_BUILTIN_STVLXL:
13388 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13389 case ALTIVEC_BUILTIN_STVRX:
13390 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13391 case ALTIVEC_BUILTIN_STVRXL:
13392 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13394 case VSX_BUILTIN_STXVD2X_V1TI:
13395 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13396 case VSX_BUILTIN_STXVD2X_V2DF:
13397 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13398 case VSX_BUILTIN_STXVD2X_V2DI:
13399 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13400 case VSX_BUILTIN_STXVW4X_V4SF:
13401 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13402 case VSX_BUILTIN_STXVW4X_V4SI:
13403 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13404 case VSX_BUILTIN_STXVW4X_V8HI:
13405 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13406 case VSX_BUILTIN_STXVW4X_V16QI:
13407 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13409 case ALTIVEC_BUILTIN_MFVSCR:
13410 icode = CODE_FOR_altivec_mfvscr;
13411 tmode = insn_data[icode].operand[0].mode;
13413 if (target == 0
13414 || GET_MODE (target) != tmode
13415 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13416 target = gen_reg_rtx (tmode);
13418 pat = GEN_FCN (icode) (target);
13419 if (! pat)
13420 return 0;
13421 emit_insn (pat);
13422 return target;
13424 case ALTIVEC_BUILTIN_MTVSCR:
13425 icode = CODE_FOR_altivec_mtvscr;
13426 arg0 = CALL_EXPR_ARG (exp, 0);
13427 op0 = expand_normal (arg0);
13428 mode0 = insn_data[icode].operand[0].mode;
13430 /* If we got invalid arguments bail out before generating bad rtl. */
13431 if (arg0 == error_mark_node)
13432 return const0_rtx;
13434 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13435 op0 = copy_to_mode_reg (mode0, op0);
13437 pat = GEN_FCN (icode) (op0);
13438 if (pat)
13439 emit_insn (pat);
13440 return NULL_RTX;
13442 case ALTIVEC_BUILTIN_DSSALL:
13443 emit_insn (gen_altivec_dssall ());
13444 return NULL_RTX;
13446 case ALTIVEC_BUILTIN_DSS:
13447 icode = CODE_FOR_altivec_dss;
13448 arg0 = CALL_EXPR_ARG (exp, 0);
13449 STRIP_NOPS (arg0);
13450 op0 = expand_normal (arg0);
13451 mode0 = insn_data[icode].operand[0].mode;
13453 /* If we got invalid arguments bail out before generating bad rtl. */
13454 if (arg0 == error_mark_node)
13455 return const0_rtx;
13457 if (TREE_CODE (arg0) != INTEGER_CST
13458 || TREE_INT_CST_LOW (arg0) & ~0x3)
13460 error ("argument to dss must be a 2-bit unsigned literal");
13461 return const0_rtx;
13464 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13465 op0 = copy_to_mode_reg (mode0, op0);
13467 emit_insn (gen_altivec_dss (op0));
13468 return NULL_RTX;
13470 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13471 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13472 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13473 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13474 case VSX_BUILTIN_VEC_INIT_V2DF:
13475 case VSX_BUILTIN_VEC_INIT_V2DI:
13476 case VSX_BUILTIN_VEC_INIT_V1TI:
13477 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13479 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13480 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13481 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13482 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13483 case VSX_BUILTIN_VEC_SET_V2DF:
13484 case VSX_BUILTIN_VEC_SET_V2DI:
13485 case VSX_BUILTIN_VEC_SET_V1TI:
13486 return altivec_expand_vec_set_builtin (exp);
13488 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13489 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13490 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13491 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13492 case VSX_BUILTIN_VEC_EXT_V2DF:
13493 case VSX_BUILTIN_VEC_EXT_V2DI:
13494 case VSX_BUILTIN_VEC_EXT_V1TI:
13495 return altivec_expand_vec_ext_builtin (exp, target);
13497 default:
13498 break;
13499 /* Fall through. */
13502 /* Expand abs* operations. */
13503 d = bdesc_abs;
13504 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13505 if (d->code == fcode)
13506 return altivec_expand_abs_builtin (d->icode, exp, target);
13508 /* Expand the AltiVec predicates. */
13509 d = bdesc_altivec_preds;
13510 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13511 if (d->code == fcode)
13512 return altivec_expand_predicate_builtin (d->icode, exp, target);
13514 /* LV* are funky. We initialized them differently. */
13515 switch (fcode)
13517 case ALTIVEC_BUILTIN_LVSL:
13518 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13519 exp, target, false);
13520 case ALTIVEC_BUILTIN_LVSR:
13521 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13522 exp, target, false);
13523 case ALTIVEC_BUILTIN_LVEBX:
13524 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13525 exp, target, false);
13526 case ALTIVEC_BUILTIN_LVEHX:
13527 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13528 exp, target, false);
13529 case ALTIVEC_BUILTIN_LVEWX:
13530 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13531 exp, target, false);
13532 case ALTIVEC_BUILTIN_LVXL_V2DF:
13533 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13534 exp, target, false);
13535 case ALTIVEC_BUILTIN_LVXL_V2DI:
13536 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13537 exp, target, false);
13538 case ALTIVEC_BUILTIN_LVXL_V4SF:
13539 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13540 exp, target, false);
13541 case ALTIVEC_BUILTIN_LVXL:
13542 case ALTIVEC_BUILTIN_LVXL_V4SI:
13543 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13544 exp, target, false);
13545 case ALTIVEC_BUILTIN_LVXL_V8HI:
13546 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13547 exp, target, false);
13548 case ALTIVEC_BUILTIN_LVXL_V16QI:
13549 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13550 exp, target, false);
13551 case ALTIVEC_BUILTIN_LVX_V2DF:
13552 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13553 exp, target, false);
13554 case ALTIVEC_BUILTIN_LVX_V2DI:
13555 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13556 exp, target, false);
13557 case ALTIVEC_BUILTIN_LVX_V4SF:
13558 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13559 exp, target, false);
13560 case ALTIVEC_BUILTIN_LVX:
13561 case ALTIVEC_BUILTIN_LVX_V4SI:
13562 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13563 exp, target, false);
13564 case ALTIVEC_BUILTIN_LVX_V8HI:
13565 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13566 exp, target, false);
13567 case ALTIVEC_BUILTIN_LVX_V16QI:
13568 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13569 exp, target, false);
13570 case ALTIVEC_BUILTIN_LVLX:
13571 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13572 exp, target, true);
13573 case ALTIVEC_BUILTIN_LVLXL:
13574 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13575 exp, target, true);
13576 case ALTIVEC_BUILTIN_LVRX:
13577 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13578 exp, target, true);
13579 case ALTIVEC_BUILTIN_LVRXL:
13580 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13581 exp, target, true);
13582 case VSX_BUILTIN_LXVD2X_V1TI:
13583 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13584 exp, target, false);
13585 case VSX_BUILTIN_LXVD2X_V2DF:
13586 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13587 exp, target, false);
13588 case VSX_BUILTIN_LXVD2X_V2DI:
13589 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13590 exp, target, false);
13591 case VSX_BUILTIN_LXVW4X_V4SF:
13592 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13593 exp, target, false);
13594 case VSX_BUILTIN_LXVW4X_V4SI:
13595 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13596 exp, target, false);
13597 case VSX_BUILTIN_LXVW4X_V8HI:
13598 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13599 exp, target, false);
13600 case VSX_BUILTIN_LXVW4X_V16QI:
13601 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13602 exp, target, false);
13603 break;
13604 default:
13605 break;
13606 /* Fall through. */
13609 *expandedp = false;
13610 return NULL_RTX;
13613 /* Expand the builtin in EXP and store the result in TARGET. Store
13614 true in *EXPANDEDP if we found a builtin to expand. */
13615 static rtx
13616 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13618 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13619 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13620 const struct builtin_description *d;
13621 size_t i;
13623 *expandedp = true;
13625 switch (fcode)
13627 case PAIRED_BUILTIN_STX:
13628 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13629 case PAIRED_BUILTIN_LX:
13630 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13631 default:
13632 break;
13633 /* Fall through. */
13636 /* Expand the paired predicates. */
13637 d = bdesc_paired_preds;
13638 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13639 if (d->code == fcode)
13640 return paired_expand_predicate_builtin (d->icode, exp, target);
13642 *expandedp = false;
13643 return NULL_RTX;
13646 /* Binops that need to be initialized manually, but can be expanded
13647 automagically by rs6000_expand_binop_builtin. */
13648 static const struct builtin_description bdesc_2arg_spe[] =
13650 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13651 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13652 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13653 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13654 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13655 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13656 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13657 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13658 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13659 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13660 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13661 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13662 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13663 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13664 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13665 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13666 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13667 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13668 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13669 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13670 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13671 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13674 /* Expand the builtin in EXP and store the result in TARGET. Store
13675 true in *EXPANDEDP if we found a builtin to expand.
13677 This expands the SPE builtins that are not simple unary and binary
13678 operations. */
13679 static rtx
13680 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13682 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13683 tree arg1, arg0;
13684 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13685 enum insn_code icode;
13686 machine_mode tmode, mode0;
13687 rtx pat, op0;
13688 const struct builtin_description *d;
13689 size_t i;
13691 *expandedp = true;
13693 /* Syntax check for a 5-bit unsigned immediate. */
13694 switch (fcode)
13696 case SPE_BUILTIN_EVSTDD:
13697 case SPE_BUILTIN_EVSTDH:
13698 case SPE_BUILTIN_EVSTDW:
13699 case SPE_BUILTIN_EVSTWHE:
13700 case SPE_BUILTIN_EVSTWHO:
13701 case SPE_BUILTIN_EVSTWWE:
13702 case SPE_BUILTIN_EVSTWWO:
13703 arg1 = CALL_EXPR_ARG (exp, 2);
13704 if (TREE_CODE (arg1) != INTEGER_CST
13705 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13707 error ("argument 2 must be a 5-bit unsigned literal");
13708 return const0_rtx;
13710 break;
13711 default:
13712 break;
13715 /* The evsplat*i instructions are not quite generic. */
13716 switch (fcode)
13718 case SPE_BUILTIN_EVSPLATFI:
13719 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13720 exp, target);
13721 case SPE_BUILTIN_EVSPLATI:
13722 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13723 exp, target);
13724 default:
13725 break;
13728 d = bdesc_2arg_spe;
13729 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13730 if (d->code == fcode)
13731 return rs6000_expand_binop_builtin (d->icode, exp, target);
13733 d = bdesc_spe_predicates;
13734 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13735 if (d->code == fcode)
13736 return spe_expand_predicate_builtin (d->icode, exp, target);
13738 d = bdesc_spe_evsel;
13739 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13740 if (d->code == fcode)
13741 return spe_expand_evsel_builtin (d->icode, exp, target);
13743 switch (fcode)
13745 case SPE_BUILTIN_EVSTDDX:
13746 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13747 case SPE_BUILTIN_EVSTDHX:
13748 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13749 case SPE_BUILTIN_EVSTDWX:
13750 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13751 case SPE_BUILTIN_EVSTWHEX:
13752 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13753 case SPE_BUILTIN_EVSTWHOX:
13754 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13755 case SPE_BUILTIN_EVSTWWEX:
13756 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13757 case SPE_BUILTIN_EVSTWWOX:
13758 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13759 case SPE_BUILTIN_EVSTDD:
13760 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13761 case SPE_BUILTIN_EVSTDH:
13762 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13763 case SPE_BUILTIN_EVSTDW:
13764 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13765 case SPE_BUILTIN_EVSTWHE:
13766 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13767 case SPE_BUILTIN_EVSTWHO:
13768 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13769 case SPE_BUILTIN_EVSTWWE:
13770 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13771 case SPE_BUILTIN_EVSTWWO:
13772 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13773 case SPE_BUILTIN_MFSPEFSCR:
13774 icode = CODE_FOR_spe_mfspefscr;
13775 tmode = insn_data[icode].operand[0].mode;
13777 if (target == 0
13778 || GET_MODE (target) != tmode
13779 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13780 target = gen_reg_rtx (tmode);
13782 pat = GEN_FCN (icode) (target);
13783 if (! pat)
13784 return 0;
13785 emit_insn (pat);
13786 return target;
13787 case SPE_BUILTIN_MTSPEFSCR:
13788 icode = CODE_FOR_spe_mtspefscr;
13789 arg0 = CALL_EXPR_ARG (exp, 0);
13790 op0 = expand_normal (arg0);
13791 mode0 = insn_data[icode].operand[0].mode;
13793 if (arg0 == error_mark_node)
13794 return const0_rtx;
13796 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13797 op0 = copy_to_mode_reg (mode0, op0);
13799 pat = GEN_FCN (icode) (op0);
13800 if (pat)
13801 emit_insn (pat);
13802 return NULL_RTX;
13803 default:
13804 break;
13807 *expandedp = false;
13808 return NULL_RTX;
13811 static rtx
13812 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13814 rtx pat, scratch, tmp;
13815 tree form = CALL_EXPR_ARG (exp, 0);
13816 tree arg0 = CALL_EXPR_ARG (exp, 1);
13817 tree arg1 = CALL_EXPR_ARG (exp, 2);
13818 rtx op0 = expand_normal (arg0);
13819 rtx op1 = expand_normal (arg1);
13820 machine_mode mode0 = insn_data[icode].operand[1].mode;
13821 machine_mode mode1 = insn_data[icode].operand[2].mode;
13822 int form_int;
13823 enum rtx_code code;
13825 if (TREE_CODE (form) != INTEGER_CST)
13827 error ("argument 1 of __builtin_paired_predicate must be a constant");
13828 return const0_rtx;
13830 else
13831 form_int = TREE_INT_CST_LOW (form);
13833 gcc_assert (mode0 == mode1);
13835 if (arg0 == error_mark_node || arg1 == error_mark_node)
13836 return const0_rtx;
13838 if (target == 0
13839 || GET_MODE (target) != SImode
13840 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13841 target = gen_reg_rtx (SImode);
13842 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13843 op0 = copy_to_mode_reg (mode0, op0);
13844 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13845 op1 = copy_to_mode_reg (mode1, op1);
13847 scratch = gen_reg_rtx (CCFPmode);
13849 pat = GEN_FCN (icode) (scratch, op0, op1);
13850 if (!pat)
13851 return const0_rtx;
13853 emit_insn (pat);
13855 switch (form_int)
13857 /* LT bit. */
13858 case 0:
13859 code = LT;
13860 break;
13861 /* GT bit. */
13862 case 1:
13863 code = GT;
13864 break;
13865 /* EQ bit. */
13866 case 2:
13867 code = EQ;
13868 break;
13869 /* UN bit. */
13870 case 3:
13871 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13872 return target;
13873 default:
13874 error ("argument 1 of __builtin_paired_predicate is out of range");
13875 return const0_rtx;
13878 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13879 emit_move_insn (target, tmp);
13880 return target;
13883 static rtx
13884 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13886 rtx pat, scratch, tmp;
13887 tree form = CALL_EXPR_ARG (exp, 0);
13888 tree arg0 = CALL_EXPR_ARG (exp, 1);
13889 tree arg1 = CALL_EXPR_ARG (exp, 2);
13890 rtx op0 = expand_normal (arg0);
13891 rtx op1 = expand_normal (arg1);
13892 machine_mode mode0 = insn_data[icode].operand[1].mode;
13893 machine_mode mode1 = insn_data[icode].operand[2].mode;
13894 int form_int;
13895 enum rtx_code code;
13897 if (TREE_CODE (form) != INTEGER_CST)
13899 error ("argument 1 of __builtin_spe_predicate must be a constant");
13900 return const0_rtx;
13902 else
13903 form_int = TREE_INT_CST_LOW (form);
13905 gcc_assert (mode0 == mode1);
13907 if (arg0 == error_mark_node || arg1 == error_mark_node)
13908 return const0_rtx;
13910 if (target == 0
13911 || GET_MODE (target) != SImode
13912 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13913 target = gen_reg_rtx (SImode);
13915 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13916 op0 = copy_to_mode_reg (mode0, op0);
13917 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13918 op1 = copy_to_mode_reg (mode1, op1);
13920 scratch = gen_reg_rtx (CCmode);
13922 pat = GEN_FCN (icode) (scratch, op0, op1);
13923 if (! pat)
13924 return const0_rtx;
13925 emit_insn (pat);
13927 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13928 _lower_. We use one compare, but look in different bits of the
13929 CR for each variant.
13931 There are 2 elements in each SPE simd type (upper/lower). The CR
13932 bits are set as follows:
13934 BIT0 | BIT 1 | BIT 2 | BIT 3
13935 U | L | (U | L) | (U & L)
13937 So, for an "all" relationship, BIT 3 would be set.
13938 For an "any" relationship, BIT 2 would be set. Etc.
13940 Following traditional nomenclature, these bits map to:
13942 BIT0 | BIT 1 | BIT 2 | BIT 3
13943 LT | GT | EQ | OV
13945 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13948 switch (form_int)
13950 /* All variant. OV bit. */
13951 case 0:
13952 /* We need to get to the OV bit, which is the ORDERED bit. We
13953 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13954 that's ugly and will make validate_condition_mode die.
13955 So let's just use another pattern. */
13956 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13957 return target;
13958 /* Any variant. EQ bit. */
13959 case 1:
13960 code = EQ;
13961 break;
13962 /* Upper variant. LT bit. */
13963 case 2:
13964 code = LT;
13965 break;
13966 /* Lower variant. GT bit. */
13967 case 3:
13968 code = GT;
13969 break;
13970 default:
13971 error ("argument 1 of __builtin_spe_predicate is out of range");
13972 return const0_rtx;
13975 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13976 emit_move_insn (target, tmp);
13978 return target;
13981 /* The evsel builtins look like this:
13983 e = __builtin_spe_evsel_OP (a, b, c, d);
13985 and work like this:
13987 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13988 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13991 static rtx
13992 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13994 rtx pat, scratch;
13995 tree arg0 = CALL_EXPR_ARG (exp, 0);
13996 tree arg1 = CALL_EXPR_ARG (exp, 1);
13997 tree arg2 = CALL_EXPR_ARG (exp, 2);
13998 tree arg3 = CALL_EXPR_ARG (exp, 3);
13999 rtx op0 = expand_normal (arg0);
14000 rtx op1 = expand_normal (arg1);
14001 rtx op2 = expand_normal (arg2);
14002 rtx op3 = expand_normal (arg3);
14003 machine_mode mode0 = insn_data[icode].operand[1].mode;
14004 machine_mode mode1 = insn_data[icode].operand[2].mode;
14006 gcc_assert (mode0 == mode1);
14008 if (arg0 == error_mark_node || arg1 == error_mark_node
14009 || arg2 == error_mark_node || arg3 == error_mark_node)
14010 return const0_rtx;
14012 if (target == 0
14013 || GET_MODE (target) != mode0
14014 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14015 target = gen_reg_rtx (mode0);
14017 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14018 op0 = copy_to_mode_reg (mode0, op0);
14019 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14020 op1 = copy_to_mode_reg (mode0, op1);
14021 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14022 op2 = copy_to_mode_reg (mode0, op2);
14023 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14024 op3 = copy_to_mode_reg (mode0, op3);
14026 /* Generate the compare. */
14027 scratch = gen_reg_rtx (CCmode);
14028 pat = GEN_FCN (icode) (scratch, op0, op1);
14029 if (! pat)
14030 return const0_rtx;
14031 emit_insn (pat);
14033 if (mode0 == V2SImode)
14034 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14035 else
14036 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14038 return target;
14041 /* Raise an error message for a builtin function that is called without the
14042 appropriate target options being set. */
14044 static void
14045 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14047 size_t uns_fncode = (size_t)fncode;
14048 const char *name = rs6000_builtin_info[uns_fncode].name;
14049 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14051 gcc_assert (name != NULL);
14052 if ((fnmask & RS6000_BTM_CELL) != 0)
14053 error ("Builtin function %s is only valid for the cell processor", name);
14054 else if ((fnmask & RS6000_BTM_VSX) != 0)
14055 error ("Builtin function %s requires the -mvsx option", name);
14056 else if ((fnmask & RS6000_BTM_HTM) != 0)
14057 error ("Builtin function %s requires the -mhtm option", name);
14058 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14059 error ("Builtin function %s requires the -maltivec option", name);
14060 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14061 error ("Builtin function %s requires the -mpaired option", name);
14062 else if ((fnmask & RS6000_BTM_SPE) != 0)
14063 error ("Builtin function %s requires the -mspe option", name);
14064 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14065 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14066 error ("Builtin function %s requires the -mhard-dfp and"
14067 " -mpower8-vector options", name);
14068 else if ((fnmask & RS6000_BTM_DFP) != 0)
14069 error ("Builtin function %s requires the -mhard-dfp option", name);
14070 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14071 error ("Builtin function %s requires the -mpower8-vector option", name);
14072 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14073 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14074 error ("Builtin function %s requires the -mhard-float and"
14075 " -mlong-double-128 options", name);
14076 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14077 error ("Builtin function %s requires the -mhard-float option", name);
14078 else
14079 error ("Builtin function %s is not supported with the current options",
14080 name);
14083 /* Expand an expression EXP that calls a built-in function,
14084 with result going to TARGET if that's convenient
14085 (and in mode MODE if that's convenient).
14086 SUBTARGET may be used as the target for computing one of EXP's operands.
14087 IGNORE is nonzero if the value is to be ignored. */
14089 static rtx
14090 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14091 machine_mode mode ATTRIBUTE_UNUSED,
14092 int ignore ATTRIBUTE_UNUSED)
14094 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14095 enum rs6000_builtins fcode
14096 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14097 size_t uns_fcode = (size_t)fcode;
14098 const struct builtin_description *d;
14099 size_t i;
14100 rtx ret;
14101 bool success;
14102 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14103 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14105 if (TARGET_DEBUG_BUILTIN)
14107 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14108 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14109 const char *name2 = ((icode != CODE_FOR_nothing)
14110 ? get_insn_name ((int)icode)
14111 : "nothing");
14112 const char *name3;
14114 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14116 default: name3 = "unknown"; break;
14117 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14118 case RS6000_BTC_UNARY: name3 = "unary"; break;
14119 case RS6000_BTC_BINARY: name3 = "binary"; break;
14120 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14121 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14122 case RS6000_BTC_ABS: name3 = "abs"; break;
14123 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14124 case RS6000_BTC_DST: name3 = "dst"; break;
14128 fprintf (stderr,
14129 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14130 (name1) ? name1 : "---", fcode,
14131 (name2) ? name2 : "---", (int)icode,
14132 name3,
14133 func_valid_p ? "" : ", not valid");
14136 if (!func_valid_p)
14138 rs6000_invalid_builtin (fcode);
14140 /* Given it is invalid, just generate a normal call. */
14141 return expand_call (exp, target, ignore);
14144 switch (fcode)
14146 case RS6000_BUILTIN_RECIP:
14147 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14149 case RS6000_BUILTIN_RECIPF:
14150 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14152 case RS6000_BUILTIN_RSQRTF:
14153 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14155 case RS6000_BUILTIN_RSQRT:
14156 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14158 case POWER7_BUILTIN_BPERMD:
14159 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14160 ? CODE_FOR_bpermd_di
14161 : CODE_FOR_bpermd_si), exp, target);
14163 case RS6000_BUILTIN_GET_TB:
14164 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14165 target);
14167 case RS6000_BUILTIN_MFTB:
14168 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14169 ? CODE_FOR_rs6000_mftb_di
14170 : CODE_FOR_rs6000_mftb_si),
14171 target);
14173 case RS6000_BUILTIN_MFFS:
14174 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14176 case RS6000_BUILTIN_MTFSF:
14177 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14179 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14180 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14182 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14183 : (int) CODE_FOR_altivec_lvsl_direct);
14184 machine_mode tmode = insn_data[icode].operand[0].mode;
14185 machine_mode mode = insn_data[icode].operand[1].mode;
14186 tree arg;
14187 rtx op, addr, pat;
14189 gcc_assert (TARGET_ALTIVEC);
14191 arg = CALL_EXPR_ARG (exp, 0);
14192 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14193 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14194 addr = memory_address (mode, op);
14195 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14196 op = addr;
14197 else
14199 /* For the load case need to negate the address. */
14200 op = gen_reg_rtx (GET_MODE (addr));
14201 emit_insn (gen_rtx_SET (VOIDmode, op,
14202 gen_rtx_NEG (GET_MODE (addr), addr)));
14204 op = gen_rtx_MEM (mode, op);
14206 if (target == 0
14207 || GET_MODE (target) != tmode
14208 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14209 target = gen_reg_rtx (tmode);
14211 pat = GEN_FCN (icode) (target, op);
14212 if (!pat)
14213 return 0;
14214 emit_insn (pat);
14216 return target;
14219 case ALTIVEC_BUILTIN_VCFUX:
14220 case ALTIVEC_BUILTIN_VCFSX:
14221 case ALTIVEC_BUILTIN_VCTUXS:
14222 case ALTIVEC_BUILTIN_VCTSXS:
14223 /* FIXME: There's got to be a nicer way to handle this case than
14224 constructing a new CALL_EXPR. */
14225 if (call_expr_nargs (exp) == 1)
14227 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14228 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14230 break;
14232 default:
14233 break;
14236 if (TARGET_ALTIVEC)
14238 ret = altivec_expand_builtin (exp, target, &success);
14240 if (success)
14241 return ret;
14243 if (TARGET_SPE)
14245 ret = spe_expand_builtin (exp, target, &success);
14247 if (success)
14248 return ret;
14250 if (TARGET_PAIRED_FLOAT)
14252 ret = paired_expand_builtin (exp, target, &success);
14254 if (success)
14255 return ret;
14257 if (TARGET_HTM)
14259 ret = htm_expand_builtin (exp, target, &success);
14261 if (success)
14262 return ret;
14265 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14266 gcc_assert (attr == RS6000_BTC_UNARY
14267 || attr == RS6000_BTC_BINARY
14268 || attr == RS6000_BTC_TERNARY);
14270 /* Handle simple unary operations. */
14271 d = bdesc_1arg;
14272 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14273 if (d->code == fcode)
14274 return rs6000_expand_unop_builtin (d->icode, exp, target);
14276 /* Handle simple binary operations. */
14277 d = bdesc_2arg;
14278 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14279 if (d->code == fcode)
14280 return rs6000_expand_binop_builtin (d->icode, exp, target);
14282 /* Handle simple ternary operations. */
14283 d = bdesc_3arg;
14284 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14285 if (d->code == fcode)
14286 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14288 gcc_unreachable ();
14291 static void
14292 rs6000_init_builtins (void)
14294 tree tdecl;
14295 tree ftype;
14296 machine_mode mode;
14298 if (TARGET_DEBUG_BUILTIN)
14299 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14300 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14301 (TARGET_SPE) ? ", spe" : "",
14302 (TARGET_ALTIVEC) ? ", altivec" : "",
14303 (TARGET_VSX) ? ", vsx" : "");
14305 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14306 V2SF_type_node = build_vector_type (float_type_node, 2);
14307 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14308 V2DF_type_node = build_vector_type (double_type_node, 2);
14309 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14310 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14311 V4SF_type_node = build_vector_type (float_type_node, 4);
14312 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14313 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14315 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14316 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14317 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14318 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14320 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14321 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14322 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14323 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14325 /* We use V1TI mode as a special container to hold __int128_t items that
14326 must live in VSX registers. */
14327 if (intTI_type_node)
14329 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14330 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14333 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14334 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14335 'vector unsigned short'. */
14337 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14338 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14339 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14340 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14341 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14343 long_integer_type_internal_node = long_integer_type_node;
14344 long_unsigned_type_internal_node = long_unsigned_type_node;
14345 long_long_integer_type_internal_node = long_long_integer_type_node;
14346 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14347 intQI_type_internal_node = intQI_type_node;
14348 uintQI_type_internal_node = unsigned_intQI_type_node;
14349 intHI_type_internal_node = intHI_type_node;
14350 uintHI_type_internal_node = unsigned_intHI_type_node;
14351 intSI_type_internal_node = intSI_type_node;
14352 uintSI_type_internal_node = unsigned_intSI_type_node;
14353 intDI_type_internal_node = intDI_type_node;
14354 uintDI_type_internal_node = unsigned_intDI_type_node;
14355 intTI_type_internal_node = intTI_type_node;
14356 uintTI_type_internal_node = unsigned_intTI_type_node;
14357 float_type_internal_node = float_type_node;
14358 double_type_internal_node = double_type_node;
14359 long_double_type_internal_node = long_double_type_node;
14360 dfloat64_type_internal_node = dfloat64_type_node;
14361 dfloat128_type_internal_node = dfloat128_type_node;
14362 void_type_internal_node = void_type_node;
14364 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14365 tree type node. */
14366 builtin_mode_to_type[QImode][0] = integer_type_node;
14367 builtin_mode_to_type[HImode][0] = integer_type_node;
14368 builtin_mode_to_type[SImode][0] = intSI_type_node;
14369 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14370 builtin_mode_to_type[DImode][0] = intDI_type_node;
14371 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14372 builtin_mode_to_type[TImode][0] = intTI_type_node;
14373 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14374 builtin_mode_to_type[SFmode][0] = float_type_node;
14375 builtin_mode_to_type[DFmode][0] = double_type_node;
14376 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14377 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14378 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14379 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14380 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14381 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14382 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14383 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14384 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14385 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14386 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14387 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14388 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14389 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14390 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14391 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14392 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14393 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14395 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14396 TYPE_NAME (bool_char_type_node) = tdecl;
14398 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14399 TYPE_NAME (bool_short_type_node) = tdecl;
14401 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14402 TYPE_NAME (bool_int_type_node) = tdecl;
14404 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14405 TYPE_NAME (pixel_type_node) = tdecl;
14407 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14408 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14409 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14410 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14411 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14413 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14414 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14416 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14417 TYPE_NAME (V16QI_type_node) = tdecl;
14419 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14420 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14422 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14423 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14425 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14426 TYPE_NAME (V8HI_type_node) = tdecl;
14428 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14429 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14431 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14432 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14434 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14435 TYPE_NAME (V4SI_type_node) = tdecl;
14437 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14438 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14440 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14441 TYPE_NAME (V4SF_type_node) = tdecl;
14443 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14444 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14446 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14447 TYPE_NAME (V2DF_type_node) = tdecl;
14449 if (TARGET_POWERPC64)
14451 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14452 TYPE_NAME (V2DI_type_node) = tdecl;
14454 tdecl = add_builtin_type ("__vector unsigned long",
14455 unsigned_V2DI_type_node);
14456 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14458 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14459 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14461 else
14463 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14464 TYPE_NAME (V2DI_type_node) = tdecl;
14466 tdecl = add_builtin_type ("__vector unsigned long long",
14467 unsigned_V2DI_type_node);
14468 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14470 tdecl = add_builtin_type ("__vector __bool long long",
14471 bool_V2DI_type_node);
14472 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14475 if (V1TI_type_node)
14477 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14478 TYPE_NAME (V1TI_type_node) = tdecl;
14480 tdecl = add_builtin_type ("__vector unsigned __int128",
14481 unsigned_V1TI_type_node);
14482 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14485 /* Paired and SPE builtins are only available if you build a compiler with
14486 the appropriate options, so only create those builtins with the
14487 appropriate compiler option. Create Altivec and VSX builtins on machines
14488 with at least the general purpose extensions (970 and newer) to allow the
14489 use of the target attribute. */
14490 if (TARGET_PAIRED_FLOAT)
14491 paired_init_builtins ();
14492 if (TARGET_SPE)
14493 spe_init_builtins ();
14494 if (TARGET_EXTRA_BUILTINS)
14495 altivec_init_builtins ();
14496 if (TARGET_HTM)
14497 htm_init_builtins ();
14499 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14500 rs6000_common_init_builtins ();
14502 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14503 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14504 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14506 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14507 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14508 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14510 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14511 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14512 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14514 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14515 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14516 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14518 mode = (TARGET_64BIT) ? DImode : SImode;
14519 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14520 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14521 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14523 ftype = build_function_type_list (unsigned_intDI_type_node,
14524 NULL_TREE);
14525 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14527 if (TARGET_64BIT)
14528 ftype = build_function_type_list (unsigned_intDI_type_node,
14529 NULL_TREE);
14530 else
14531 ftype = build_function_type_list (unsigned_intSI_type_node,
14532 NULL_TREE);
14533 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14535 ftype = build_function_type_list (double_type_node, NULL_TREE);
14536 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14538 ftype = build_function_type_list (void_type_node,
14539 intSI_type_node, double_type_node,
14540 NULL_TREE);
14541 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14543 #if TARGET_XCOFF
14544 /* AIX libm provides clog as __clog. */
14545 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14546 set_user_assembler_name (tdecl, "__clog");
14547 #endif
14549 #ifdef SUBTARGET_INIT_BUILTINS
14550 SUBTARGET_INIT_BUILTINS;
14551 #endif
14554 /* Returns the rs6000 builtin decl for CODE. */
14556 static tree
14557 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14559 HOST_WIDE_INT fnmask;
14561 if (code >= RS6000_BUILTIN_COUNT)
14562 return error_mark_node;
14564 fnmask = rs6000_builtin_info[code].mask;
14565 if ((fnmask & rs6000_builtin_mask) != fnmask)
14567 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14568 return error_mark_node;
14571 return rs6000_builtin_decls[code];
14574 static void
14575 spe_init_builtins (void)
14577 tree puint_type_node = build_pointer_type (unsigned_type_node);
14578 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14579 const struct builtin_description *d;
14580 size_t i;
14582 tree v2si_ftype_4_v2si
14583 = build_function_type_list (opaque_V2SI_type_node,
14584 opaque_V2SI_type_node,
14585 opaque_V2SI_type_node,
14586 opaque_V2SI_type_node,
14587 opaque_V2SI_type_node,
14588 NULL_TREE);
14590 tree v2sf_ftype_4_v2sf
14591 = build_function_type_list (opaque_V2SF_type_node,
14592 opaque_V2SF_type_node,
14593 opaque_V2SF_type_node,
14594 opaque_V2SF_type_node,
14595 opaque_V2SF_type_node,
14596 NULL_TREE);
14598 tree int_ftype_int_v2si_v2si
14599 = build_function_type_list (integer_type_node,
14600 integer_type_node,
14601 opaque_V2SI_type_node,
14602 opaque_V2SI_type_node,
14603 NULL_TREE);
14605 tree int_ftype_int_v2sf_v2sf
14606 = build_function_type_list (integer_type_node,
14607 integer_type_node,
14608 opaque_V2SF_type_node,
14609 opaque_V2SF_type_node,
14610 NULL_TREE);
14612 tree void_ftype_v2si_puint_int
14613 = build_function_type_list (void_type_node,
14614 opaque_V2SI_type_node,
14615 puint_type_node,
14616 integer_type_node,
14617 NULL_TREE);
14619 tree void_ftype_v2si_puint_char
14620 = build_function_type_list (void_type_node,
14621 opaque_V2SI_type_node,
14622 puint_type_node,
14623 char_type_node,
14624 NULL_TREE);
14626 tree void_ftype_v2si_pv2si_int
14627 = build_function_type_list (void_type_node,
14628 opaque_V2SI_type_node,
14629 opaque_p_V2SI_type_node,
14630 integer_type_node,
14631 NULL_TREE);
14633 tree void_ftype_v2si_pv2si_char
14634 = build_function_type_list (void_type_node,
14635 opaque_V2SI_type_node,
14636 opaque_p_V2SI_type_node,
14637 char_type_node,
14638 NULL_TREE);
14640 tree void_ftype_int
14641 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14643 tree int_ftype_void
14644 = build_function_type_list (integer_type_node, NULL_TREE);
14646 tree v2si_ftype_pv2si_int
14647 = build_function_type_list (opaque_V2SI_type_node,
14648 opaque_p_V2SI_type_node,
14649 integer_type_node,
14650 NULL_TREE);
14652 tree v2si_ftype_puint_int
14653 = build_function_type_list (opaque_V2SI_type_node,
14654 puint_type_node,
14655 integer_type_node,
14656 NULL_TREE);
14658 tree v2si_ftype_pushort_int
14659 = build_function_type_list (opaque_V2SI_type_node,
14660 pushort_type_node,
14661 integer_type_node,
14662 NULL_TREE);
14664 tree v2si_ftype_signed_char
14665 = build_function_type_list (opaque_V2SI_type_node,
14666 signed_char_type_node,
14667 NULL_TREE);
14669 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14671 /* Initialize irregular SPE builtins. */
14673 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14674 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14675 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14676 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14677 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14678 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14679 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14680 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14681 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14682 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14683 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14684 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14685 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14686 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14687 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14688 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14689 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14690 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14692 /* Loads. */
14693 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14694 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14695 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14696 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14697 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14698 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14699 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14700 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14701 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14702 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14703 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14704 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14705 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14706 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14707 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14708 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14709 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14710 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14711 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14712 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14713 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14714 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14716 /* Predicates. */
14717 d = bdesc_spe_predicates;
14718 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14720 tree type;
14722 switch (insn_data[d->icode].operand[1].mode)
14724 case V2SImode:
14725 type = int_ftype_int_v2si_v2si;
14726 break;
14727 case V2SFmode:
14728 type = int_ftype_int_v2sf_v2sf;
14729 break;
14730 default:
14731 gcc_unreachable ();
14734 def_builtin (d->name, type, d->code);
14737 /* Evsel predicates. */
14738 d = bdesc_spe_evsel;
14739 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14741 tree type;
14743 switch (insn_data[d->icode].operand[1].mode)
14745 case V2SImode:
14746 type = v2si_ftype_4_v2si;
14747 break;
14748 case V2SFmode:
14749 type = v2sf_ftype_4_v2sf;
14750 break;
14751 default:
14752 gcc_unreachable ();
14755 def_builtin (d->name, type, d->code);
14759 static void
14760 paired_init_builtins (void)
14762 const struct builtin_description *d;
14763 size_t i;
14765 tree int_ftype_int_v2sf_v2sf
14766 = build_function_type_list (integer_type_node,
14767 integer_type_node,
14768 V2SF_type_node,
14769 V2SF_type_node,
14770 NULL_TREE);
14771 tree pcfloat_type_node =
14772 build_pointer_type (build_qualified_type
14773 (float_type_node, TYPE_QUAL_CONST));
14775 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14776 long_integer_type_node,
14777 pcfloat_type_node,
14778 NULL_TREE);
14779 tree void_ftype_v2sf_long_pcfloat =
14780 build_function_type_list (void_type_node,
14781 V2SF_type_node,
14782 long_integer_type_node,
14783 pcfloat_type_node,
14784 NULL_TREE);
14787 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14788 PAIRED_BUILTIN_LX);
14791 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14792 PAIRED_BUILTIN_STX);
14794 /* Predicates. */
14795 d = bdesc_paired_preds;
14796 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14798 tree type;
14800 if (TARGET_DEBUG_BUILTIN)
14801 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14802 (int)i, get_insn_name (d->icode), (int)d->icode,
14803 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14805 switch (insn_data[d->icode].operand[1].mode)
14807 case V2SFmode:
14808 type = int_ftype_int_v2sf_v2sf;
14809 break;
14810 default:
14811 gcc_unreachable ();
14814 def_builtin (d->name, type, d->code);
14818 static void
14819 altivec_init_builtins (void)
14821 const struct builtin_description *d;
14822 size_t i;
14823 tree ftype;
14824 tree decl;
14826 tree pvoid_type_node = build_pointer_type (void_type_node);
14828 tree pcvoid_type_node
14829 = build_pointer_type (build_qualified_type (void_type_node,
14830 TYPE_QUAL_CONST));
14832 tree int_ftype_opaque
14833 = build_function_type_list (integer_type_node,
14834 opaque_V4SI_type_node, NULL_TREE);
14835 tree opaque_ftype_opaque
14836 = build_function_type_list (integer_type_node, NULL_TREE);
14837 tree opaque_ftype_opaque_int
14838 = build_function_type_list (opaque_V4SI_type_node,
14839 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14840 tree opaque_ftype_opaque_opaque_int
14841 = build_function_type_list (opaque_V4SI_type_node,
14842 opaque_V4SI_type_node, opaque_V4SI_type_node,
14843 integer_type_node, NULL_TREE);
14844 tree int_ftype_int_opaque_opaque
14845 = build_function_type_list (integer_type_node,
14846 integer_type_node, opaque_V4SI_type_node,
14847 opaque_V4SI_type_node, NULL_TREE);
14848 tree int_ftype_int_v4si_v4si
14849 = build_function_type_list (integer_type_node,
14850 integer_type_node, V4SI_type_node,
14851 V4SI_type_node, NULL_TREE);
14852 tree int_ftype_int_v2di_v2di
14853 = build_function_type_list (integer_type_node,
14854 integer_type_node, V2DI_type_node,
14855 V2DI_type_node, NULL_TREE);
14856 tree void_ftype_v4si
14857 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14858 tree v8hi_ftype_void
14859 = build_function_type_list (V8HI_type_node, NULL_TREE);
14860 tree void_ftype_void
14861 = build_function_type_list (void_type_node, NULL_TREE);
14862 tree void_ftype_int
14863 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14865 tree opaque_ftype_long_pcvoid
14866 = build_function_type_list (opaque_V4SI_type_node,
14867 long_integer_type_node, pcvoid_type_node,
14868 NULL_TREE);
14869 tree v16qi_ftype_long_pcvoid
14870 = build_function_type_list (V16QI_type_node,
14871 long_integer_type_node, pcvoid_type_node,
14872 NULL_TREE);
14873 tree v8hi_ftype_long_pcvoid
14874 = build_function_type_list (V8HI_type_node,
14875 long_integer_type_node, pcvoid_type_node,
14876 NULL_TREE);
14877 tree v4si_ftype_long_pcvoid
14878 = build_function_type_list (V4SI_type_node,
14879 long_integer_type_node, pcvoid_type_node,
14880 NULL_TREE);
14881 tree v4sf_ftype_long_pcvoid
14882 = build_function_type_list (V4SF_type_node,
14883 long_integer_type_node, pcvoid_type_node,
14884 NULL_TREE);
14885 tree v2df_ftype_long_pcvoid
14886 = build_function_type_list (V2DF_type_node,
14887 long_integer_type_node, pcvoid_type_node,
14888 NULL_TREE);
14889 tree v2di_ftype_long_pcvoid
14890 = build_function_type_list (V2DI_type_node,
14891 long_integer_type_node, pcvoid_type_node,
14892 NULL_TREE);
14894 tree void_ftype_opaque_long_pvoid
14895 = build_function_type_list (void_type_node,
14896 opaque_V4SI_type_node, long_integer_type_node,
14897 pvoid_type_node, NULL_TREE);
14898 tree void_ftype_v4si_long_pvoid
14899 = build_function_type_list (void_type_node,
14900 V4SI_type_node, long_integer_type_node,
14901 pvoid_type_node, NULL_TREE);
14902 tree void_ftype_v16qi_long_pvoid
14903 = build_function_type_list (void_type_node,
14904 V16QI_type_node, long_integer_type_node,
14905 pvoid_type_node, NULL_TREE);
14906 tree void_ftype_v8hi_long_pvoid
14907 = build_function_type_list (void_type_node,
14908 V8HI_type_node, long_integer_type_node,
14909 pvoid_type_node, NULL_TREE);
14910 tree void_ftype_v4sf_long_pvoid
14911 = build_function_type_list (void_type_node,
14912 V4SF_type_node, long_integer_type_node,
14913 pvoid_type_node, NULL_TREE);
14914 tree void_ftype_v2df_long_pvoid
14915 = build_function_type_list (void_type_node,
14916 V2DF_type_node, long_integer_type_node,
14917 pvoid_type_node, NULL_TREE);
14918 tree void_ftype_v2di_long_pvoid
14919 = build_function_type_list (void_type_node,
14920 V2DI_type_node, long_integer_type_node,
14921 pvoid_type_node, NULL_TREE);
14922 tree int_ftype_int_v8hi_v8hi
14923 = build_function_type_list (integer_type_node,
14924 integer_type_node, V8HI_type_node,
14925 V8HI_type_node, NULL_TREE);
14926 tree int_ftype_int_v16qi_v16qi
14927 = build_function_type_list (integer_type_node,
14928 integer_type_node, V16QI_type_node,
14929 V16QI_type_node, NULL_TREE);
14930 tree int_ftype_int_v4sf_v4sf
14931 = build_function_type_list (integer_type_node,
14932 integer_type_node, V4SF_type_node,
14933 V4SF_type_node, NULL_TREE);
14934 tree int_ftype_int_v2df_v2df
14935 = build_function_type_list (integer_type_node,
14936 integer_type_node, V2DF_type_node,
14937 V2DF_type_node, NULL_TREE);
14938 tree v2di_ftype_v2di
14939 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14940 tree v4si_ftype_v4si
14941 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14942 tree v8hi_ftype_v8hi
14943 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14944 tree v16qi_ftype_v16qi
14945 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14946 tree v4sf_ftype_v4sf
14947 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14948 tree v2df_ftype_v2df
14949 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14950 tree void_ftype_pcvoid_int_int
14951 = build_function_type_list (void_type_node,
14952 pcvoid_type_node, integer_type_node,
14953 integer_type_node, NULL_TREE);
14955 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14956 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14957 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14958 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14959 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14960 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14961 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14962 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14963 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14964 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14965 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14966 ALTIVEC_BUILTIN_LVXL_V2DF);
14967 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14968 ALTIVEC_BUILTIN_LVXL_V2DI);
14969 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14970 ALTIVEC_BUILTIN_LVXL_V4SF);
14971 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14972 ALTIVEC_BUILTIN_LVXL_V4SI);
14973 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14974 ALTIVEC_BUILTIN_LVXL_V8HI);
14975 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14976 ALTIVEC_BUILTIN_LVXL_V16QI);
14977 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14978 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14979 ALTIVEC_BUILTIN_LVX_V2DF);
14980 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14981 ALTIVEC_BUILTIN_LVX_V2DI);
14982 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14983 ALTIVEC_BUILTIN_LVX_V4SF);
14984 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14985 ALTIVEC_BUILTIN_LVX_V4SI);
14986 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14987 ALTIVEC_BUILTIN_LVX_V8HI);
14988 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14989 ALTIVEC_BUILTIN_LVX_V16QI);
14990 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14991 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14992 ALTIVEC_BUILTIN_STVX_V2DF);
14993 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14994 ALTIVEC_BUILTIN_STVX_V2DI);
14995 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14996 ALTIVEC_BUILTIN_STVX_V4SF);
14997 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14998 ALTIVEC_BUILTIN_STVX_V4SI);
14999 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15000 ALTIVEC_BUILTIN_STVX_V8HI);
15001 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15002 ALTIVEC_BUILTIN_STVX_V16QI);
15003 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15004 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15005 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15006 ALTIVEC_BUILTIN_STVXL_V2DF);
15007 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15008 ALTIVEC_BUILTIN_STVXL_V2DI);
15009 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15010 ALTIVEC_BUILTIN_STVXL_V4SF);
15011 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15012 ALTIVEC_BUILTIN_STVXL_V4SI);
15013 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15014 ALTIVEC_BUILTIN_STVXL_V8HI);
15015 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15016 ALTIVEC_BUILTIN_STVXL_V16QI);
15017 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15018 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15019 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15020 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15021 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15022 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15023 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15024 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15025 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15026 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15027 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15028 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15029 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15030 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15031 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15032 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15034 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15035 VSX_BUILTIN_LXVD2X_V2DF);
15036 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15037 VSX_BUILTIN_LXVD2X_V2DI);
15038 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15039 VSX_BUILTIN_LXVW4X_V4SF);
15040 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15041 VSX_BUILTIN_LXVW4X_V4SI);
15042 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15043 VSX_BUILTIN_LXVW4X_V8HI);
15044 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15045 VSX_BUILTIN_LXVW4X_V16QI);
15046 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15047 VSX_BUILTIN_STXVD2X_V2DF);
15048 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15049 VSX_BUILTIN_STXVD2X_V2DI);
15050 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15051 VSX_BUILTIN_STXVW4X_V4SF);
15052 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15053 VSX_BUILTIN_STXVW4X_V4SI);
15054 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15055 VSX_BUILTIN_STXVW4X_V8HI);
15056 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15057 VSX_BUILTIN_STXVW4X_V16QI);
15058 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
15059 VSX_BUILTIN_VEC_LD);
15060 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
15061 VSX_BUILTIN_VEC_ST);
15063 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
15064 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
15065 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
15067 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
15068 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
15069 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
15070 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
15071 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
15072 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
15073 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
15074 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
15075 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
15076 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
15077 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
15078 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
15080 /* Cell builtins. */
15081 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
15082 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15083 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15084 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15086 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15087 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15088 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15089 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15091 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15092 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15093 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15094 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15096 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15097 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15098 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15099 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15101 /* Add the DST variants. */
15102 d = bdesc_dst;
15103 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15104 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15106 /* Initialize the predicates. */
15107 d = bdesc_altivec_preds;
15108 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15110 machine_mode mode1;
15111 tree type;
15113 if (rs6000_overloaded_builtin_p (d->code))
15114 mode1 = VOIDmode;
15115 else
15116 mode1 = insn_data[d->icode].operand[1].mode;
15118 switch (mode1)
15120 case VOIDmode:
15121 type = int_ftype_int_opaque_opaque;
15122 break;
15123 case V2DImode:
15124 type = int_ftype_int_v2di_v2di;
15125 break;
15126 case V4SImode:
15127 type = int_ftype_int_v4si_v4si;
15128 break;
15129 case V8HImode:
15130 type = int_ftype_int_v8hi_v8hi;
15131 break;
15132 case V16QImode:
15133 type = int_ftype_int_v16qi_v16qi;
15134 break;
15135 case V4SFmode:
15136 type = int_ftype_int_v4sf_v4sf;
15137 break;
15138 case V2DFmode:
15139 type = int_ftype_int_v2df_v2df;
15140 break;
15141 default:
15142 gcc_unreachable ();
15145 def_builtin (d->name, type, d->code);
15148 /* Initialize the abs* operators. */
15149 d = bdesc_abs;
15150 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15152 machine_mode mode0;
15153 tree type;
15155 mode0 = insn_data[d->icode].operand[0].mode;
15157 switch (mode0)
15159 case V2DImode:
15160 type = v2di_ftype_v2di;
15161 break;
15162 case V4SImode:
15163 type = v4si_ftype_v4si;
15164 break;
15165 case V8HImode:
15166 type = v8hi_ftype_v8hi;
15167 break;
15168 case V16QImode:
15169 type = v16qi_ftype_v16qi;
15170 break;
15171 case V4SFmode:
15172 type = v4sf_ftype_v4sf;
15173 break;
15174 case V2DFmode:
15175 type = v2df_ftype_v2df;
15176 break;
15177 default:
15178 gcc_unreachable ();
15181 def_builtin (d->name, type, d->code);
15184 /* Initialize target builtin that implements
15185 targetm.vectorize.builtin_mask_for_load. */
15187 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15188 v16qi_ftype_long_pcvoid,
15189 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15190 BUILT_IN_MD, NULL, NULL_TREE);
15191 TREE_READONLY (decl) = 1;
15192 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15193 altivec_builtin_mask_for_load = decl;
15195 /* Access to the vec_init patterns. */
15196 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15197 integer_type_node, integer_type_node,
15198 integer_type_node, NULL_TREE);
15199 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15201 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15202 short_integer_type_node,
15203 short_integer_type_node,
15204 short_integer_type_node,
15205 short_integer_type_node,
15206 short_integer_type_node,
15207 short_integer_type_node,
15208 short_integer_type_node, NULL_TREE);
15209 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15211 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15212 char_type_node, char_type_node,
15213 char_type_node, char_type_node,
15214 char_type_node, char_type_node,
15215 char_type_node, char_type_node,
15216 char_type_node, char_type_node,
15217 char_type_node, char_type_node,
15218 char_type_node, char_type_node,
15219 char_type_node, NULL_TREE);
15220 def_builtin ("__builtin_vec_init_v16qi", ftype,
15221 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15223 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15224 float_type_node, float_type_node,
15225 float_type_node, NULL_TREE);
15226 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15228 /* VSX builtins. */
15229 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15230 double_type_node, NULL_TREE);
15231 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15233 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15234 intDI_type_node, NULL_TREE);
15235 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15237 /* Access to the vec_set patterns. */
15238 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15239 intSI_type_node,
15240 integer_type_node, NULL_TREE);
15241 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15243 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15244 intHI_type_node,
15245 integer_type_node, NULL_TREE);
15246 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15248 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15249 intQI_type_node,
15250 integer_type_node, NULL_TREE);
15251 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15253 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15254 float_type_node,
15255 integer_type_node, NULL_TREE);
15256 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15258 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15259 double_type_node,
15260 integer_type_node, NULL_TREE);
15261 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15263 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15264 intDI_type_node,
15265 integer_type_node, NULL_TREE);
15266 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15268 /* Access to the vec_extract patterns. */
15269 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15270 integer_type_node, NULL_TREE);
15271 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15273 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15274 integer_type_node, NULL_TREE);
15275 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15277 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15278 integer_type_node, NULL_TREE);
15279 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15281 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15282 integer_type_node, NULL_TREE);
15283 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15285 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15286 integer_type_node, NULL_TREE);
15287 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15289 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15290 integer_type_node, NULL_TREE);
15291 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15294 if (V1TI_type_node)
15296 tree v1ti_ftype_long_pcvoid
15297 = build_function_type_list (V1TI_type_node,
15298 long_integer_type_node, pcvoid_type_node,
15299 NULL_TREE);
15300 tree void_ftype_v1ti_long_pvoid
15301 = build_function_type_list (void_type_node,
15302 V1TI_type_node, long_integer_type_node,
15303 pvoid_type_node, NULL_TREE);
15304 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15305 VSX_BUILTIN_LXVD2X_V1TI);
15306 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15307 VSX_BUILTIN_STXVD2X_V1TI);
15308 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15309 NULL_TREE, NULL_TREE);
15310 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15311 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15312 intTI_type_node,
15313 integer_type_node, NULL_TREE);
15314 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15315 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15316 integer_type_node, NULL_TREE);
15317 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15322 static void
15323 htm_init_builtins (void)
15325 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15326 const struct builtin_description *d;
15327 size_t i;
15329 d = bdesc_htm;
15330 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15332 tree op[MAX_HTM_OPERANDS], type;
15333 HOST_WIDE_INT mask = d->mask;
15334 unsigned attr = rs6000_builtin_info[d->code].attr;
15335 bool void_func = (attr & RS6000_BTC_VOID);
15336 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15337 int nopnds = 0;
15338 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15339 : unsigned_type_node;
15341 if ((mask & builtin_mask) != mask)
15343 if (TARGET_DEBUG_BUILTIN)
15344 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15345 continue;
15348 if (d->name == 0)
15350 if (TARGET_DEBUG_BUILTIN)
15351 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15352 (long unsigned) i);
15353 continue;
15356 op[nopnds++] = (void_func) ? void_type_node : argtype;
15358 if (attr_args == RS6000_BTC_UNARY)
15359 op[nopnds++] = argtype;
15360 else if (attr_args == RS6000_BTC_BINARY)
15362 op[nopnds++] = argtype;
15363 op[nopnds++] = argtype;
15365 else if (attr_args == RS6000_BTC_TERNARY)
15367 op[nopnds++] = argtype;
15368 op[nopnds++] = argtype;
15369 op[nopnds++] = argtype;
15372 switch (nopnds)
15374 case 1:
15375 type = build_function_type_list (op[0], NULL_TREE);
15376 break;
15377 case 2:
15378 type = build_function_type_list (op[0], op[1], NULL_TREE);
15379 break;
15380 case 3:
15381 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15382 break;
15383 case 4:
15384 type = build_function_type_list (op[0], op[1], op[2], op[3],
15385 NULL_TREE);
15386 break;
15387 default:
15388 gcc_unreachable ();
15391 def_builtin (d->name, type, d->code);
15395 /* Hash function for builtin functions with up to 3 arguments and a return
15396 type. */
15397 hashval_t
15398 builtin_hasher::hash (builtin_hash_struct *bh)
15400 unsigned ret = 0;
15401 int i;
15403 for (i = 0; i < 4; i++)
15405 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15406 ret = (ret * 2) + bh->uns_p[i];
15409 return ret;
15412 /* Compare builtin hash entries H1 and H2 for equivalence. */
15413 bool
15414 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15416 return ((p1->mode[0] == p2->mode[0])
15417 && (p1->mode[1] == p2->mode[1])
15418 && (p1->mode[2] == p2->mode[2])
15419 && (p1->mode[3] == p2->mode[3])
15420 && (p1->uns_p[0] == p2->uns_p[0])
15421 && (p1->uns_p[1] == p2->uns_p[1])
15422 && (p1->uns_p[2] == p2->uns_p[2])
15423 && (p1->uns_p[3] == p2->uns_p[3]));
15426 /* Map types for builtin functions with an explicit return type and up to 3
15427 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15428 of the argument. */
15429 static tree
15430 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15431 machine_mode mode_arg1, machine_mode mode_arg2,
15432 enum rs6000_builtins builtin, const char *name)
15434 struct builtin_hash_struct h;
15435 struct builtin_hash_struct *h2;
15436 int num_args = 3;
15437 int i;
15438 tree ret_type = NULL_TREE;
15439 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15441 /* Create builtin_hash_table. */
15442 if (builtin_hash_table == NULL)
15443 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15445 h.type = NULL_TREE;
15446 h.mode[0] = mode_ret;
15447 h.mode[1] = mode_arg0;
15448 h.mode[2] = mode_arg1;
15449 h.mode[3] = mode_arg2;
15450 h.uns_p[0] = 0;
15451 h.uns_p[1] = 0;
15452 h.uns_p[2] = 0;
15453 h.uns_p[3] = 0;
15455 /* If the builtin is a type that produces unsigned results or takes unsigned
15456 arguments, and it is returned as a decl for the vectorizer (such as
15457 widening multiplies, permute), make sure the arguments and return value
15458 are type correct. */
15459 switch (builtin)
15461 /* unsigned 1 argument functions. */
15462 case CRYPTO_BUILTIN_VSBOX:
15463 case P8V_BUILTIN_VGBBD:
15464 case MISC_BUILTIN_CDTBCD:
15465 case MISC_BUILTIN_CBCDTD:
15466 h.uns_p[0] = 1;
15467 h.uns_p[1] = 1;
15468 break;
15470 /* unsigned 2 argument functions. */
15471 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15472 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15473 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15474 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15475 case CRYPTO_BUILTIN_VCIPHER:
15476 case CRYPTO_BUILTIN_VCIPHERLAST:
15477 case CRYPTO_BUILTIN_VNCIPHER:
15478 case CRYPTO_BUILTIN_VNCIPHERLAST:
15479 case CRYPTO_BUILTIN_VPMSUMB:
15480 case CRYPTO_BUILTIN_VPMSUMH:
15481 case CRYPTO_BUILTIN_VPMSUMW:
15482 case CRYPTO_BUILTIN_VPMSUMD:
15483 case CRYPTO_BUILTIN_VPMSUM:
15484 case MISC_BUILTIN_ADDG6S:
15485 case MISC_BUILTIN_DIVWEU:
15486 case MISC_BUILTIN_DIVWEUO:
15487 case MISC_BUILTIN_DIVDEU:
15488 case MISC_BUILTIN_DIVDEUO:
15489 h.uns_p[0] = 1;
15490 h.uns_p[1] = 1;
15491 h.uns_p[2] = 1;
15492 break;
15494 /* unsigned 3 argument functions. */
15495 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15496 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15497 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15498 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15499 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15500 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15501 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15502 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15503 case VSX_BUILTIN_VPERM_16QI_UNS:
15504 case VSX_BUILTIN_VPERM_8HI_UNS:
15505 case VSX_BUILTIN_VPERM_4SI_UNS:
15506 case VSX_BUILTIN_VPERM_2DI_UNS:
15507 case VSX_BUILTIN_XXSEL_16QI_UNS:
15508 case VSX_BUILTIN_XXSEL_8HI_UNS:
15509 case VSX_BUILTIN_XXSEL_4SI_UNS:
15510 case VSX_BUILTIN_XXSEL_2DI_UNS:
15511 case CRYPTO_BUILTIN_VPERMXOR:
15512 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15513 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15514 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15515 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15516 case CRYPTO_BUILTIN_VSHASIGMAW:
15517 case CRYPTO_BUILTIN_VSHASIGMAD:
15518 case CRYPTO_BUILTIN_VSHASIGMA:
15519 h.uns_p[0] = 1;
15520 h.uns_p[1] = 1;
15521 h.uns_p[2] = 1;
15522 h.uns_p[3] = 1;
15523 break;
15525 /* signed permute functions with unsigned char mask. */
15526 case ALTIVEC_BUILTIN_VPERM_16QI:
15527 case ALTIVEC_BUILTIN_VPERM_8HI:
15528 case ALTIVEC_BUILTIN_VPERM_4SI:
15529 case ALTIVEC_BUILTIN_VPERM_4SF:
15530 case ALTIVEC_BUILTIN_VPERM_2DI:
15531 case ALTIVEC_BUILTIN_VPERM_2DF:
15532 case VSX_BUILTIN_VPERM_16QI:
15533 case VSX_BUILTIN_VPERM_8HI:
15534 case VSX_BUILTIN_VPERM_4SI:
15535 case VSX_BUILTIN_VPERM_4SF:
15536 case VSX_BUILTIN_VPERM_2DI:
15537 case VSX_BUILTIN_VPERM_2DF:
15538 h.uns_p[3] = 1;
15539 break;
15541 /* unsigned args, signed return. */
15542 case VSX_BUILTIN_XVCVUXDDP_UNS:
15543 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15544 h.uns_p[1] = 1;
15545 break;
15547 /* signed args, unsigned return. */
15548 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15549 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15550 case MISC_BUILTIN_UNPACK_TD:
15551 case MISC_BUILTIN_UNPACK_V1TI:
15552 h.uns_p[0] = 1;
15553 break;
15555 /* unsigned arguments for 128-bit pack instructions. */
15556 case MISC_BUILTIN_PACK_TD:
15557 case MISC_BUILTIN_PACK_V1TI:
15558 h.uns_p[1] = 1;
15559 h.uns_p[2] = 1;
15560 break;
15562 default:
15563 break;
15566 /* Figure out how many args are present. */
15567 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15568 num_args--;
15570 if (num_args == 0)
15571 fatal_error (input_location,
15572 "internal error: builtin function %s had no type", name);
15574 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15575 if (!ret_type && h.uns_p[0])
15576 ret_type = builtin_mode_to_type[h.mode[0]][0];
15578 if (!ret_type)
15579 fatal_error (input_location,
15580 "internal error: builtin function %s had an unexpected "
15581 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15583 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15584 arg_type[i] = NULL_TREE;
15586 for (i = 0; i < num_args; i++)
15588 int m = (int) h.mode[i+1];
15589 int uns_p = h.uns_p[i+1];
15591 arg_type[i] = builtin_mode_to_type[m][uns_p];
15592 if (!arg_type[i] && uns_p)
15593 arg_type[i] = builtin_mode_to_type[m][0];
15595 if (!arg_type[i])
15596 fatal_error (input_location,
15597 "internal error: builtin function %s, argument %d "
15598 "had unexpected argument type %s", name, i,
15599 GET_MODE_NAME (m));
15602 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15603 if (*found == NULL)
15605 h2 = ggc_alloc<builtin_hash_struct> ();
15606 *h2 = h;
15607 *found = h2;
15609 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15610 arg_type[2], NULL_TREE);
15613 return (*found)->type;
15616 static void
15617 rs6000_common_init_builtins (void)
15619 const struct builtin_description *d;
15620 size_t i;
15622 tree opaque_ftype_opaque = NULL_TREE;
15623 tree opaque_ftype_opaque_opaque = NULL_TREE;
15624 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15625 tree v2si_ftype_qi = NULL_TREE;
15626 tree v2si_ftype_v2si_qi = NULL_TREE;
15627 tree v2si_ftype_int_qi = NULL_TREE;
15628 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15630 if (!TARGET_PAIRED_FLOAT)
15632 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15633 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15636 /* Paired and SPE builtins are only available if you build a compiler with
15637 the appropriate options, so only create those builtins with the
15638 appropriate compiler option. Create Altivec and VSX builtins on machines
15639 with at least the general purpose extensions (970 and newer) to allow the
15640 use of the target attribute.. */
15642 if (TARGET_EXTRA_BUILTINS)
15643 builtin_mask |= RS6000_BTM_COMMON;
15645 /* Add the ternary operators. */
15646 d = bdesc_3arg;
15647 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15649 tree type;
15650 HOST_WIDE_INT mask = d->mask;
15652 if ((mask & builtin_mask) != mask)
15654 if (TARGET_DEBUG_BUILTIN)
15655 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15656 continue;
15659 if (rs6000_overloaded_builtin_p (d->code))
15661 if (! (type = opaque_ftype_opaque_opaque_opaque))
15662 type = opaque_ftype_opaque_opaque_opaque
15663 = build_function_type_list (opaque_V4SI_type_node,
15664 opaque_V4SI_type_node,
15665 opaque_V4SI_type_node,
15666 opaque_V4SI_type_node,
15667 NULL_TREE);
15669 else
15671 enum insn_code icode = d->icode;
15672 if (d->name == 0)
15674 if (TARGET_DEBUG_BUILTIN)
15675 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15676 (long unsigned)i);
15678 continue;
15681 if (icode == CODE_FOR_nothing)
15683 if (TARGET_DEBUG_BUILTIN)
15684 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15685 d->name);
15687 continue;
15690 type = builtin_function_type (insn_data[icode].operand[0].mode,
15691 insn_data[icode].operand[1].mode,
15692 insn_data[icode].operand[2].mode,
15693 insn_data[icode].operand[3].mode,
15694 d->code, d->name);
15697 def_builtin (d->name, type, d->code);
15700 /* Add the binary operators. */
15701 d = bdesc_2arg;
15702 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15704 machine_mode mode0, mode1, mode2;
15705 tree type;
15706 HOST_WIDE_INT mask = d->mask;
15708 if ((mask & builtin_mask) != mask)
15710 if (TARGET_DEBUG_BUILTIN)
15711 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15712 continue;
15715 if (rs6000_overloaded_builtin_p (d->code))
15717 if (! (type = opaque_ftype_opaque_opaque))
15718 type = opaque_ftype_opaque_opaque
15719 = build_function_type_list (opaque_V4SI_type_node,
15720 opaque_V4SI_type_node,
15721 opaque_V4SI_type_node,
15722 NULL_TREE);
15724 else
15726 enum insn_code icode = d->icode;
15727 if (d->name == 0)
15729 if (TARGET_DEBUG_BUILTIN)
15730 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15731 (long unsigned)i);
15733 continue;
15736 if (icode == CODE_FOR_nothing)
15738 if (TARGET_DEBUG_BUILTIN)
15739 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15740 d->name);
15742 continue;
15745 mode0 = insn_data[icode].operand[0].mode;
15746 mode1 = insn_data[icode].operand[1].mode;
15747 mode2 = insn_data[icode].operand[2].mode;
15749 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15751 if (! (type = v2si_ftype_v2si_qi))
15752 type = v2si_ftype_v2si_qi
15753 = build_function_type_list (opaque_V2SI_type_node,
15754 opaque_V2SI_type_node,
15755 char_type_node,
15756 NULL_TREE);
15759 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15760 && mode2 == QImode)
15762 if (! (type = v2si_ftype_int_qi))
15763 type = v2si_ftype_int_qi
15764 = build_function_type_list (opaque_V2SI_type_node,
15765 integer_type_node,
15766 char_type_node,
15767 NULL_TREE);
15770 else
15771 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15772 d->code, d->name);
15775 def_builtin (d->name, type, d->code);
15778 /* Add the simple unary operators. */
15779 d = bdesc_1arg;
15780 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15782 machine_mode mode0, mode1;
15783 tree type;
15784 HOST_WIDE_INT mask = d->mask;
15786 if ((mask & builtin_mask) != mask)
15788 if (TARGET_DEBUG_BUILTIN)
15789 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15790 continue;
15793 if (rs6000_overloaded_builtin_p (d->code))
15795 if (! (type = opaque_ftype_opaque))
15796 type = opaque_ftype_opaque
15797 = build_function_type_list (opaque_V4SI_type_node,
15798 opaque_V4SI_type_node,
15799 NULL_TREE);
15801 else
15803 enum insn_code icode = d->icode;
15804 if (d->name == 0)
15806 if (TARGET_DEBUG_BUILTIN)
15807 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15808 (long unsigned)i);
15810 continue;
15813 if (icode == CODE_FOR_nothing)
15815 if (TARGET_DEBUG_BUILTIN)
15816 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15817 d->name);
15819 continue;
15822 mode0 = insn_data[icode].operand[0].mode;
15823 mode1 = insn_data[icode].operand[1].mode;
15825 if (mode0 == V2SImode && mode1 == QImode)
15827 if (! (type = v2si_ftype_qi))
15828 type = v2si_ftype_qi
15829 = build_function_type_list (opaque_V2SI_type_node,
15830 char_type_node,
15831 NULL_TREE);
15834 else
15835 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15836 d->code, d->name);
15839 def_builtin (d->name, type, d->code);
15843 static void
15844 rs6000_init_libfuncs (void)
15846 if (!TARGET_IEEEQUAD)
15847 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15848 if (!TARGET_XL_COMPAT)
15850 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15851 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15852 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15853 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15855 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15857 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15858 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15859 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15860 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15861 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15862 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15863 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15865 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15866 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15867 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15868 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15869 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15870 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15871 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15872 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15875 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15876 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15878 else
15880 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15881 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15882 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15883 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15885 else
15887 /* 32-bit SVR4 quad floating point routines. */
15889 set_optab_libfunc (add_optab, TFmode, "_q_add");
15890 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15891 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15892 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15893 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15894 if (TARGET_PPC_GPOPT)
15895 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15897 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15898 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15899 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15900 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15901 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15902 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15904 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15905 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15906 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15907 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15908 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15909 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15910 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15911 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15916 /* Expand a block clear operation, and return 1 if successful. Return 0
15917 if we should let the compiler generate normal code.
15919 operands[0] is the destination
15920 operands[1] is the length
15921 operands[3] is the alignment */
15924 expand_block_clear (rtx operands[])
15926 rtx orig_dest = operands[0];
15927 rtx bytes_rtx = operands[1];
15928 rtx align_rtx = operands[3];
15929 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15930 HOST_WIDE_INT align;
15931 HOST_WIDE_INT bytes;
15932 int offset;
15933 int clear_bytes;
15934 int clear_step;
15936 /* If this is not a fixed size move, just call memcpy */
15937 if (! constp)
15938 return 0;
15940 /* This must be a fixed size alignment */
15941 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15942 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15944 /* Anything to clear? */
15945 bytes = INTVAL (bytes_rtx);
15946 if (bytes <= 0)
15947 return 1;
15949 /* Use the builtin memset after a point, to avoid huge code bloat.
15950 When optimize_size, avoid any significant code bloat; calling
15951 memset is about 4 instructions, so allow for one instruction to
15952 load zero and three to do clearing. */
15953 if (TARGET_ALTIVEC && align >= 128)
15954 clear_step = 16;
15955 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15956 clear_step = 8;
15957 else if (TARGET_SPE && align >= 64)
15958 clear_step = 8;
15959 else
15960 clear_step = 4;
15962 if (optimize_size && bytes > 3 * clear_step)
15963 return 0;
15964 if (! optimize_size && bytes > 8 * clear_step)
15965 return 0;
15967 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15969 machine_mode mode = BLKmode;
15970 rtx dest;
15972 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15974 clear_bytes = 16;
15975 mode = V4SImode;
15977 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15979 clear_bytes = 8;
15980 mode = V2SImode;
15982 else if (bytes >= 8 && TARGET_POWERPC64
15983 && (align >= 64 || !STRICT_ALIGNMENT))
15985 clear_bytes = 8;
15986 mode = DImode;
15987 if (offset == 0 && align < 64)
15989 rtx addr;
15991 /* If the address form is reg+offset with offset not a
15992 multiple of four, reload into reg indirect form here
15993 rather than waiting for reload. This way we get one
15994 reload, not one per store. */
15995 addr = XEXP (orig_dest, 0);
15996 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15997 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15998 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16000 addr = copy_addr_to_reg (addr);
16001 orig_dest = replace_equiv_address (orig_dest, addr);
16005 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16006 { /* move 4 bytes */
16007 clear_bytes = 4;
16008 mode = SImode;
16010 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16011 { /* move 2 bytes */
16012 clear_bytes = 2;
16013 mode = HImode;
16015 else /* move 1 byte at a time */
16017 clear_bytes = 1;
16018 mode = QImode;
16021 dest = adjust_address (orig_dest, mode, offset);
16023 emit_move_insn (dest, CONST0_RTX (mode));
16026 return 1;
16030 /* Expand a block move operation, and return 1 if successful. Return 0
16031 if we should let the compiler generate normal code.
16033 operands[0] is the destination
16034 operands[1] is the source
16035 operands[2] is the length
16036 operands[3] is the alignment */
16038 #define MAX_MOVE_REG 4
16041 expand_block_move (rtx operands[])
16043 rtx orig_dest = operands[0];
16044 rtx orig_src = operands[1];
16045 rtx bytes_rtx = operands[2];
16046 rtx align_rtx = operands[3];
16047 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
16048 int align;
16049 int bytes;
16050 int offset;
16051 int move_bytes;
16052 rtx stores[MAX_MOVE_REG];
16053 int num_reg = 0;
16055 /* If this is not a fixed size move, just call memcpy */
16056 if (! constp)
16057 return 0;
16059 /* This must be a fixed size alignment */
16060 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16061 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16063 /* Anything to move? */
16064 bytes = INTVAL (bytes_rtx);
16065 if (bytes <= 0)
16066 return 1;
16068 if (bytes > rs6000_block_move_inline_limit)
16069 return 0;
16071 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
16073 union {
16074 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
16075 rtx (*mov) (rtx, rtx);
16076 } gen_func;
16077 machine_mode mode = BLKmode;
16078 rtx src, dest;
16080 /* Altivec first, since it will be faster than a string move
16081 when it applies, and usually not significantly larger. */
16082 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16084 move_bytes = 16;
16085 mode = V4SImode;
16086 gen_func.mov = gen_movv4si;
16088 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16090 move_bytes = 8;
16091 mode = V2SImode;
16092 gen_func.mov = gen_movv2si;
16094 else if (TARGET_STRING
16095 && bytes > 24 /* move up to 32 bytes at a time */
16096 && ! fixed_regs[5]
16097 && ! fixed_regs[6]
16098 && ! fixed_regs[7]
16099 && ! fixed_regs[8]
16100 && ! fixed_regs[9]
16101 && ! fixed_regs[10]
16102 && ! fixed_regs[11]
16103 && ! fixed_regs[12])
16105 move_bytes = (bytes > 32) ? 32 : bytes;
16106 gen_func.movmemsi = gen_movmemsi_8reg;
16108 else if (TARGET_STRING
16109 && bytes > 16 /* move up to 24 bytes at a time */
16110 && ! fixed_regs[5]
16111 && ! fixed_regs[6]
16112 && ! fixed_regs[7]
16113 && ! fixed_regs[8]
16114 && ! fixed_regs[9]
16115 && ! fixed_regs[10])
16117 move_bytes = (bytes > 24) ? 24 : bytes;
16118 gen_func.movmemsi = gen_movmemsi_6reg;
16120 else if (TARGET_STRING
16121 && bytes > 8 /* move up to 16 bytes at a time */
16122 && ! fixed_regs[5]
16123 && ! fixed_regs[6]
16124 && ! fixed_regs[7]
16125 && ! fixed_regs[8])
16127 move_bytes = (bytes > 16) ? 16 : bytes;
16128 gen_func.movmemsi = gen_movmemsi_4reg;
16130 else if (bytes >= 8 && TARGET_POWERPC64
16131 && (align >= 64 || !STRICT_ALIGNMENT))
16133 move_bytes = 8;
16134 mode = DImode;
16135 gen_func.mov = gen_movdi;
16136 if (offset == 0 && align < 64)
16138 rtx addr;
16140 /* If the address form is reg+offset with offset not a
16141 multiple of four, reload into reg indirect form here
16142 rather than waiting for reload. This way we get one
16143 reload, not one per load and/or store. */
16144 addr = XEXP (orig_dest, 0);
16145 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16146 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16147 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16149 addr = copy_addr_to_reg (addr);
16150 orig_dest = replace_equiv_address (orig_dest, addr);
16152 addr = XEXP (orig_src, 0);
16153 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16154 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16155 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16157 addr = copy_addr_to_reg (addr);
16158 orig_src = replace_equiv_address (orig_src, addr);
16162 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16163 { /* move up to 8 bytes at a time */
16164 move_bytes = (bytes > 8) ? 8 : bytes;
16165 gen_func.movmemsi = gen_movmemsi_2reg;
16167 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16168 { /* move 4 bytes */
16169 move_bytes = 4;
16170 mode = SImode;
16171 gen_func.mov = gen_movsi;
16173 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16174 { /* move 2 bytes */
16175 move_bytes = 2;
16176 mode = HImode;
16177 gen_func.mov = gen_movhi;
16179 else if (TARGET_STRING && bytes > 1)
16180 { /* move up to 4 bytes at a time */
16181 move_bytes = (bytes > 4) ? 4 : bytes;
16182 gen_func.movmemsi = gen_movmemsi_1reg;
16184 else /* move 1 byte at a time */
16186 move_bytes = 1;
16187 mode = QImode;
16188 gen_func.mov = gen_movqi;
16191 src = adjust_address (orig_src, mode, offset);
16192 dest = adjust_address (orig_dest, mode, offset);
16194 if (mode != BLKmode)
16196 rtx tmp_reg = gen_reg_rtx (mode);
16198 emit_insn ((*gen_func.mov) (tmp_reg, src));
16199 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16202 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16204 int i;
16205 for (i = 0; i < num_reg; i++)
16206 emit_insn (stores[i]);
16207 num_reg = 0;
16210 if (mode == BLKmode)
16212 /* Move the address into scratch registers. The movmemsi
16213 patterns require zero offset. */
16214 if (!REG_P (XEXP (src, 0)))
16216 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16217 src = replace_equiv_address (src, src_reg);
16219 set_mem_size (src, move_bytes);
16221 if (!REG_P (XEXP (dest, 0)))
16223 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16224 dest = replace_equiv_address (dest, dest_reg);
16226 set_mem_size (dest, move_bytes);
16228 emit_insn ((*gen_func.movmemsi) (dest, src,
16229 GEN_INT (move_bytes & 31),
16230 align_rtx));
16234 return 1;
16238 /* Return a string to perform a load_multiple operation.
16239 operands[0] is the vector.
16240 operands[1] is the source address.
16241 operands[2] is the first destination register. */
16243 const char *
16244 rs6000_output_load_multiple (rtx operands[3])
16246 /* We have to handle the case where the pseudo used to contain the address
16247 is assigned to one of the output registers. */
16248 int i, j;
16249 int words = XVECLEN (operands[0], 0);
16250 rtx xop[10];
16252 if (XVECLEN (operands[0], 0) == 1)
16253 return "lwz %2,0(%1)";
16255 for (i = 0; i < words; i++)
16256 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16258 if (i == words-1)
16260 xop[0] = GEN_INT (4 * (words-1));
16261 xop[1] = operands[1];
16262 xop[2] = operands[2];
16263 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16264 return "";
16266 else if (i == 0)
16268 xop[0] = GEN_INT (4 * (words-1));
16269 xop[1] = operands[1];
16270 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16271 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16272 return "";
16274 else
16276 for (j = 0; j < words; j++)
16277 if (j != i)
16279 xop[0] = GEN_INT (j * 4);
16280 xop[1] = operands[1];
16281 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16282 output_asm_insn ("lwz %2,%0(%1)", xop);
16284 xop[0] = GEN_INT (i * 4);
16285 xop[1] = operands[1];
16286 output_asm_insn ("lwz %1,%0(%1)", xop);
16287 return "";
16291 return "lswi %2,%1,%N0";
16295 /* A validation routine: say whether CODE, a condition code, and MODE
16296 match. The other alternatives either don't make sense or should
16297 never be generated. */
16299 void
16300 validate_condition_mode (enum rtx_code code, machine_mode mode)
16302 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16303 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16304 && GET_MODE_CLASS (mode) == MODE_CC);
16306 /* These don't make sense. */
16307 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16308 || mode != CCUNSmode);
16310 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16311 || mode == CCUNSmode);
16313 gcc_assert (mode == CCFPmode
16314 || (code != ORDERED && code != UNORDERED
16315 && code != UNEQ && code != LTGT
16316 && code != UNGT && code != UNLT
16317 && code != UNGE && code != UNLE));
16319 /* These should never be generated except for
16320 flag_finite_math_only. */
16321 gcc_assert (mode != CCFPmode
16322 || flag_finite_math_only
16323 || (code != LE && code != GE
16324 && code != UNEQ && code != LTGT
16325 && code != UNGT && code != UNLT));
16327 /* These are invalid; the information is not there. */
16328 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16332 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16333 mask required to convert the result of a rotate insn into a shift
16334 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16337 includes_lshift_p (rtx shiftop, rtx andop)
16339 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16341 shift_mask <<= INTVAL (shiftop);
16343 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16346 /* Similar, but for right shift. */
16349 includes_rshift_p (rtx shiftop, rtx andop)
16351 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16353 shift_mask >>= INTVAL (shiftop);
16355 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16358 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16359 to perform a left shift. It must have exactly SHIFTOP least
16360 significant 0's, then one or more 1's, then zero or more 0's. */
16363 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16365 if (GET_CODE (andop) == CONST_INT)
16367 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16369 c = INTVAL (andop);
16370 if (c == 0 || c == HOST_WIDE_INT_M1U)
16371 return 0;
16373 shift_mask = HOST_WIDE_INT_M1U;
16374 shift_mask <<= INTVAL (shiftop);
16376 /* Find the least significant one bit. */
16377 lsb = c & -c;
16379 /* It must coincide with the LSB of the shift mask. */
16380 if (-lsb != shift_mask)
16381 return 0;
16383 /* Invert to look for the next transition (if any). */
16384 c = ~c;
16386 /* Remove the low group of ones (originally low group of zeros). */
16387 c &= -lsb;
16389 /* Again find the lsb, and check we have all 1's above. */
16390 lsb = c & -c;
16391 return c == -lsb;
16393 else
16394 return 0;
16397 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16398 to perform a left shift. It must have SHIFTOP or more least
16399 significant 0's, with the remainder of the word 1's. */
16402 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16404 if (GET_CODE (andop) == CONST_INT)
16406 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16408 shift_mask = HOST_WIDE_INT_M1U;
16409 shift_mask <<= INTVAL (shiftop);
16410 c = INTVAL (andop);
16412 /* Find the least significant one bit. */
16413 lsb = c & -c;
16415 /* It must be covered by the shift mask.
16416 This test also rejects c == 0. */
16417 if ((lsb & shift_mask) == 0)
16418 return 0;
16420 /* Check we have all 1's above the transition, and reject all 1's. */
16421 return c == -lsb && lsb != 1;
16423 else
16424 return 0;
16427 /* Return 1 if operands will generate a valid arguments to rlwimi
16428 instruction for insert with right shift in 64-bit mode. The mask may
16429 not start on the first bit or stop on the last bit because wrap-around
16430 effects of instruction do not correspond to semantics of RTL insn. */
16433 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16435 if (INTVAL (startop) > 32
16436 && INTVAL (startop) < 64
16437 && INTVAL (sizeop) > 1
16438 && INTVAL (sizeop) + INTVAL (startop) < 64
16439 && INTVAL (shiftop) > 0
16440 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16441 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16442 return 1;
16444 return 0;
16447 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16448 for lfq and stfq insns iff the registers are hard registers. */
16451 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16453 /* We might have been passed a SUBREG. */
16454 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16455 return 0;
16457 /* We might have been passed non floating point registers. */
16458 if (!FP_REGNO_P (REGNO (reg1))
16459 || !FP_REGNO_P (REGNO (reg2)))
16460 return 0;
16462 return (REGNO (reg1) == REGNO (reg2) - 1);
16465 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16466 addr1 and addr2 must be in consecutive memory locations
16467 (addr2 == addr1 + 8). */
16470 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16472 rtx addr1, addr2;
16473 unsigned int reg1, reg2;
16474 int offset1, offset2;
16476 /* The mems cannot be volatile. */
16477 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16478 return 0;
16480 addr1 = XEXP (mem1, 0);
16481 addr2 = XEXP (mem2, 0);
16483 /* Extract an offset (if used) from the first addr. */
16484 if (GET_CODE (addr1) == PLUS)
16486 /* If not a REG, return zero. */
16487 if (GET_CODE (XEXP (addr1, 0)) != REG)
16488 return 0;
16489 else
16491 reg1 = REGNO (XEXP (addr1, 0));
16492 /* The offset must be constant! */
16493 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16494 return 0;
16495 offset1 = INTVAL (XEXP (addr1, 1));
16498 else if (GET_CODE (addr1) != REG)
16499 return 0;
16500 else
16502 reg1 = REGNO (addr1);
16503 /* This was a simple (mem (reg)) expression. Offset is 0. */
16504 offset1 = 0;
16507 /* And now for the second addr. */
16508 if (GET_CODE (addr2) == PLUS)
16510 /* If not a REG, return zero. */
16511 if (GET_CODE (XEXP (addr2, 0)) != REG)
16512 return 0;
16513 else
16515 reg2 = REGNO (XEXP (addr2, 0));
16516 /* The offset must be constant. */
16517 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16518 return 0;
16519 offset2 = INTVAL (XEXP (addr2, 1));
16522 else if (GET_CODE (addr2) != REG)
16523 return 0;
16524 else
16526 reg2 = REGNO (addr2);
16527 /* This was a simple (mem (reg)) expression. Offset is 0. */
16528 offset2 = 0;
16531 /* Both of these must have the same base register. */
16532 if (reg1 != reg2)
16533 return 0;
16535 /* The offset for the second addr must be 8 more than the first addr. */
16536 if (offset2 != offset1 + 8)
16537 return 0;
16539 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16540 instructions. */
16541 return 1;
16546 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16548 static bool eliminated = false;
16549 rtx ret;
16551 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16552 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16553 else
16555 rtx mem = cfun->machine->sdmode_stack_slot;
16556 gcc_assert (mem != NULL_RTX);
16558 if (!eliminated)
16560 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16561 cfun->machine->sdmode_stack_slot = mem;
16562 eliminated = true;
16564 ret = mem;
16567 if (TARGET_DEBUG_ADDR)
16569 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16570 GET_MODE_NAME (mode));
16571 if (!ret)
16572 fprintf (stderr, "\tNULL_RTX\n");
16573 else
16574 debug_rtx (ret);
16577 return ret;
16580 /* Return the mode to be used for memory when a secondary memory
16581 location is needed. For SDmode values we need to use DDmode, in
16582 all other cases we can use the same mode. */
16583 machine_mode
16584 rs6000_secondary_memory_needed_mode (machine_mode mode)
16586 if (lra_in_progress && mode == SDmode)
16587 return DDmode;
16588 return mode;
16591 static tree
16592 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16594 /* Don't walk into types. */
16595 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16597 *walk_subtrees = 0;
16598 return NULL_TREE;
16601 switch (TREE_CODE (*tp))
16603 case VAR_DECL:
16604 case PARM_DECL:
16605 case FIELD_DECL:
16606 case RESULT_DECL:
16607 case SSA_NAME:
16608 case REAL_CST:
16609 case MEM_REF:
16610 case VIEW_CONVERT_EXPR:
16611 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16612 return *tp;
16613 break;
16614 default:
16615 break;
16618 return NULL_TREE;
16621 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16622 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16623 only work on the traditional altivec registers, note if an altivec register
16624 was chosen. */
16626 static enum rs6000_reg_type
16627 register_to_reg_type (rtx reg, bool *is_altivec)
16629 HOST_WIDE_INT regno;
16630 enum reg_class rclass;
16632 if (GET_CODE (reg) == SUBREG)
16633 reg = SUBREG_REG (reg);
16635 if (!REG_P (reg))
16636 return NO_REG_TYPE;
16638 regno = REGNO (reg);
16639 if (regno >= FIRST_PSEUDO_REGISTER)
16641 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16642 return PSEUDO_REG_TYPE;
16644 regno = true_regnum (reg);
16645 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16646 return PSEUDO_REG_TYPE;
16649 gcc_assert (regno >= 0);
16651 if (is_altivec && ALTIVEC_REGNO_P (regno))
16652 *is_altivec = true;
16654 rclass = rs6000_regno_regclass[regno];
16655 return reg_class_to_reg_type[(int)rclass];
16658 /* Helper function to return the cost of adding a TOC entry address. */
16660 static inline int
16661 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16663 int ret;
16665 if (TARGET_CMODEL != CMODEL_SMALL)
16666 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16668 else
16669 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16671 return ret;
16674 /* Helper function for rs6000_secondary_reload to determine whether the memory
16675 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16676 needs reloading. Return negative if the memory is not handled by the memory
16677 helper functions and to try a different reload method, 0 if no additional
16678 instructions are need, and positive to give the extra cost for the
16679 memory. */
16681 static int
16682 rs6000_secondary_reload_memory (rtx addr,
16683 enum reg_class rclass,
16684 enum machine_mode mode)
16686 int extra_cost = 0;
16687 rtx reg, and_arg, plus_arg0, plus_arg1;
16688 addr_mask_type addr_mask;
16689 const char *type = NULL;
16690 const char *fail_msg = NULL;
16692 if (GPR_REG_CLASS_P (rclass))
16693 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16695 else if (rclass == FLOAT_REGS)
16696 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16698 else if (rclass == ALTIVEC_REGS)
16699 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16701 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16702 else if (rclass == VSX_REGS)
16703 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16704 & ~RELOAD_REG_AND_M16);
16706 else
16708 if (TARGET_DEBUG_ADDR)
16709 fprintf (stderr,
16710 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16711 "class is not GPR, FPR, VMX\n",
16712 GET_MODE_NAME (mode), reg_class_names[rclass]);
16714 return -1;
16717 /* If the register isn't valid in this register class, just return now. */
16718 if ((addr_mask & RELOAD_REG_VALID) == 0)
16720 if (TARGET_DEBUG_ADDR)
16721 fprintf (stderr,
16722 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16723 "not valid in class\n",
16724 GET_MODE_NAME (mode), reg_class_names[rclass]);
16726 return -1;
16729 switch (GET_CODE (addr))
16731 /* Does the register class supports auto update forms for this mode? We
16732 don't need a scratch register, since the powerpc only supports
16733 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16734 case PRE_INC:
16735 case PRE_DEC:
16736 reg = XEXP (addr, 0);
16737 if (!base_reg_operand (addr, GET_MODE (reg)))
16739 fail_msg = "no base register #1";
16740 extra_cost = -1;
16743 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16745 extra_cost = 1;
16746 type = "update";
16748 break;
16750 case PRE_MODIFY:
16751 reg = XEXP (addr, 0);
16752 plus_arg1 = XEXP (addr, 1);
16753 if (!base_reg_operand (reg, GET_MODE (reg))
16754 || GET_CODE (plus_arg1) != PLUS
16755 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16757 fail_msg = "bad PRE_MODIFY";
16758 extra_cost = -1;
16761 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16763 extra_cost = 1;
16764 type = "update";
16766 break;
16768 /* Do we need to simulate AND -16 to clear the bottom address bits used
16769 in VMX load/stores? Only allow the AND for vector sizes. */
16770 case AND:
16771 and_arg = XEXP (addr, 0);
16772 if (GET_MODE_SIZE (mode) != 16
16773 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16774 || INTVAL (XEXP (addr, 1)) != -16)
16776 fail_msg = "bad Altivec AND #1";
16777 extra_cost = -1;
16780 if (rclass != ALTIVEC_REGS)
16782 if (legitimate_indirect_address_p (and_arg, false))
16783 extra_cost = 1;
16785 else if (legitimate_indexed_address_p (and_arg, false))
16786 extra_cost = 2;
16788 else
16790 fail_msg = "bad Altivec AND #2";
16791 extra_cost = -1;
16794 type = "and";
16796 break;
16798 /* If this is an indirect address, make sure it is a base register. */
16799 case REG:
16800 case SUBREG:
16801 if (!legitimate_indirect_address_p (addr, false))
16803 extra_cost = 1;
16804 type = "move";
16806 break;
16808 /* If this is an indexed address, make sure the register class can handle
16809 indexed addresses for this mode. */
16810 case PLUS:
16811 plus_arg0 = XEXP (addr, 0);
16812 plus_arg1 = XEXP (addr, 1);
16814 /* (plus (plus (reg) (constant)) (constant)) is generated during
16815 push_reload processing, so handle it now. */
16816 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16818 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16820 extra_cost = 1;
16821 type = "offset";
16825 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16826 push_reload processing, so handle it now. */
16827 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16829 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16831 extra_cost = 1;
16832 type = "indexed #2";
16836 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16838 fail_msg = "no base register #2";
16839 extra_cost = -1;
16842 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16844 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16845 || !legitimate_indexed_address_p (addr, false))
16847 extra_cost = 1;
16848 type = "indexed";
16852 /* Make sure the register class can handle offset addresses. */
16853 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16855 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16857 extra_cost = 1;
16858 type = "offset";
16862 else
16864 fail_msg = "bad PLUS";
16865 extra_cost = -1;
16868 break;
16870 case LO_SUM:
16871 if (!legitimate_lo_sum_address_p (mode, addr, false))
16873 fail_msg = "bad LO_SUM";
16874 extra_cost = -1;
16877 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16879 extra_cost = 1;
16880 type = "lo_sum";
16882 break;
16884 /* Static addresses need to create a TOC entry. */
16885 case CONST:
16886 case SYMBOL_REF:
16887 case LABEL_REF:
16888 type = "address";
16889 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16890 break;
16892 /* TOC references look like offsetable memory. */
16893 case UNSPEC:
16894 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16896 fail_msg = "bad UNSPEC";
16897 extra_cost = -1;
16900 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16902 extra_cost = 1;
16903 type = "toc reference";
16905 break;
16907 default:
16909 fail_msg = "bad address";
16910 extra_cost = -1;
16914 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16916 if (extra_cost < 0)
16917 fprintf (stderr,
16918 "rs6000_secondary_reload_memory error: mode = %s, "
16919 "class = %s, addr_mask = '%s', %s\n",
16920 GET_MODE_NAME (mode),
16921 reg_class_names[rclass],
16922 rs6000_debug_addr_mask (addr_mask, false),
16923 (fail_msg != NULL) ? fail_msg : "<bad address>");
16925 else
16926 fprintf (stderr,
16927 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16928 "addr_mask = '%s', extra cost = %d, %s\n",
16929 GET_MODE_NAME (mode),
16930 reg_class_names[rclass],
16931 rs6000_debug_addr_mask (addr_mask, false),
16932 extra_cost,
16933 (type) ? type : "<none>");
16935 debug_rtx (addr);
16938 return extra_cost;
16941 /* Helper function for rs6000_secondary_reload to return true if a move to a
16942 different register classe is really a simple move. */
16944 static bool
16945 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16946 enum rs6000_reg_type from_type,
16947 machine_mode mode)
16949 int size;
16951 /* Add support for various direct moves available. In this function, we only
16952 look at cases where we don't need any extra registers, and one or more
16953 simple move insns are issued. At present, 32-bit integers are not allowed
16954 in FPR/VSX registers. Single precision binary floating is not a simple
16955 move because we need to convert to the single precision memory layout.
16956 The 4-byte SDmode can be moved. */
16957 size = GET_MODE_SIZE (mode);
16958 if (TARGET_DIRECT_MOVE
16959 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16960 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16961 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16962 return true;
16964 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16965 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16966 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16967 return true;
16969 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16970 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16971 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16972 return true;
16974 return false;
16977 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16978 special direct moves that involve allocating an extra register, return the
16979 insn code of the helper function if there is such a function or
16980 CODE_FOR_nothing if not. */
16982 static bool
16983 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16984 enum rs6000_reg_type from_type,
16985 machine_mode mode,
16986 secondary_reload_info *sri,
16987 bool altivec_p)
16989 bool ret = false;
16990 enum insn_code icode = CODE_FOR_nothing;
16991 int cost = 0;
16992 int size = GET_MODE_SIZE (mode);
16994 if (TARGET_POWERPC64)
16996 if (size == 16)
16998 /* Handle moving 128-bit values from GPRs to VSX point registers on
16999 power8 when running in 64-bit mode using XXPERMDI to glue the two
17000 64-bit values back together. */
17001 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17003 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
17004 icode = reg_addr[mode].reload_vsx_gpr;
17007 /* Handle moving 128-bit values from VSX point registers to GPRs on
17008 power8 when running in 64-bit mode using XXPERMDI to get access to the
17009 bottom 64-bit value. */
17010 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17012 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
17013 icode = reg_addr[mode].reload_gpr_vsx;
17017 else if (mode == SFmode)
17019 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17021 cost = 3; /* xscvdpspn, mfvsrd, and. */
17022 icode = reg_addr[mode].reload_gpr_vsx;
17025 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17027 cost = 2; /* mtvsrz, xscvspdpn. */
17028 icode = reg_addr[mode].reload_vsx_gpr;
17033 if (TARGET_POWERPC64 && size == 16)
17035 /* Handle moving 128-bit values from GPRs to VSX point registers on
17036 power8 when running in 64-bit mode using XXPERMDI to glue the two
17037 64-bit values back together. */
17038 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17040 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
17041 icode = reg_addr[mode].reload_vsx_gpr;
17044 /* Handle moving 128-bit values from VSX point registers to GPRs on
17045 power8 when running in 64-bit mode using XXPERMDI to get access to the
17046 bottom 64-bit value. */
17047 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17049 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
17050 icode = reg_addr[mode].reload_gpr_vsx;
17054 else if (!TARGET_POWERPC64 && size == 8)
17056 /* Handle moving 64-bit values from GPRs to floating point registers on
17057 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
17058 values back together. Altivec register classes must be handled
17059 specially since a different instruction is used, and the secondary
17060 reload support requires a single instruction class in the scratch
17061 register constraint. However, right now TFmode is not allowed in
17062 Altivec registers, so the pattern will never match. */
17063 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
17065 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
17066 icode = reg_addr[mode].reload_fpr_gpr;
17070 if (icode != CODE_FOR_nothing)
17072 ret = true;
17073 if (sri)
17075 sri->icode = icode;
17076 sri->extra_cost = cost;
17080 return ret;
17083 /* Return whether a move between two register classes can be done either
17084 directly (simple move) or via a pattern that uses a single extra temporary
17085 (using power8's direct move in this case. */
17087 static bool
17088 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
17089 enum rs6000_reg_type from_type,
17090 machine_mode mode,
17091 secondary_reload_info *sri,
17092 bool altivec_p)
17094 /* Fall back to load/store reloads if either type is not a register. */
17095 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
17096 return false;
17098 /* If we haven't allocated registers yet, assume the move can be done for the
17099 standard register types. */
17100 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
17101 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
17102 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
17103 return true;
17105 /* Moves to the same set of registers is a simple move for non-specialized
17106 registers. */
17107 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
17108 return true;
17110 /* Check whether a simple move can be done directly. */
17111 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
17113 if (sri)
17115 sri->icode = CODE_FOR_nothing;
17116 sri->extra_cost = 0;
17118 return true;
17121 /* Now check if we can do it in a few steps. */
17122 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
17123 altivec_p);
17126 /* Inform reload about cases where moving X with a mode MODE to a register in
17127 RCLASS requires an extra scratch or immediate register. Return the class
17128 needed for the immediate register.
17130 For VSX and Altivec, we may need a register to convert sp+offset into
17131 reg+sp.
17133 For misaligned 64-bit gpr loads and stores we need a register to
17134 convert an offset address to indirect. */
17136 static reg_class_t
17137 rs6000_secondary_reload (bool in_p,
17138 rtx x,
17139 reg_class_t rclass_i,
17140 machine_mode mode,
17141 secondary_reload_info *sri)
17143 enum reg_class rclass = (enum reg_class) rclass_i;
17144 reg_class_t ret = ALL_REGS;
17145 enum insn_code icode;
17146 bool default_p = false;
17147 bool done_p = false;
17149 /* Allow subreg of memory before/during reload. */
17150 bool memory_p = (MEM_P (x)
17151 || (!reload_completed && GET_CODE (x) == SUBREG
17152 && MEM_P (SUBREG_REG (x))));
17154 sri->icode = CODE_FOR_nothing;
17155 sri->extra_cost = 0;
17156 icode = ((in_p)
17157 ? reg_addr[mode].reload_load
17158 : reg_addr[mode].reload_store);
17160 if (REG_P (x) || register_operand (x, mode))
17162 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17163 bool altivec_p = (rclass == ALTIVEC_REGS);
17164 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17166 if (!in_p)
17168 enum rs6000_reg_type exchange = to_type;
17169 to_type = from_type;
17170 from_type = exchange;
17173 /* Can we do a direct move of some sort? */
17174 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17175 altivec_p))
17177 icode = (enum insn_code)sri->icode;
17178 default_p = false;
17179 done_p = true;
17180 ret = NO_REGS;
17184 /* Make sure 0.0 is not reloaded or forced into memory. */
17185 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17187 ret = NO_REGS;
17188 default_p = false;
17189 done_p = true;
17192 /* If this is a scalar floating point value and we want to load it into the
17193 traditional Altivec registers, do it via a move via a traditional floating
17194 point register. Also make sure that non-zero constants use a FPR. */
17195 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17196 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17197 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17199 ret = FLOAT_REGS;
17200 default_p = false;
17201 done_p = true;
17204 /* Handle reload of load/stores if we have reload helper functions. */
17205 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17207 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17208 mode);
17210 if (extra_cost >= 0)
17212 done_p = true;
17213 ret = NO_REGS;
17214 if (extra_cost > 0)
17216 sri->extra_cost = extra_cost;
17217 sri->icode = icode;
17222 /* Handle unaligned loads and stores of integer registers. */
17223 if (!done_p && TARGET_POWERPC64
17224 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17225 && memory_p
17226 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17228 rtx addr = XEXP (x, 0);
17229 rtx off = address_offset (addr);
17231 if (off != NULL_RTX)
17233 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17234 unsigned HOST_WIDE_INT offset = INTVAL (off);
17236 /* We need a secondary reload when our legitimate_address_p
17237 says the address is good (as otherwise the entire address
17238 will be reloaded), and the offset is not a multiple of
17239 four or we have an address wrap. Address wrap will only
17240 occur for LO_SUMs since legitimate_offset_address_p
17241 rejects addresses for 16-byte mems that will wrap. */
17242 if (GET_CODE (addr) == LO_SUM
17243 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17244 && ((offset & 3) != 0
17245 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17246 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17247 && (offset & 3) != 0))
17249 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17250 if (in_p)
17251 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17252 : CODE_FOR_reload_di_load);
17253 else
17254 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17255 : CODE_FOR_reload_di_store);
17256 sri->extra_cost = 2;
17257 ret = NO_REGS;
17258 done_p = true;
17260 else
17261 default_p = true;
17263 else
17264 default_p = true;
17267 if (!done_p && !TARGET_POWERPC64
17268 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17269 && memory_p
17270 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17272 rtx addr = XEXP (x, 0);
17273 rtx off = address_offset (addr);
17275 if (off != NULL_RTX)
17277 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17278 unsigned HOST_WIDE_INT offset = INTVAL (off);
17280 /* We need a secondary reload when our legitimate_address_p
17281 says the address is good (as otherwise the entire address
17282 will be reloaded), and we have a wrap.
17284 legitimate_lo_sum_address_p allows LO_SUM addresses to
17285 have any offset so test for wrap in the low 16 bits.
17287 legitimate_offset_address_p checks for the range
17288 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17289 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17290 [0x7ff4,0x7fff] respectively, so test for the
17291 intersection of these ranges, [0x7ffc,0x7fff] and
17292 [0x7ff4,0x7ff7] respectively.
17294 Note that the address we see here may have been
17295 manipulated by legitimize_reload_address. */
17296 if (GET_CODE (addr) == LO_SUM
17297 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17298 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17300 if (in_p)
17301 sri->icode = CODE_FOR_reload_si_load;
17302 else
17303 sri->icode = CODE_FOR_reload_si_store;
17304 sri->extra_cost = 2;
17305 ret = NO_REGS;
17306 done_p = true;
17308 else
17309 default_p = true;
17311 else
17312 default_p = true;
17315 if (!done_p)
17316 default_p = true;
17318 if (default_p)
17319 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17321 gcc_assert (ret != ALL_REGS);
17323 if (TARGET_DEBUG_ADDR)
17325 fprintf (stderr,
17326 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17327 "mode = %s",
17328 reg_class_names[ret],
17329 in_p ? "true" : "false",
17330 reg_class_names[rclass],
17331 GET_MODE_NAME (mode));
17333 if (reload_completed)
17334 fputs (", after reload", stderr);
17336 if (!done_p)
17337 fputs (", done_p not set", stderr);
17339 if (default_p)
17340 fputs (", default secondary reload", stderr);
17342 if (sri->icode != CODE_FOR_nothing)
17343 fprintf (stderr, ", reload func = %s, extra cost = %d",
17344 insn_data[sri->icode].name, sri->extra_cost);
17346 fputs ("\n", stderr);
17347 debug_rtx (x);
17350 return ret;
17353 /* Better tracing for rs6000_secondary_reload_inner. */
17355 static void
17356 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17357 bool store_p)
17359 rtx set, clobber;
17361 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17363 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17364 store_p ? "store" : "load");
17366 if (store_p)
17367 set = gen_rtx_SET (VOIDmode, mem, reg);
17368 else
17369 set = gen_rtx_SET (VOIDmode, reg, mem);
17371 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17372 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17375 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17376 ATTRIBUTE_NORETURN;
17378 static void
17379 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17380 bool store_p)
17382 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17383 gcc_unreachable ();
17386 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17387 reload helper functions. These were identified in
17388 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17389 reload, it calls the insns:
17390 reload_<RELOAD:mode>_<P:mptrsize>_store
17391 reload_<RELOAD:mode>_<P:mptrsize>_load
17393 which in turn calls this function, to do whatever is necessary to create
17394 valid addresses. */
17396 void
17397 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17399 int regno = true_regnum (reg);
17400 machine_mode mode = GET_MODE (reg);
17401 addr_mask_type addr_mask;
17402 rtx addr;
17403 rtx new_addr;
17404 rtx op_reg, op0, op1;
17405 rtx and_op;
17406 rtx cc_clobber;
17407 rtvec rv;
17409 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17410 || !base_reg_operand (scratch, GET_MODE (scratch)))
17411 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17413 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17414 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17416 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17417 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17419 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17420 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17422 else
17423 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17425 /* Make sure the mode is valid in this register class. */
17426 if ((addr_mask & RELOAD_REG_VALID) == 0)
17427 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17429 if (TARGET_DEBUG_ADDR)
17430 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17432 new_addr = addr = XEXP (mem, 0);
17433 switch (GET_CODE (addr))
17435 /* Does the register class support auto update forms for this mode? If
17436 not, do the update now. We don't need a scratch register, since the
17437 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17438 case PRE_INC:
17439 case PRE_DEC:
17440 op_reg = XEXP (addr, 0);
17441 if (!base_reg_operand (op_reg, Pmode))
17442 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17444 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17446 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17447 new_addr = op_reg;
17449 break;
17451 case PRE_MODIFY:
17452 op0 = XEXP (addr, 0);
17453 op1 = XEXP (addr, 1);
17454 if (!base_reg_operand (op0, Pmode)
17455 || GET_CODE (op1) != PLUS
17456 || !rtx_equal_p (op0, XEXP (op1, 0)))
17457 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17459 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17461 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17462 new_addr = reg;
17464 break;
17466 /* Do we need to simulate AND -16 to clear the bottom address bits used
17467 in VMX load/stores? */
17468 case AND:
17469 op0 = XEXP (addr, 0);
17470 op1 = XEXP (addr, 1);
17471 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17473 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17474 op_reg = op0;
17476 else if (GET_CODE (op1) == PLUS)
17478 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17479 op_reg = scratch;
17482 else
17483 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17485 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17486 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17487 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17488 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17489 new_addr = scratch;
17491 break;
17493 /* If this is an indirect address, make sure it is a base register. */
17494 case REG:
17495 case SUBREG:
17496 if (!base_reg_operand (addr, GET_MODE (addr)))
17498 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17499 new_addr = scratch;
17501 break;
17503 /* If this is an indexed address, make sure the register class can handle
17504 indexed addresses for this mode. */
17505 case PLUS:
17506 op0 = XEXP (addr, 0);
17507 op1 = XEXP (addr, 1);
17508 if (!base_reg_operand (op0, Pmode))
17509 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17511 else if (int_reg_operand (op1, Pmode))
17513 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17515 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17516 new_addr = scratch;
17520 /* Make sure the register class can handle offset addresses. */
17521 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17523 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17525 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17526 new_addr = scratch;
17530 else
17531 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17533 break;
17535 case LO_SUM:
17536 op0 = XEXP (addr, 0);
17537 op1 = XEXP (addr, 1);
17538 if (!base_reg_operand (op0, Pmode))
17539 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17541 else if (int_reg_operand (op1, Pmode))
17543 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17545 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17546 new_addr = scratch;
17550 /* Make sure the register class can handle offset addresses. */
17551 else if (legitimate_lo_sum_address_p (mode, addr, false))
17553 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17555 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17556 new_addr = scratch;
17560 else
17561 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17563 break;
17565 case SYMBOL_REF:
17566 case CONST:
17567 case LABEL_REF:
17568 rs6000_emit_move (scratch, addr, Pmode);
17569 new_addr = scratch;
17570 break;
17572 default:
17573 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17576 /* Adjust the address if it changed. */
17577 if (addr != new_addr)
17579 mem = replace_equiv_address_nv (mem, new_addr);
17580 if (TARGET_DEBUG_ADDR)
17581 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17584 /* Now create the move. */
17585 if (store_p)
17586 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17587 else
17588 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17590 return;
17593 /* Convert reloads involving 64-bit gprs and misaligned offset
17594 addressing, or multiple 32-bit gprs and offsets that are too large,
17595 to use indirect addressing. */
17597 void
17598 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17600 int regno = true_regnum (reg);
17601 enum reg_class rclass;
17602 rtx addr;
17603 rtx scratch_or_premodify = scratch;
17605 if (TARGET_DEBUG_ADDR)
17607 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17608 store_p ? "store" : "load");
17609 fprintf (stderr, "reg:\n");
17610 debug_rtx (reg);
17611 fprintf (stderr, "mem:\n");
17612 debug_rtx (mem);
17613 fprintf (stderr, "scratch:\n");
17614 debug_rtx (scratch);
17617 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17618 gcc_assert (GET_CODE (mem) == MEM);
17619 rclass = REGNO_REG_CLASS (regno);
17620 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17621 addr = XEXP (mem, 0);
17623 if (GET_CODE (addr) == PRE_MODIFY)
17625 scratch_or_premodify = XEXP (addr, 0);
17626 gcc_assert (REG_P (scratch_or_premodify));
17627 addr = XEXP (addr, 1);
17629 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17631 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17633 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17635 /* Now create the move. */
17636 if (store_p)
17637 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17638 else
17639 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17641 return;
17644 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17645 this function has any SDmode references. If we are on a power7 or later, we
17646 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17647 can load/store the value. */
17649 static void
17650 rs6000_alloc_sdmode_stack_slot (void)
17652 tree t;
17653 basic_block bb;
17654 gimple_stmt_iterator gsi;
17656 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17657 /* We use a different approach for dealing with the secondary
17658 memory in LRA. */
17659 if (ira_use_lra_p)
17660 return;
17662 if (TARGET_NO_SDMODE_STACK)
17663 return;
17665 FOR_EACH_BB_FN (bb, cfun)
17666 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17668 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17669 if (ret)
17671 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17672 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17673 SDmode, 0);
17674 return;
17678 /* Check for any SDmode parameters of the function. */
17679 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17681 if (TREE_TYPE (t) == error_mark_node)
17682 continue;
17684 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17685 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17687 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17688 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17689 SDmode, 0);
17690 return;
17695 static void
17696 rs6000_instantiate_decls (void)
17698 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17699 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17702 /* Given an rtx X being reloaded into a reg required to be
17703 in class CLASS, return the class of reg to actually use.
17704 In general this is just CLASS; but on some machines
17705 in some cases it is preferable to use a more restrictive class.
17707 On the RS/6000, we have to return NO_REGS when we want to reload a
17708 floating-point CONST_DOUBLE to force it to be copied to memory.
17710 We also don't want to reload integer values into floating-point
17711 registers if we can at all help it. In fact, this can
17712 cause reload to die, if it tries to generate a reload of CTR
17713 into a FP register and discovers it doesn't have the memory location
17714 required.
17716 ??? Would it be a good idea to have reload do the converse, that is
17717 try to reload floating modes into FP registers if possible?
17720 static enum reg_class
17721 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17723 machine_mode mode = GET_MODE (x);
17724 bool is_constant = CONSTANT_P (x);
17726 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
17727 the reloading of address expressions using PLUS into floating point
17728 registers. */
17729 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
17731 if (is_constant)
17733 /* Zero is always allowed in all VSX registers. */
17734 if (x == CONST0_RTX (mode))
17735 return rclass;
17737 /* If this is a vector constant that can be formed with a few Altivec
17738 instructions, we want altivec registers. */
17739 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17740 return ALTIVEC_REGS;
17742 /* Force constant to memory. */
17743 return NO_REGS;
17746 /* If this is a scalar floating point value, prefer the traditional
17747 floating point registers so that we can use D-form (register+offset)
17748 addressing. */
17749 if (GET_MODE_SIZE (mode) < 16)
17750 return FLOAT_REGS;
17752 /* Prefer the Altivec registers if Altivec is handling the vector
17753 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17754 loads. */
17755 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17756 || mode == V1TImode)
17757 return ALTIVEC_REGS;
17759 return rclass;
17762 if (is_constant || GET_CODE (x) == PLUS)
17764 if (reg_class_subset_p (GENERAL_REGS, rclass))
17765 return GENERAL_REGS;
17766 if (reg_class_subset_p (BASE_REGS, rclass))
17767 return BASE_REGS;
17768 return NO_REGS;
17771 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17772 return GENERAL_REGS;
17774 return rclass;
17777 /* Debug version of rs6000_preferred_reload_class. */
17778 static enum reg_class
17779 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17781 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17783 fprintf (stderr,
17784 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17785 "mode = %s, x:\n",
17786 reg_class_names[ret], reg_class_names[rclass],
17787 GET_MODE_NAME (GET_MODE (x)));
17788 debug_rtx (x);
17790 return ret;
17793 /* If we are copying between FP or AltiVec registers and anything else, we need
17794 a memory location. The exception is when we are targeting ppc64 and the
17795 move to/from fpr to gpr instructions are available. Also, under VSX, you
17796 can copy vector registers from the FP register set to the Altivec register
17797 set and vice versa. */
17799 static bool
17800 rs6000_secondary_memory_needed (enum reg_class from_class,
17801 enum reg_class to_class,
17802 machine_mode mode)
17804 enum rs6000_reg_type from_type, to_type;
17805 bool altivec_p = ((from_class == ALTIVEC_REGS)
17806 || (to_class == ALTIVEC_REGS));
17808 /* If a simple/direct move is available, we don't need secondary memory */
17809 from_type = reg_class_to_reg_type[(int)from_class];
17810 to_type = reg_class_to_reg_type[(int)to_class];
17812 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17813 (secondary_reload_info *)0, altivec_p))
17814 return false;
17816 /* If we have a floating point or vector register class, we need to use
17817 memory to transfer the data. */
17818 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17819 return true;
17821 return false;
17824 /* Debug version of rs6000_secondary_memory_needed. */
17825 static bool
17826 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17827 enum reg_class to_class,
17828 machine_mode mode)
17830 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17832 fprintf (stderr,
17833 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17834 "to_class = %s, mode = %s\n",
17835 ret ? "true" : "false",
17836 reg_class_names[from_class],
17837 reg_class_names[to_class],
17838 GET_MODE_NAME (mode));
17840 return ret;
17843 /* Return the register class of a scratch register needed to copy IN into
17844 or out of a register in RCLASS in MODE. If it can be done directly,
17845 NO_REGS is returned. */
17847 static enum reg_class
17848 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17849 rtx in)
17851 int regno;
17853 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17854 #if TARGET_MACHO
17855 && MACHOPIC_INDIRECT
17856 #endif
17859 /* We cannot copy a symbolic operand directly into anything
17860 other than BASE_REGS for TARGET_ELF. So indicate that a
17861 register from BASE_REGS is needed as an intermediate
17862 register.
17864 On Darwin, pic addresses require a load from memory, which
17865 needs a base register. */
17866 if (rclass != BASE_REGS
17867 && (GET_CODE (in) == SYMBOL_REF
17868 || GET_CODE (in) == HIGH
17869 || GET_CODE (in) == LABEL_REF
17870 || GET_CODE (in) == CONST))
17871 return BASE_REGS;
17874 if (GET_CODE (in) == REG)
17876 regno = REGNO (in);
17877 if (regno >= FIRST_PSEUDO_REGISTER)
17879 regno = true_regnum (in);
17880 if (regno >= FIRST_PSEUDO_REGISTER)
17881 regno = -1;
17884 else if (GET_CODE (in) == SUBREG)
17886 regno = true_regnum (in);
17887 if (regno >= FIRST_PSEUDO_REGISTER)
17888 regno = -1;
17890 else
17891 regno = -1;
17893 /* If we have VSX register moves, prefer moving scalar values between
17894 Altivec registers and GPR by going via an FPR (and then via memory)
17895 instead of reloading the secondary memory address for Altivec moves. */
17896 if (TARGET_VSX
17897 && GET_MODE_SIZE (mode) < 16
17898 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17899 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17900 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17901 && (regno >= 0 && INT_REGNO_P (regno)))))
17902 return FLOAT_REGS;
17904 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17905 into anything. */
17906 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17907 || (regno >= 0 && INT_REGNO_P (regno)))
17908 return NO_REGS;
17910 /* Constants, memory, and VSX registers can go into VSX registers (both the
17911 traditional floating point and the altivec registers). */
17912 if (rclass == VSX_REGS
17913 && (regno == -1 || VSX_REGNO_P (regno)))
17914 return NO_REGS;
17916 /* Constants, memory, and FP registers can go into FP registers. */
17917 if ((regno == -1 || FP_REGNO_P (regno))
17918 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17919 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17921 /* Memory, and AltiVec registers can go into AltiVec registers. */
17922 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17923 && rclass == ALTIVEC_REGS)
17924 return NO_REGS;
17926 /* We can copy among the CR registers. */
17927 if ((rclass == CR_REGS || rclass == CR0_REGS)
17928 && regno >= 0 && CR_REGNO_P (regno))
17929 return NO_REGS;
17931 /* Otherwise, we need GENERAL_REGS. */
17932 return GENERAL_REGS;
17935 /* Debug version of rs6000_secondary_reload_class. */
17936 static enum reg_class
17937 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17938 machine_mode mode, rtx in)
17940 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17941 fprintf (stderr,
17942 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17943 "mode = %s, input rtx:\n",
17944 reg_class_names[ret], reg_class_names[rclass],
17945 GET_MODE_NAME (mode));
17946 debug_rtx (in);
17948 return ret;
17951 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17953 static bool
17954 rs6000_cannot_change_mode_class (machine_mode from,
17955 machine_mode to,
17956 enum reg_class rclass)
17958 unsigned from_size = GET_MODE_SIZE (from);
17959 unsigned to_size = GET_MODE_SIZE (to);
17961 if (from_size != to_size)
17963 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17965 if (reg_classes_intersect_p (xclass, rclass))
17967 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17968 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17970 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17971 single register under VSX because the scalar part of the register
17972 is in the upper 64-bits, and not the lower 64-bits. Types like
17973 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17974 IEEE floating point can't overlap, and neither can small
17975 values. */
17977 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17978 return true;
17980 /* TDmode in floating-mode registers must always go into a register
17981 pair with the most significant word in the even-numbered register
17982 to match ISA requirements. In little-endian mode, this does not
17983 match subreg numbering, so we cannot allow subregs. */
17984 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17985 return true;
17987 if (from_size < 8 || to_size < 8)
17988 return true;
17990 if (from_size == 8 && (8 * to_nregs) != to_size)
17991 return true;
17993 if (to_size == 8 && (8 * from_nregs) != from_size)
17994 return true;
17996 return false;
17998 else
17999 return false;
18002 if (TARGET_E500_DOUBLE
18003 && ((((to) == DFmode) + ((from) == DFmode)) == 1
18004 || (((to) == TFmode) + ((from) == TFmode)) == 1
18005 || (((to) == DDmode) + ((from) == DDmode)) == 1
18006 || (((to) == TDmode) + ((from) == TDmode)) == 1
18007 || (((to) == DImode) + ((from) == DImode)) == 1))
18008 return true;
18010 /* Since the VSX register set includes traditional floating point registers
18011 and altivec registers, just check for the size being different instead of
18012 trying to check whether the modes are vector modes. Otherwise it won't
18013 allow say DF and DI to change classes. For types like TFmode and TDmode
18014 that take 2 64-bit registers, rather than a single 128-bit register, don't
18015 allow subregs of those types to other 128 bit types. */
18016 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
18018 unsigned num_regs = (from_size + 15) / 16;
18019 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
18020 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
18021 return true;
18023 return (from_size != 8 && from_size != 16);
18026 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
18027 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
18028 return true;
18030 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
18031 && reg_classes_intersect_p (GENERAL_REGS, rclass))
18032 return true;
18034 return false;
18037 /* Debug version of rs6000_cannot_change_mode_class. */
18038 static bool
18039 rs6000_debug_cannot_change_mode_class (machine_mode from,
18040 machine_mode to,
18041 enum reg_class rclass)
18043 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
18045 fprintf (stderr,
18046 "rs6000_cannot_change_mode_class, return %s, from = %s, "
18047 "to = %s, rclass = %s\n",
18048 ret ? "true" : "false",
18049 GET_MODE_NAME (from), GET_MODE_NAME (to),
18050 reg_class_names[rclass]);
18052 return ret;
18055 /* Return a string to do a move operation of 128 bits of data. */
18057 const char *
18058 rs6000_output_move_128bit (rtx operands[])
18060 rtx dest = operands[0];
18061 rtx src = operands[1];
18062 machine_mode mode = GET_MODE (dest);
18063 int dest_regno;
18064 int src_regno;
18065 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
18066 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
18068 if (REG_P (dest))
18070 dest_regno = REGNO (dest);
18071 dest_gpr_p = INT_REGNO_P (dest_regno);
18072 dest_fp_p = FP_REGNO_P (dest_regno);
18073 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
18074 dest_vsx_p = dest_fp_p | dest_vmx_p;
18076 else
18078 dest_regno = -1;
18079 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
18082 if (REG_P (src))
18084 src_regno = REGNO (src);
18085 src_gpr_p = INT_REGNO_P (src_regno);
18086 src_fp_p = FP_REGNO_P (src_regno);
18087 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
18088 src_vsx_p = src_fp_p | src_vmx_p;
18090 else
18092 src_regno = -1;
18093 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
18096 /* Register moves. */
18097 if (dest_regno >= 0 && src_regno >= 0)
18099 if (dest_gpr_p)
18101 if (src_gpr_p)
18102 return "#";
18104 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
18105 return "#";
18108 else if (TARGET_VSX && dest_vsx_p)
18110 if (src_vsx_p)
18111 return "xxlor %x0,%x1,%x1";
18113 else if (TARGET_DIRECT_MOVE && src_gpr_p)
18114 return "#";
18117 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
18118 return "vor %0,%1,%1";
18120 else if (dest_fp_p && src_fp_p)
18121 return "#";
18124 /* Loads. */
18125 else if (dest_regno >= 0 && MEM_P (src))
18127 if (dest_gpr_p)
18129 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18130 return "lq %0,%1";
18131 else
18132 return "#";
18135 else if (TARGET_ALTIVEC && dest_vmx_p
18136 && altivec_indexed_or_indirect_operand (src, mode))
18137 return "lvx %0,%y1";
18139 else if (TARGET_VSX && dest_vsx_p)
18141 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18142 return "lxvw4x %x0,%y1";
18143 else
18144 return "lxvd2x %x0,%y1";
18147 else if (TARGET_ALTIVEC && dest_vmx_p)
18148 return "lvx %0,%y1";
18150 else if (dest_fp_p)
18151 return "#";
18154 /* Stores. */
18155 else if (src_regno >= 0 && MEM_P (dest))
18157 if (src_gpr_p)
18159 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18160 return "stq %1,%0";
18161 else
18162 return "#";
18165 else if (TARGET_ALTIVEC && src_vmx_p
18166 && altivec_indexed_or_indirect_operand (src, mode))
18167 return "stvx %1,%y0";
18169 else if (TARGET_VSX && src_vsx_p)
18171 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18172 return "stxvw4x %x1,%y0";
18173 else
18174 return "stxvd2x %x1,%y0";
18177 else if (TARGET_ALTIVEC && src_vmx_p)
18178 return "stvx %1,%y0";
18180 else if (src_fp_p)
18181 return "#";
18184 /* Constants. */
18185 else if (dest_regno >= 0
18186 && (GET_CODE (src) == CONST_INT
18187 || GET_CODE (src) == CONST_WIDE_INT
18188 || GET_CODE (src) == CONST_DOUBLE
18189 || GET_CODE (src) == CONST_VECTOR))
18191 if (dest_gpr_p)
18192 return "#";
18194 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18195 return "xxlxor %x0,%x0,%x0";
18197 else if (TARGET_ALTIVEC && dest_vmx_p)
18198 return output_vec_const_move (operands);
18201 if (TARGET_DEBUG_ADDR)
18203 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18204 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18207 gcc_unreachable ();
18210 /* Validate a 128-bit move. */
18211 bool
18212 rs6000_move_128bit_ok_p (rtx operands[])
18214 machine_mode mode = GET_MODE (operands[0]);
18215 return (gpc_reg_operand (operands[0], mode)
18216 || gpc_reg_operand (operands[1], mode));
18219 /* Return true if a 128-bit move needs to be split. */
18220 bool
18221 rs6000_split_128bit_ok_p (rtx operands[])
18223 if (!reload_completed)
18224 return false;
18226 if (!gpr_or_gpr_p (operands[0], operands[1]))
18227 return false;
18229 if (quad_load_store_p (operands[0], operands[1]))
18230 return false;
18232 return true;
18236 /* Given a comparison operation, return the bit number in CCR to test. We
18237 know this is a valid comparison.
18239 SCC_P is 1 if this is for an scc. That means that %D will have been
18240 used instead of %C, so the bits will be in different places.
18242 Return -1 if OP isn't a valid comparison for some reason. */
18245 ccr_bit (rtx op, int scc_p)
18247 enum rtx_code code = GET_CODE (op);
18248 machine_mode cc_mode;
18249 int cc_regnum;
18250 int base_bit;
18251 rtx reg;
18253 if (!COMPARISON_P (op))
18254 return -1;
18256 reg = XEXP (op, 0);
18258 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18260 cc_mode = GET_MODE (reg);
18261 cc_regnum = REGNO (reg);
18262 base_bit = 4 * (cc_regnum - CR0_REGNO);
18264 validate_condition_mode (code, cc_mode);
18266 /* When generating a sCOND operation, only positive conditions are
18267 allowed. */
18268 gcc_assert (!scc_p
18269 || code == EQ || code == GT || code == LT || code == UNORDERED
18270 || code == GTU || code == LTU);
18272 switch (code)
18274 case NE:
18275 return scc_p ? base_bit + 3 : base_bit + 2;
18276 case EQ:
18277 return base_bit + 2;
18278 case GT: case GTU: case UNLE:
18279 return base_bit + 1;
18280 case LT: case LTU: case UNGE:
18281 return base_bit;
18282 case ORDERED: case UNORDERED:
18283 return base_bit + 3;
18285 case GE: case GEU:
18286 /* If scc, we will have done a cror to put the bit in the
18287 unordered position. So test that bit. For integer, this is ! LT
18288 unless this is an scc insn. */
18289 return scc_p ? base_bit + 3 : base_bit;
18291 case LE: case LEU:
18292 return scc_p ? base_bit + 3 : base_bit + 1;
18294 default:
18295 gcc_unreachable ();
18299 /* Return the GOT register. */
18302 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18304 /* The second flow pass currently (June 1999) can't update
18305 regs_ever_live without disturbing other parts of the compiler, so
18306 update it here to make the prolog/epilogue code happy. */
18307 if (!can_create_pseudo_p ()
18308 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18309 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18311 crtl->uses_pic_offset_table = 1;
18313 return pic_offset_table_rtx;
18316 static rs6000_stack_t stack_info;
18318 /* Function to init struct machine_function.
18319 This will be called, via a pointer variable,
18320 from push_function_context. */
18322 static struct machine_function *
18323 rs6000_init_machine_status (void)
18325 stack_info.reload_completed = 0;
18326 return ggc_cleared_alloc<machine_function> ();
18329 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18332 extract_MB (rtx op)
18334 int i;
18335 unsigned long val = INTVAL (op);
18337 /* If the high bit is zero, the value is the first 1 bit we find
18338 from the left. */
18339 if ((val & 0x80000000) == 0)
18341 gcc_assert (val & 0xffffffff);
18343 i = 1;
18344 while (((val <<= 1) & 0x80000000) == 0)
18345 ++i;
18346 return i;
18349 /* If the high bit is set and the low bit is not, or the mask is all
18350 1's, the value is zero. */
18351 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18352 return 0;
18354 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18355 from the right. */
18356 i = 31;
18357 while (((val >>= 1) & 1) != 0)
18358 --i;
18360 return i;
18364 extract_ME (rtx op)
18366 int i;
18367 unsigned long val = INTVAL (op);
18369 /* If the low bit is zero, the value is the first 1 bit we find from
18370 the right. */
18371 if ((val & 1) == 0)
18373 gcc_assert (val & 0xffffffff);
18375 i = 30;
18376 while (((val >>= 1) & 1) == 0)
18377 --i;
18379 return i;
18382 /* If the low bit is set and the high bit is not, or the mask is all
18383 1's, the value is 31. */
18384 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18385 return 31;
18387 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18388 from the left. */
18389 i = 0;
18390 while (((val <<= 1) & 0x80000000) != 0)
18391 ++i;
18393 return i;
18396 /* Write out a function code label. */
18398 void
18399 rs6000_output_function_entry (FILE *file, const char *fname)
18401 if (fname[0] != '.')
18403 switch (DEFAULT_ABI)
18405 default:
18406 gcc_unreachable ();
18408 case ABI_AIX:
18409 if (DOT_SYMBOLS)
18410 putc ('.', file);
18411 else
18412 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18413 break;
18415 case ABI_ELFv2:
18416 case ABI_V4:
18417 case ABI_DARWIN:
18418 break;
18422 RS6000_OUTPUT_BASENAME (file, fname);
18425 /* Print an operand. Recognize special options, documented below. */
18427 #if TARGET_ELF
18428 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18429 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18430 #else
18431 #define SMALL_DATA_RELOC "sda21"
18432 #define SMALL_DATA_REG 0
18433 #endif
18435 void
18436 print_operand (FILE *file, rtx x, int code)
18438 int i;
18439 unsigned HOST_WIDE_INT uval;
18441 switch (code)
18443 /* %a is output_address. */
18445 case 'b':
18446 /* If constant, low-order 16 bits of constant, unsigned.
18447 Otherwise, write normally. */
18448 if (INT_P (x))
18449 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18450 else
18451 print_operand (file, x, 0);
18452 return;
18454 case 'B':
18455 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18456 for 64-bit mask direction. */
18457 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18458 return;
18460 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18461 output_operand. */
18463 case 'D':
18464 /* Like 'J' but get to the GT bit only. */
18465 gcc_assert (REG_P (x));
18467 /* Bit 1 is GT bit. */
18468 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18470 /* Add one for shift count in rlinm for scc. */
18471 fprintf (file, "%d", i + 1);
18472 return;
18474 case 'e':
18475 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18476 if (! INT_P (x))
18478 output_operand_lossage ("invalid %%e value");
18479 return;
18482 uval = INTVAL (x);
18483 if ((uval & 0xffff) == 0 && uval != 0)
18484 putc ('s', file);
18485 return;
18487 case 'E':
18488 /* X is a CR register. Print the number of the EQ bit of the CR */
18489 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18490 output_operand_lossage ("invalid %%E value");
18491 else
18492 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18493 return;
18495 case 'f':
18496 /* X is a CR register. Print the shift count needed to move it
18497 to the high-order four bits. */
18498 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18499 output_operand_lossage ("invalid %%f value");
18500 else
18501 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18502 return;
18504 case 'F':
18505 /* Similar, but print the count for the rotate in the opposite
18506 direction. */
18507 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18508 output_operand_lossage ("invalid %%F value");
18509 else
18510 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18511 return;
18513 case 'G':
18514 /* X is a constant integer. If it is negative, print "m",
18515 otherwise print "z". This is to make an aze or ame insn. */
18516 if (GET_CODE (x) != CONST_INT)
18517 output_operand_lossage ("invalid %%G value");
18518 else if (INTVAL (x) >= 0)
18519 putc ('z', file);
18520 else
18521 putc ('m', file);
18522 return;
18524 case 'h':
18525 /* If constant, output low-order five bits. Otherwise, write
18526 normally. */
18527 if (INT_P (x))
18528 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18529 else
18530 print_operand (file, x, 0);
18531 return;
18533 case 'H':
18534 /* If constant, output low-order six bits. Otherwise, write
18535 normally. */
18536 if (INT_P (x))
18537 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18538 else
18539 print_operand (file, x, 0);
18540 return;
18542 case 'I':
18543 /* Print `i' if this is a constant, else nothing. */
18544 if (INT_P (x))
18545 putc ('i', file);
18546 return;
18548 case 'j':
18549 /* Write the bit number in CCR for jump. */
18550 i = ccr_bit (x, 0);
18551 if (i == -1)
18552 output_operand_lossage ("invalid %%j code");
18553 else
18554 fprintf (file, "%d", i);
18555 return;
18557 case 'J':
18558 /* Similar, but add one for shift count in rlinm for scc and pass
18559 scc flag to `ccr_bit'. */
18560 i = ccr_bit (x, 1);
18561 if (i == -1)
18562 output_operand_lossage ("invalid %%J code");
18563 else
18564 /* If we want bit 31, write a shift count of zero, not 32. */
18565 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18566 return;
18568 case 'k':
18569 /* X must be a constant. Write the 1's complement of the
18570 constant. */
18571 if (! INT_P (x))
18572 output_operand_lossage ("invalid %%k value");
18573 else
18574 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18575 return;
18577 case 'K':
18578 /* X must be a symbolic constant on ELF. Write an
18579 expression suitable for an 'addi' that adds in the low 16
18580 bits of the MEM. */
18581 if (GET_CODE (x) == CONST)
18583 if (GET_CODE (XEXP (x, 0)) != PLUS
18584 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18585 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18586 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18587 output_operand_lossage ("invalid %%K value");
18589 print_operand_address (file, x);
18590 fputs ("@l", file);
18591 return;
18593 /* %l is output_asm_label. */
18595 case 'L':
18596 /* Write second word of DImode or DFmode reference. Works on register
18597 or non-indexed memory only. */
18598 if (REG_P (x))
18599 fputs (reg_names[REGNO (x) + 1], file);
18600 else if (MEM_P (x))
18602 /* Handle possible auto-increment. Since it is pre-increment and
18603 we have already done it, we can just use an offset of word. */
18604 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18605 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18606 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18607 UNITS_PER_WORD));
18608 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18609 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18610 UNITS_PER_WORD));
18611 else
18612 output_address (XEXP (adjust_address_nv (x, SImode,
18613 UNITS_PER_WORD),
18614 0));
18616 if (small_data_operand (x, GET_MODE (x)))
18617 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18618 reg_names[SMALL_DATA_REG]);
18620 return;
18622 case 'm':
18623 /* MB value for a mask operand. */
18624 if (! mask_operand (x, SImode))
18625 output_operand_lossage ("invalid %%m value");
18627 fprintf (file, "%d", extract_MB (x));
18628 return;
18630 case 'M':
18631 /* ME value for a mask operand. */
18632 if (! mask_operand (x, SImode))
18633 output_operand_lossage ("invalid %%M value");
18635 fprintf (file, "%d", extract_ME (x));
18636 return;
18638 /* %n outputs the negative of its operand. */
18640 case 'N':
18641 /* Write the number of elements in the vector times 4. */
18642 if (GET_CODE (x) != PARALLEL)
18643 output_operand_lossage ("invalid %%N value");
18644 else
18645 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18646 return;
18648 case 'O':
18649 /* Similar, but subtract 1 first. */
18650 if (GET_CODE (x) != PARALLEL)
18651 output_operand_lossage ("invalid %%O value");
18652 else
18653 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18654 return;
18656 case 'p':
18657 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18658 if (! INT_P (x)
18659 || INTVAL (x) < 0
18660 || (i = exact_log2 (INTVAL (x))) < 0)
18661 output_operand_lossage ("invalid %%p value");
18662 else
18663 fprintf (file, "%d", i);
18664 return;
18666 case 'P':
18667 /* The operand must be an indirect memory reference. The result
18668 is the register name. */
18669 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18670 || REGNO (XEXP (x, 0)) >= 32)
18671 output_operand_lossage ("invalid %%P value");
18672 else
18673 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18674 return;
18676 case 'q':
18677 /* This outputs the logical code corresponding to a boolean
18678 expression. The expression may have one or both operands
18679 negated (if one, only the first one). For condition register
18680 logical operations, it will also treat the negated
18681 CR codes as NOTs, but not handle NOTs of them. */
18683 const char *const *t = 0;
18684 const char *s;
18685 enum rtx_code code = GET_CODE (x);
18686 static const char * const tbl[3][3] = {
18687 { "and", "andc", "nor" },
18688 { "or", "orc", "nand" },
18689 { "xor", "eqv", "xor" } };
18691 if (code == AND)
18692 t = tbl[0];
18693 else if (code == IOR)
18694 t = tbl[1];
18695 else if (code == XOR)
18696 t = tbl[2];
18697 else
18698 output_operand_lossage ("invalid %%q value");
18700 if (GET_CODE (XEXP (x, 0)) != NOT)
18701 s = t[0];
18702 else
18704 if (GET_CODE (XEXP (x, 1)) == NOT)
18705 s = t[2];
18706 else
18707 s = t[1];
18710 fputs (s, file);
18712 return;
18714 case 'Q':
18715 if (! TARGET_MFCRF)
18716 return;
18717 fputc (',', file);
18718 /* FALLTHRU */
18720 case 'R':
18721 /* X is a CR register. Print the mask for `mtcrf'. */
18722 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18723 output_operand_lossage ("invalid %%R value");
18724 else
18725 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18726 return;
18728 case 's':
18729 /* Low 5 bits of 32 - value */
18730 if (! INT_P (x))
18731 output_operand_lossage ("invalid %%s value");
18732 else
18733 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18734 return;
18736 case 'S':
18737 /* PowerPC64 mask position. All 0's is excluded.
18738 CONST_INT 32-bit mask is considered sign-extended so any
18739 transition must occur within the CONST_INT, not on the boundary. */
18740 if (! mask64_operand (x, DImode))
18741 output_operand_lossage ("invalid %%S value");
18743 uval = INTVAL (x);
18745 if (uval & 1) /* Clear Left */
18747 #if HOST_BITS_PER_WIDE_INT > 64
18748 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18749 #endif
18750 i = 64;
18752 else /* Clear Right */
18754 uval = ~uval;
18755 #if HOST_BITS_PER_WIDE_INT > 64
18756 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18757 #endif
18758 i = 63;
18760 while (uval != 0)
18761 --i, uval >>= 1;
18762 gcc_assert (i >= 0);
18763 fprintf (file, "%d", i);
18764 return;
18766 case 't':
18767 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18768 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18770 /* Bit 3 is OV bit. */
18771 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18773 /* If we want bit 31, write a shift count of zero, not 32. */
18774 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18775 return;
18777 case 'T':
18778 /* Print the symbolic name of a branch target register. */
18779 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18780 && REGNO (x) != CTR_REGNO))
18781 output_operand_lossage ("invalid %%T value");
18782 else if (REGNO (x) == LR_REGNO)
18783 fputs ("lr", file);
18784 else
18785 fputs ("ctr", file);
18786 return;
18788 case 'u':
18789 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18790 for use in unsigned operand. */
18791 if (! INT_P (x))
18793 output_operand_lossage ("invalid %%u value");
18794 return;
18797 uval = INTVAL (x);
18798 if ((uval & 0xffff) == 0)
18799 uval >>= 16;
18801 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18802 return;
18804 case 'v':
18805 /* High-order 16 bits of constant for use in signed operand. */
18806 if (! INT_P (x))
18807 output_operand_lossage ("invalid %%v value");
18808 else
18809 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18810 (INTVAL (x) >> 16) & 0xffff);
18811 return;
18813 case 'U':
18814 /* Print `u' if this has an auto-increment or auto-decrement. */
18815 if (MEM_P (x)
18816 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18817 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18818 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18819 putc ('u', file);
18820 return;
18822 case 'V':
18823 /* Print the trap code for this operand. */
18824 switch (GET_CODE (x))
18826 case EQ:
18827 fputs ("eq", file); /* 4 */
18828 break;
18829 case NE:
18830 fputs ("ne", file); /* 24 */
18831 break;
18832 case LT:
18833 fputs ("lt", file); /* 16 */
18834 break;
18835 case LE:
18836 fputs ("le", file); /* 20 */
18837 break;
18838 case GT:
18839 fputs ("gt", file); /* 8 */
18840 break;
18841 case GE:
18842 fputs ("ge", file); /* 12 */
18843 break;
18844 case LTU:
18845 fputs ("llt", file); /* 2 */
18846 break;
18847 case LEU:
18848 fputs ("lle", file); /* 6 */
18849 break;
18850 case GTU:
18851 fputs ("lgt", file); /* 1 */
18852 break;
18853 case GEU:
18854 fputs ("lge", file); /* 5 */
18855 break;
18856 default:
18857 gcc_unreachable ();
18859 break;
18861 case 'w':
18862 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18863 normally. */
18864 if (INT_P (x))
18865 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18866 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18867 else
18868 print_operand (file, x, 0);
18869 return;
18871 case 'W':
18872 /* MB value for a PowerPC64 rldic operand. */
18873 i = clz_hwi (INTVAL (x));
18875 fprintf (file, "%d", i);
18876 return;
18878 case 'x':
18879 /* X is a FPR or Altivec register used in a VSX context. */
18880 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18881 output_operand_lossage ("invalid %%x value");
18882 else
18884 int reg = REGNO (x);
18885 int vsx_reg = (FP_REGNO_P (reg)
18886 ? reg - 32
18887 : reg - FIRST_ALTIVEC_REGNO + 32);
18889 #ifdef TARGET_REGNAMES
18890 if (TARGET_REGNAMES)
18891 fprintf (file, "%%vs%d", vsx_reg);
18892 else
18893 #endif
18894 fprintf (file, "%d", vsx_reg);
18896 return;
18898 case 'X':
18899 if (MEM_P (x)
18900 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18901 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18902 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18903 putc ('x', file);
18904 return;
18906 case 'Y':
18907 /* Like 'L', for third word of TImode/PTImode */
18908 if (REG_P (x))
18909 fputs (reg_names[REGNO (x) + 2], file);
18910 else if (MEM_P (x))
18912 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18913 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18914 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18915 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18916 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18917 else
18918 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18919 if (small_data_operand (x, GET_MODE (x)))
18920 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18921 reg_names[SMALL_DATA_REG]);
18923 return;
18925 case 'z':
18926 /* X is a SYMBOL_REF. Write out the name preceded by a
18927 period and without any trailing data in brackets. Used for function
18928 names. If we are configured for System V (or the embedded ABI) on
18929 the PowerPC, do not emit the period, since those systems do not use
18930 TOCs and the like. */
18931 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18933 /* For macho, check to see if we need a stub. */
18934 if (TARGET_MACHO)
18936 const char *name = XSTR (x, 0);
18937 #if TARGET_MACHO
18938 if (darwin_emit_branch_islands
18939 && MACHOPIC_INDIRECT
18940 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18941 name = machopic_indirection_name (x, /*stub_p=*/true);
18942 #endif
18943 assemble_name (file, name);
18945 else if (!DOT_SYMBOLS)
18946 assemble_name (file, XSTR (x, 0));
18947 else
18948 rs6000_output_function_entry (file, XSTR (x, 0));
18949 return;
18951 case 'Z':
18952 /* Like 'L', for last word of TImode/PTImode. */
18953 if (REG_P (x))
18954 fputs (reg_names[REGNO (x) + 3], file);
18955 else if (MEM_P (x))
18957 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18958 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18959 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18960 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18961 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18962 else
18963 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18964 if (small_data_operand (x, GET_MODE (x)))
18965 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18966 reg_names[SMALL_DATA_REG]);
18968 return;
18970 /* Print AltiVec or SPE memory operand. */
18971 case 'y':
18973 rtx tmp;
18975 gcc_assert (MEM_P (x));
18977 tmp = XEXP (x, 0);
18979 /* Ugly hack because %y is overloaded. */
18980 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18981 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18982 || GET_MODE (x) == TFmode
18983 || GET_MODE (x) == TImode
18984 || GET_MODE (x) == PTImode))
18986 /* Handle [reg]. */
18987 if (REG_P (tmp))
18989 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18990 break;
18992 /* Handle [reg+UIMM]. */
18993 else if (GET_CODE (tmp) == PLUS &&
18994 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18996 int x;
18998 gcc_assert (REG_P (XEXP (tmp, 0)));
19000 x = INTVAL (XEXP (tmp, 1));
19001 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
19002 break;
19005 /* Fall through. Must be [reg+reg]. */
19007 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
19008 && GET_CODE (tmp) == AND
19009 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
19010 && INTVAL (XEXP (tmp, 1)) == -16)
19011 tmp = XEXP (tmp, 0);
19012 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
19013 && GET_CODE (tmp) == PRE_MODIFY)
19014 tmp = XEXP (tmp, 1);
19015 if (REG_P (tmp))
19016 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
19017 else
19019 if (GET_CODE (tmp) != PLUS
19020 || !REG_P (XEXP (tmp, 0))
19021 || !REG_P (XEXP (tmp, 1)))
19023 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
19024 break;
19027 if (REGNO (XEXP (tmp, 0)) == 0)
19028 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
19029 reg_names[ REGNO (XEXP (tmp, 0)) ]);
19030 else
19031 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
19032 reg_names[ REGNO (XEXP (tmp, 1)) ]);
19034 break;
19037 case 0:
19038 if (REG_P (x))
19039 fprintf (file, "%s", reg_names[REGNO (x)]);
19040 else if (MEM_P (x))
19042 /* We need to handle PRE_INC and PRE_DEC here, since we need to
19043 know the width from the mode. */
19044 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
19045 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
19046 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
19047 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
19048 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
19049 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
19050 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19051 output_address (XEXP (XEXP (x, 0), 1));
19052 else
19053 output_address (XEXP (x, 0));
19055 else
19057 if (toc_relative_expr_p (x, false))
19058 /* This hack along with a corresponding hack in
19059 rs6000_output_addr_const_extra arranges to output addends
19060 where the assembler expects to find them. eg.
19061 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
19062 without this hack would be output as "x@toc+4". We
19063 want "x+4@toc". */
19064 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19065 else
19066 output_addr_const (file, x);
19068 return;
19070 case '&':
19071 if (const char *name = get_some_local_dynamic_name ())
19072 assemble_name (file, name);
19073 else
19074 output_operand_lossage ("'%%&' used without any "
19075 "local dynamic TLS references");
19076 return;
19078 default:
19079 output_operand_lossage ("invalid %%xn code");
19083 /* Print the address of an operand. */
19085 void
19086 print_operand_address (FILE *file, rtx x)
19088 if (REG_P (x))
19089 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
19090 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
19091 || GET_CODE (x) == LABEL_REF)
19093 output_addr_const (file, x);
19094 if (small_data_operand (x, GET_MODE (x)))
19095 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19096 reg_names[SMALL_DATA_REG]);
19097 else
19098 gcc_assert (!TARGET_TOC);
19100 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19101 && REG_P (XEXP (x, 1)))
19103 if (REGNO (XEXP (x, 0)) == 0)
19104 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
19105 reg_names[ REGNO (XEXP (x, 0)) ]);
19106 else
19107 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
19108 reg_names[ REGNO (XEXP (x, 1)) ]);
19110 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19111 && GET_CODE (XEXP (x, 1)) == CONST_INT)
19112 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
19113 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
19114 #if TARGET_MACHO
19115 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19116 && CONSTANT_P (XEXP (x, 1)))
19118 fprintf (file, "lo16(");
19119 output_addr_const (file, XEXP (x, 1));
19120 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19122 #endif
19123 #if TARGET_ELF
19124 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19125 && CONSTANT_P (XEXP (x, 1)))
19127 output_addr_const (file, XEXP (x, 1));
19128 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19130 #endif
19131 else if (toc_relative_expr_p (x, false))
19133 /* This hack along with a corresponding hack in
19134 rs6000_output_addr_const_extra arranges to output addends
19135 where the assembler expects to find them. eg.
19136 (lo_sum (reg 9)
19137 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
19138 without this hack would be output as "x@toc+8@l(9)". We
19139 want "x+8@toc@l(9)". */
19140 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19141 if (GET_CODE (x) == LO_SUM)
19142 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
19143 else
19144 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
19146 else
19147 gcc_unreachable ();
19150 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
19152 static bool
19153 rs6000_output_addr_const_extra (FILE *file, rtx x)
19155 if (GET_CODE (x) == UNSPEC)
19156 switch (XINT (x, 1))
19158 case UNSPEC_TOCREL:
19159 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19160 && REG_P (XVECEXP (x, 0, 1))
19161 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19162 output_addr_const (file, XVECEXP (x, 0, 0));
19163 if (x == tocrel_base && tocrel_offset != const0_rtx)
19165 if (INTVAL (tocrel_offset) >= 0)
19166 fprintf (file, "+");
19167 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19169 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19171 putc ('-', file);
19172 assemble_name (file, toc_label_name);
19174 else if (TARGET_ELF)
19175 fputs ("@toc", file);
19176 return true;
19178 #if TARGET_MACHO
19179 case UNSPEC_MACHOPIC_OFFSET:
19180 output_addr_const (file, XVECEXP (x, 0, 0));
19181 putc ('-', file);
19182 machopic_output_function_base_name (file);
19183 return true;
19184 #endif
19186 return false;
19189 /* Target hook for assembling integer objects. The PowerPC version has
19190 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19191 is defined. It also needs to handle DI-mode objects on 64-bit
19192 targets. */
19194 static bool
19195 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19197 #ifdef RELOCATABLE_NEEDS_FIXUP
19198 /* Special handling for SI values. */
19199 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19201 static int recurse = 0;
19203 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19204 the .fixup section. Since the TOC section is already relocated, we
19205 don't need to mark it here. We used to skip the text section, but it
19206 should never be valid for relocated addresses to be placed in the text
19207 section. */
19208 if (TARGET_RELOCATABLE
19209 && in_section != toc_section
19210 && !recurse
19211 && !CONST_SCALAR_INT_P (x)
19212 && CONSTANT_P (x))
19214 char buf[256];
19216 recurse = 1;
19217 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19218 fixuplabelno++;
19219 ASM_OUTPUT_LABEL (asm_out_file, buf);
19220 fprintf (asm_out_file, "\t.long\t(");
19221 output_addr_const (asm_out_file, x);
19222 fprintf (asm_out_file, ")@fixup\n");
19223 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19224 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19225 fprintf (asm_out_file, "\t.long\t");
19226 assemble_name (asm_out_file, buf);
19227 fprintf (asm_out_file, "\n\t.previous\n");
19228 recurse = 0;
19229 return true;
19231 /* Remove initial .'s to turn a -mcall-aixdesc function
19232 address into the address of the descriptor, not the function
19233 itself. */
19234 else if (GET_CODE (x) == SYMBOL_REF
19235 && XSTR (x, 0)[0] == '.'
19236 && DEFAULT_ABI == ABI_AIX)
19238 const char *name = XSTR (x, 0);
19239 while (*name == '.')
19240 name++;
19242 fprintf (asm_out_file, "\t.long\t%s\n", name);
19243 return true;
19246 #endif /* RELOCATABLE_NEEDS_FIXUP */
19247 return default_assemble_integer (x, size, aligned_p);
19250 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19251 /* Emit an assembler directive to set symbol visibility for DECL to
19252 VISIBILITY_TYPE. */
19254 static void
19255 rs6000_assemble_visibility (tree decl, int vis)
19257 if (TARGET_XCOFF)
19258 return;
19260 /* Functions need to have their entry point symbol visibility set as
19261 well as their descriptor symbol visibility. */
19262 if (DEFAULT_ABI == ABI_AIX
19263 && DOT_SYMBOLS
19264 && TREE_CODE (decl) == FUNCTION_DECL)
19266 static const char * const visibility_types[] = {
19267 NULL, "internal", "hidden", "protected"
19270 const char *name, *type;
19272 name = ((* targetm.strip_name_encoding)
19273 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19274 type = visibility_types[vis];
19276 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19277 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19279 else
19280 default_assemble_visibility (decl, vis);
19282 #endif
19284 enum rtx_code
19285 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19287 /* Reversal of FP compares takes care -- an ordered compare
19288 becomes an unordered compare and vice versa. */
19289 if (mode == CCFPmode
19290 && (!flag_finite_math_only
19291 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19292 || code == UNEQ || code == LTGT))
19293 return reverse_condition_maybe_unordered (code);
19294 else
19295 return reverse_condition (code);
19298 /* Generate a compare for CODE. Return a brand-new rtx that
19299 represents the result of the compare. */
19301 static rtx
19302 rs6000_generate_compare (rtx cmp, machine_mode mode)
19304 machine_mode comp_mode;
19305 rtx compare_result;
19306 enum rtx_code code = GET_CODE (cmp);
19307 rtx op0 = XEXP (cmp, 0);
19308 rtx op1 = XEXP (cmp, 1);
19310 if (FLOAT_MODE_P (mode))
19311 comp_mode = CCFPmode;
19312 else if (code == GTU || code == LTU
19313 || code == GEU || code == LEU)
19314 comp_mode = CCUNSmode;
19315 else if ((code == EQ || code == NE)
19316 && unsigned_reg_p (op0)
19317 && (unsigned_reg_p (op1)
19318 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19319 /* These are unsigned values, perhaps there will be a later
19320 ordering compare that can be shared with this one. */
19321 comp_mode = CCUNSmode;
19322 else
19323 comp_mode = CCmode;
19325 /* If we have an unsigned compare, make sure we don't have a signed value as
19326 an immediate. */
19327 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19328 && INTVAL (op1) < 0)
19330 op0 = copy_rtx_if_shared (op0);
19331 op1 = force_reg (GET_MODE (op0), op1);
19332 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19335 /* First, the compare. */
19336 compare_result = gen_reg_rtx (comp_mode);
19338 /* E500 FP compare instructions on the GPRs. Yuck! */
19339 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19340 && FLOAT_MODE_P (mode))
19342 rtx cmp, or_result, compare_result2;
19343 machine_mode op_mode = GET_MODE (op0);
19344 bool reverse_p;
19346 if (op_mode == VOIDmode)
19347 op_mode = GET_MODE (op1);
19349 /* First reverse the condition codes that aren't directly supported. */
19350 switch (code)
19352 case NE:
19353 case UNLT:
19354 case UNLE:
19355 case UNGT:
19356 case UNGE:
19357 code = reverse_condition_maybe_unordered (code);
19358 reverse_p = true;
19359 break;
19361 case EQ:
19362 case LT:
19363 case LE:
19364 case GT:
19365 case GE:
19366 reverse_p = false;
19367 break;
19369 default:
19370 gcc_unreachable ();
19373 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19374 This explains the following mess. */
19376 switch (code)
19378 case EQ:
19379 switch (op_mode)
19381 case SFmode:
19382 cmp = (flag_finite_math_only && !flag_trapping_math)
19383 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19384 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19385 break;
19387 case DFmode:
19388 cmp = (flag_finite_math_only && !flag_trapping_math)
19389 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19390 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19391 break;
19393 case TFmode:
19394 cmp = (flag_finite_math_only && !flag_trapping_math)
19395 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19396 : gen_cmptfeq_gpr (compare_result, op0, op1);
19397 break;
19399 default:
19400 gcc_unreachable ();
19402 break;
19404 case GT:
19405 case GE:
19406 switch (op_mode)
19408 case SFmode:
19409 cmp = (flag_finite_math_only && !flag_trapping_math)
19410 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19411 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19412 break;
19414 case DFmode:
19415 cmp = (flag_finite_math_only && !flag_trapping_math)
19416 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19417 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19418 break;
19420 case TFmode:
19421 cmp = (flag_finite_math_only && !flag_trapping_math)
19422 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19423 : gen_cmptfgt_gpr (compare_result, op0, op1);
19424 break;
19426 default:
19427 gcc_unreachable ();
19429 break;
19431 case LT:
19432 case LE:
19433 switch (op_mode)
19435 case SFmode:
19436 cmp = (flag_finite_math_only && !flag_trapping_math)
19437 ? gen_tstsflt_gpr (compare_result, op0, op1)
19438 : gen_cmpsflt_gpr (compare_result, op0, op1);
19439 break;
19441 case DFmode:
19442 cmp = (flag_finite_math_only && !flag_trapping_math)
19443 ? gen_tstdflt_gpr (compare_result, op0, op1)
19444 : gen_cmpdflt_gpr (compare_result, op0, op1);
19445 break;
19447 case TFmode:
19448 cmp = (flag_finite_math_only && !flag_trapping_math)
19449 ? gen_tsttflt_gpr (compare_result, op0, op1)
19450 : gen_cmptflt_gpr (compare_result, op0, op1);
19451 break;
19453 default:
19454 gcc_unreachable ();
19456 break;
19458 default:
19459 gcc_unreachable ();
19462 /* Synthesize LE and GE from LT/GT || EQ. */
19463 if (code == LE || code == GE)
19465 emit_insn (cmp);
19467 compare_result2 = gen_reg_rtx (CCFPmode);
19469 /* Do the EQ. */
19470 switch (op_mode)
19472 case SFmode:
19473 cmp = (flag_finite_math_only && !flag_trapping_math)
19474 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19475 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19476 break;
19478 case DFmode:
19479 cmp = (flag_finite_math_only && !flag_trapping_math)
19480 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19481 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19482 break;
19484 case TFmode:
19485 cmp = (flag_finite_math_only && !flag_trapping_math)
19486 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19487 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19488 break;
19490 default:
19491 gcc_unreachable ();
19494 emit_insn (cmp);
19496 /* OR them together. */
19497 or_result = gen_reg_rtx (CCFPmode);
19498 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19499 compare_result2);
19500 compare_result = or_result;
19503 code = reverse_p ? NE : EQ;
19505 emit_insn (cmp);
19507 else
19509 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19510 CLOBBERs to match cmptf_internal2 pattern. */
19511 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19512 && GET_MODE (op0) == TFmode
19513 && !TARGET_IEEEQUAD
19514 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19515 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19516 gen_rtvec (10,
19517 gen_rtx_SET (VOIDmode,
19518 compare_result,
19519 gen_rtx_COMPARE (comp_mode, op0, op1)),
19520 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19521 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19522 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19523 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19524 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19525 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19526 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19527 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19528 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19529 else if (GET_CODE (op1) == UNSPEC
19530 && XINT (op1, 1) == UNSPEC_SP_TEST)
19532 rtx op1b = XVECEXP (op1, 0, 0);
19533 comp_mode = CCEQmode;
19534 compare_result = gen_reg_rtx (CCEQmode);
19535 if (TARGET_64BIT)
19536 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19537 else
19538 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19540 else
19541 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19542 gen_rtx_COMPARE (comp_mode, op0, op1)));
19545 /* Some kinds of FP comparisons need an OR operation;
19546 under flag_finite_math_only we don't bother. */
19547 if (FLOAT_MODE_P (mode)
19548 && !flag_finite_math_only
19549 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19550 && (code == LE || code == GE
19551 || code == UNEQ || code == LTGT
19552 || code == UNGT || code == UNLT))
19554 enum rtx_code or1, or2;
19555 rtx or1_rtx, or2_rtx, compare2_rtx;
19556 rtx or_result = gen_reg_rtx (CCEQmode);
19558 switch (code)
19560 case LE: or1 = LT; or2 = EQ; break;
19561 case GE: or1 = GT; or2 = EQ; break;
19562 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19563 case LTGT: or1 = LT; or2 = GT; break;
19564 case UNGT: or1 = UNORDERED; or2 = GT; break;
19565 case UNLT: or1 = UNORDERED; or2 = LT; break;
19566 default: gcc_unreachable ();
19568 validate_condition_mode (or1, comp_mode);
19569 validate_condition_mode (or2, comp_mode);
19570 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19571 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19572 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19573 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19574 const_true_rtx);
19575 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19577 compare_result = or_result;
19578 code = EQ;
19581 validate_condition_mode (code, GET_MODE (compare_result));
19583 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19587 /* Emit the RTL for an sISEL pattern. */
19589 void
19590 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19592 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19595 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
19596 can be used as that dest register. Return the dest register. */
19599 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
19601 if (op2 == const0_rtx)
19602 return op1;
19604 if (GET_CODE (scratch) == SCRATCH)
19605 scratch = gen_reg_rtx (mode);
19607 if (logical_operand (op2, mode))
19608 emit_insn (gen_rtx_SET (VOIDmode, scratch, gen_rtx_XOR (mode, op1, op2)));
19609 else
19610 emit_insn (gen_rtx_SET (VOIDmode, scratch,
19611 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
19613 return scratch;
19616 void
19617 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19619 rtx condition_rtx;
19620 machine_mode op_mode;
19621 enum rtx_code cond_code;
19622 rtx result = operands[0];
19624 condition_rtx = rs6000_generate_compare (operands[1], mode);
19625 cond_code = GET_CODE (condition_rtx);
19627 if (FLOAT_MODE_P (mode)
19628 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19630 rtx t;
19632 PUT_MODE (condition_rtx, SImode);
19633 t = XEXP (condition_rtx, 0);
19635 gcc_assert (cond_code == NE || cond_code == EQ);
19637 if (cond_code == NE)
19638 emit_insn (gen_e500_flip_gt_bit (t, t));
19640 emit_insn (gen_move_from_CR_gt_bit (result, t));
19641 return;
19644 if (cond_code == NE
19645 || cond_code == GE || cond_code == LE
19646 || cond_code == GEU || cond_code == LEU
19647 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19649 rtx not_result = gen_reg_rtx (CCEQmode);
19650 rtx not_op, rev_cond_rtx;
19651 machine_mode cc_mode;
19653 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19655 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19656 SImode, XEXP (condition_rtx, 0), const0_rtx);
19657 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19658 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19659 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19662 op_mode = GET_MODE (XEXP (operands[1], 0));
19663 if (op_mode == VOIDmode)
19664 op_mode = GET_MODE (XEXP (operands[1], 1));
19666 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19668 PUT_MODE (condition_rtx, DImode);
19669 convert_move (result, condition_rtx, 0);
19671 else
19673 PUT_MODE (condition_rtx, SImode);
19674 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19678 /* Emit a branch of kind CODE to location LOC. */
19680 void
19681 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19683 rtx condition_rtx, loc_ref;
19685 condition_rtx = rs6000_generate_compare (operands[0], mode);
19686 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19687 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19688 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19689 loc_ref, pc_rtx)));
19692 /* Return the string to output a conditional branch to LABEL, which is
19693 the operand template of the label, or NULL if the branch is really a
19694 conditional return.
19696 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19697 condition code register and its mode specifies what kind of
19698 comparison we made.
19700 REVERSED is nonzero if we should reverse the sense of the comparison.
19702 INSN is the insn. */
19704 char *
19705 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19707 static char string[64];
19708 enum rtx_code code = GET_CODE (op);
19709 rtx cc_reg = XEXP (op, 0);
19710 machine_mode mode = GET_MODE (cc_reg);
19711 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19712 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19713 int really_reversed = reversed ^ need_longbranch;
19714 char *s = string;
19715 const char *ccode;
19716 const char *pred;
19717 rtx note;
19719 validate_condition_mode (code, mode);
19721 /* Work out which way this really branches. We could use
19722 reverse_condition_maybe_unordered here always but this
19723 makes the resulting assembler clearer. */
19724 if (really_reversed)
19726 /* Reversal of FP compares takes care -- an ordered compare
19727 becomes an unordered compare and vice versa. */
19728 if (mode == CCFPmode)
19729 code = reverse_condition_maybe_unordered (code);
19730 else
19731 code = reverse_condition (code);
19734 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19736 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19737 to the GT bit. */
19738 switch (code)
19740 case EQ:
19741 /* Opposite of GT. */
19742 code = GT;
19743 break;
19745 case NE:
19746 code = UNLE;
19747 break;
19749 default:
19750 gcc_unreachable ();
19754 switch (code)
19756 /* Not all of these are actually distinct opcodes, but
19757 we distinguish them for clarity of the resulting assembler. */
19758 case NE: case LTGT:
19759 ccode = "ne"; break;
19760 case EQ: case UNEQ:
19761 ccode = "eq"; break;
19762 case GE: case GEU:
19763 ccode = "ge"; break;
19764 case GT: case GTU: case UNGT:
19765 ccode = "gt"; break;
19766 case LE: case LEU:
19767 ccode = "le"; break;
19768 case LT: case LTU: case UNLT:
19769 ccode = "lt"; break;
19770 case UNORDERED: ccode = "un"; break;
19771 case ORDERED: ccode = "nu"; break;
19772 case UNGE: ccode = "nl"; break;
19773 case UNLE: ccode = "ng"; break;
19774 default:
19775 gcc_unreachable ();
19778 /* Maybe we have a guess as to how likely the branch is. */
19779 pred = "";
19780 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19781 if (note != NULL_RTX)
19783 /* PROB is the difference from 50%. */
19784 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19786 /* Only hint for highly probable/improbable branches on newer
19787 cpus as static prediction overrides processor dynamic
19788 prediction. For older cpus we may as well always hint, but
19789 assume not taken for branches that are very close to 50% as a
19790 mispredicted taken branch is more expensive than a
19791 mispredicted not-taken branch. */
19792 if (rs6000_always_hint
19793 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19794 && br_prob_note_reliable_p (note)))
19796 if (abs (prob) > REG_BR_PROB_BASE / 20
19797 && ((prob > 0) ^ need_longbranch))
19798 pred = "+";
19799 else
19800 pred = "-";
19804 if (label == NULL)
19805 s += sprintf (s, "b%slr%s ", ccode, pred);
19806 else
19807 s += sprintf (s, "b%s%s ", ccode, pred);
19809 /* We need to escape any '%' characters in the reg_names string.
19810 Assume they'd only be the first character.... */
19811 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19812 *s++ = '%';
19813 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19815 if (label != NULL)
19817 /* If the branch distance was too far, we may have to use an
19818 unconditional branch to go the distance. */
19819 if (need_longbranch)
19820 s += sprintf (s, ",$+8\n\tb %s", label);
19821 else
19822 s += sprintf (s, ",%s", label);
19825 return string;
19828 /* Return the string to flip the GT bit on a CR. */
19829 char *
19830 output_e500_flip_gt_bit (rtx dst, rtx src)
19832 static char string[64];
19833 int a, b;
19835 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19836 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19838 /* GT bit. */
19839 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19840 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19842 sprintf (string, "crnot %d,%d", a, b);
19843 return string;
19846 /* Return insn for VSX or Altivec comparisons. */
19848 static rtx
19849 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19851 rtx mask;
19852 machine_mode mode = GET_MODE (op0);
19854 switch (code)
19856 default:
19857 break;
19859 case GE:
19860 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19861 return NULL_RTX;
19863 case EQ:
19864 case GT:
19865 case GTU:
19866 case ORDERED:
19867 case UNORDERED:
19868 case UNEQ:
19869 case LTGT:
19870 mask = gen_reg_rtx (mode);
19871 emit_insn (gen_rtx_SET (VOIDmode,
19872 mask,
19873 gen_rtx_fmt_ee (code, mode, op0, op1)));
19874 return mask;
19877 return NULL_RTX;
19880 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19881 DMODE is expected destination mode. This is a recursive function. */
19883 static rtx
19884 rs6000_emit_vector_compare (enum rtx_code rcode,
19885 rtx op0, rtx op1,
19886 machine_mode dmode)
19888 rtx mask;
19889 bool swap_operands = false;
19890 bool try_again = false;
19892 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19893 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19895 /* See if the comparison works as is. */
19896 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19897 if (mask)
19898 return mask;
19900 switch (rcode)
19902 case LT:
19903 rcode = GT;
19904 swap_operands = true;
19905 try_again = true;
19906 break;
19907 case LTU:
19908 rcode = GTU;
19909 swap_operands = true;
19910 try_again = true;
19911 break;
19912 case NE:
19913 case UNLE:
19914 case UNLT:
19915 case UNGE:
19916 case UNGT:
19917 /* Invert condition and try again.
19918 e.g., A != B becomes ~(A==B). */
19920 enum rtx_code rev_code;
19921 enum insn_code nor_code;
19922 rtx mask2;
19924 rev_code = reverse_condition_maybe_unordered (rcode);
19925 if (rev_code == UNKNOWN)
19926 return NULL_RTX;
19928 nor_code = optab_handler (one_cmpl_optab, dmode);
19929 if (nor_code == CODE_FOR_nothing)
19930 return NULL_RTX;
19932 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19933 if (!mask2)
19934 return NULL_RTX;
19936 mask = gen_reg_rtx (dmode);
19937 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19938 return mask;
19940 break;
19941 case GE:
19942 case GEU:
19943 case LE:
19944 case LEU:
19945 /* Try GT/GTU/LT/LTU OR EQ */
19947 rtx c_rtx, eq_rtx;
19948 enum insn_code ior_code;
19949 enum rtx_code new_code;
19951 switch (rcode)
19953 case GE:
19954 new_code = GT;
19955 break;
19957 case GEU:
19958 new_code = GTU;
19959 break;
19961 case LE:
19962 new_code = LT;
19963 break;
19965 case LEU:
19966 new_code = LTU;
19967 break;
19969 default:
19970 gcc_unreachable ();
19973 ior_code = optab_handler (ior_optab, dmode);
19974 if (ior_code == CODE_FOR_nothing)
19975 return NULL_RTX;
19977 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19978 if (!c_rtx)
19979 return NULL_RTX;
19981 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19982 if (!eq_rtx)
19983 return NULL_RTX;
19985 mask = gen_reg_rtx (dmode);
19986 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19987 return mask;
19989 break;
19990 default:
19991 return NULL_RTX;
19994 if (try_again)
19996 if (swap_operands)
19997 std::swap (op0, op1);
19999 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
20000 if (mask)
20001 return mask;
20004 /* You only get two chances. */
20005 return NULL_RTX;
20008 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
20009 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
20010 operands for the relation operation COND. */
20013 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
20014 rtx cond, rtx cc_op0, rtx cc_op1)
20016 machine_mode dest_mode = GET_MODE (dest);
20017 machine_mode mask_mode = GET_MODE (cc_op0);
20018 enum rtx_code rcode = GET_CODE (cond);
20019 machine_mode cc_mode = CCmode;
20020 rtx mask;
20021 rtx cond2;
20022 rtx tmp;
20023 bool invert_move = false;
20025 if (VECTOR_UNIT_NONE_P (dest_mode))
20026 return 0;
20028 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
20029 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
20031 switch (rcode)
20033 /* Swap operands if we can, and fall back to doing the operation as
20034 specified, and doing a NOR to invert the test. */
20035 case NE:
20036 case UNLE:
20037 case UNLT:
20038 case UNGE:
20039 case UNGT:
20040 /* Invert condition and try again.
20041 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
20042 invert_move = true;
20043 rcode = reverse_condition_maybe_unordered (rcode);
20044 if (rcode == UNKNOWN)
20045 return 0;
20046 break;
20048 /* Mark unsigned tests with CCUNSmode. */
20049 case GTU:
20050 case GEU:
20051 case LTU:
20052 case LEU:
20053 cc_mode = CCUNSmode;
20054 break;
20056 default:
20057 break;
20060 /* Get the vector mask for the given relational operations. */
20061 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
20063 if (!mask)
20064 return 0;
20066 if (invert_move)
20068 tmp = op_true;
20069 op_true = op_false;
20070 op_false = tmp;
20073 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
20074 CONST0_RTX (dest_mode));
20075 emit_insn (gen_rtx_SET (VOIDmode,
20076 dest,
20077 gen_rtx_IF_THEN_ELSE (dest_mode,
20078 cond2,
20079 op_true,
20080 op_false)));
20081 return 1;
20084 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
20085 operands of the last comparison is nonzero/true, FALSE_COND if it
20086 is zero/false. Return 0 if the hardware has no such operation. */
20089 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20091 enum rtx_code code = GET_CODE (op);
20092 rtx op0 = XEXP (op, 0);
20093 rtx op1 = XEXP (op, 1);
20094 REAL_VALUE_TYPE c1;
20095 machine_mode compare_mode = GET_MODE (op0);
20096 machine_mode result_mode = GET_MODE (dest);
20097 rtx temp;
20098 bool is_against_zero;
20100 /* These modes should always match. */
20101 if (GET_MODE (op1) != compare_mode
20102 /* In the isel case however, we can use a compare immediate, so
20103 op1 may be a small constant. */
20104 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
20105 return 0;
20106 if (GET_MODE (true_cond) != result_mode)
20107 return 0;
20108 if (GET_MODE (false_cond) != result_mode)
20109 return 0;
20111 /* Don't allow using floating point comparisons for integer results for
20112 now. */
20113 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
20114 return 0;
20116 /* First, work out if the hardware can do this at all, or
20117 if it's too slow.... */
20118 if (!FLOAT_MODE_P (compare_mode))
20120 if (TARGET_ISEL)
20121 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
20122 return 0;
20124 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
20125 && SCALAR_FLOAT_MODE_P (compare_mode))
20126 return 0;
20128 is_against_zero = op1 == CONST0_RTX (compare_mode);
20130 /* A floating-point subtract might overflow, underflow, or produce
20131 an inexact result, thus changing the floating-point flags, so it
20132 can't be generated if we care about that. It's safe if one side
20133 of the construct is zero, since then no subtract will be
20134 generated. */
20135 if (SCALAR_FLOAT_MODE_P (compare_mode)
20136 && flag_trapping_math && ! is_against_zero)
20137 return 0;
20139 /* Eliminate half of the comparisons by switching operands, this
20140 makes the remaining code simpler. */
20141 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
20142 || code == LTGT || code == LT || code == UNLE)
20144 code = reverse_condition_maybe_unordered (code);
20145 temp = true_cond;
20146 true_cond = false_cond;
20147 false_cond = temp;
20150 /* UNEQ and LTGT take four instructions for a comparison with zero,
20151 it'll probably be faster to use a branch here too. */
20152 if (code == UNEQ && HONOR_NANS (compare_mode))
20153 return 0;
20155 if (GET_CODE (op1) == CONST_DOUBLE)
20156 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20158 /* We're going to try to implement comparisons by performing
20159 a subtract, then comparing against zero. Unfortunately,
20160 Inf - Inf is NaN which is not zero, and so if we don't
20161 know that the operand is finite and the comparison
20162 would treat EQ different to UNORDERED, we can't do it. */
20163 if (HONOR_INFINITIES (compare_mode)
20164 && code != GT && code != UNGE
20165 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20166 /* Constructs of the form (a OP b ? a : b) are safe. */
20167 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20168 || (! rtx_equal_p (op0, true_cond)
20169 && ! rtx_equal_p (op1, true_cond))))
20170 return 0;
20172 /* At this point we know we can use fsel. */
20174 /* Reduce the comparison to a comparison against zero. */
20175 if (! is_against_zero)
20177 temp = gen_reg_rtx (compare_mode);
20178 emit_insn (gen_rtx_SET (VOIDmode, temp,
20179 gen_rtx_MINUS (compare_mode, op0, op1)));
20180 op0 = temp;
20181 op1 = CONST0_RTX (compare_mode);
20184 /* If we don't care about NaNs we can reduce some of the comparisons
20185 down to faster ones. */
20186 if (! HONOR_NANS (compare_mode))
20187 switch (code)
20189 case GT:
20190 code = LE;
20191 temp = true_cond;
20192 true_cond = false_cond;
20193 false_cond = temp;
20194 break;
20195 case UNGE:
20196 code = GE;
20197 break;
20198 case UNEQ:
20199 code = EQ;
20200 break;
20201 default:
20202 break;
20205 /* Now, reduce everything down to a GE. */
20206 switch (code)
20208 case GE:
20209 break;
20211 case LE:
20212 temp = gen_reg_rtx (compare_mode);
20213 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20214 op0 = temp;
20215 break;
20217 case ORDERED:
20218 temp = gen_reg_rtx (compare_mode);
20219 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20220 op0 = temp;
20221 break;
20223 case EQ:
20224 temp = gen_reg_rtx (compare_mode);
20225 emit_insn (gen_rtx_SET (VOIDmode, temp,
20226 gen_rtx_NEG (compare_mode,
20227 gen_rtx_ABS (compare_mode, op0))));
20228 op0 = temp;
20229 break;
20231 case UNGE:
20232 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20233 temp = gen_reg_rtx (result_mode);
20234 emit_insn (gen_rtx_SET (VOIDmode, temp,
20235 gen_rtx_IF_THEN_ELSE (result_mode,
20236 gen_rtx_GE (VOIDmode,
20237 op0, op1),
20238 true_cond, false_cond)));
20239 false_cond = true_cond;
20240 true_cond = temp;
20242 temp = gen_reg_rtx (compare_mode);
20243 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20244 op0 = temp;
20245 break;
20247 case GT:
20248 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20249 temp = gen_reg_rtx (result_mode);
20250 emit_insn (gen_rtx_SET (VOIDmode, temp,
20251 gen_rtx_IF_THEN_ELSE (result_mode,
20252 gen_rtx_GE (VOIDmode,
20253 op0, op1),
20254 true_cond, false_cond)));
20255 true_cond = false_cond;
20256 false_cond = temp;
20258 temp = gen_reg_rtx (compare_mode);
20259 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20260 op0 = temp;
20261 break;
20263 default:
20264 gcc_unreachable ();
20267 emit_insn (gen_rtx_SET (VOIDmode, dest,
20268 gen_rtx_IF_THEN_ELSE (result_mode,
20269 gen_rtx_GE (VOIDmode,
20270 op0, op1),
20271 true_cond, false_cond)));
20272 return 1;
20275 /* Same as above, but for ints (isel). */
20277 static int
20278 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20280 rtx condition_rtx, cr;
20281 machine_mode mode = GET_MODE (dest);
20282 enum rtx_code cond_code;
20283 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20284 bool signedp;
20286 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20287 return 0;
20289 /* We still have to do the compare, because isel doesn't do a
20290 compare, it just looks at the CRx bits set by a previous compare
20291 instruction. */
20292 condition_rtx = rs6000_generate_compare (op, mode);
20293 cond_code = GET_CODE (condition_rtx);
20294 cr = XEXP (condition_rtx, 0);
20295 signedp = GET_MODE (cr) == CCmode;
20297 isel_func = (mode == SImode
20298 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20299 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20301 switch (cond_code)
20303 case LT: case GT: case LTU: case GTU: case EQ:
20304 /* isel handles these directly. */
20305 break;
20307 default:
20308 /* We need to swap the sense of the comparison. */
20310 std::swap (false_cond, true_cond);
20311 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20313 break;
20316 false_cond = force_reg (mode, false_cond);
20317 if (true_cond != const0_rtx)
20318 true_cond = force_reg (mode, true_cond);
20320 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20322 return 1;
20325 const char *
20326 output_isel (rtx *operands)
20328 enum rtx_code code;
20330 code = GET_CODE (operands[1]);
20332 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20334 gcc_assert (GET_CODE (operands[2]) == REG
20335 && GET_CODE (operands[3]) == REG);
20336 PUT_CODE (operands[1], reverse_condition (code));
20337 return "isel %0,%3,%2,%j1";
20340 return "isel %0,%2,%3,%j1";
20343 void
20344 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20346 machine_mode mode = GET_MODE (op0);
20347 enum rtx_code c;
20348 rtx target;
20350 /* VSX/altivec have direct min/max insns. */
20351 if ((code == SMAX || code == SMIN)
20352 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20353 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20355 emit_insn (gen_rtx_SET (VOIDmode,
20356 dest,
20357 gen_rtx_fmt_ee (code, mode, op0, op1)));
20358 return;
20361 if (code == SMAX || code == SMIN)
20362 c = GE;
20363 else
20364 c = GEU;
20366 if (code == SMAX || code == UMAX)
20367 target = emit_conditional_move (dest, c, op0, op1, mode,
20368 op0, op1, mode, 0);
20369 else
20370 target = emit_conditional_move (dest, c, op0, op1, mode,
20371 op1, op0, mode, 0);
20372 gcc_assert (target);
20373 if (target != dest)
20374 emit_move_insn (dest, target);
20377 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20378 COND is true. Mark the jump as unlikely to be taken. */
20380 static void
20381 emit_unlikely_jump (rtx cond, rtx label)
20383 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20384 rtx x;
20386 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20387 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20388 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20391 /* A subroutine of the atomic operation splitters. Emit a load-locked
20392 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20393 the zero_extend operation. */
20395 static void
20396 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20398 rtx (*fn) (rtx, rtx) = NULL;
20400 switch (mode)
20402 case QImode:
20403 fn = gen_load_lockedqi;
20404 break;
20405 case HImode:
20406 fn = gen_load_lockedhi;
20407 break;
20408 case SImode:
20409 if (GET_MODE (mem) == QImode)
20410 fn = gen_load_lockedqi_si;
20411 else if (GET_MODE (mem) == HImode)
20412 fn = gen_load_lockedhi_si;
20413 else
20414 fn = gen_load_lockedsi;
20415 break;
20416 case DImode:
20417 fn = gen_load_lockeddi;
20418 break;
20419 case TImode:
20420 fn = gen_load_lockedti;
20421 break;
20422 default:
20423 gcc_unreachable ();
20425 emit_insn (fn (reg, mem));
20428 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20429 instruction in MODE. */
20431 static void
20432 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20434 rtx (*fn) (rtx, rtx, rtx) = NULL;
20436 switch (mode)
20438 case QImode:
20439 fn = gen_store_conditionalqi;
20440 break;
20441 case HImode:
20442 fn = gen_store_conditionalhi;
20443 break;
20444 case SImode:
20445 fn = gen_store_conditionalsi;
20446 break;
20447 case DImode:
20448 fn = gen_store_conditionaldi;
20449 break;
20450 case TImode:
20451 fn = gen_store_conditionalti;
20452 break;
20453 default:
20454 gcc_unreachable ();
20457 /* Emit sync before stwcx. to address PPC405 Erratum. */
20458 if (PPC405_ERRATUM77)
20459 emit_insn (gen_hwsync ());
20461 emit_insn (fn (res, mem, val));
20464 /* Expand barriers before and after a load_locked/store_cond sequence. */
20466 static rtx
20467 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20469 rtx addr = XEXP (mem, 0);
20470 int strict_p = (reload_in_progress || reload_completed);
20472 if (!legitimate_indirect_address_p (addr, strict_p)
20473 && !legitimate_indexed_address_p (addr, strict_p))
20475 addr = force_reg (Pmode, addr);
20476 mem = replace_equiv_address_nv (mem, addr);
20479 switch (model)
20481 case MEMMODEL_RELAXED:
20482 case MEMMODEL_CONSUME:
20483 case MEMMODEL_ACQUIRE:
20484 break;
20485 case MEMMODEL_RELEASE:
20486 case MEMMODEL_ACQ_REL:
20487 emit_insn (gen_lwsync ());
20488 break;
20489 case MEMMODEL_SEQ_CST:
20490 emit_insn (gen_hwsync ());
20491 break;
20492 default:
20493 gcc_unreachable ();
20495 return mem;
20498 static void
20499 rs6000_post_atomic_barrier (enum memmodel model)
20501 switch (model)
20503 case MEMMODEL_RELAXED:
20504 case MEMMODEL_CONSUME:
20505 case MEMMODEL_RELEASE:
20506 break;
20507 case MEMMODEL_ACQUIRE:
20508 case MEMMODEL_ACQ_REL:
20509 case MEMMODEL_SEQ_CST:
20510 emit_insn (gen_isync ());
20511 break;
20512 default:
20513 gcc_unreachable ();
20517 /* A subroutine of the various atomic expanders. For sub-word operations,
20518 we must adjust things to operate on SImode. Given the original MEM,
20519 return a new aligned memory. Also build and return the quantities by
20520 which to shift and mask. */
20522 static rtx
20523 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20525 rtx addr, align, shift, mask, mem;
20526 HOST_WIDE_INT shift_mask;
20527 machine_mode mode = GET_MODE (orig_mem);
20529 /* For smaller modes, we have to implement this via SImode. */
20530 shift_mask = (mode == QImode ? 0x18 : 0x10);
20532 addr = XEXP (orig_mem, 0);
20533 addr = force_reg (GET_MODE (addr), addr);
20535 /* Aligned memory containing subword. Generate a new memory. We
20536 do not want any of the existing MEM_ATTR data, as we're now
20537 accessing memory outside the original object. */
20538 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20539 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20540 mem = gen_rtx_MEM (SImode, align);
20541 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20542 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20543 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20545 /* Shift amount for subword relative to aligned word. */
20546 shift = gen_reg_rtx (SImode);
20547 addr = gen_lowpart (SImode, addr);
20548 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20549 if (BYTES_BIG_ENDIAN)
20550 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20551 shift, 1, OPTAB_LIB_WIDEN);
20552 *pshift = shift;
20554 /* Mask for insertion. */
20555 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20556 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20557 *pmask = mask;
20559 return mem;
20562 /* A subroutine of the various atomic expanders. For sub-word operands,
20563 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20565 static rtx
20566 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20568 rtx x;
20570 x = gen_reg_rtx (SImode);
20571 emit_insn (gen_rtx_SET (VOIDmode, x,
20572 gen_rtx_AND (SImode,
20573 gen_rtx_NOT (SImode, mask),
20574 oldval)));
20576 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20578 return x;
20581 /* A subroutine of the various atomic expanders. For sub-word operands,
20582 extract WIDE to NARROW via SHIFT. */
20584 static void
20585 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20587 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20588 wide, 1, OPTAB_LIB_WIDEN);
20589 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20592 /* Expand an atomic compare and swap operation. */
20594 void
20595 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20597 rtx boolval, retval, mem, oldval, newval, cond;
20598 rtx label1, label2, x, mask, shift;
20599 machine_mode mode, orig_mode;
20600 enum memmodel mod_s, mod_f;
20601 bool is_weak;
20603 boolval = operands[0];
20604 retval = operands[1];
20605 mem = operands[2];
20606 oldval = operands[3];
20607 newval = operands[4];
20608 is_weak = (INTVAL (operands[5]) != 0);
20609 mod_s = (enum memmodel) INTVAL (operands[6]);
20610 mod_f = (enum memmodel) INTVAL (operands[7]);
20611 orig_mode = mode = GET_MODE (mem);
20613 mask = shift = NULL_RTX;
20614 if (mode == QImode || mode == HImode)
20616 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20617 lwarx and shift/mask operations. With power8, we need to do the
20618 comparison in SImode, but the store is still done in QI/HImode. */
20619 oldval = convert_modes (SImode, mode, oldval, 1);
20621 if (!TARGET_SYNC_HI_QI)
20623 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20625 /* Shift and mask OLDVAL into position with the word. */
20626 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20627 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20629 /* Shift and mask NEWVAL into position within the word. */
20630 newval = convert_modes (SImode, mode, newval, 1);
20631 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20632 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20635 /* Prepare to adjust the return value. */
20636 retval = gen_reg_rtx (SImode);
20637 mode = SImode;
20639 else if (reg_overlap_mentioned_p (retval, oldval))
20640 oldval = copy_to_reg (oldval);
20642 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20644 label1 = NULL_RTX;
20645 if (!is_weak)
20647 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20648 emit_label (XEXP (label1, 0));
20650 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20652 emit_load_locked (mode, retval, mem);
20654 x = retval;
20655 if (mask)
20657 x = expand_simple_binop (SImode, AND, retval, mask,
20658 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20661 cond = gen_reg_rtx (CCmode);
20662 /* If we have TImode, synthesize a comparison. */
20663 if (mode != TImode)
20664 x = gen_rtx_COMPARE (CCmode, x, oldval);
20665 else
20667 rtx xor1_result = gen_reg_rtx (DImode);
20668 rtx xor2_result = gen_reg_rtx (DImode);
20669 rtx or_result = gen_reg_rtx (DImode);
20670 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20671 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20672 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20673 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20675 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20676 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20677 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20678 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20681 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20683 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20684 emit_unlikely_jump (x, label2);
20686 x = newval;
20687 if (mask)
20688 x = rs6000_mask_atomic_subword (retval, newval, mask);
20690 emit_store_conditional (orig_mode, cond, mem, x);
20692 if (!is_weak)
20694 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20695 emit_unlikely_jump (x, label1);
20698 if (mod_f != MEMMODEL_RELAXED)
20699 emit_label (XEXP (label2, 0));
20701 rs6000_post_atomic_barrier (mod_s);
20703 if (mod_f == MEMMODEL_RELAXED)
20704 emit_label (XEXP (label2, 0));
20706 if (shift)
20707 rs6000_finish_atomic_subword (operands[1], retval, shift);
20708 else if (mode != GET_MODE (operands[1]))
20709 convert_move (operands[1], retval, 1);
20711 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20712 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20713 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20716 /* Expand an atomic exchange operation. */
20718 void
20719 rs6000_expand_atomic_exchange (rtx operands[])
20721 rtx retval, mem, val, cond;
20722 machine_mode mode;
20723 enum memmodel model;
20724 rtx label, x, mask, shift;
20726 retval = operands[0];
20727 mem = operands[1];
20728 val = operands[2];
20729 model = (enum memmodel) INTVAL (operands[3]);
20730 mode = GET_MODE (mem);
20732 mask = shift = NULL_RTX;
20733 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20735 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20737 /* Shift and mask VAL into position with the word. */
20738 val = convert_modes (SImode, mode, val, 1);
20739 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20740 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20742 /* Prepare to adjust the return value. */
20743 retval = gen_reg_rtx (SImode);
20744 mode = SImode;
20747 mem = rs6000_pre_atomic_barrier (mem, model);
20749 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20750 emit_label (XEXP (label, 0));
20752 emit_load_locked (mode, retval, mem);
20754 x = val;
20755 if (mask)
20756 x = rs6000_mask_atomic_subword (retval, val, mask);
20758 cond = gen_reg_rtx (CCmode);
20759 emit_store_conditional (mode, cond, mem, x);
20761 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20762 emit_unlikely_jump (x, label);
20764 rs6000_post_atomic_barrier (model);
20766 if (shift)
20767 rs6000_finish_atomic_subword (operands[0], retval, shift);
20770 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20771 to perform. MEM is the memory on which to operate. VAL is the second
20772 operand of the binary operator. BEFORE and AFTER are optional locations to
20773 return the value of MEM either before of after the operation. MODEL_RTX
20774 is a CONST_INT containing the memory model to use. */
20776 void
20777 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20778 rtx orig_before, rtx orig_after, rtx model_rtx)
20780 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20781 machine_mode mode = GET_MODE (mem);
20782 machine_mode store_mode = mode;
20783 rtx label, x, cond, mask, shift;
20784 rtx before = orig_before, after = orig_after;
20786 mask = shift = NULL_RTX;
20787 /* On power8, we want to use SImode for the operation. On previous systems,
20788 use the operation in a subword and shift/mask to get the proper byte or
20789 halfword. */
20790 if (mode == QImode || mode == HImode)
20792 if (TARGET_SYNC_HI_QI)
20794 val = convert_modes (SImode, mode, val, 1);
20796 /* Prepare to adjust the return value. */
20797 before = gen_reg_rtx (SImode);
20798 if (after)
20799 after = gen_reg_rtx (SImode);
20800 mode = SImode;
20802 else
20804 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20806 /* Shift and mask VAL into position with the word. */
20807 val = convert_modes (SImode, mode, val, 1);
20808 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20809 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20811 switch (code)
20813 case IOR:
20814 case XOR:
20815 /* We've already zero-extended VAL. That is sufficient to
20816 make certain that it does not affect other bits. */
20817 mask = NULL;
20818 break;
20820 case AND:
20821 /* If we make certain that all of the other bits in VAL are
20822 set, that will be sufficient to not affect other bits. */
20823 x = gen_rtx_NOT (SImode, mask);
20824 x = gen_rtx_IOR (SImode, x, val);
20825 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20826 mask = NULL;
20827 break;
20829 case NOT:
20830 case PLUS:
20831 case MINUS:
20832 /* These will all affect bits outside the field and need
20833 adjustment via MASK within the loop. */
20834 break;
20836 default:
20837 gcc_unreachable ();
20840 /* Prepare to adjust the return value. */
20841 before = gen_reg_rtx (SImode);
20842 if (after)
20843 after = gen_reg_rtx (SImode);
20844 store_mode = mode = SImode;
20848 mem = rs6000_pre_atomic_barrier (mem, model);
20850 label = gen_label_rtx ();
20851 emit_label (label);
20852 label = gen_rtx_LABEL_REF (VOIDmode, label);
20854 if (before == NULL_RTX)
20855 before = gen_reg_rtx (mode);
20857 emit_load_locked (mode, before, mem);
20859 if (code == NOT)
20861 x = expand_simple_binop (mode, AND, before, val,
20862 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20863 after = expand_simple_unop (mode, NOT, x, after, 1);
20865 else
20867 after = expand_simple_binop (mode, code, before, val,
20868 after, 1, OPTAB_LIB_WIDEN);
20871 x = after;
20872 if (mask)
20874 x = expand_simple_binop (SImode, AND, after, mask,
20875 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20876 x = rs6000_mask_atomic_subword (before, x, mask);
20878 else if (store_mode != mode)
20879 x = convert_modes (store_mode, mode, x, 1);
20881 cond = gen_reg_rtx (CCmode);
20882 emit_store_conditional (store_mode, cond, mem, x);
20884 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20885 emit_unlikely_jump (x, label);
20887 rs6000_post_atomic_barrier (model);
20889 if (shift)
20891 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20892 then do the calcuations in a SImode register. */
20893 if (orig_before)
20894 rs6000_finish_atomic_subword (orig_before, before, shift);
20895 if (orig_after)
20896 rs6000_finish_atomic_subword (orig_after, after, shift);
20898 else if (store_mode != mode)
20900 /* QImode/HImode on machines with lbarx/lharx where we do the native
20901 operation and then do the calcuations in a SImode register. */
20902 if (orig_before)
20903 convert_move (orig_before, before, 1);
20904 if (orig_after)
20905 convert_move (orig_after, after, 1);
20907 else if (orig_after && after != orig_after)
20908 emit_move_insn (orig_after, after);
20911 /* Emit instructions to move SRC to DST. Called by splitters for
20912 multi-register moves. It will emit at most one instruction for
20913 each register that is accessed; that is, it won't emit li/lis pairs
20914 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20915 register. */
20917 void
20918 rs6000_split_multireg_move (rtx dst, rtx src)
20920 /* The register number of the first register being moved. */
20921 int reg;
20922 /* The mode that is to be moved. */
20923 machine_mode mode;
20924 /* The mode that the move is being done in, and its size. */
20925 machine_mode reg_mode;
20926 int reg_mode_size;
20927 /* The number of registers that will be moved. */
20928 int nregs;
20930 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20931 mode = GET_MODE (dst);
20932 nregs = hard_regno_nregs[reg][mode];
20933 if (FP_REGNO_P (reg))
20934 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20935 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20936 else if (ALTIVEC_REGNO_P (reg))
20937 reg_mode = V16QImode;
20938 else if (TARGET_E500_DOUBLE && mode == TFmode)
20939 reg_mode = DFmode;
20940 else
20941 reg_mode = word_mode;
20942 reg_mode_size = GET_MODE_SIZE (reg_mode);
20944 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20946 /* TDmode residing in FP registers is special, since the ISA requires that
20947 the lower-numbered word of a register pair is always the most significant
20948 word, even in little-endian mode. This does not match the usual subreg
20949 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20950 the appropriate constituent registers "by hand" in little-endian mode.
20952 Note we do not need to check for destructive overlap here since TDmode
20953 can only reside in even/odd register pairs. */
20954 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20956 rtx p_src, p_dst;
20957 int i;
20959 for (i = 0; i < nregs; i++)
20961 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20962 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20963 else
20964 p_src = simplify_gen_subreg (reg_mode, src, mode,
20965 i * reg_mode_size);
20967 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20968 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20969 else
20970 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20971 i * reg_mode_size);
20973 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20976 return;
20979 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20981 /* Move register range backwards, if we might have destructive
20982 overlap. */
20983 int i;
20984 for (i = nregs - 1; i >= 0; i--)
20985 emit_insn (gen_rtx_SET (VOIDmode,
20986 simplify_gen_subreg (reg_mode, dst, mode,
20987 i * reg_mode_size),
20988 simplify_gen_subreg (reg_mode, src, mode,
20989 i * reg_mode_size)));
20991 else
20993 int i;
20994 int j = -1;
20995 bool used_update = false;
20996 rtx restore_basereg = NULL_RTX;
20998 if (MEM_P (src) && INT_REGNO_P (reg))
21000 rtx breg;
21002 if (GET_CODE (XEXP (src, 0)) == PRE_INC
21003 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
21005 rtx delta_rtx;
21006 breg = XEXP (XEXP (src, 0), 0);
21007 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
21008 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
21009 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
21010 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21011 src = replace_equiv_address (src, breg);
21013 else if (! rs6000_offsettable_memref_p (src, reg_mode))
21015 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
21017 rtx basereg = XEXP (XEXP (src, 0), 0);
21018 if (TARGET_UPDATE)
21020 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
21021 emit_insn (gen_rtx_SET (VOIDmode, ndst,
21022 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
21023 used_update = true;
21025 else
21026 emit_insn (gen_rtx_SET (VOIDmode, basereg,
21027 XEXP (XEXP (src, 0), 1)));
21028 src = replace_equiv_address (src, basereg);
21030 else
21032 rtx basereg = gen_rtx_REG (Pmode, reg);
21033 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
21034 src = replace_equiv_address (src, basereg);
21038 breg = XEXP (src, 0);
21039 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
21040 breg = XEXP (breg, 0);
21042 /* If the base register we are using to address memory is
21043 also a destination reg, then change that register last. */
21044 if (REG_P (breg)
21045 && REGNO (breg) >= REGNO (dst)
21046 && REGNO (breg) < REGNO (dst) + nregs)
21047 j = REGNO (breg) - REGNO (dst);
21049 else if (MEM_P (dst) && INT_REGNO_P (reg))
21051 rtx breg;
21053 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
21054 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
21056 rtx delta_rtx;
21057 breg = XEXP (XEXP (dst, 0), 0);
21058 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
21059 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
21060 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
21062 /* We have to update the breg before doing the store.
21063 Use store with update, if available. */
21065 if (TARGET_UPDATE)
21067 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21068 emit_insn (TARGET_32BIT
21069 ? (TARGET_POWERPC64
21070 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
21071 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
21072 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
21073 used_update = true;
21075 else
21076 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21077 dst = replace_equiv_address (dst, breg);
21079 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
21080 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
21082 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
21084 rtx basereg = XEXP (XEXP (dst, 0), 0);
21085 if (TARGET_UPDATE)
21087 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21088 emit_insn (gen_rtx_SET (VOIDmode,
21089 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
21090 used_update = true;
21092 else
21093 emit_insn (gen_rtx_SET (VOIDmode, basereg,
21094 XEXP (XEXP (dst, 0), 1)));
21095 dst = replace_equiv_address (dst, basereg);
21097 else
21099 rtx basereg = XEXP (XEXP (dst, 0), 0);
21100 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
21101 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
21102 && REG_P (basereg)
21103 && REG_P (offsetreg)
21104 && REGNO (basereg) != REGNO (offsetreg));
21105 if (REGNO (basereg) == 0)
21107 rtx tmp = offsetreg;
21108 offsetreg = basereg;
21109 basereg = tmp;
21111 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
21112 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
21113 dst = replace_equiv_address (dst, basereg);
21116 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
21117 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
21120 for (i = 0; i < nregs; i++)
21122 /* Calculate index to next subword. */
21123 ++j;
21124 if (j == nregs)
21125 j = 0;
21127 /* If compiler already emitted move of first word by
21128 store with update, no need to do anything. */
21129 if (j == 0 && used_update)
21130 continue;
21132 emit_insn (gen_rtx_SET (VOIDmode,
21133 simplify_gen_subreg (reg_mode, dst, mode,
21134 j * reg_mode_size),
21135 simplify_gen_subreg (reg_mode, src, mode,
21136 j * reg_mode_size)));
21138 if (restore_basereg != NULL_RTX)
21139 emit_insn (restore_basereg);
21144 /* This page contains routines that are used to determine what the
21145 function prologue and epilogue code will do and write them out. */
21147 static inline bool
21148 save_reg_p (int r)
21150 return !call_used_regs[r] && df_regs_ever_live_p (r);
21153 /* Return the first fixed-point register that is required to be
21154 saved. 32 if none. */
21157 first_reg_to_save (void)
21159 int first_reg;
21161 /* Find lowest numbered live register. */
21162 for (first_reg = 13; first_reg <= 31; first_reg++)
21163 if (save_reg_p (first_reg))
21164 break;
21166 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21167 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21168 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21169 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21170 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21171 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21173 #if TARGET_MACHO
21174 if (flag_pic
21175 && crtl->uses_pic_offset_table
21176 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21177 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21178 #endif
21180 return first_reg;
21183 /* Similar, for FP regs. */
21186 first_fp_reg_to_save (void)
21188 int first_reg;
21190 /* Find lowest numbered live register. */
21191 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21192 if (save_reg_p (first_reg))
21193 break;
21195 return first_reg;
21198 /* Similar, for AltiVec regs. */
21200 static int
21201 first_altivec_reg_to_save (void)
21203 int i;
21205 /* Stack frame remains as is unless we are in AltiVec ABI. */
21206 if (! TARGET_ALTIVEC_ABI)
21207 return LAST_ALTIVEC_REGNO + 1;
21209 /* On Darwin, the unwind routines are compiled without
21210 TARGET_ALTIVEC, and use save_world to save/restore the
21211 altivec registers when necessary. */
21212 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21213 && ! TARGET_ALTIVEC)
21214 return FIRST_ALTIVEC_REGNO + 20;
21216 /* Find lowest numbered live register. */
21217 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21218 if (save_reg_p (i))
21219 break;
21221 return i;
21224 /* Return a 32-bit mask of the AltiVec registers we need to set in
21225 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21226 the 32-bit word is 0. */
21228 static unsigned int
21229 compute_vrsave_mask (void)
21231 unsigned int i, mask = 0;
21233 /* On Darwin, the unwind routines are compiled without
21234 TARGET_ALTIVEC, and use save_world to save/restore the
21235 call-saved altivec registers when necessary. */
21236 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21237 && ! TARGET_ALTIVEC)
21238 mask |= 0xFFF;
21240 /* First, find out if we use _any_ altivec registers. */
21241 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21242 if (df_regs_ever_live_p (i))
21243 mask |= ALTIVEC_REG_BIT (i);
21245 if (mask == 0)
21246 return mask;
21248 /* Next, remove the argument registers from the set. These must
21249 be in the VRSAVE mask set by the caller, so we don't need to add
21250 them in again. More importantly, the mask we compute here is
21251 used to generate CLOBBERs in the set_vrsave insn, and we do not
21252 wish the argument registers to die. */
21253 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
21254 mask &= ~ALTIVEC_REG_BIT (i);
21256 /* Similarly, remove the return value from the set. */
21258 bool yes = false;
21259 diddle_return_value (is_altivec_return_reg, &yes);
21260 if (yes)
21261 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21264 return mask;
21267 /* For a very restricted set of circumstances, we can cut down the
21268 size of prologues/epilogues by calling our own save/restore-the-world
21269 routines. */
21271 static void
21272 compute_save_world_info (rs6000_stack_t *info_ptr)
21274 info_ptr->world_save_p = 1;
21275 info_ptr->world_save_p
21276 = (WORLD_SAVE_P (info_ptr)
21277 && DEFAULT_ABI == ABI_DARWIN
21278 && !cfun->has_nonlocal_label
21279 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21280 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21281 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21282 && info_ptr->cr_save_p);
21284 /* This will not work in conjunction with sibcalls. Make sure there
21285 are none. (This check is expensive, but seldom executed.) */
21286 if (WORLD_SAVE_P (info_ptr))
21288 rtx_insn *insn;
21289 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21290 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21292 info_ptr->world_save_p = 0;
21293 break;
21297 if (WORLD_SAVE_P (info_ptr))
21299 /* Even if we're not touching VRsave, make sure there's room on the
21300 stack for it, if it looks like we're calling SAVE_WORLD, which
21301 will attempt to save it. */
21302 info_ptr->vrsave_size = 4;
21304 /* If we are going to save the world, we need to save the link register too. */
21305 info_ptr->lr_save_p = 1;
21307 /* "Save" the VRsave register too if we're saving the world. */
21308 if (info_ptr->vrsave_mask == 0)
21309 info_ptr->vrsave_mask = compute_vrsave_mask ();
21311 /* Because the Darwin register save/restore routines only handle
21312 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21313 check. */
21314 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21315 && (info_ptr->first_altivec_reg_save
21316 >= FIRST_SAVED_ALTIVEC_REGNO));
21318 return;
21322 static void
21323 is_altivec_return_reg (rtx reg, void *xyes)
21325 bool *yes = (bool *) xyes;
21326 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21327 *yes = true;
21331 /* Look for user-defined global regs in the range FIRST to LAST-1.
21332 We should not restore these, and so cannot use lmw or out-of-line
21333 restore functions if there are any. We also can't save them
21334 (well, emit frame notes for them), because frame unwinding during
21335 exception handling will restore saved registers. */
21337 static bool
21338 global_regs_p (unsigned first, unsigned last)
21340 while (first < last)
21341 if (global_regs[first++])
21342 return true;
21343 return false;
21346 /* Determine the strategy for savings/restoring registers. */
21348 enum {
21349 SAVRES_MULTIPLE = 0x1,
21350 SAVE_INLINE_FPRS = 0x2,
21351 SAVE_INLINE_GPRS = 0x4,
21352 REST_INLINE_FPRS = 0x8,
21353 REST_INLINE_GPRS = 0x10,
21354 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21355 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21356 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21357 SAVE_INLINE_VRS = 0x100,
21358 REST_INLINE_VRS = 0x200
21361 static int
21362 rs6000_savres_strategy (rs6000_stack_t *info,
21363 bool using_static_chain_p)
21365 int strategy = 0;
21366 bool lr_save_p;
21368 if (TARGET_MULTIPLE
21369 && !TARGET_POWERPC64
21370 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21371 && info->first_gp_reg_save < 31
21372 && !global_regs_p (info->first_gp_reg_save, 32))
21373 strategy |= SAVRES_MULTIPLE;
21375 if (crtl->calls_eh_return
21376 || cfun->machine->ra_need_lr)
21377 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21378 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21379 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21381 if (info->first_fp_reg_save == 64
21382 /* The out-of-line FP routines use double-precision stores;
21383 we can't use those routines if we don't have such stores. */
21384 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21385 || global_regs_p (info->first_fp_reg_save, 64))
21386 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21388 if (info->first_gp_reg_save == 32
21389 || (!(strategy & SAVRES_MULTIPLE)
21390 && global_regs_p (info->first_gp_reg_save, 32)))
21391 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21393 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21394 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21395 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21397 /* Define cutoff for using out-of-line functions to save registers. */
21398 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21400 if (!optimize_size)
21402 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21403 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21404 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21406 else
21408 /* Prefer out-of-line restore if it will exit. */
21409 if (info->first_fp_reg_save > 61)
21410 strategy |= SAVE_INLINE_FPRS;
21411 if (info->first_gp_reg_save > 29)
21413 if (info->first_fp_reg_save == 64)
21414 strategy |= SAVE_INLINE_GPRS;
21415 else
21416 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21418 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21419 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21422 else if (DEFAULT_ABI == ABI_DARWIN)
21424 if (info->first_fp_reg_save > 60)
21425 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21426 if (info->first_gp_reg_save > 29)
21427 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21428 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21430 else
21432 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21433 if (info->first_fp_reg_save > 61)
21434 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21435 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21436 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21439 /* Don't bother to try to save things out-of-line if r11 is occupied
21440 by the static chain. It would require too much fiddling and the
21441 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21442 pointer on Darwin, and AIX uses r1 or r12. */
21443 if (using_static_chain_p
21444 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21445 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21446 | SAVE_INLINE_GPRS
21447 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21449 /* We can only use the out-of-line routines to restore if we've
21450 saved all the registers from first_fp_reg_save in the prologue.
21451 Otherwise, we risk loading garbage. */
21452 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21454 int i;
21456 for (i = info->first_fp_reg_save; i < 64; i++)
21457 if (!save_reg_p (i))
21459 strategy |= REST_INLINE_FPRS;
21460 break;
21464 /* If we are going to use store multiple, then don't even bother
21465 with the out-of-line routines, since the store-multiple
21466 instruction will always be smaller. */
21467 if ((strategy & SAVRES_MULTIPLE))
21468 strategy |= SAVE_INLINE_GPRS;
21470 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21471 saved is an out-of-line save or restore. Set up the value for
21472 the next test (excluding out-of-line gpr restore). */
21473 lr_save_p = (info->lr_save_p
21474 || !(strategy & SAVE_INLINE_GPRS)
21475 || !(strategy & SAVE_INLINE_FPRS)
21476 || !(strategy & SAVE_INLINE_VRS)
21477 || !(strategy & REST_INLINE_FPRS)
21478 || !(strategy & REST_INLINE_VRS));
21480 /* The situation is more complicated with load multiple. We'd
21481 prefer to use the out-of-line routines for restores, since the
21482 "exit" out-of-line routines can handle the restore of LR and the
21483 frame teardown. However if doesn't make sense to use the
21484 out-of-line routine if that is the only reason we'd need to save
21485 LR, and we can't use the "exit" out-of-line gpr restore if we
21486 have saved some fprs; In those cases it is advantageous to use
21487 load multiple when available. */
21488 if ((strategy & SAVRES_MULTIPLE)
21489 && (!lr_save_p
21490 || info->first_fp_reg_save != 64))
21491 strategy |= REST_INLINE_GPRS;
21493 /* Saving CR interferes with the exit routines used on the SPE, so
21494 just punt here. */
21495 if (TARGET_SPE_ABI
21496 && info->spe_64bit_regs_used
21497 && info->cr_save_p)
21498 strategy |= REST_INLINE_GPRS;
21500 /* We can only use load multiple or the out-of-line routines to
21501 restore if we've used store multiple or out-of-line routines
21502 in the prologue, i.e. if we've saved all the registers from
21503 first_gp_reg_save. Otherwise, we risk loading garbage. */
21504 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21505 == SAVE_INLINE_GPRS)
21507 int i;
21509 for (i = info->first_gp_reg_save; i < 32; i++)
21510 if (!save_reg_p (i))
21512 strategy |= REST_INLINE_GPRS;
21513 break;
21517 if (TARGET_ELF && TARGET_64BIT)
21519 if (!(strategy & SAVE_INLINE_FPRS))
21520 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21521 else if (!(strategy & SAVE_INLINE_GPRS)
21522 && info->first_fp_reg_save == 64)
21523 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21525 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21526 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21528 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21529 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21531 return strategy;
21534 /* Calculate the stack information for the current function. This is
21535 complicated by having two separate calling sequences, the AIX calling
21536 sequence and the V.4 calling sequence.
21538 AIX (and Darwin/Mac OS X) stack frames look like:
21539 32-bit 64-bit
21540 SP----> +---------------------------------------+
21541 | back chain to caller | 0 0
21542 +---------------------------------------+
21543 | saved CR | 4 8 (8-11)
21544 +---------------------------------------+
21545 | saved LR | 8 16
21546 +---------------------------------------+
21547 | reserved for compilers | 12 24
21548 +---------------------------------------+
21549 | reserved for binders | 16 32
21550 +---------------------------------------+
21551 | saved TOC pointer | 20 40
21552 +---------------------------------------+
21553 | Parameter save area (P) | 24 48
21554 +---------------------------------------+
21555 | Alloca space (A) | 24+P etc.
21556 +---------------------------------------+
21557 | Local variable space (L) | 24+P+A
21558 +---------------------------------------+
21559 | Float/int conversion temporary (X) | 24+P+A+L
21560 +---------------------------------------+
21561 | Save area for AltiVec registers (W) | 24+P+A+L+X
21562 +---------------------------------------+
21563 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21564 +---------------------------------------+
21565 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21566 +---------------------------------------+
21567 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21568 +---------------------------------------+
21569 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21570 +---------------------------------------+
21571 old SP->| back chain to caller's caller |
21572 +---------------------------------------+
21574 The required alignment for AIX configurations is two words (i.e., 8
21575 or 16 bytes).
21577 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21579 SP----> +---------------------------------------+
21580 | Back chain to caller | 0
21581 +---------------------------------------+
21582 | Save area for CR | 8
21583 +---------------------------------------+
21584 | Saved LR | 16
21585 +---------------------------------------+
21586 | Saved TOC pointer | 24
21587 +---------------------------------------+
21588 | Parameter save area (P) | 32
21589 +---------------------------------------+
21590 | Alloca space (A) | 32+P
21591 +---------------------------------------+
21592 | Local variable space (L) | 32+P+A
21593 +---------------------------------------+
21594 | Save area for AltiVec registers (W) | 32+P+A+L
21595 +---------------------------------------+
21596 | AltiVec alignment padding (Y) | 32+P+A+L+W
21597 +---------------------------------------+
21598 | Save area for GP registers (G) | 32+P+A+L+W+Y
21599 +---------------------------------------+
21600 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21601 +---------------------------------------+
21602 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21603 +---------------------------------------+
21606 V.4 stack frames look like:
21608 SP----> +---------------------------------------+
21609 | back chain to caller | 0
21610 +---------------------------------------+
21611 | caller's saved LR | 4
21612 +---------------------------------------+
21613 | Parameter save area (P) | 8
21614 +---------------------------------------+
21615 | Alloca space (A) | 8+P
21616 +---------------------------------------+
21617 | Varargs save area (V) | 8+P+A
21618 +---------------------------------------+
21619 | Local variable space (L) | 8+P+A+V
21620 +---------------------------------------+
21621 | Float/int conversion temporary (X) | 8+P+A+V+L
21622 +---------------------------------------+
21623 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21624 +---------------------------------------+
21625 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21626 +---------------------------------------+
21627 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21628 +---------------------------------------+
21629 | SPE: area for 64-bit GP registers |
21630 +---------------------------------------+
21631 | SPE alignment padding |
21632 +---------------------------------------+
21633 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21634 +---------------------------------------+
21635 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21636 +---------------------------------------+
21637 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21638 +---------------------------------------+
21639 old SP->| back chain to caller's caller |
21640 +---------------------------------------+
21642 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21643 given. (But note below and in sysv4.h that we require only 8 and
21644 may round up the size of our stack frame anyways. The historical
21645 reason is early versions of powerpc-linux which didn't properly
21646 align the stack at program startup. A happy side-effect is that
21647 -mno-eabi libraries can be used with -meabi programs.)
21649 The EABI configuration defaults to the V.4 layout. However,
21650 the stack alignment requirements may differ. If -mno-eabi is not
21651 given, the required stack alignment is 8 bytes; if -mno-eabi is
21652 given, the required alignment is 16 bytes. (But see V.4 comment
21653 above.) */
21655 #ifndef ABI_STACK_BOUNDARY
21656 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21657 #endif
21659 static rs6000_stack_t *
21660 rs6000_stack_info (void)
21662 /* We should never be called for thunks, we are not set up for that. */
21663 gcc_assert (!cfun->is_thunk);
21665 rs6000_stack_t *info_ptr = &stack_info;
21666 int reg_size = TARGET_32BIT ? 4 : 8;
21667 int ehrd_size;
21668 int ehcr_size;
21669 int save_align;
21670 int first_gp;
21671 HOST_WIDE_INT non_fixed_size;
21672 bool using_static_chain_p;
21674 if (reload_completed && info_ptr->reload_completed)
21675 return info_ptr;
21677 memset (info_ptr, 0, sizeof (*info_ptr));
21678 info_ptr->reload_completed = reload_completed;
21680 if (TARGET_SPE)
21682 /* Cache value so we don't rescan instruction chain over and over. */
21683 if (cfun->machine->insn_chain_scanned_p == 0)
21684 cfun->machine->insn_chain_scanned_p
21685 = spe_func_has_64bit_regs_p () + 1;
21686 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21689 /* Select which calling sequence. */
21690 info_ptr->abi = DEFAULT_ABI;
21692 /* Calculate which registers need to be saved & save area size. */
21693 info_ptr->first_gp_reg_save = first_reg_to_save ();
21694 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21695 even if it currently looks like we won't. Reload may need it to
21696 get at a constant; if so, it will have already created a constant
21697 pool entry for it. */
21698 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21699 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21700 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21701 && crtl->uses_const_pool
21702 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21703 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21704 else
21705 first_gp = info_ptr->first_gp_reg_save;
21707 info_ptr->gp_size = reg_size * (32 - first_gp);
21709 /* For the SPE, we have an additional upper 32-bits on each GPR.
21710 Ideally we should save the entire 64-bits only when the upper
21711 half is used in SIMD instructions. Since we only record
21712 registers live (not the size they are used in), this proves
21713 difficult because we'd have to traverse the instruction chain at
21714 the right time, taking reload into account. This is a real pain,
21715 so we opt to save the GPRs in 64-bits always if but one register
21716 gets used in 64-bits. Otherwise, all the registers in the frame
21717 get saved in 32-bits.
21719 So... since when we save all GPRs (except the SP) in 64-bits, the
21720 traditional GP save area will be empty. */
21721 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21722 info_ptr->gp_size = 0;
21724 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21725 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21727 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21728 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21729 - info_ptr->first_altivec_reg_save);
21731 /* Does this function call anything? */
21732 info_ptr->calls_p = (! crtl->is_leaf
21733 || cfun->machine->ra_needs_full_frame);
21735 /* Determine if we need to save the condition code registers. */
21736 if (df_regs_ever_live_p (CR2_REGNO)
21737 || df_regs_ever_live_p (CR3_REGNO)
21738 || df_regs_ever_live_p (CR4_REGNO))
21740 info_ptr->cr_save_p = 1;
21741 if (DEFAULT_ABI == ABI_V4)
21742 info_ptr->cr_size = reg_size;
21745 /* If the current function calls __builtin_eh_return, then we need
21746 to allocate stack space for registers that will hold data for
21747 the exception handler. */
21748 if (crtl->calls_eh_return)
21750 unsigned int i;
21751 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21752 continue;
21754 /* SPE saves EH registers in 64-bits. */
21755 ehrd_size = i * (TARGET_SPE_ABI
21756 && info_ptr->spe_64bit_regs_used != 0
21757 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21759 else
21760 ehrd_size = 0;
21762 /* In the ELFv2 ABI, we also need to allocate space for separate
21763 CR field save areas if the function calls __builtin_eh_return. */
21764 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21766 /* This hard-codes that we have three call-saved CR fields. */
21767 ehcr_size = 3 * reg_size;
21768 /* We do *not* use the regular CR save mechanism. */
21769 info_ptr->cr_save_p = 0;
21771 else
21772 ehcr_size = 0;
21774 /* Determine various sizes. */
21775 info_ptr->reg_size = reg_size;
21776 info_ptr->fixed_size = RS6000_SAVE_AREA;
21777 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21778 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21779 TARGET_ALTIVEC ? 16 : 8);
21780 if (FRAME_GROWS_DOWNWARD)
21781 info_ptr->vars_size
21782 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21783 + info_ptr->parm_size,
21784 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21785 - (info_ptr->fixed_size + info_ptr->vars_size
21786 + info_ptr->parm_size);
21788 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21789 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21790 else
21791 info_ptr->spe_gp_size = 0;
21793 if (TARGET_ALTIVEC_ABI)
21794 info_ptr->vrsave_mask = compute_vrsave_mask ();
21795 else
21796 info_ptr->vrsave_mask = 0;
21798 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21799 info_ptr->vrsave_size = 4;
21800 else
21801 info_ptr->vrsave_size = 0;
21803 compute_save_world_info (info_ptr);
21805 /* Calculate the offsets. */
21806 switch (DEFAULT_ABI)
21808 case ABI_NONE:
21809 default:
21810 gcc_unreachable ();
21812 case ABI_AIX:
21813 case ABI_ELFv2:
21814 case ABI_DARWIN:
21815 info_ptr->fp_save_offset = - info_ptr->fp_size;
21816 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21818 if (TARGET_ALTIVEC_ABI)
21820 info_ptr->vrsave_save_offset
21821 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21823 /* Align stack so vector save area is on a quadword boundary.
21824 The padding goes above the vectors. */
21825 if (info_ptr->altivec_size != 0)
21826 info_ptr->altivec_padding_size
21827 = info_ptr->vrsave_save_offset & 0xF;
21828 else
21829 info_ptr->altivec_padding_size = 0;
21831 info_ptr->altivec_save_offset
21832 = info_ptr->vrsave_save_offset
21833 - info_ptr->altivec_padding_size
21834 - info_ptr->altivec_size;
21835 gcc_assert (info_ptr->altivec_size == 0
21836 || info_ptr->altivec_save_offset % 16 == 0);
21838 /* Adjust for AltiVec case. */
21839 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21841 else
21842 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21844 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21845 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21846 info_ptr->lr_save_offset = 2*reg_size;
21847 break;
21849 case ABI_V4:
21850 info_ptr->fp_save_offset = - info_ptr->fp_size;
21851 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21852 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21854 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21856 /* Align stack so SPE GPR save area is aligned on a
21857 double-word boundary. */
21858 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21859 info_ptr->spe_padding_size
21860 = 8 - (-info_ptr->cr_save_offset % 8);
21861 else
21862 info_ptr->spe_padding_size = 0;
21864 info_ptr->spe_gp_save_offset
21865 = info_ptr->cr_save_offset
21866 - info_ptr->spe_padding_size
21867 - info_ptr->spe_gp_size;
21869 /* Adjust for SPE case. */
21870 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21872 else if (TARGET_ALTIVEC_ABI)
21874 info_ptr->vrsave_save_offset
21875 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21877 /* Align stack so vector save area is on a quadword boundary. */
21878 if (info_ptr->altivec_size != 0)
21879 info_ptr->altivec_padding_size
21880 = 16 - (-info_ptr->vrsave_save_offset % 16);
21881 else
21882 info_ptr->altivec_padding_size = 0;
21884 info_ptr->altivec_save_offset
21885 = info_ptr->vrsave_save_offset
21886 - info_ptr->altivec_padding_size
21887 - info_ptr->altivec_size;
21889 /* Adjust for AltiVec case. */
21890 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21892 else
21893 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21894 info_ptr->ehrd_offset -= ehrd_size;
21895 info_ptr->lr_save_offset = reg_size;
21896 break;
21899 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21900 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21901 + info_ptr->gp_size
21902 + info_ptr->altivec_size
21903 + info_ptr->altivec_padding_size
21904 + info_ptr->spe_gp_size
21905 + info_ptr->spe_padding_size
21906 + ehrd_size
21907 + ehcr_size
21908 + info_ptr->cr_size
21909 + info_ptr->vrsave_size,
21910 save_align);
21912 non_fixed_size = (info_ptr->vars_size
21913 + info_ptr->parm_size
21914 + info_ptr->save_size);
21916 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21917 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21919 /* Determine if we need to save the link register. */
21920 if (info_ptr->calls_p
21921 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21922 && crtl->profile
21923 && !TARGET_PROFILE_KERNEL)
21924 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21925 #ifdef TARGET_RELOCATABLE
21926 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21927 #endif
21928 || rs6000_ra_ever_killed ())
21929 info_ptr->lr_save_p = 1;
21931 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21932 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21933 && call_used_regs[STATIC_CHAIN_REGNUM]);
21934 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21935 using_static_chain_p);
21937 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21938 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21939 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21940 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21941 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21942 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21943 info_ptr->lr_save_p = 1;
21945 if (info_ptr->lr_save_p)
21946 df_set_regs_ever_live (LR_REGNO, true);
21948 /* Determine if we need to allocate any stack frame:
21950 For AIX we need to push the stack if a frame pointer is needed
21951 (because the stack might be dynamically adjusted), if we are
21952 debugging, if we make calls, or if the sum of fp_save, gp_save,
21953 and local variables are more than the space needed to save all
21954 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21955 + 18*8 = 288 (GPR13 reserved).
21957 For V.4 we don't have the stack cushion that AIX uses, but assume
21958 that the debugger can handle stackless frames. */
21960 if (info_ptr->calls_p)
21961 info_ptr->push_p = 1;
21963 else if (DEFAULT_ABI == ABI_V4)
21964 info_ptr->push_p = non_fixed_size != 0;
21966 else if (frame_pointer_needed)
21967 info_ptr->push_p = 1;
21969 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21970 info_ptr->push_p = 1;
21972 else
21973 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21975 /* Zero offsets if we're not saving those registers. */
21976 if (info_ptr->fp_size == 0)
21977 info_ptr->fp_save_offset = 0;
21979 if (info_ptr->gp_size == 0)
21980 info_ptr->gp_save_offset = 0;
21982 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21983 info_ptr->altivec_save_offset = 0;
21985 /* Zero VRSAVE offset if not saved and restored. */
21986 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21987 info_ptr->vrsave_save_offset = 0;
21989 if (! TARGET_SPE_ABI
21990 || info_ptr->spe_64bit_regs_used == 0
21991 || info_ptr->spe_gp_size == 0)
21992 info_ptr->spe_gp_save_offset = 0;
21994 if (! info_ptr->lr_save_p)
21995 info_ptr->lr_save_offset = 0;
21997 if (! info_ptr->cr_save_p)
21998 info_ptr->cr_save_offset = 0;
22000 return info_ptr;
22003 /* Return true if the current function uses any GPRs in 64-bit SIMD
22004 mode. */
22006 static bool
22007 spe_func_has_64bit_regs_p (void)
22009 rtx_insn *insns, *insn;
22011 /* Functions that save and restore all the call-saved registers will
22012 need to save/restore the registers in 64-bits. */
22013 if (crtl->calls_eh_return
22014 || cfun->calls_setjmp
22015 || crtl->has_nonlocal_goto)
22016 return true;
22018 insns = get_insns ();
22020 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
22022 if (INSN_P (insn))
22024 rtx i;
22026 /* FIXME: This should be implemented with attributes...
22028 (set_attr "spe64" "true")....then,
22029 if (get_spe64(insn)) return true;
22031 It's the only reliable way to do the stuff below. */
22033 i = PATTERN (insn);
22034 if (GET_CODE (i) == SET)
22036 machine_mode mode = GET_MODE (SET_SRC (i));
22038 if (SPE_VECTOR_MODE (mode))
22039 return true;
22040 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
22041 return true;
22046 return false;
22049 static void
22050 debug_stack_info (rs6000_stack_t *info)
22052 const char *abi_string;
22054 if (! info)
22055 info = rs6000_stack_info ();
22057 fprintf (stderr, "\nStack information for function %s:\n",
22058 ((current_function_decl && DECL_NAME (current_function_decl))
22059 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
22060 : "<unknown>"));
22062 switch (info->abi)
22064 default: abi_string = "Unknown"; break;
22065 case ABI_NONE: abi_string = "NONE"; break;
22066 case ABI_AIX: abi_string = "AIX"; break;
22067 case ABI_ELFv2: abi_string = "ELFv2"; break;
22068 case ABI_DARWIN: abi_string = "Darwin"; break;
22069 case ABI_V4: abi_string = "V.4"; break;
22072 fprintf (stderr, "\tABI = %5s\n", abi_string);
22074 if (TARGET_ALTIVEC_ABI)
22075 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
22077 if (TARGET_SPE_ABI)
22078 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
22080 if (info->first_gp_reg_save != 32)
22081 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
22083 if (info->first_fp_reg_save != 64)
22084 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
22086 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
22087 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
22088 info->first_altivec_reg_save);
22090 if (info->lr_save_p)
22091 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
22093 if (info->cr_save_p)
22094 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
22096 if (info->vrsave_mask)
22097 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
22099 if (info->push_p)
22100 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
22102 if (info->calls_p)
22103 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
22105 if (info->gp_save_offset)
22106 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
22108 if (info->fp_save_offset)
22109 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
22111 if (info->altivec_save_offset)
22112 fprintf (stderr, "\taltivec_save_offset = %5d\n",
22113 info->altivec_save_offset);
22115 if (info->spe_gp_save_offset)
22116 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
22117 info->spe_gp_save_offset);
22119 if (info->vrsave_save_offset)
22120 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
22121 info->vrsave_save_offset);
22123 if (info->lr_save_offset)
22124 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
22126 if (info->cr_save_offset)
22127 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
22129 if (info->varargs_save_offset)
22130 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
22132 if (info->total_size)
22133 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22134 info->total_size);
22136 if (info->vars_size)
22137 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22138 info->vars_size);
22140 if (info->parm_size)
22141 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
22143 if (info->fixed_size)
22144 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
22146 if (info->gp_size)
22147 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
22149 if (info->spe_gp_size)
22150 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
22152 if (info->fp_size)
22153 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
22155 if (info->altivec_size)
22156 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
22158 if (info->vrsave_size)
22159 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22161 if (info->altivec_padding_size)
22162 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22163 info->altivec_padding_size);
22165 if (info->spe_padding_size)
22166 fprintf (stderr, "\tspe_padding_size = %5d\n",
22167 info->spe_padding_size);
22169 if (info->cr_size)
22170 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22172 if (info->save_size)
22173 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22175 if (info->reg_size != 4)
22176 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22178 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22180 fprintf (stderr, "\n");
22184 rs6000_return_addr (int count, rtx frame)
22186 /* Currently we don't optimize very well between prolog and body
22187 code and for PIC code the code can be actually quite bad, so
22188 don't try to be too clever here. */
22189 if (count != 0
22190 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22192 cfun->machine->ra_needs_full_frame = 1;
22194 return
22195 gen_rtx_MEM
22196 (Pmode,
22197 memory_address
22198 (Pmode,
22199 plus_constant (Pmode,
22200 copy_to_reg
22201 (gen_rtx_MEM (Pmode,
22202 memory_address (Pmode, frame))),
22203 RETURN_ADDRESS_OFFSET)));
22206 cfun->machine->ra_need_lr = 1;
22207 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22210 /* Say whether a function is a candidate for sibcall handling or not. */
22212 static bool
22213 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22215 tree fntype;
22217 if (decl)
22218 fntype = TREE_TYPE (decl);
22219 else
22220 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22222 /* We can't do it if the called function has more vector parameters
22223 than the current function; there's nowhere to put the VRsave code. */
22224 if (TARGET_ALTIVEC_ABI
22225 && TARGET_ALTIVEC_VRSAVE
22226 && !(decl && decl == current_function_decl))
22228 function_args_iterator args_iter;
22229 tree type;
22230 int nvreg = 0;
22232 /* Functions with vector parameters are required to have a
22233 prototype, so the argument type info must be available
22234 here. */
22235 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22236 if (TREE_CODE (type) == VECTOR_TYPE
22237 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22238 nvreg++;
22240 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22241 if (TREE_CODE (type) == VECTOR_TYPE
22242 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22243 nvreg--;
22245 if (nvreg > 0)
22246 return false;
22249 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22250 functions, because the callee may have a different TOC pointer to
22251 the caller and there's no way to ensure we restore the TOC when
22252 we return. With the secure-plt SYSV ABI we can't make non-local
22253 calls when -fpic/PIC because the plt call stubs use r30. */
22254 if (DEFAULT_ABI == ABI_DARWIN
22255 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22256 && decl
22257 && !DECL_EXTERNAL (decl)
22258 && (*targetm.binds_local_p) (decl))
22259 || (DEFAULT_ABI == ABI_V4
22260 && (!TARGET_SECURE_PLT
22261 || !flag_pic
22262 || (decl
22263 && (*targetm.binds_local_p) (decl)))))
22265 tree attr_list = TYPE_ATTRIBUTES (fntype);
22267 if (!lookup_attribute ("longcall", attr_list)
22268 || lookup_attribute ("shortcall", attr_list))
22269 return true;
22272 return false;
22275 static int
22276 rs6000_ra_ever_killed (void)
22278 rtx_insn *top;
22279 rtx reg;
22280 rtx_insn *insn;
22282 if (cfun->is_thunk)
22283 return 0;
22285 if (cfun->machine->lr_save_state)
22286 return cfun->machine->lr_save_state - 1;
22288 /* regs_ever_live has LR marked as used if any sibcalls are present,
22289 but this should not force saving and restoring in the
22290 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22291 clobbers LR, so that is inappropriate. */
22293 /* Also, the prologue can generate a store into LR that
22294 doesn't really count, like this:
22296 move LR->R0
22297 bcl to set PIC register
22298 move LR->R31
22299 move R0->LR
22301 When we're called from the epilogue, we need to avoid counting
22302 this as a store. */
22304 push_topmost_sequence ();
22305 top = get_insns ();
22306 pop_topmost_sequence ();
22307 reg = gen_rtx_REG (Pmode, LR_REGNO);
22309 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22311 if (INSN_P (insn))
22313 if (CALL_P (insn))
22315 if (!SIBLING_CALL_P (insn))
22316 return 1;
22318 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22319 return 1;
22320 else if (set_of (reg, insn) != NULL_RTX
22321 && !prologue_epilogue_contains (insn))
22322 return 1;
22325 return 0;
22328 /* Emit instructions needed to load the TOC register.
22329 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22330 a constant pool; or for SVR4 -fpic. */
22332 void
22333 rs6000_emit_load_toc_table (int fromprolog)
22335 rtx dest;
22336 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22338 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22340 char buf[30];
22341 rtx lab, tmp1, tmp2, got;
22343 lab = gen_label_rtx ();
22344 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22345 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22346 if (flag_pic == 2)
22347 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22348 else
22349 got = rs6000_got_sym ();
22350 tmp1 = tmp2 = dest;
22351 if (!fromprolog)
22353 tmp1 = gen_reg_rtx (Pmode);
22354 tmp2 = gen_reg_rtx (Pmode);
22356 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22357 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22358 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22359 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22361 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22363 emit_insn (gen_load_toc_v4_pic_si ());
22364 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22366 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22368 char buf[30];
22369 rtx temp0 = (fromprolog
22370 ? gen_rtx_REG (Pmode, 0)
22371 : gen_reg_rtx (Pmode));
22373 if (fromprolog)
22375 rtx symF, symL;
22377 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22378 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22380 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22381 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22383 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22384 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22385 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22387 else
22389 rtx tocsym, lab;
22391 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22392 lab = gen_label_rtx ();
22393 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22394 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22395 if (TARGET_LINK_STACK)
22396 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22397 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22399 emit_insn (gen_addsi3 (dest, temp0, dest));
22401 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22403 /* This is for AIX code running in non-PIC ELF32. */
22404 char buf[30];
22405 rtx realsym;
22406 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22407 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22409 emit_insn (gen_elf_high (dest, realsym));
22410 emit_insn (gen_elf_low (dest, dest, realsym));
22412 else
22414 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22416 if (TARGET_32BIT)
22417 emit_insn (gen_load_toc_aix_si (dest));
22418 else
22419 emit_insn (gen_load_toc_aix_di (dest));
22423 /* Emit instructions to restore the link register after determining where
22424 its value has been stored. */
22426 void
22427 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22429 rs6000_stack_t *info = rs6000_stack_info ();
22430 rtx operands[2];
22432 operands[0] = source;
22433 operands[1] = scratch;
22435 if (info->lr_save_p)
22437 rtx frame_rtx = stack_pointer_rtx;
22438 HOST_WIDE_INT sp_offset = 0;
22439 rtx tmp;
22441 if (frame_pointer_needed
22442 || cfun->calls_alloca
22443 || info->total_size > 32767)
22445 tmp = gen_frame_mem (Pmode, frame_rtx);
22446 emit_move_insn (operands[1], tmp);
22447 frame_rtx = operands[1];
22449 else if (info->push_p)
22450 sp_offset = info->total_size;
22452 tmp = plus_constant (Pmode, frame_rtx,
22453 info->lr_save_offset + sp_offset);
22454 tmp = gen_frame_mem (Pmode, tmp);
22455 emit_move_insn (tmp, operands[0]);
22457 else
22458 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22460 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22461 state of lr_save_p so any change from here on would be a bug. In
22462 particular, stop rs6000_ra_ever_killed from considering the SET
22463 of lr we may have added just above. */
22464 cfun->machine->lr_save_state = info->lr_save_p + 1;
22467 static GTY(()) alias_set_type set = -1;
22469 alias_set_type
22470 get_TOC_alias_set (void)
22472 if (set == -1)
22473 set = new_alias_set ();
22474 return set;
22477 /* This returns nonzero if the current function uses the TOC. This is
22478 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22479 is generated by the ABI_V4 load_toc_* patterns. */
22480 #if TARGET_ELF
22481 static int
22482 uses_TOC (void)
22484 rtx_insn *insn;
22486 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22487 if (INSN_P (insn))
22489 rtx pat = PATTERN (insn);
22490 int i;
22492 if (GET_CODE (pat) == PARALLEL)
22493 for (i = 0; i < XVECLEN (pat, 0); i++)
22495 rtx sub = XVECEXP (pat, 0, i);
22496 if (GET_CODE (sub) == USE)
22498 sub = XEXP (sub, 0);
22499 if (GET_CODE (sub) == UNSPEC
22500 && XINT (sub, 1) == UNSPEC_TOC)
22501 return 1;
22505 return 0;
22507 #endif
22510 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22512 rtx tocrel, tocreg, hi;
22514 if (TARGET_DEBUG_ADDR)
22516 if (GET_CODE (symbol) == SYMBOL_REF)
22517 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22518 XSTR (symbol, 0));
22519 else
22521 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22522 GET_RTX_NAME (GET_CODE (symbol)));
22523 debug_rtx (symbol);
22527 if (!can_create_pseudo_p ())
22528 df_set_regs_ever_live (TOC_REGISTER, true);
22530 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22531 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22532 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22533 return tocrel;
22535 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22536 if (largetoc_reg != NULL)
22538 emit_move_insn (largetoc_reg, hi);
22539 hi = largetoc_reg;
22541 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22544 /* Issue assembly directives that create a reference to the given DWARF
22545 FRAME_TABLE_LABEL from the current function section. */
22546 void
22547 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22549 fprintf (asm_out_file, "\t.ref %s\n",
22550 (* targetm.strip_name_encoding) (frame_table_label));
22553 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22554 and the change to the stack pointer. */
22556 static void
22557 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22559 rtvec p;
22560 int i;
22561 rtx regs[3];
22563 i = 0;
22564 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22565 if (hard_frame_needed)
22566 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22567 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22568 || (hard_frame_needed
22569 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22570 regs[i++] = fp;
22572 p = rtvec_alloc (i);
22573 while (--i >= 0)
22575 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22576 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22579 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22582 /* Emit the correct code for allocating stack space, as insns.
22583 If COPY_REG, make sure a copy of the old frame is left there.
22584 The generated code may use hard register 0 as a temporary. */
22586 static void
22587 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22589 rtx_insn *insn;
22590 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22591 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22592 rtx todec = gen_int_mode (-size, Pmode);
22593 rtx par, set, mem;
22595 if (INTVAL (todec) != -size)
22597 warning (0, "stack frame too large");
22598 emit_insn (gen_trap ());
22599 return;
22602 if (crtl->limit_stack)
22604 if (REG_P (stack_limit_rtx)
22605 && REGNO (stack_limit_rtx) > 1
22606 && REGNO (stack_limit_rtx) <= 31)
22608 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22609 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22610 const0_rtx));
22612 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22613 && TARGET_32BIT
22614 && DEFAULT_ABI == ABI_V4)
22616 rtx toload = gen_rtx_CONST (VOIDmode,
22617 gen_rtx_PLUS (Pmode,
22618 stack_limit_rtx,
22619 GEN_INT (size)));
22621 emit_insn (gen_elf_high (tmp_reg, toload));
22622 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22623 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22624 const0_rtx));
22626 else
22627 warning (0, "stack limit expression is not supported");
22630 if (copy_reg)
22632 if (copy_off != 0)
22633 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22634 else
22635 emit_move_insn (copy_reg, stack_reg);
22638 if (size > 32767)
22640 /* Need a note here so that try_split doesn't get confused. */
22641 if (get_last_insn () == NULL_RTX)
22642 emit_note (NOTE_INSN_DELETED);
22643 insn = emit_move_insn (tmp_reg, todec);
22644 try_split (PATTERN (insn), insn, 0);
22645 todec = tmp_reg;
22648 insn = emit_insn (TARGET_32BIT
22649 ? gen_movsi_update_stack (stack_reg, stack_reg,
22650 todec, stack_reg)
22651 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22652 todec, stack_reg));
22653 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22654 it now and set the alias set/attributes. The above gen_*_update
22655 calls will generate a PARALLEL with the MEM set being the first
22656 operation. */
22657 par = PATTERN (insn);
22658 gcc_assert (GET_CODE (par) == PARALLEL);
22659 set = XVECEXP (par, 0, 0);
22660 gcc_assert (GET_CODE (set) == SET);
22661 mem = SET_DEST (set);
22662 gcc_assert (MEM_P (mem));
22663 MEM_NOTRAP_P (mem) = 1;
22664 set_mem_alias_set (mem, get_frame_alias_set ());
22666 RTX_FRAME_RELATED_P (insn) = 1;
22667 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22668 gen_rtx_SET (VOIDmode, stack_reg,
22669 gen_rtx_PLUS (Pmode, stack_reg,
22670 GEN_INT (-size))));
22673 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22675 #if PROBE_INTERVAL > 32768
22676 #error Cannot use indexed addressing mode for stack probing
22677 #endif
22679 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22680 inclusive. These are offsets from the current stack pointer. */
22682 static void
22683 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22685 /* See if we have a constant small number of probes to generate. If so,
22686 that's the easy case. */
22687 if (first + size <= 32768)
22689 HOST_WIDE_INT i;
22691 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22692 it exceeds SIZE. If only one probe is needed, this will not
22693 generate any code. Then probe at FIRST + SIZE. */
22694 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22695 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22696 -(first + i)));
22698 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22699 -(first + size)));
22702 /* Otherwise, do the same as above, but in a loop. Note that we must be
22703 extra careful with variables wrapping around because we might be at
22704 the very top (or the very bottom) of the address space and we have
22705 to be able to handle this case properly; in particular, we use an
22706 equality test for the loop condition. */
22707 else
22709 HOST_WIDE_INT rounded_size;
22710 rtx r12 = gen_rtx_REG (Pmode, 12);
22711 rtx r0 = gen_rtx_REG (Pmode, 0);
22713 /* Sanity check for the addressing mode we're going to use. */
22714 gcc_assert (first <= 32768);
22716 /* Step 1: round SIZE to the previous multiple of the interval. */
22718 rounded_size = size & -PROBE_INTERVAL;
22721 /* Step 2: compute initial and final value of the loop counter. */
22723 /* TEST_ADDR = SP + FIRST. */
22724 emit_insn (gen_rtx_SET (VOIDmode, r12,
22725 plus_constant (Pmode, stack_pointer_rtx,
22726 -first)));
22728 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22729 if (rounded_size > 32768)
22731 emit_move_insn (r0, GEN_INT (-rounded_size));
22732 emit_insn (gen_rtx_SET (VOIDmode, r0,
22733 gen_rtx_PLUS (Pmode, r12, r0)));
22735 else
22736 emit_insn (gen_rtx_SET (VOIDmode, r0,
22737 plus_constant (Pmode, r12, -rounded_size)));
22740 /* Step 3: the loop
22742 while (TEST_ADDR != LAST_ADDR)
22744 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22745 probe at TEST_ADDR
22748 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22749 until it is equal to ROUNDED_SIZE. */
22751 if (TARGET_64BIT)
22752 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22753 else
22754 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22757 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22758 that SIZE is equal to ROUNDED_SIZE. */
22760 if (size != rounded_size)
22761 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22765 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22766 absolute addresses. */
22768 const char *
22769 output_probe_stack_range (rtx reg1, rtx reg2)
22771 static int labelno = 0;
22772 char loop_lab[32], end_lab[32];
22773 rtx xops[2];
22775 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22776 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22778 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22780 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22781 xops[0] = reg1;
22782 xops[1] = reg2;
22783 if (TARGET_64BIT)
22784 output_asm_insn ("cmpd 0,%0,%1", xops);
22785 else
22786 output_asm_insn ("cmpw 0,%0,%1", xops);
22788 fputs ("\tbeq 0,", asm_out_file);
22789 assemble_name_raw (asm_out_file, end_lab);
22790 fputc ('\n', asm_out_file);
22792 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22793 xops[1] = GEN_INT (-PROBE_INTERVAL);
22794 output_asm_insn ("addi %0,%0,%1", xops);
22796 /* Probe at TEST_ADDR and branch. */
22797 xops[1] = gen_rtx_REG (Pmode, 0);
22798 output_asm_insn ("stw %1,0(%0)", xops);
22799 fprintf (asm_out_file, "\tb ");
22800 assemble_name_raw (asm_out_file, loop_lab);
22801 fputc ('\n', asm_out_file);
22803 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22805 return "";
22808 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22809 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22810 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22811 deduce these equivalences by itself so it wasn't necessary to hold
22812 its hand so much. Don't be tempted to always supply d2_f_d_e with
22813 the actual cfa register, ie. r31 when we are using a hard frame
22814 pointer. That fails when saving regs off r1, and sched moves the
22815 r31 setup past the reg saves. */
22817 static rtx
22818 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22819 rtx reg2, rtx rreg)
22821 rtx real, temp;
22823 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22825 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22826 int i;
22828 gcc_checking_assert (val == 0);
22829 real = PATTERN (insn);
22830 if (GET_CODE (real) == PARALLEL)
22831 for (i = 0; i < XVECLEN (real, 0); i++)
22832 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22834 rtx set = XVECEXP (real, 0, i);
22836 RTX_FRAME_RELATED_P (set) = 1;
22838 RTX_FRAME_RELATED_P (insn) = 1;
22839 return insn;
22842 /* copy_rtx will not make unique copies of registers, so we need to
22843 ensure we don't have unwanted sharing here. */
22844 if (reg == reg2)
22845 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22847 if (reg == rreg)
22848 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22850 real = copy_rtx (PATTERN (insn));
22852 if (reg2 != NULL_RTX)
22853 real = replace_rtx (real, reg2, rreg);
22855 if (REGNO (reg) == STACK_POINTER_REGNUM)
22856 gcc_checking_assert (val == 0);
22857 else
22858 real = replace_rtx (real, reg,
22859 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22860 STACK_POINTER_REGNUM),
22861 GEN_INT (val)));
22863 /* We expect that 'real' is either a SET or a PARALLEL containing
22864 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22865 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22867 if (GET_CODE (real) == SET)
22869 rtx set = real;
22871 temp = simplify_rtx (SET_SRC (set));
22872 if (temp)
22873 SET_SRC (set) = temp;
22874 temp = simplify_rtx (SET_DEST (set));
22875 if (temp)
22876 SET_DEST (set) = temp;
22877 if (GET_CODE (SET_DEST (set)) == MEM)
22879 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22880 if (temp)
22881 XEXP (SET_DEST (set), 0) = temp;
22884 else
22886 int i;
22888 gcc_assert (GET_CODE (real) == PARALLEL);
22889 for (i = 0; i < XVECLEN (real, 0); i++)
22890 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22892 rtx set = XVECEXP (real, 0, i);
22894 temp = simplify_rtx (SET_SRC (set));
22895 if (temp)
22896 SET_SRC (set) = temp;
22897 temp = simplify_rtx (SET_DEST (set));
22898 if (temp)
22899 SET_DEST (set) = temp;
22900 if (GET_CODE (SET_DEST (set)) == MEM)
22902 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22903 if (temp)
22904 XEXP (SET_DEST (set), 0) = temp;
22906 RTX_FRAME_RELATED_P (set) = 1;
22910 RTX_FRAME_RELATED_P (insn) = 1;
22911 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22913 return insn;
22916 /* Returns an insn that has a vrsave set operation with the
22917 appropriate CLOBBERs. */
22919 static rtx
22920 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22922 int nclobs, i;
22923 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22924 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22926 clobs[0]
22927 = gen_rtx_SET (VOIDmode,
22928 vrsave,
22929 gen_rtx_UNSPEC_VOLATILE (SImode,
22930 gen_rtvec (2, reg, vrsave),
22931 UNSPECV_SET_VRSAVE));
22933 nclobs = 1;
22935 /* We need to clobber the registers in the mask so the scheduler
22936 does not move sets to VRSAVE before sets of AltiVec registers.
22938 However, if the function receives nonlocal gotos, reload will set
22939 all call saved registers live. We will end up with:
22941 (set (reg 999) (mem))
22942 (parallel [ (set (reg vrsave) (unspec blah))
22943 (clobber (reg 999))])
22945 The clobber will cause the store into reg 999 to be dead, and
22946 flow will attempt to delete an epilogue insn. In this case, we
22947 need an unspec use/set of the register. */
22949 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22950 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22952 if (!epiloguep || call_used_regs [i])
22953 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22954 gen_rtx_REG (V4SImode, i));
22955 else
22957 rtx reg = gen_rtx_REG (V4SImode, i);
22959 clobs[nclobs++]
22960 = gen_rtx_SET (VOIDmode,
22961 reg,
22962 gen_rtx_UNSPEC (V4SImode,
22963 gen_rtvec (1, reg), 27));
22967 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22969 for (i = 0; i < nclobs; ++i)
22970 XVECEXP (insn, 0, i) = clobs[i];
22972 return insn;
22975 static rtx
22976 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22978 rtx addr, mem;
22980 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22981 mem = gen_frame_mem (GET_MODE (reg), addr);
22982 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22985 static rtx
22986 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22988 return gen_frame_set (reg, frame_reg, offset, false);
22991 static rtx
22992 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22994 return gen_frame_set (reg, frame_reg, offset, true);
22997 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22998 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
23000 static rtx
23001 emit_frame_save (rtx frame_reg, machine_mode mode,
23002 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
23004 rtx reg, insn;
23006 /* Some cases that need register indexed addressing. */
23007 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
23008 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23009 || (TARGET_E500_DOUBLE && mode == DFmode)
23010 || (TARGET_SPE_ABI
23011 && SPE_VECTOR_MODE (mode)
23012 && !SPE_CONST_OFFSET_OK (offset))));
23014 reg = gen_rtx_REG (mode, regno);
23015 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
23016 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
23017 NULL_RTX, NULL_RTX);
23020 /* Emit an offset memory reference suitable for a frame store, while
23021 converting to a valid addressing mode. */
23023 static rtx
23024 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
23026 rtx int_rtx, offset_rtx;
23028 int_rtx = GEN_INT (offset);
23030 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
23031 || (TARGET_E500_DOUBLE && mode == DFmode))
23033 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
23034 emit_move_insn (offset_rtx, int_rtx);
23036 else
23037 offset_rtx = int_rtx;
23039 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
23042 #ifndef TARGET_FIX_AND_CONTINUE
23043 #define TARGET_FIX_AND_CONTINUE 0
23044 #endif
23046 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
23047 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
23048 #define LAST_SAVRES_REGISTER 31
23049 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
23051 enum {
23052 SAVRES_LR = 0x1,
23053 SAVRES_SAVE = 0x2,
23054 SAVRES_REG = 0x0c,
23055 SAVRES_GPR = 0,
23056 SAVRES_FPR = 4,
23057 SAVRES_VR = 8
23060 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
23062 /* Temporary holding space for an out-of-line register save/restore
23063 routine name. */
23064 static char savres_routine_name[30];
23066 /* Return the name for an out-of-line register save/restore routine.
23067 We are saving/restoring GPRs if GPR is true. */
23069 static char *
23070 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
23072 const char *prefix = "";
23073 const char *suffix = "";
23075 /* Different targets are supposed to define
23076 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
23077 routine name could be defined with:
23079 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
23081 This is a nice idea in practice, but in reality, things are
23082 complicated in several ways:
23084 - ELF targets have save/restore routines for GPRs.
23086 - SPE targets use different prefixes for 32/64-bit registers, and
23087 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
23089 - PPC64 ELF targets have routines for save/restore of GPRs that
23090 differ in what they do with the link register, so having a set
23091 prefix doesn't work. (We only use one of the save routines at
23092 the moment, though.)
23094 - PPC32 elf targets have "exit" versions of the restore routines
23095 that restore the link register and can save some extra space.
23096 These require an extra suffix. (There are also "tail" versions
23097 of the restore routines and "GOT" versions of the save routines,
23098 but we don't generate those at present. Same problems apply,
23099 though.)
23101 We deal with all this by synthesizing our own prefix/suffix and
23102 using that for the simple sprintf call shown above. */
23103 if (TARGET_SPE)
23105 /* No floating point saves on the SPE. */
23106 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
23108 if ((sel & SAVRES_SAVE))
23109 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
23110 else
23111 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
23113 if ((sel & SAVRES_LR))
23114 suffix = "_x";
23116 else if (DEFAULT_ABI == ABI_V4)
23118 if (TARGET_64BIT)
23119 goto aix_names;
23121 if ((sel & SAVRES_REG) == SAVRES_GPR)
23122 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
23123 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23124 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
23125 else if ((sel & SAVRES_REG) == SAVRES_VR)
23126 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23127 else
23128 abort ();
23130 if ((sel & SAVRES_LR))
23131 suffix = "_x";
23133 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23135 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
23136 /* No out-of-line save/restore routines for GPRs on AIX. */
23137 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
23138 #endif
23140 aix_names:
23141 if ((sel & SAVRES_REG) == SAVRES_GPR)
23142 prefix = ((sel & SAVRES_SAVE)
23143 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
23144 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
23145 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23147 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23148 if ((sel & SAVRES_LR))
23149 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
23150 else
23151 #endif
23153 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23154 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23157 else if ((sel & SAVRES_REG) == SAVRES_VR)
23158 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23159 else
23160 abort ();
23163 if (DEFAULT_ABI == ABI_DARWIN)
23165 /* The Darwin approach is (slightly) different, in order to be
23166 compatible with code generated by the system toolchain. There is a
23167 single symbol for the start of save sequence, and the code here
23168 embeds an offset into that code on the basis of the first register
23169 to be saved. */
23170 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23171 if ((sel & SAVRES_REG) == SAVRES_GPR)
23172 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23173 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23174 (regno - 13) * 4, prefix, regno);
23175 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23176 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23177 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23178 else if ((sel & SAVRES_REG) == SAVRES_VR)
23179 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23180 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23181 else
23182 abort ();
23184 else
23185 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23187 return savres_routine_name;
23190 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23191 We are saving/restoring GPRs if GPR is true. */
23193 static rtx
23194 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23196 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23197 ? info->first_gp_reg_save
23198 : (sel & SAVRES_REG) == SAVRES_FPR
23199 ? info->first_fp_reg_save - 32
23200 : (sel & SAVRES_REG) == SAVRES_VR
23201 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23202 : -1);
23203 rtx sym;
23204 int select = sel;
23206 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23207 versions of the gpr routines. */
23208 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23209 && info->spe_64bit_regs_used)
23210 select ^= SAVRES_FPR ^ SAVRES_GPR;
23212 /* Don't generate bogus routine names. */
23213 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23214 && regno <= LAST_SAVRES_REGISTER
23215 && select >= 0 && select <= 12);
23217 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23219 if (sym == NULL)
23221 char *name;
23223 name = rs6000_savres_routine_name (info, regno, sel);
23225 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23226 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23227 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23230 return sym;
23233 /* Emit a sequence of insns, including a stack tie if needed, for
23234 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23235 reset the stack pointer, but move the base of the frame into
23236 reg UPDT_REGNO for use by out-of-line register restore routines. */
23238 static rtx
23239 rs6000_emit_stack_reset (rs6000_stack_t *info,
23240 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23241 unsigned updt_regno)
23243 rtx updt_reg_rtx;
23245 /* This blockage is needed so that sched doesn't decide to move
23246 the sp change before the register restores. */
23247 if (DEFAULT_ABI == ABI_V4
23248 || (TARGET_SPE_ABI
23249 && info->spe_64bit_regs_used != 0
23250 && info->first_gp_reg_save != 32))
23251 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23253 /* If we are restoring registers out-of-line, we will be using the
23254 "exit" variants of the restore routines, which will reset the
23255 stack for us. But we do need to point updt_reg into the
23256 right place for those routines. */
23257 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23259 if (frame_off != 0)
23260 return emit_insn (gen_add3_insn (updt_reg_rtx,
23261 frame_reg_rtx, GEN_INT (frame_off)));
23262 else if (REGNO (frame_reg_rtx) != updt_regno)
23263 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23265 return NULL_RTX;
23268 /* Return the register number used as a pointer by out-of-line
23269 save/restore functions. */
23271 static inline unsigned
23272 ptr_regno_for_savres (int sel)
23274 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23275 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23276 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23279 /* Construct a parallel rtx describing the effect of a call to an
23280 out-of-line register save/restore routine, and emit the insn
23281 or jump_insn as appropriate. */
23283 static rtx
23284 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23285 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23286 machine_mode reg_mode, int sel)
23288 int i;
23289 int offset, start_reg, end_reg, n_regs, use_reg;
23290 int reg_size = GET_MODE_SIZE (reg_mode);
23291 rtx sym;
23292 rtvec p;
23293 rtx par, insn;
23295 offset = 0;
23296 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23297 ? info->first_gp_reg_save
23298 : (sel & SAVRES_REG) == SAVRES_FPR
23299 ? info->first_fp_reg_save
23300 : (sel & SAVRES_REG) == SAVRES_VR
23301 ? info->first_altivec_reg_save
23302 : -1);
23303 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23304 ? 32
23305 : (sel & SAVRES_REG) == SAVRES_FPR
23306 ? 64
23307 : (sel & SAVRES_REG) == SAVRES_VR
23308 ? LAST_ALTIVEC_REGNO + 1
23309 : -1);
23310 n_regs = end_reg - start_reg;
23311 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23312 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23313 + n_regs);
23315 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23316 RTVEC_ELT (p, offset++) = ret_rtx;
23318 RTVEC_ELT (p, offset++)
23319 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23321 sym = rs6000_savres_routine_sym (info, sel);
23322 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23324 use_reg = ptr_regno_for_savres (sel);
23325 if ((sel & SAVRES_REG) == SAVRES_VR)
23327 /* Vector regs are saved/restored using [reg+reg] addressing. */
23328 RTVEC_ELT (p, offset++)
23329 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23330 RTVEC_ELT (p, offset++)
23331 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23333 else
23334 RTVEC_ELT (p, offset++)
23335 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23337 for (i = 0; i < end_reg - start_reg; i++)
23338 RTVEC_ELT (p, i + offset)
23339 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23340 frame_reg_rtx, save_area_offset + reg_size * i,
23341 (sel & SAVRES_SAVE) != 0);
23343 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23344 RTVEC_ELT (p, i + offset)
23345 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23347 par = gen_rtx_PARALLEL (VOIDmode, p);
23349 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23351 insn = emit_jump_insn (par);
23352 JUMP_LABEL (insn) = ret_rtx;
23354 else
23355 insn = emit_insn (par);
23356 return insn;
23359 /* Emit code to store CR fields that need to be saved into REG. */
23361 static void
23362 rs6000_emit_move_from_cr (rtx reg)
23364 /* Only the ELFv2 ABI allows storing only selected fields. */
23365 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23367 int i, cr_reg[8], count = 0;
23369 /* Collect CR fields that must be saved. */
23370 for (i = 0; i < 8; i++)
23371 if (save_reg_p (CR0_REGNO + i))
23372 cr_reg[count++] = i;
23374 /* If it's just a single one, use mfcrf. */
23375 if (count == 1)
23377 rtvec p = rtvec_alloc (1);
23378 rtvec r = rtvec_alloc (2);
23379 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23380 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23381 RTVEC_ELT (p, 0)
23382 = gen_rtx_SET (VOIDmode, reg,
23383 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23385 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23386 return;
23389 /* ??? It might be better to handle count == 2 / 3 cases here
23390 as well, using logical operations to combine the values. */
23393 emit_insn (gen_movesi_from_cr (reg));
23396 /* Determine whether the gp REG is really used. */
23398 static bool
23399 rs6000_reg_live_or_pic_offset_p (int reg)
23401 /* If the function calls eh_return, claim used all the registers that would
23402 be checked for liveness otherwise. This is required for the PIC offset
23403 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23404 register allocation purposes in this case. */
23406 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23407 && (!call_used_regs[reg]
23408 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23409 && !TARGET_SINGLE_PIC_BASE
23410 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23411 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23412 && !TARGET_SINGLE_PIC_BASE
23413 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23414 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23417 /* Emit function prologue as insns. */
23419 void
23420 rs6000_emit_prologue (void)
23422 rs6000_stack_t *info = rs6000_stack_info ();
23423 machine_mode reg_mode = Pmode;
23424 int reg_size = TARGET_32BIT ? 4 : 8;
23425 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23426 rtx frame_reg_rtx = sp_reg_rtx;
23427 unsigned int cr_save_regno;
23428 rtx cr_save_rtx = NULL_RTX;
23429 rtx insn;
23430 int strategy;
23431 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23432 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23433 && call_used_regs[STATIC_CHAIN_REGNUM]);
23434 /* Offset to top of frame for frame_reg and sp respectively. */
23435 HOST_WIDE_INT frame_off = 0;
23436 HOST_WIDE_INT sp_off = 0;
23438 #ifdef ENABLE_CHECKING
23439 /* Track and check usage of r0, r11, r12. */
23440 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23441 #define START_USE(R) do \
23443 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23444 reg_inuse |= 1 << (R); \
23445 } while (0)
23446 #define END_USE(R) do \
23448 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23449 reg_inuse &= ~(1 << (R)); \
23450 } while (0)
23451 #define NOT_INUSE(R) do \
23453 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23454 } while (0)
23455 #else
23456 #define START_USE(R) do {} while (0)
23457 #define END_USE(R) do {} while (0)
23458 #define NOT_INUSE(R) do {} while (0)
23459 #endif
23461 if (DEFAULT_ABI == ABI_ELFv2)
23463 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23465 /* With -mminimal-toc we may generate an extra use of r2 below. */
23466 if (!TARGET_SINGLE_PIC_BASE
23467 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23468 cfun->machine->r2_setup_needed = true;
23472 if (flag_stack_usage_info)
23473 current_function_static_stack_size = info->total_size;
23475 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23477 HOST_WIDE_INT size = info->total_size;
23479 if (crtl->is_leaf && !cfun->calls_alloca)
23481 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23482 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23483 size - STACK_CHECK_PROTECT);
23485 else if (size > 0)
23486 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23489 if (TARGET_FIX_AND_CONTINUE)
23491 /* gdb on darwin arranges to forward a function from the old
23492 address by modifying the first 5 instructions of the function
23493 to branch to the overriding function. This is necessary to
23494 permit function pointers that point to the old function to
23495 actually forward to the new function. */
23496 emit_insn (gen_nop ());
23497 emit_insn (gen_nop ());
23498 emit_insn (gen_nop ());
23499 emit_insn (gen_nop ());
23500 emit_insn (gen_nop ());
23503 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23505 reg_mode = V2SImode;
23506 reg_size = 8;
23509 /* Handle world saves specially here. */
23510 if (WORLD_SAVE_P (info))
23512 int i, j, sz;
23513 rtx treg;
23514 rtvec p;
23515 rtx reg0;
23517 /* save_world expects lr in r0. */
23518 reg0 = gen_rtx_REG (Pmode, 0);
23519 if (info->lr_save_p)
23521 insn = emit_move_insn (reg0,
23522 gen_rtx_REG (Pmode, LR_REGNO));
23523 RTX_FRAME_RELATED_P (insn) = 1;
23526 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23527 assumptions about the offsets of various bits of the stack
23528 frame. */
23529 gcc_assert (info->gp_save_offset == -220
23530 && info->fp_save_offset == -144
23531 && info->lr_save_offset == 8
23532 && info->cr_save_offset == 4
23533 && info->push_p
23534 && info->lr_save_p
23535 && (!crtl->calls_eh_return
23536 || info->ehrd_offset == -432)
23537 && info->vrsave_save_offset == -224
23538 && info->altivec_save_offset == -416);
23540 treg = gen_rtx_REG (SImode, 11);
23541 emit_move_insn (treg, GEN_INT (-info->total_size));
23543 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23544 in R11. It also clobbers R12, so beware! */
23546 /* Preserve CR2 for save_world prologues */
23547 sz = 5;
23548 sz += 32 - info->first_gp_reg_save;
23549 sz += 64 - info->first_fp_reg_save;
23550 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23551 p = rtvec_alloc (sz);
23552 j = 0;
23553 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23554 gen_rtx_REG (SImode,
23555 LR_REGNO));
23556 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23557 gen_rtx_SYMBOL_REF (Pmode,
23558 "*save_world"));
23559 /* We do floats first so that the instruction pattern matches
23560 properly. */
23561 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23562 RTVEC_ELT (p, j++)
23563 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23564 ? DFmode : SFmode,
23565 info->first_fp_reg_save + i),
23566 frame_reg_rtx,
23567 info->fp_save_offset + frame_off + 8 * i);
23568 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23569 RTVEC_ELT (p, j++)
23570 = gen_frame_store (gen_rtx_REG (V4SImode,
23571 info->first_altivec_reg_save + i),
23572 frame_reg_rtx,
23573 info->altivec_save_offset + frame_off + 16 * i);
23574 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23575 RTVEC_ELT (p, j++)
23576 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23577 frame_reg_rtx,
23578 info->gp_save_offset + frame_off + reg_size * i);
23580 /* CR register traditionally saved as CR2. */
23581 RTVEC_ELT (p, j++)
23582 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23583 frame_reg_rtx, info->cr_save_offset + frame_off);
23584 /* Explain about use of R0. */
23585 if (info->lr_save_p)
23586 RTVEC_ELT (p, j++)
23587 = gen_frame_store (reg0,
23588 frame_reg_rtx, info->lr_save_offset + frame_off);
23589 /* Explain what happens to the stack pointer. */
23591 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23592 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23595 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23596 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23597 treg, GEN_INT (-info->total_size));
23598 sp_off = frame_off = info->total_size;
23601 strategy = info->savres_strategy;
23603 /* For V.4, update stack before we do any saving and set back pointer. */
23604 if (! WORLD_SAVE_P (info)
23605 && info->push_p
23606 && (DEFAULT_ABI == ABI_V4
23607 || crtl->calls_eh_return))
23609 bool need_r11 = (TARGET_SPE
23610 ? (!(strategy & SAVE_INLINE_GPRS)
23611 && info->spe_64bit_regs_used == 0)
23612 : (!(strategy & SAVE_INLINE_FPRS)
23613 || !(strategy & SAVE_INLINE_GPRS)
23614 || !(strategy & SAVE_INLINE_VRS)));
23615 int ptr_regno = -1;
23616 rtx ptr_reg = NULL_RTX;
23617 int ptr_off = 0;
23619 if (info->total_size < 32767)
23620 frame_off = info->total_size;
23621 else if (need_r11)
23622 ptr_regno = 11;
23623 else if (info->cr_save_p
23624 || info->lr_save_p
23625 || info->first_fp_reg_save < 64
23626 || info->first_gp_reg_save < 32
23627 || info->altivec_size != 0
23628 || info->vrsave_mask != 0
23629 || crtl->calls_eh_return)
23630 ptr_regno = 12;
23631 else
23633 /* The prologue won't be saving any regs so there is no need
23634 to set up a frame register to access any frame save area.
23635 We also won't be using frame_off anywhere below, but set
23636 the correct value anyway to protect against future
23637 changes to this function. */
23638 frame_off = info->total_size;
23640 if (ptr_regno != -1)
23642 /* Set up the frame offset to that needed by the first
23643 out-of-line save function. */
23644 START_USE (ptr_regno);
23645 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23646 frame_reg_rtx = ptr_reg;
23647 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23648 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23649 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23650 ptr_off = info->gp_save_offset + info->gp_size;
23651 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23652 ptr_off = info->altivec_save_offset + info->altivec_size;
23653 frame_off = -ptr_off;
23655 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23656 sp_off = info->total_size;
23657 if (frame_reg_rtx != sp_reg_rtx)
23658 rs6000_emit_stack_tie (frame_reg_rtx, false);
23661 /* If we use the link register, get it into r0. */
23662 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23664 rtx addr, reg, mem;
23666 reg = gen_rtx_REG (Pmode, 0);
23667 START_USE (0);
23668 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23669 RTX_FRAME_RELATED_P (insn) = 1;
23671 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23672 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23674 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23675 GEN_INT (info->lr_save_offset + frame_off));
23676 mem = gen_rtx_MEM (Pmode, addr);
23677 /* This should not be of rs6000_sr_alias_set, because of
23678 __builtin_return_address. */
23680 insn = emit_move_insn (mem, reg);
23681 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23682 NULL_RTX, NULL_RTX);
23683 END_USE (0);
23687 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23688 r12 will be needed by out-of-line gpr restore. */
23689 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23690 && !(strategy & (SAVE_INLINE_GPRS
23691 | SAVE_NOINLINE_GPRS_SAVES_LR))
23692 ? 11 : 12);
23693 if (!WORLD_SAVE_P (info)
23694 && info->cr_save_p
23695 && REGNO (frame_reg_rtx) != cr_save_regno
23696 && !(using_static_chain_p && cr_save_regno == 11))
23698 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23699 START_USE (cr_save_regno);
23700 rs6000_emit_move_from_cr (cr_save_rtx);
23703 /* Do any required saving of fpr's. If only one or two to save, do
23704 it ourselves. Otherwise, call function. */
23705 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23707 int i;
23708 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23709 if (save_reg_p (info->first_fp_reg_save + i))
23710 emit_frame_save (frame_reg_rtx,
23711 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23712 ? DFmode : SFmode),
23713 info->first_fp_reg_save + i,
23714 info->fp_save_offset + frame_off + 8 * i,
23715 sp_off - frame_off);
23717 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23719 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23720 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23721 unsigned ptr_regno = ptr_regno_for_savres (sel);
23722 rtx ptr_reg = frame_reg_rtx;
23724 if (REGNO (frame_reg_rtx) == ptr_regno)
23725 gcc_checking_assert (frame_off == 0);
23726 else
23728 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23729 NOT_INUSE (ptr_regno);
23730 emit_insn (gen_add3_insn (ptr_reg,
23731 frame_reg_rtx, GEN_INT (frame_off)));
23733 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23734 info->fp_save_offset,
23735 info->lr_save_offset,
23736 DFmode, sel);
23737 rs6000_frame_related (insn, ptr_reg, sp_off,
23738 NULL_RTX, NULL_RTX);
23739 if (lr)
23740 END_USE (0);
23743 /* Save GPRs. This is done as a PARALLEL if we are using
23744 the store-multiple instructions. */
23745 if (!WORLD_SAVE_P (info)
23746 && TARGET_SPE_ABI
23747 && info->spe_64bit_regs_used != 0
23748 && info->first_gp_reg_save != 32)
23750 int i;
23751 rtx spe_save_area_ptr;
23752 HOST_WIDE_INT save_off;
23753 int ool_adjust = 0;
23755 /* Determine whether we can address all of the registers that need
23756 to be saved with an offset from frame_reg_rtx that fits in
23757 the small const field for SPE memory instructions. */
23758 int spe_regs_addressable
23759 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23760 + reg_size * (32 - info->first_gp_reg_save - 1))
23761 && (strategy & SAVE_INLINE_GPRS));
23763 if (spe_regs_addressable)
23765 spe_save_area_ptr = frame_reg_rtx;
23766 save_off = frame_off;
23768 else
23770 /* Make r11 point to the start of the SPE save area. We need
23771 to be careful here if r11 is holding the static chain. If
23772 it is, then temporarily save it in r0. */
23773 HOST_WIDE_INT offset;
23775 if (!(strategy & SAVE_INLINE_GPRS))
23776 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23777 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23778 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23779 save_off = frame_off - offset;
23781 if (using_static_chain_p)
23783 rtx r0 = gen_rtx_REG (Pmode, 0);
23785 START_USE (0);
23786 gcc_assert (info->first_gp_reg_save > 11);
23788 emit_move_insn (r0, spe_save_area_ptr);
23790 else if (REGNO (frame_reg_rtx) != 11)
23791 START_USE (11);
23793 emit_insn (gen_addsi3 (spe_save_area_ptr,
23794 frame_reg_rtx, GEN_INT (offset)));
23795 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23796 frame_off = -info->spe_gp_save_offset + ool_adjust;
23799 if ((strategy & SAVE_INLINE_GPRS))
23801 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23802 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23803 emit_frame_save (spe_save_area_ptr, reg_mode,
23804 info->first_gp_reg_save + i,
23805 (info->spe_gp_save_offset + save_off
23806 + reg_size * i),
23807 sp_off - save_off);
23809 else
23811 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23812 info->spe_gp_save_offset + save_off,
23813 0, reg_mode,
23814 SAVRES_SAVE | SAVRES_GPR);
23816 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23817 NULL_RTX, NULL_RTX);
23820 /* Move the static chain pointer back. */
23821 if (!spe_regs_addressable)
23823 if (using_static_chain_p)
23825 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23826 END_USE (0);
23828 else if (REGNO (frame_reg_rtx) != 11)
23829 END_USE (11);
23832 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23834 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23835 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23836 unsigned ptr_regno = ptr_regno_for_savres (sel);
23837 rtx ptr_reg = frame_reg_rtx;
23838 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23839 int end_save = info->gp_save_offset + info->gp_size;
23840 int ptr_off;
23842 if (!ptr_set_up)
23843 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23845 /* Need to adjust r11 (r12) if we saved any FPRs. */
23846 if (end_save + frame_off != 0)
23848 rtx offset = GEN_INT (end_save + frame_off);
23850 if (ptr_set_up)
23851 frame_off = -end_save;
23852 else
23853 NOT_INUSE (ptr_regno);
23854 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23856 else if (!ptr_set_up)
23858 NOT_INUSE (ptr_regno);
23859 emit_move_insn (ptr_reg, frame_reg_rtx);
23861 ptr_off = -end_save;
23862 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23863 info->gp_save_offset + ptr_off,
23864 info->lr_save_offset + ptr_off,
23865 reg_mode, sel);
23866 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23867 NULL_RTX, NULL_RTX);
23868 if (lr)
23869 END_USE (0);
23871 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23873 rtvec p;
23874 int i;
23875 p = rtvec_alloc (32 - info->first_gp_reg_save);
23876 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23877 RTVEC_ELT (p, i)
23878 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23879 frame_reg_rtx,
23880 info->gp_save_offset + frame_off + reg_size * i);
23881 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23882 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23883 NULL_RTX, NULL_RTX);
23885 else if (!WORLD_SAVE_P (info))
23887 int i;
23888 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23889 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23890 emit_frame_save (frame_reg_rtx, reg_mode,
23891 info->first_gp_reg_save + i,
23892 info->gp_save_offset + frame_off + reg_size * i,
23893 sp_off - frame_off);
23896 if (crtl->calls_eh_return)
23898 unsigned int i;
23899 rtvec p;
23901 for (i = 0; ; ++i)
23903 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23904 if (regno == INVALID_REGNUM)
23905 break;
23908 p = rtvec_alloc (i);
23910 for (i = 0; ; ++i)
23912 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23913 if (regno == INVALID_REGNUM)
23914 break;
23916 insn
23917 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23918 sp_reg_rtx,
23919 info->ehrd_offset + sp_off + reg_size * (int) i);
23920 RTVEC_ELT (p, i) = insn;
23921 RTX_FRAME_RELATED_P (insn) = 1;
23924 insn = emit_insn (gen_blockage ());
23925 RTX_FRAME_RELATED_P (insn) = 1;
23926 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23929 /* In AIX ABI we need to make sure r2 is really saved. */
23930 if (TARGET_AIX && crtl->calls_eh_return)
23932 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23933 rtx save_insn, join_insn, note;
23934 long toc_restore_insn;
23936 tmp_reg = gen_rtx_REG (Pmode, 11);
23937 tmp_reg_si = gen_rtx_REG (SImode, 11);
23938 if (using_static_chain_p)
23940 START_USE (0);
23941 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23943 else
23944 START_USE (11);
23945 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23946 /* Peek at instruction to which this function returns. If it's
23947 restoring r2, then we know we've already saved r2. We can't
23948 unconditionally save r2 because the value we have will already
23949 be updated if we arrived at this function via a plt call or
23950 toc adjusting stub. */
23951 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23952 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23953 + RS6000_TOC_SAVE_SLOT);
23954 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23955 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23956 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23957 validate_condition_mode (EQ, CCUNSmode);
23958 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23959 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23960 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23961 toc_save_done = gen_label_rtx ();
23962 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23963 gen_rtx_EQ (VOIDmode, compare_result,
23964 const0_rtx),
23965 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23966 pc_rtx);
23967 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23968 JUMP_LABEL (jump) = toc_save_done;
23969 LABEL_NUSES (toc_save_done) += 1;
23971 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23972 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23973 sp_off - frame_off);
23975 emit_label (toc_save_done);
23977 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23978 have a CFG that has different saves along different paths.
23979 Move the note to a dummy blockage insn, which describes that
23980 R2 is unconditionally saved after the label. */
23981 /* ??? An alternate representation might be a special insn pattern
23982 containing both the branch and the store. That might let the
23983 code that minimizes the number of DW_CFA_advance opcodes better
23984 freedom in placing the annotations. */
23985 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23986 if (note)
23987 remove_note (save_insn, note);
23988 else
23989 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23990 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23991 RTX_FRAME_RELATED_P (save_insn) = 0;
23993 join_insn = emit_insn (gen_blockage ());
23994 REG_NOTES (join_insn) = note;
23995 RTX_FRAME_RELATED_P (join_insn) = 1;
23997 if (using_static_chain_p)
23999 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
24000 END_USE (0);
24002 else
24003 END_USE (11);
24006 /* Save CR if we use any that must be preserved. */
24007 if (!WORLD_SAVE_P (info) && info->cr_save_p)
24009 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
24010 GEN_INT (info->cr_save_offset + frame_off));
24011 rtx mem = gen_frame_mem (SImode, addr);
24013 /* If we didn't copy cr before, do so now using r0. */
24014 if (cr_save_rtx == NULL_RTX)
24016 START_USE (0);
24017 cr_save_rtx = gen_rtx_REG (SImode, 0);
24018 rs6000_emit_move_from_cr (cr_save_rtx);
24021 /* Saving CR requires a two-instruction sequence: one instruction
24022 to move the CR to a general-purpose register, and a second
24023 instruction that stores the GPR to memory.
24025 We do not emit any DWARF CFI records for the first of these,
24026 because we cannot properly represent the fact that CR is saved in
24027 a register. One reason is that we cannot express that multiple
24028 CR fields are saved; another reason is that on 64-bit, the size
24029 of the CR register in DWARF (4 bytes) differs from the size of
24030 a general-purpose register.
24032 This means if any intervening instruction were to clobber one of
24033 the call-saved CR fields, we'd have incorrect CFI. To prevent
24034 this from happening, we mark the store to memory as a use of
24035 those CR fields, which prevents any such instruction from being
24036 scheduled in between the two instructions. */
24037 rtx crsave_v[9];
24038 int n_crsave = 0;
24039 int i;
24041 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
24042 for (i = 0; i < 8; i++)
24043 if (save_reg_p (CR0_REGNO + i))
24044 crsave_v[n_crsave++]
24045 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24047 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
24048 gen_rtvec_v (n_crsave, crsave_v)));
24049 END_USE (REGNO (cr_save_rtx));
24051 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
24052 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
24053 so we need to construct a frame expression manually. */
24054 RTX_FRAME_RELATED_P (insn) = 1;
24056 /* Update address to be stack-pointer relative, like
24057 rs6000_frame_related would do. */
24058 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
24059 GEN_INT (info->cr_save_offset + sp_off));
24060 mem = gen_frame_mem (SImode, addr);
24062 if (DEFAULT_ABI == ABI_ELFv2)
24064 /* In the ELFv2 ABI we generate separate CFI records for each
24065 CR field that was actually saved. They all point to the
24066 same 32-bit stack slot. */
24067 rtx crframe[8];
24068 int n_crframe = 0;
24070 for (i = 0; i < 8; i++)
24071 if (save_reg_p (CR0_REGNO + i))
24073 crframe[n_crframe]
24074 = gen_rtx_SET (VOIDmode, mem,
24075 gen_rtx_REG (SImode, CR0_REGNO + i));
24077 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
24078 n_crframe++;
24081 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24082 gen_rtx_PARALLEL (VOIDmode,
24083 gen_rtvec_v (n_crframe, crframe)));
24085 else
24087 /* In other ABIs, by convention, we use a single CR regnum to
24088 represent the fact that all call-saved CR fields are saved.
24089 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
24090 rtx set = gen_rtx_SET (VOIDmode, mem,
24091 gen_rtx_REG (SImode, CR2_REGNO));
24092 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
24096 /* In the ELFv2 ABI we need to save all call-saved CR fields into
24097 *separate* slots if the routine calls __builtin_eh_return, so
24098 that they can be independently restored by the unwinder. */
24099 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24101 int i, cr_off = info->ehcr_offset;
24102 rtx crsave;
24104 /* ??? We might get better performance by using multiple mfocrf
24105 instructions. */
24106 crsave = gen_rtx_REG (SImode, 0);
24107 emit_insn (gen_movesi_from_cr (crsave));
24109 for (i = 0; i < 8; i++)
24110 if (!call_used_regs[CR0_REGNO + i])
24112 rtvec p = rtvec_alloc (2);
24113 RTVEC_ELT (p, 0)
24114 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
24115 RTVEC_ELT (p, 1)
24116 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24118 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24120 RTX_FRAME_RELATED_P (insn) = 1;
24121 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24122 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
24123 sp_reg_rtx, cr_off + sp_off));
24125 cr_off += reg_size;
24129 /* Update stack and set back pointer unless this is V.4,
24130 for which it was done previously. */
24131 if (!WORLD_SAVE_P (info) && info->push_p
24132 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
24134 rtx ptr_reg = NULL;
24135 int ptr_off = 0;
24137 /* If saving altivec regs we need to be able to address all save
24138 locations using a 16-bit offset. */
24139 if ((strategy & SAVE_INLINE_VRS) == 0
24140 || (info->altivec_size != 0
24141 && (info->altivec_save_offset + info->altivec_size - 16
24142 + info->total_size - frame_off) > 32767)
24143 || (info->vrsave_size != 0
24144 && (info->vrsave_save_offset
24145 + info->total_size - frame_off) > 32767))
24147 int sel = SAVRES_SAVE | SAVRES_VR;
24148 unsigned ptr_regno = ptr_regno_for_savres (sel);
24150 if (using_static_chain_p
24151 && ptr_regno == STATIC_CHAIN_REGNUM)
24152 ptr_regno = 12;
24153 if (REGNO (frame_reg_rtx) != ptr_regno)
24154 START_USE (ptr_regno);
24155 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24156 frame_reg_rtx = ptr_reg;
24157 ptr_off = info->altivec_save_offset + info->altivec_size;
24158 frame_off = -ptr_off;
24160 else if (REGNO (frame_reg_rtx) == 1)
24161 frame_off = info->total_size;
24162 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
24163 sp_off = info->total_size;
24164 if (frame_reg_rtx != sp_reg_rtx)
24165 rs6000_emit_stack_tie (frame_reg_rtx, false);
24168 /* Set frame pointer, if needed. */
24169 if (frame_pointer_needed)
24171 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24172 sp_reg_rtx);
24173 RTX_FRAME_RELATED_P (insn) = 1;
24176 /* Save AltiVec registers if needed. Save here because the red zone does
24177 not always include AltiVec registers. */
24178 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24179 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24181 int end_save = info->altivec_save_offset + info->altivec_size;
24182 int ptr_off;
24183 /* Oddly, the vector save/restore functions point r0 at the end
24184 of the save area, then use r11 or r12 to load offsets for
24185 [reg+reg] addressing. */
24186 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24187 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24188 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24190 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24191 NOT_INUSE (0);
24192 if (end_save + frame_off != 0)
24194 rtx offset = GEN_INT (end_save + frame_off);
24196 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24198 else
24199 emit_move_insn (ptr_reg, frame_reg_rtx);
24201 ptr_off = -end_save;
24202 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24203 info->altivec_save_offset + ptr_off,
24204 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24205 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24206 NULL_RTX, NULL_RTX);
24207 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24209 /* The oddity mentioned above clobbered our frame reg. */
24210 emit_move_insn (frame_reg_rtx, ptr_reg);
24211 frame_off = ptr_off;
24214 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24215 && info->altivec_size != 0)
24217 int i;
24219 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24220 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24222 rtx areg, savereg, mem;
24223 int offset;
24225 offset = (info->altivec_save_offset + frame_off
24226 + 16 * (i - info->first_altivec_reg_save));
24228 savereg = gen_rtx_REG (V4SImode, i);
24230 NOT_INUSE (0);
24231 areg = gen_rtx_REG (Pmode, 0);
24232 emit_move_insn (areg, GEN_INT (offset));
24234 /* AltiVec addressing mode is [reg+reg]. */
24235 mem = gen_frame_mem (V4SImode,
24236 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24238 /* Rather than emitting a generic move, force use of the stvx
24239 instruction, which we always want. In particular we don't
24240 want xxpermdi/stxvd2x for little endian. */
24241 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
24243 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24244 areg, GEN_INT (offset));
24248 /* VRSAVE is a bit vector representing which AltiVec registers
24249 are used. The OS uses this to determine which vector
24250 registers to save on a context switch. We need to save
24251 VRSAVE on the stack frame, add whatever AltiVec registers we
24252 used in this function, and do the corresponding magic in the
24253 epilogue. */
24255 if (!WORLD_SAVE_P (info)
24256 && TARGET_ALTIVEC
24257 && TARGET_ALTIVEC_VRSAVE
24258 && info->vrsave_mask != 0)
24260 rtx reg, vrsave;
24261 int offset;
24262 int save_regno;
24264 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24265 be using r12 as frame_reg_rtx and r11 as the static chain
24266 pointer for nested functions. */
24267 save_regno = 12;
24268 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24269 && !using_static_chain_p)
24270 save_regno = 11;
24271 else if (REGNO (frame_reg_rtx) == 12)
24273 save_regno = 11;
24274 if (using_static_chain_p)
24275 save_regno = 0;
24278 NOT_INUSE (save_regno);
24279 reg = gen_rtx_REG (SImode, save_regno);
24280 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24281 if (TARGET_MACHO)
24282 emit_insn (gen_get_vrsave_internal (reg));
24283 else
24284 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24286 /* Save VRSAVE. */
24287 offset = info->vrsave_save_offset + frame_off;
24288 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24290 /* Include the registers in the mask. */
24291 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24293 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24296 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24297 if (!TARGET_SINGLE_PIC_BASE
24298 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24299 || (DEFAULT_ABI == ABI_V4
24300 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24301 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24303 /* If emit_load_toc_table will use the link register, we need to save
24304 it. We use R12 for this purpose because emit_load_toc_table
24305 can use register 0. This allows us to use a plain 'blr' to return
24306 from the procedure more often. */
24307 int save_LR_around_toc_setup = (TARGET_ELF
24308 && DEFAULT_ABI == ABI_V4
24309 && flag_pic
24310 && ! info->lr_save_p
24311 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24312 if (save_LR_around_toc_setup)
24314 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24315 rtx tmp = gen_rtx_REG (Pmode, 12);
24317 insn = emit_move_insn (tmp, lr);
24318 RTX_FRAME_RELATED_P (insn) = 1;
24320 rs6000_emit_load_toc_table (TRUE);
24322 insn = emit_move_insn (lr, tmp);
24323 add_reg_note (insn, REG_CFA_RESTORE, lr);
24324 RTX_FRAME_RELATED_P (insn) = 1;
24326 else
24327 rs6000_emit_load_toc_table (TRUE);
24330 #if TARGET_MACHO
24331 if (!TARGET_SINGLE_PIC_BASE
24332 && DEFAULT_ABI == ABI_DARWIN
24333 && flag_pic && crtl->uses_pic_offset_table)
24335 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24336 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24338 /* Save and restore LR locally around this call (in R0). */
24339 if (!info->lr_save_p)
24340 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24342 emit_insn (gen_load_macho_picbase (src));
24344 emit_move_insn (gen_rtx_REG (Pmode,
24345 RS6000_PIC_OFFSET_TABLE_REGNUM),
24346 lr);
24348 if (!info->lr_save_p)
24349 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24351 #endif
24353 /* If we need to, save the TOC register after doing the stack setup.
24354 Do not emit eh frame info for this save. The unwinder wants info,
24355 conceptually attached to instructions in this function, about
24356 register values in the caller of this function. This R2 may have
24357 already been changed from the value in the caller.
24358 We don't attempt to write accurate DWARF EH frame info for R2
24359 because code emitted by gcc for a (non-pointer) function call
24360 doesn't save and restore R2. Instead, R2 is managed out-of-line
24361 by a linker generated plt call stub when the function resides in
24362 a shared library. This behaviour is costly to describe in DWARF,
24363 both in terms of the size of DWARF info and the time taken in the
24364 unwinder to interpret it. R2 changes, apart from the
24365 calls_eh_return case earlier in this function, are handled by
24366 linux-unwind.h frob_update_context. */
24367 if (rs6000_save_toc_in_prologue_p ())
24369 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24370 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24374 /* Output .extern statements for the save/restore routines we use. */
24376 static void
24377 rs6000_output_savres_externs (FILE *file)
24379 rs6000_stack_t *info = rs6000_stack_info ();
24381 if (TARGET_DEBUG_STACK)
24382 debug_stack_info (info);
24384 /* Write .extern for any function we will call to save and restore
24385 fp values. */
24386 if (info->first_fp_reg_save < 64
24387 && !TARGET_MACHO
24388 && !TARGET_ELF)
24390 char *name;
24391 int regno = info->first_fp_reg_save - 32;
24393 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24395 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24396 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24397 name = rs6000_savres_routine_name (info, regno, sel);
24398 fprintf (file, "\t.extern %s\n", name);
24400 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24402 bool lr = (info->savres_strategy
24403 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24404 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24405 name = rs6000_savres_routine_name (info, regno, sel);
24406 fprintf (file, "\t.extern %s\n", name);
24411 /* Write function prologue. */
24413 static void
24414 rs6000_output_function_prologue (FILE *file,
24415 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24417 if (!cfun->is_thunk)
24418 rs6000_output_savres_externs (file);
24420 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24421 immediately after the global entry point label. */
24422 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24424 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24426 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24427 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24429 fputs ("\t.localentry\t", file);
24430 assemble_name (file, name);
24431 fputs (",.-", file);
24432 assemble_name (file, name);
24433 fputs ("\n", file);
24436 /* Output -mprofile-kernel code. This needs to be done here instead of
24437 in output_function_profile since it must go after the ELFv2 ABI
24438 local entry point. */
24439 if (TARGET_PROFILE_KERNEL && crtl->profile)
24441 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24442 gcc_assert (!TARGET_32BIT);
24444 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24446 /* In the ELFv2 ABI we have no compiler stack word. It must be
24447 the resposibility of _mcount to preserve the static chain
24448 register if required. */
24449 if (DEFAULT_ABI != ABI_ELFv2
24450 && cfun->static_chain_decl != NULL)
24452 asm_fprintf (file, "\tstd %s,24(%s)\n",
24453 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24454 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24455 asm_fprintf (file, "\tld %s,24(%s)\n",
24456 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24458 else
24459 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24462 rs6000_pic_labelno++;
24465 /* Non-zero if vmx regs are restored before the frame pop, zero if
24466 we restore after the pop when possible. */
24467 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24469 /* Restoring cr is a two step process: loading a reg from the frame
24470 save, then moving the reg to cr. For ABI_V4 we must let the
24471 unwinder know that the stack location is no longer valid at or
24472 before the stack deallocation, but we can't emit a cfa_restore for
24473 cr at the stack deallocation like we do for other registers.
24474 The trouble is that it is possible for the move to cr to be
24475 scheduled after the stack deallocation. So say exactly where cr
24476 is located on each of the two insns. */
24478 static rtx
24479 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24481 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24482 rtx reg = gen_rtx_REG (SImode, regno);
24483 rtx_insn *insn = emit_move_insn (reg, mem);
24485 if (!exit_func && DEFAULT_ABI == ABI_V4)
24487 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24488 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24490 add_reg_note (insn, REG_CFA_REGISTER, set);
24491 RTX_FRAME_RELATED_P (insn) = 1;
24493 return reg;
24496 /* Reload CR from REG. */
24498 static void
24499 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24501 int count = 0;
24502 int i;
24504 if (using_mfcr_multiple)
24506 for (i = 0; i < 8; i++)
24507 if (save_reg_p (CR0_REGNO + i))
24508 count++;
24509 gcc_assert (count);
24512 if (using_mfcr_multiple && count > 1)
24514 rtx_insn *insn;
24515 rtvec p;
24516 int ndx;
24518 p = rtvec_alloc (count);
24520 ndx = 0;
24521 for (i = 0; i < 8; i++)
24522 if (save_reg_p (CR0_REGNO + i))
24524 rtvec r = rtvec_alloc (2);
24525 RTVEC_ELT (r, 0) = reg;
24526 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24527 RTVEC_ELT (p, ndx) =
24528 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24529 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24530 ndx++;
24532 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24533 gcc_assert (ndx == count);
24535 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24536 CR field separately. */
24537 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24539 for (i = 0; i < 8; i++)
24540 if (save_reg_p (CR0_REGNO + i))
24541 add_reg_note (insn, REG_CFA_RESTORE,
24542 gen_rtx_REG (SImode, CR0_REGNO + i));
24544 RTX_FRAME_RELATED_P (insn) = 1;
24547 else
24548 for (i = 0; i < 8; i++)
24549 if (save_reg_p (CR0_REGNO + i))
24551 rtx insn = emit_insn (gen_movsi_to_cr_one
24552 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24554 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24555 CR field separately, attached to the insn that in fact
24556 restores this particular CR field. */
24557 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24559 add_reg_note (insn, REG_CFA_RESTORE,
24560 gen_rtx_REG (SImode, CR0_REGNO + i));
24562 RTX_FRAME_RELATED_P (insn) = 1;
24566 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24567 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24568 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24570 rtx_insn *insn = get_last_insn ();
24571 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24573 add_reg_note (insn, REG_CFA_RESTORE, cr);
24574 RTX_FRAME_RELATED_P (insn) = 1;
24578 /* Like cr, the move to lr instruction can be scheduled after the
24579 stack deallocation, but unlike cr, its stack frame save is still
24580 valid. So we only need to emit the cfa_restore on the correct
24581 instruction. */
24583 static void
24584 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24586 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24587 rtx reg = gen_rtx_REG (Pmode, regno);
24589 emit_move_insn (reg, mem);
24592 static void
24593 restore_saved_lr (int regno, bool exit_func)
24595 rtx reg = gen_rtx_REG (Pmode, regno);
24596 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24597 rtx_insn *insn = emit_move_insn (lr, reg);
24599 if (!exit_func && flag_shrink_wrap)
24601 add_reg_note (insn, REG_CFA_RESTORE, lr);
24602 RTX_FRAME_RELATED_P (insn) = 1;
24606 static rtx
24607 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24609 if (DEFAULT_ABI == ABI_ELFv2)
24611 int i;
24612 for (i = 0; i < 8; i++)
24613 if (save_reg_p (CR0_REGNO + i))
24615 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24616 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24617 cfa_restores);
24620 else if (info->cr_save_p)
24621 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24622 gen_rtx_REG (SImode, CR2_REGNO),
24623 cfa_restores);
24625 if (info->lr_save_p)
24626 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24627 gen_rtx_REG (Pmode, LR_REGNO),
24628 cfa_restores);
24629 return cfa_restores;
24632 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24633 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24634 below stack pointer not cloberred by signals. */
24636 static inline bool
24637 offset_below_red_zone_p (HOST_WIDE_INT offset)
24639 return offset < (DEFAULT_ABI == ABI_V4
24641 : TARGET_32BIT ? -220 : -288);
24644 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24646 static void
24647 emit_cfa_restores (rtx cfa_restores)
24649 rtx_insn *insn = get_last_insn ();
24650 rtx *loc = &REG_NOTES (insn);
24652 while (*loc)
24653 loc = &XEXP (*loc, 1);
24654 *loc = cfa_restores;
24655 RTX_FRAME_RELATED_P (insn) = 1;
24658 /* Emit function epilogue as insns. */
24660 void
24661 rs6000_emit_epilogue (int sibcall)
24663 rs6000_stack_t *info;
24664 int restoring_GPRs_inline;
24665 int restoring_FPRs_inline;
24666 int using_load_multiple;
24667 int using_mtcr_multiple;
24668 int use_backchain_to_restore_sp;
24669 int restore_lr;
24670 int strategy;
24671 HOST_WIDE_INT frame_off = 0;
24672 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24673 rtx frame_reg_rtx = sp_reg_rtx;
24674 rtx cfa_restores = NULL_RTX;
24675 rtx insn;
24676 rtx cr_save_reg = NULL_RTX;
24677 machine_mode reg_mode = Pmode;
24678 int reg_size = TARGET_32BIT ? 4 : 8;
24679 int i;
24680 bool exit_func;
24681 unsigned ptr_regno;
24683 info = rs6000_stack_info ();
24685 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24687 reg_mode = V2SImode;
24688 reg_size = 8;
24691 strategy = info->savres_strategy;
24692 using_load_multiple = strategy & SAVRES_MULTIPLE;
24693 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24694 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24695 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24696 || rs6000_cpu == PROCESSOR_PPC603
24697 || rs6000_cpu == PROCESSOR_PPC750
24698 || optimize_size);
24699 /* Restore via the backchain when we have a large frame, since this
24700 is more efficient than an addis, addi pair. The second condition
24701 here will not trigger at the moment; We don't actually need a
24702 frame pointer for alloca, but the generic parts of the compiler
24703 give us one anyway. */
24704 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24705 || (cfun->calls_alloca
24706 && !frame_pointer_needed));
24707 restore_lr = (info->lr_save_p
24708 && (restoring_FPRs_inline
24709 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24710 && (restoring_GPRs_inline
24711 || info->first_fp_reg_save < 64));
24713 if (WORLD_SAVE_P (info))
24715 int i, j;
24716 char rname[30];
24717 const char *alloc_rname;
24718 rtvec p;
24720 /* eh_rest_world_r10 will return to the location saved in the LR
24721 stack slot (which is not likely to be our caller.)
24722 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24723 rest_world is similar, except any R10 parameter is ignored.
24724 The exception-handling stuff that was here in 2.95 is no
24725 longer necessary. */
24727 p = rtvec_alloc (9
24729 + 32 - info->first_gp_reg_save
24730 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24731 + 63 + 1 - info->first_fp_reg_save);
24733 strcpy (rname, ((crtl->calls_eh_return) ?
24734 "*eh_rest_world_r10" : "*rest_world"));
24735 alloc_rname = ggc_strdup (rname);
24737 j = 0;
24738 RTVEC_ELT (p, j++) = ret_rtx;
24739 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24740 gen_rtx_REG (Pmode,
24741 LR_REGNO));
24742 RTVEC_ELT (p, j++)
24743 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24744 /* The instruction pattern requires a clobber here;
24745 it is shared with the restVEC helper. */
24746 RTVEC_ELT (p, j++)
24747 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24750 /* CR register traditionally saved as CR2. */
24751 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24752 RTVEC_ELT (p, j++)
24753 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24754 if (flag_shrink_wrap)
24756 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24757 gen_rtx_REG (Pmode, LR_REGNO),
24758 cfa_restores);
24759 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24763 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24765 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24766 RTVEC_ELT (p, j++)
24767 = gen_frame_load (reg,
24768 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24769 if (flag_shrink_wrap)
24770 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24772 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24774 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24775 RTVEC_ELT (p, j++)
24776 = gen_frame_load (reg,
24777 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24778 if (flag_shrink_wrap)
24779 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24781 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24783 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24784 ? DFmode : SFmode),
24785 info->first_fp_reg_save + i);
24786 RTVEC_ELT (p, j++)
24787 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24788 if (flag_shrink_wrap)
24789 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24791 RTVEC_ELT (p, j++)
24792 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24793 RTVEC_ELT (p, j++)
24794 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24795 RTVEC_ELT (p, j++)
24796 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24797 RTVEC_ELT (p, j++)
24798 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24799 RTVEC_ELT (p, j++)
24800 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24801 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24803 if (flag_shrink_wrap)
24805 REG_NOTES (insn) = cfa_restores;
24806 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24807 RTX_FRAME_RELATED_P (insn) = 1;
24809 return;
24812 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24813 if (info->push_p)
24814 frame_off = info->total_size;
24816 /* Restore AltiVec registers if we must do so before adjusting the
24817 stack. */
24818 if (TARGET_ALTIVEC_ABI
24819 && info->altivec_size != 0
24820 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24821 || (DEFAULT_ABI != ABI_V4
24822 && offset_below_red_zone_p (info->altivec_save_offset))))
24824 int i;
24825 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24827 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24828 if (use_backchain_to_restore_sp)
24830 int frame_regno = 11;
24832 if ((strategy & REST_INLINE_VRS) == 0)
24834 /* Of r11 and r12, select the one not clobbered by an
24835 out-of-line restore function for the frame register. */
24836 frame_regno = 11 + 12 - scratch_regno;
24838 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24839 emit_move_insn (frame_reg_rtx,
24840 gen_rtx_MEM (Pmode, sp_reg_rtx));
24841 frame_off = 0;
24843 else if (frame_pointer_needed)
24844 frame_reg_rtx = hard_frame_pointer_rtx;
24846 if ((strategy & REST_INLINE_VRS) == 0)
24848 int end_save = info->altivec_save_offset + info->altivec_size;
24849 int ptr_off;
24850 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24851 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24853 if (end_save + frame_off != 0)
24855 rtx offset = GEN_INT (end_save + frame_off);
24857 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24859 else
24860 emit_move_insn (ptr_reg, frame_reg_rtx);
24862 ptr_off = -end_save;
24863 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24864 info->altivec_save_offset + ptr_off,
24865 0, V4SImode, SAVRES_VR);
24867 else
24869 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24870 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24872 rtx addr, areg, mem, reg;
24874 areg = gen_rtx_REG (Pmode, 0);
24875 emit_move_insn
24876 (areg, GEN_INT (info->altivec_save_offset
24877 + frame_off
24878 + 16 * (i - info->first_altivec_reg_save)));
24880 /* AltiVec addressing mode is [reg+reg]. */
24881 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24882 mem = gen_frame_mem (V4SImode, addr);
24884 reg = gen_rtx_REG (V4SImode, i);
24885 /* Rather than emitting a generic move, force use of the
24886 lvx instruction, which we always want. In particular
24887 we don't want lxvd2x/xxpermdi for little endian. */
24888 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
24892 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24893 if (((strategy & REST_INLINE_VRS) == 0
24894 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24895 && (flag_shrink_wrap
24896 || (offset_below_red_zone_p
24897 (info->altivec_save_offset
24898 + 16 * (i - info->first_altivec_reg_save)))))
24900 rtx reg = gen_rtx_REG (V4SImode, i);
24901 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24905 /* Restore VRSAVE if we must do so before adjusting the stack. */
24906 if (TARGET_ALTIVEC
24907 && TARGET_ALTIVEC_VRSAVE
24908 && info->vrsave_mask != 0
24909 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24910 || (DEFAULT_ABI != ABI_V4
24911 && offset_below_red_zone_p (info->vrsave_save_offset))))
24913 rtx reg;
24915 if (frame_reg_rtx == sp_reg_rtx)
24917 if (use_backchain_to_restore_sp)
24919 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24920 emit_move_insn (frame_reg_rtx,
24921 gen_rtx_MEM (Pmode, sp_reg_rtx));
24922 frame_off = 0;
24924 else if (frame_pointer_needed)
24925 frame_reg_rtx = hard_frame_pointer_rtx;
24928 reg = gen_rtx_REG (SImode, 12);
24929 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24930 info->vrsave_save_offset + frame_off));
24932 emit_insn (generate_set_vrsave (reg, info, 1));
24935 insn = NULL_RTX;
24936 /* If we have a large stack frame, restore the old stack pointer
24937 using the backchain. */
24938 if (use_backchain_to_restore_sp)
24940 if (frame_reg_rtx == sp_reg_rtx)
24942 /* Under V.4, don't reset the stack pointer until after we're done
24943 loading the saved registers. */
24944 if (DEFAULT_ABI == ABI_V4)
24945 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24947 insn = emit_move_insn (frame_reg_rtx,
24948 gen_rtx_MEM (Pmode, sp_reg_rtx));
24949 frame_off = 0;
24951 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24952 && DEFAULT_ABI == ABI_V4)
24953 /* frame_reg_rtx has been set up by the altivec restore. */
24955 else
24957 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24958 frame_reg_rtx = sp_reg_rtx;
24961 /* If we have a frame pointer, we can restore the old stack pointer
24962 from it. */
24963 else if (frame_pointer_needed)
24965 frame_reg_rtx = sp_reg_rtx;
24966 if (DEFAULT_ABI == ABI_V4)
24967 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24968 /* Prevent reordering memory accesses against stack pointer restore. */
24969 else if (cfun->calls_alloca
24970 || offset_below_red_zone_p (-info->total_size))
24971 rs6000_emit_stack_tie (frame_reg_rtx, true);
24973 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24974 GEN_INT (info->total_size)));
24975 frame_off = 0;
24977 else if (info->push_p
24978 && DEFAULT_ABI != ABI_V4
24979 && !crtl->calls_eh_return)
24981 /* Prevent reordering memory accesses against stack pointer restore. */
24982 if (cfun->calls_alloca
24983 || offset_below_red_zone_p (-info->total_size))
24984 rs6000_emit_stack_tie (frame_reg_rtx, false);
24985 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24986 GEN_INT (info->total_size)));
24987 frame_off = 0;
24989 if (insn && frame_reg_rtx == sp_reg_rtx)
24991 if (cfa_restores)
24993 REG_NOTES (insn) = cfa_restores;
24994 cfa_restores = NULL_RTX;
24996 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24997 RTX_FRAME_RELATED_P (insn) = 1;
25000 /* Restore AltiVec registers if we have not done so already. */
25001 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25002 && TARGET_ALTIVEC_ABI
25003 && info->altivec_size != 0
25004 && (DEFAULT_ABI == ABI_V4
25005 || !offset_below_red_zone_p (info->altivec_save_offset)))
25007 int i;
25009 if ((strategy & REST_INLINE_VRS) == 0)
25011 int end_save = info->altivec_save_offset + info->altivec_size;
25012 int ptr_off;
25013 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
25014 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
25015 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
25017 if (end_save + frame_off != 0)
25019 rtx offset = GEN_INT (end_save + frame_off);
25021 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25023 else
25024 emit_move_insn (ptr_reg, frame_reg_rtx);
25026 ptr_off = -end_save;
25027 insn = rs6000_emit_savres_rtx (info, scratch_reg,
25028 info->altivec_save_offset + ptr_off,
25029 0, V4SImode, SAVRES_VR);
25030 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
25032 /* Frame reg was clobbered by out-of-line save. Restore it
25033 from ptr_reg, and if we are calling out-of-line gpr or
25034 fpr restore set up the correct pointer and offset. */
25035 unsigned newptr_regno = 1;
25036 if (!restoring_GPRs_inline)
25038 bool lr = info->gp_save_offset + info->gp_size == 0;
25039 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25040 newptr_regno = ptr_regno_for_savres (sel);
25041 end_save = info->gp_save_offset + info->gp_size;
25043 else if (!restoring_FPRs_inline)
25045 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
25046 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25047 newptr_regno = ptr_regno_for_savres (sel);
25048 end_save = info->fp_save_offset + info->fp_size;
25051 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
25052 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
25054 if (end_save + ptr_off != 0)
25056 rtx offset = GEN_INT (end_save + ptr_off);
25058 frame_off = -end_save;
25059 if (TARGET_32BIT)
25060 emit_insn (gen_addsi3_carry (frame_reg_rtx,
25061 ptr_reg, offset));
25062 else
25063 emit_insn (gen_adddi3_carry (frame_reg_rtx,
25064 ptr_reg, offset));
25066 else
25068 frame_off = ptr_off;
25069 emit_move_insn (frame_reg_rtx, ptr_reg);
25073 else
25075 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25076 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25078 rtx addr, areg, mem, reg;
25080 areg = gen_rtx_REG (Pmode, 0);
25081 emit_move_insn
25082 (areg, GEN_INT (info->altivec_save_offset
25083 + frame_off
25084 + 16 * (i - info->first_altivec_reg_save)));
25086 /* AltiVec addressing mode is [reg+reg]. */
25087 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
25088 mem = gen_frame_mem (V4SImode, addr);
25090 reg = gen_rtx_REG (V4SImode, i);
25091 /* Rather than emitting a generic move, force use of the
25092 lvx instruction, which we always want. In particular
25093 we don't want lxvd2x/xxpermdi for little endian. */
25094 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
25098 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25099 if (((strategy & REST_INLINE_VRS) == 0
25100 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25101 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25103 rtx reg = gen_rtx_REG (V4SImode, i);
25104 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25108 /* Restore VRSAVE if we have not done so already. */
25109 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25110 && TARGET_ALTIVEC
25111 && TARGET_ALTIVEC_VRSAVE
25112 && info->vrsave_mask != 0
25113 && (DEFAULT_ABI == ABI_V4
25114 || !offset_below_red_zone_p (info->vrsave_save_offset)))
25116 rtx reg;
25118 reg = gen_rtx_REG (SImode, 12);
25119 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25120 info->vrsave_save_offset + frame_off));
25122 emit_insn (generate_set_vrsave (reg, info, 1));
25125 /* If we exit by an out-of-line restore function on ABI_V4 then that
25126 function will deallocate the stack, so we don't need to worry
25127 about the unwinder restoring cr from an invalid stack frame
25128 location. */
25129 exit_func = (!restoring_FPRs_inline
25130 || (!restoring_GPRs_inline
25131 && info->first_fp_reg_save == 64));
25133 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
25134 *separate* slots if the routine calls __builtin_eh_return, so
25135 that they can be independently restored by the unwinder. */
25136 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25138 int i, cr_off = info->ehcr_offset;
25140 for (i = 0; i < 8; i++)
25141 if (!call_used_regs[CR0_REGNO + i])
25143 rtx reg = gen_rtx_REG (SImode, 0);
25144 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25145 cr_off + frame_off));
25147 insn = emit_insn (gen_movsi_to_cr_one
25148 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25150 if (!exit_func && flag_shrink_wrap)
25152 add_reg_note (insn, REG_CFA_RESTORE,
25153 gen_rtx_REG (SImode, CR0_REGNO + i));
25155 RTX_FRAME_RELATED_P (insn) = 1;
25158 cr_off += reg_size;
25162 /* Get the old lr if we saved it. If we are restoring registers
25163 out-of-line, then the out-of-line routines can do this for us. */
25164 if (restore_lr && restoring_GPRs_inline)
25165 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25167 /* Get the old cr if we saved it. */
25168 if (info->cr_save_p)
25170 unsigned cr_save_regno = 12;
25172 if (!restoring_GPRs_inline)
25174 /* Ensure we don't use the register used by the out-of-line
25175 gpr register restore below. */
25176 bool lr = info->gp_save_offset + info->gp_size == 0;
25177 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25178 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25180 if (gpr_ptr_regno == 12)
25181 cr_save_regno = 11;
25182 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25184 else if (REGNO (frame_reg_rtx) == 12)
25185 cr_save_regno = 11;
25187 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25188 info->cr_save_offset + frame_off,
25189 exit_func);
25192 /* Set LR here to try to overlap restores below. */
25193 if (restore_lr && restoring_GPRs_inline)
25194 restore_saved_lr (0, exit_func);
25196 /* Load exception handler data registers, if needed. */
25197 if (crtl->calls_eh_return)
25199 unsigned int i, regno;
25201 if (TARGET_AIX)
25203 rtx reg = gen_rtx_REG (reg_mode, 2);
25204 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25205 frame_off + RS6000_TOC_SAVE_SLOT));
25208 for (i = 0; ; ++i)
25210 rtx mem;
25212 regno = EH_RETURN_DATA_REGNO (i);
25213 if (regno == INVALID_REGNUM)
25214 break;
25216 /* Note: possible use of r0 here to address SPE regs. */
25217 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25218 info->ehrd_offset + frame_off
25219 + reg_size * (int) i);
25221 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25225 /* Restore GPRs. This is done as a PARALLEL if we are using
25226 the load-multiple instructions. */
25227 if (TARGET_SPE_ABI
25228 && info->spe_64bit_regs_used
25229 && info->first_gp_reg_save != 32)
25231 /* Determine whether we can address all of the registers that need
25232 to be saved with an offset from frame_reg_rtx that fits in
25233 the small const field for SPE memory instructions. */
25234 int spe_regs_addressable
25235 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25236 + reg_size * (32 - info->first_gp_reg_save - 1))
25237 && restoring_GPRs_inline);
25239 if (!spe_regs_addressable)
25241 int ool_adjust = 0;
25242 rtx old_frame_reg_rtx = frame_reg_rtx;
25243 /* Make r11 point to the start of the SPE save area. We worried about
25244 not clobbering it when we were saving registers in the prologue.
25245 There's no need to worry here because the static chain is passed
25246 anew to every function. */
25248 if (!restoring_GPRs_inline)
25249 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25250 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25251 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25252 GEN_INT (info->spe_gp_save_offset
25253 + frame_off
25254 - ool_adjust)));
25255 /* Keep the invariant that frame_reg_rtx + frame_off points
25256 at the top of the stack frame. */
25257 frame_off = -info->spe_gp_save_offset + ool_adjust;
25260 if (restoring_GPRs_inline)
25262 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25264 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25265 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25267 rtx offset, addr, mem, reg;
25269 /* We're doing all this to ensure that the immediate offset
25270 fits into the immediate field of 'evldd'. */
25271 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25273 offset = GEN_INT (spe_offset + reg_size * i);
25274 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25275 mem = gen_rtx_MEM (V2SImode, addr);
25276 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25278 emit_move_insn (reg, mem);
25281 else
25282 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25283 info->spe_gp_save_offset + frame_off,
25284 info->lr_save_offset + frame_off,
25285 reg_mode,
25286 SAVRES_GPR | SAVRES_LR);
25288 else if (!restoring_GPRs_inline)
25290 /* We are jumping to an out-of-line function. */
25291 rtx ptr_reg;
25292 int end_save = info->gp_save_offset + info->gp_size;
25293 bool can_use_exit = end_save == 0;
25294 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25295 int ptr_off;
25297 /* Emit stack reset code if we need it. */
25298 ptr_regno = ptr_regno_for_savres (sel);
25299 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25300 if (can_use_exit)
25301 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25302 else if (end_save + frame_off != 0)
25303 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25304 GEN_INT (end_save + frame_off)));
25305 else if (REGNO (frame_reg_rtx) != ptr_regno)
25306 emit_move_insn (ptr_reg, frame_reg_rtx);
25307 if (REGNO (frame_reg_rtx) == ptr_regno)
25308 frame_off = -end_save;
25310 if (can_use_exit && info->cr_save_p)
25311 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25313 ptr_off = -end_save;
25314 rs6000_emit_savres_rtx (info, ptr_reg,
25315 info->gp_save_offset + ptr_off,
25316 info->lr_save_offset + ptr_off,
25317 reg_mode, sel);
25319 else if (using_load_multiple)
25321 rtvec p;
25322 p = rtvec_alloc (32 - info->first_gp_reg_save);
25323 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25324 RTVEC_ELT (p, i)
25325 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25326 frame_reg_rtx,
25327 info->gp_save_offset + frame_off + reg_size * i);
25328 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25330 else
25332 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25333 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25334 emit_insn (gen_frame_load
25335 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25336 frame_reg_rtx,
25337 info->gp_save_offset + frame_off + reg_size * i));
25340 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25342 /* If the frame pointer was used then we can't delay emitting
25343 a REG_CFA_DEF_CFA note. This must happen on the insn that
25344 restores the frame pointer, r31. We may have already emitted
25345 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25346 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25347 be harmless if emitted. */
25348 if (frame_pointer_needed)
25350 insn = get_last_insn ();
25351 add_reg_note (insn, REG_CFA_DEF_CFA,
25352 plus_constant (Pmode, frame_reg_rtx, frame_off));
25353 RTX_FRAME_RELATED_P (insn) = 1;
25356 /* Set up cfa_restores. We always need these when
25357 shrink-wrapping. If not shrink-wrapping then we only need
25358 the cfa_restore when the stack location is no longer valid.
25359 The cfa_restores must be emitted on or before the insn that
25360 invalidates the stack, and of course must not be emitted
25361 before the insn that actually does the restore. The latter
25362 is why it is a bad idea to emit the cfa_restores as a group
25363 on the last instruction here that actually does a restore:
25364 That insn may be reordered with respect to others doing
25365 restores. */
25366 if (flag_shrink_wrap
25367 && !restoring_GPRs_inline
25368 && info->first_fp_reg_save == 64)
25369 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25371 for (i = info->first_gp_reg_save; i < 32; i++)
25372 if (!restoring_GPRs_inline
25373 || using_load_multiple
25374 || rs6000_reg_live_or_pic_offset_p (i))
25376 rtx reg = gen_rtx_REG (reg_mode, i);
25378 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25382 if (!restoring_GPRs_inline
25383 && info->first_fp_reg_save == 64)
25385 /* We are jumping to an out-of-line function. */
25386 if (cfa_restores)
25387 emit_cfa_restores (cfa_restores);
25388 return;
25391 if (restore_lr && !restoring_GPRs_inline)
25393 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25394 restore_saved_lr (0, exit_func);
25397 /* Restore fpr's if we need to do it without calling a function. */
25398 if (restoring_FPRs_inline)
25399 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25400 if (save_reg_p (info->first_fp_reg_save + i))
25402 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25403 ? DFmode : SFmode),
25404 info->first_fp_reg_save + i);
25405 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25406 info->fp_save_offset + frame_off + 8 * i));
25407 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25408 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25411 /* If we saved cr, restore it here. Just those that were used. */
25412 if (info->cr_save_p)
25413 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25415 /* If this is V.4, unwind the stack pointer after all of the loads
25416 have been done, or set up r11 if we are restoring fp out of line. */
25417 ptr_regno = 1;
25418 if (!restoring_FPRs_inline)
25420 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25421 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25422 ptr_regno = ptr_regno_for_savres (sel);
25425 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25426 if (REGNO (frame_reg_rtx) == ptr_regno)
25427 frame_off = 0;
25429 if (insn && restoring_FPRs_inline)
25431 if (cfa_restores)
25433 REG_NOTES (insn) = cfa_restores;
25434 cfa_restores = NULL_RTX;
25436 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25437 RTX_FRAME_RELATED_P (insn) = 1;
25440 if (crtl->calls_eh_return)
25442 rtx sa = EH_RETURN_STACKADJ_RTX;
25443 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25446 if (!sibcall)
25448 rtvec p;
25449 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25450 if (! restoring_FPRs_inline)
25452 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25453 RTVEC_ELT (p, 0) = ret_rtx;
25455 else
25457 if (cfa_restores)
25459 /* We can't hang the cfa_restores off a simple return,
25460 since the shrink-wrap code sometimes uses an existing
25461 return. This means there might be a path from
25462 pre-prologue code to this return, and dwarf2cfi code
25463 wants the eh_frame unwinder state to be the same on
25464 all paths to any point. So we need to emit the
25465 cfa_restores before the return. For -m64 we really
25466 don't need epilogue cfa_restores at all, except for
25467 this irritating dwarf2cfi with shrink-wrap
25468 requirement; The stack red-zone means eh_frame info
25469 from the prologue telling the unwinder to restore
25470 from the stack is perfectly good right to the end of
25471 the function. */
25472 emit_insn (gen_blockage ());
25473 emit_cfa_restores (cfa_restores);
25474 cfa_restores = NULL_RTX;
25476 p = rtvec_alloc (2);
25477 RTVEC_ELT (p, 0) = simple_return_rtx;
25480 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25481 ? gen_rtx_USE (VOIDmode,
25482 gen_rtx_REG (Pmode, LR_REGNO))
25483 : gen_rtx_CLOBBER (VOIDmode,
25484 gen_rtx_REG (Pmode, LR_REGNO)));
25486 /* If we have to restore more than two FP registers, branch to the
25487 restore function. It will return to our caller. */
25488 if (! restoring_FPRs_inline)
25490 int i;
25491 int reg;
25492 rtx sym;
25494 if (flag_shrink_wrap)
25495 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25497 sym = rs6000_savres_routine_sym (info,
25498 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25499 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25500 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25501 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25503 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25505 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25507 RTVEC_ELT (p, i + 4)
25508 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25509 if (flag_shrink_wrap)
25510 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25511 cfa_restores);
25515 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25518 if (cfa_restores)
25520 if (sibcall)
25521 /* Ensure the cfa_restores are hung off an insn that won't
25522 be reordered above other restores. */
25523 emit_insn (gen_blockage ());
25525 emit_cfa_restores (cfa_restores);
25529 /* Write function epilogue. */
25531 static void
25532 rs6000_output_function_epilogue (FILE *file,
25533 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25535 #if TARGET_MACHO
25536 macho_branch_islands ();
25537 /* Mach-O doesn't support labels at the end of objects, so if
25538 it looks like we might want one, insert a NOP. */
25540 rtx_insn *insn = get_last_insn ();
25541 rtx_insn *deleted_debug_label = NULL;
25542 while (insn
25543 && NOTE_P (insn)
25544 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25546 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25547 notes only, instead set their CODE_LABEL_NUMBER to -1,
25548 otherwise there would be code generation differences
25549 in between -g and -g0. */
25550 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25551 deleted_debug_label = insn;
25552 insn = PREV_INSN (insn);
25554 if (insn
25555 && (LABEL_P (insn)
25556 || (NOTE_P (insn)
25557 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25558 fputs ("\tnop\n", file);
25559 else if (deleted_debug_label)
25560 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25561 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25562 CODE_LABEL_NUMBER (insn) = -1;
25564 #endif
25566 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25567 on its format.
25569 We don't output a traceback table if -finhibit-size-directive was
25570 used. The documentation for -finhibit-size-directive reads
25571 ``don't output a @code{.size} assembler directive, or anything
25572 else that would cause trouble if the function is split in the
25573 middle, and the two halves are placed at locations far apart in
25574 memory.'' The traceback table has this property, since it
25575 includes the offset from the start of the function to the
25576 traceback table itself.
25578 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25579 different traceback table. */
25580 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25581 && ! flag_inhibit_size_directive
25582 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25584 const char *fname = NULL;
25585 const char *language_string = lang_hooks.name;
25586 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25587 int i;
25588 int optional_tbtab;
25589 rs6000_stack_t *info = rs6000_stack_info ();
25591 if (rs6000_traceback == traceback_full)
25592 optional_tbtab = 1;
25593 else if (rs6000_traceback == traceback_part)
25594 optional_tbtab = 0;
25595 else
25596 optional_tbtab = !optimize_size && !TARGET_ELF;
25598 if (optional_tbtab)
25600 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25601 while (*fname == '.') /* V.4 encodes . in the name */
25602 fname++;
25604 /* Need label immediately before tbtab, so we can compute
25605 its offset from the function start. */
25606 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25607 ASM_OUTPUT_LABEL (file, fname);
25610 /* The .tbtab pseudo-op can only be used for the first eight
25611 expressions, since it can't handle the possibly variable
25612 length fields that follow. However, if you omit the optional
25613 fields, the assembler outputs zeros for all optional fields
25614 anyways, giving each variable length field is minimum length
25615 (as defined in sys/debug.h). Thus we can not use the .tbtab
25616 pseudo-op at all. */
25618 /* An all-zero word flags the start of the tbtab, for debuggers
25619 that have to find it by searching forward from the entry
25620 point or from the current pc. */
25621 fputs ("\t.long 0\n", file);
25623 /* Tbtab format type. Use format type 0. */
25624 fputs ("\t.byte 0,", file);
25626 /* Language type. Unfortunately, there does not seem to be any
25627 official way to discover the language being compiled, so we
25628 use language_string.
25629 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25630 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25631 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
25632 either, so for now use 0. */
25633 if (lang_GNU_C ()
25634 || ! strcmp (language_string, "GNU GIMPLE")
25635 || ! strcmp (language_string, "GNU Go")
25636 || ! strcmp (language_string, "libgccjit"))
25637 i = 0;
25638 else if (! strcmp (language_string, "GNU F77")
25639 || lang_GNU_Fortran ())
25640 i = 1;
25641 else if (! strcmp (language_string, "GNU Pascal"))
25642 i = 2;
25643 else if (! strcmp (language_string, "GNU Ada"))
25644 i = 3;
25645 else if (lang_GNU_CXX ()
25646 || ! strcmp (language_string, "GNU Objective-C++"))
25647 i = 9;
25648 else if (! strcmp (language_string, "GNU Java"))
25649 i = 13;
25650 else if (! strcmp (language_string, "GNU Objective-C"))
25651 i = 14;
25652 else
25653 gcc_unreachable ();
25654 fprintf (file, "%d,", i);
25656 /* 8 single bit fields: global linkage (not set for C extern linkage,
25657 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25658 from start of procedure stored in tbtab, internal function, function
25659 has controlled storage, function has no toc, function uses fp,
25660 function logs/aborts fp operations. */
25661 /* Assume that fp operations are used if any fp reg must be saved. */
25662 fprintf (file, "%d,",
25663 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25665 /* 6 bitfields: function is interrupt handler, name present in
25666 proc table, function calls alloca, on condition directives
25667 (controls stack walks, 3 bits), saves condition reg, saves
25668 link reg. */
25669 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25670 set up as a frame pointer, even when there is no alloca call. */
25671 fprintf (file, "%d,",
25672 ((optional_tbtab << 6)
25673 | ((optional_tbtab & frame_pointer_needed) << 5)
25674 | (info->cr_save_p << 1)
25675 | (info->lr_save_p)));
25677 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25678 (6 bits). */
25679 fprintf (file, "%d,",
25680 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25682 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25683 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25685 if (optional_tbtab)
25687 /* Compute the parameter info from the function decl argument
25688 list. */
25689 tree decl;
25690 int next_parm_info_bit = 31;
25692 for (decl = DECL_ARGUMENTS (current_function_decl);
25693 decl; decl = DECL_CHAIN (decl))
25695 rtx parameter = DECL_INCOMING_RTL (decl);
25696 machine_mode mode = GET_MODE (parameter);
25698 if (GET_CODE (parameter) == REG)
25700 if (SCALAR_FLOAT_MODE_P (mode))
25702 int bits;
25704 float_parms++;
25706 switch (mode)
25708 case SFmode:
25709 case SDmode:
25710 bits = 0x2;
25711 break;
25713 case DFmode:
25714 case DDmode:
25715 case TFmode:
25716 case TDmode:
25717 bits = 0x3;
25718 break;
25720 default:
25721 gcc_unreachable ();
25724 /* If only one bit will fit, don't or in this entry. */
25725 if (next_parm_info_bit > 0)
25726 parm_info |= (bits << (next_parm_info_bit - 1));
25727 next_parm_info_bit -= 2;
25729 else
25731 fixed_parms += ((GET_MODE_SIZE (mode)
25732 + (UNITS_PER_WORD - 1))
25733 / UNITS_PER_WORD);
25734 next_parm_info_bit -= 1;
25740 /* Number of fixed point parameters. */
25741 /* This is actually the number of words of fixed point parameters; thus
25742 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25743 fprintf (file, "%d,", fixed_parms);
25745 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25746 all on stack. */
25747 /* This is actually the number of fp registers that hold parameters;
25748 and thus the maximum value is 13. */
25749 /* Set parameters on stack bit if parameters are not in their original
25750 registers, regardless of whether they are on the stack? Xlc
25751 seems to set the bit when not optimizing. */
25752 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25754 if (! optional_tbtab)
25755 return;
25757 /* Optional fields follow. Some are variable length. */
25759 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25760 11 double float. */
25761 /* There is an entry for each parameter in a register, in the order that
25762 they occur in the parameter list. Any intervening arguments on the
25763 stack are ignored. If the list overflows a long (max possible length
25764 34 bits) then completely leave off all elements that don't fit. */
25765 /* Only emit this long if there was at least one parameter. */
25766 if (fixed_parms || float_parms)
25767 fprintf (file, "\t.long %d\n", parm_info);
25769 /* Offset from start of code to tb table. */
25770 fputs ("\t.long ", file);
25771 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25772 RS6000_OUTPUT_BASENAME (file, fname);
25773 putc ('-', file);
25774 rs6000_output_function_entry (file, fname);
25775 putc ('\n', file);
25777 /* Interrupt handler mask. */
25778 /* Omit this long, since we never set the interrupt handler bit
25779 above. */
25781 /* Number of CTL (controlled storage) anchors. */
25782 /* Omit this long, since the has_ctl bit is never set above. */
25784 /* Displacement into stack of each CTL anchor. */
25785 /* Omit this list of longs, because there are no CTL anchors. */
25787 /* Length of function name. */
25788 if (*fname == '*')
25789 ++fname;
25790 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25792 /* Function name. */
25793 assemble_string (fname, strlen (fname));
25795 /* Register for alloca automatic storage; this is always reg 31.
25796 Only emit this if the alloca bit was set above. */
25797 if (frame_pointer_needed)
25798 fputs ("\t.byte 31\n", file);
25800 fputs ("\t.align 2\n", file);
25804 /* A C compound statement that outputs the assembler code for a thunk
25805 function, used to implement C++ virtual function calls with
25806 multiple inheritance. The thunk acts as a wrapper around a virtual
25807 function, adjusting the implicit object parameter before handing
25808 control off to the real function.
25810 First, emit code to add the integer DELTA to the location that
25811 contains the incoming first argument. Assume that this argument
25812 contains a pointer, and is the one used to pass the `this' pointer
25813 in C++. This is the incoming argument *before* the function
25814 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25815 values of all other incoming arguments.
25817 After the addition, emit code to jump to FUNCTION, which is a
25818 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25819 not touch the return address. Hence returning from FUNCTION will
25820 return to whoever called the current `thunk'.
25822 The effect must be as if FUNCTION had been called directly with the
25823 adjusted first argument. This macro is responsible for emitting
25824 all of the code for a thunk function; output_function_prologue()
25825 and output_function_epilogue() are not invoked.
25827 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25828 been extracted from it.) It might possibly be useful on some
25829 targets, but probably not.
25831 If you do not define this macro, the target-independent code in the
25832 C++ frontend will generate a less efficient heavyweight thunk that
25833 calls FUNCTION instead of jumping to it. The generic approach does
25834 not support varargs. */
25836 static void
25837 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25838 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25839 tree function)
25841 rtx this_rtx, funexp;
25842 rtx_insn *insn;
25844 reload_completed = 1;
25845 epilogue_completed = 1;
25847 /* Mark the end of the (empty) prologue. */
25848 emit_note (NOTE_INSN_PROLOGUE_END);
25850 /* Find the "this" pointer. If the function returns a structure,
25851 the structure return pointer is in r3. */
25852 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25853 this_rtx = gen_rtx_REG (Pmode, 4);
25854 else
25855 this_rtx = gen_rtx_REG (Pmode, 3);
25857 /* Apply the constant offset, if required. */
25858 if (delta)
25859 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25861 /* Apply the offset from the vtable, if required. */
25862 if (vcall_offset)
25864 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25865 rtx tmp = gen_rtx_REG (Pmode, 12);
25867 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25868 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25870 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25871 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25873 else
25875 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25877 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25879 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25882 /* Generate a tail call to the target function. */
25883 if (!TREE_USED (function))
25885 assemble_external (function);
25886 TREE_USED (function) = 1;
25888 funexp = XEXP (DECL_RTL (function), 0);
25889 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25891 #if TARGET_MACHO
25892 if (MACHOPIC_INDIRECT)
25893 funexp = machopic_indirect_call_target (funexp);
25894 #endif
25896 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25897 generate sibcall RTL explicitly. */
25898 insn = emit_call_insn (
25899 gen_rtx_PARALLEL (VOIDmode,
25900 gen_rtvec (4,
25901 gen_rtx_CALL (VOIDmode,
25902 funexp, const0_rtx),
25903 gen_rtx_USE (VOIDmode, const0_rtx),
25904 gen_rtx_USE (VOIDmode,
25905 gen_rtx_REG (SImode,
25906 LR_REGNO)),
25907 simple_return_rtx)));
25908 SIBLING_CALL_P (insn) = 1;
25909 emit_barrier ();
25911 /* Ensure we have a global entry point for the thunk. ??? We could
25912 avoid that if the target routine doesn't need a global entry point,
25913 but we do not know whether this is the case at this point. */
25914 if (DEFAULT_ABI == ABI_ELFv2)
25915 cfun->machine->r2_setup_needed = true;
25917 /* Run just enough of rest_of_compilation to get the insns emitted.
25918 There's not really enough bulk here to make other passes such as
25919 instruction scheduling worth while. Note that use_thunk calls
25920 assemble_start_function and assemble_end_function. */
25921 insn = get_insns ();
25922 shorten_branches (insn);
25923 final_start_function (insn, file, 1);
25924 final (insn, file, 1);
25925 final_end_function ();
25927 reload_completed = 0;
25928 epilogue_completed = 0;
25931 /* A quick summary of the various types of 'constant-pool tables'
25932 under PowerPC:
25934 Target Flags Name One table per
25935 AIX (none) AIX TOC object file
25936 AIX -mfull-toc AIX TOC object file
25937 AIX -mminimal-toc AIX minimal TOC translation unit
25938 SVR4/EABI (none) SVR4 SDATA object file
25939 SVR4/EABI -fpic SVR4 pic object file
25940 SVR4/EABI -fPIC SVR4 PIC translation unit
25941 SVR4/EABI -mrelocatable EABI TOC function
25942 SVR4/EABI -maix AIX TOC object file
25943 SVR4/EABI -maix -mminimal-toc
25944 AIX minimal TOC translation unit
25946 Name Reg. Set by entries contains:
25947 made by addrs? fp? sum?
25949 AIX TOC 2 crt0 as Y option option
25950 AIX minimal TOC 30 prolog gcc Y Y option
25951 SVR4 SDATA 13 crt0 gcc N Y N
25952 SVR4 pic 30 prolog ld Y not yet N
25953 SVR4 PIC 30 prolog gcc Y option option
25954 EABI TOC 30 prolog gcc Y option option
25958 /* Hash functions for the hash table. */
25960 static unsigned
25961 rs6000_hash_constant (rtx k)
25963 enum rtx_code code = GET_CODE (k);
25964 machine_mode mode = GET_MODE (k);
25965 unsigned result = (code << 3) ^ mode;
25966 const char *format;
25967 int flen, fidx;
25969 format = GET_RTX_FORMAT (code);
25970 flen = strlen (format);
25971 fidx = 0;
25973 switch (code)
25975 case LABEL_REF:
25976 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25978 case CONST_WIDE_INT:
25980 int i;
25981 flen = CONST_WIDE_INT_NUNITS (k);
25982 for (i = 0; i < flen; i++)
25983 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25984 return result;
25987 case CONST_DOUBLE:
25988 if (mode != VOIDmode)
25989 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25990 flen = 2;
25991 break;
25993 case CODE_LABEL:
25994 fidx = 3;
25995 break;
25997 default:
25998 break;
26001 for (; fidx < flen; fidx++)
26002 switch (format[fidx])
26004 case 's':
26006 unsigned i, len;
26007 const char *str = XSTR (k, fidx);
26008 len = strlen (str);
26009 result = result * 613 + len;
26010 for (i = 0; i < len; i++)
26011 result = result * 613 + (unsigned) str[i];
26012 break;
26014 case 'u':
26015 case 'e':
26016 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
26017 break;
26018 case 'i':
26019 case 'n':
26020 result = result * 613 + (unsigned) XINT (k, fidx);
26021 break;
26022 case 'w':
26023 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
26024 result = result * 613 + (unsigned) XWINT (k, fidx);
26025 else
26027 size_t i;
26028 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
26029 result = result * 613 + (unsigned) (XWINT (k, fidx)
26030 >> CHAR_BIT * i);
26032 break;
26033 case '0':
26034 break;
26035 default:
26036 gcc_unreachable ();
26039 return result;
26042 hashval_t
26043 toc_hasher::hash (toc_hash_struct *thc)
26045 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
26048 /* Compare H1 and H2 for equivalence. */
26050 bool
26051 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
26053 rtx r1 = h1->key;
26054 rtx r2 = h2->key;
26056 if (h1->key_mode != h2->key_mode)
26057 return 0;
26059 return rtx_equal_p (r1, r2);
26062 /* These are the names given by the C++ front-end to vtables, and
26063 vtable-like objects. Ideally, this logic should not be here;
26064 instead, there should be some programmatic way of inquiring as
26065 to whether or not an object is a vtable. */
26067 #define VTABLE_NAME_P(NAME) \
26068 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
26069 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
26070 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
26071 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
26072 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
26074 #ifdef NO_DOLLAR_IN_LABEL
26075 /* Return a GGC-allocated character string translating dollar signs in
26076 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
26078 const char *
26079 rs6000_xcoff_strip_dollar (const char *name)
26081 char *strip, *p;
26082 const char *q;
26083 size_t len;
26085 q = (const char *) strchr (name, '$');
26087 if (q == 0 || q == name)
26088 return name;
26090 len = strlen (name);
26091 strip = XALLOCAVEC (char, len + 1);
26092 strcpy (strip, name);
26093 p = strip + (q - name);
26094 while (p)
26096 *p = '_';
26097 p = strchr (p + 1, '$');
26100 return ggc_alloc_string (strip, len);
26102 #endif
26104 void
26105 rs6000_output_symbol_ref (FILE *file, rtx x)
26107 /* Currently C++ toc references to vtables can be emitted before it
26108 is decided whether the vtable is public or private. If this is
26109 the case, then the linker will eventually complain that there is
26110 a reference to an unknown section. Thus, for vtables only,
26111 we emit the TOC reference to reference the symbol and not the
26112 section. */
26113 const char *name = XSTR (x, 0);
26115 tree decl = SYMBOL_REF_DECL (x);
26116 if (decl /* sync condition with assemble_external () */
26117 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
26118 && (TREE_CODE (decl) == VAR_DECL
26119 || TREE_CODE (decl) == FUNCTION_DECL)
26120 && name[strlen (name) - 1] != ']')
26122 name = concat (name,
26123 (TREE_CODE (decl) == FUNCTION_DECL
26124 ? "[DS]" : "[UA]"),
26125 NULL);
26126 XSTR (x, 0) = name;
26129 if (VTABLE_NAME_P (name))
26131 RS6000_OUTPUT_BASENAME (file, name);
26133 else
26134 assemble_name (file, name);
26137 /* Output a TOC entry. We derive the entry name from what is being
26138 written. */
26140 void
26141 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
26143 char buf[256];
26144 const char *name = buf;
26145 rtx base = x;
26146 HOST_WIDE_INT offset = 0;
26148 gcc_assert (!TARGET_NO_TOC);
26150 /* When the linker won't eliminate them, don't output duplicate
26151 TOC entries (this happens on AIX if there is any kind of TOC,
26152 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
26153 CODE_LABELs. */
26154 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
26156 struct toc_hash_struct *h;
26158 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
26159 time because GGC is not initialized at that point. */
26160 if (toc_hash_table == NULL)
26161 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
26163 h = ggc_alloc<toc_hash_struct> ();
26164 h->key = x;
26165 h->key_mode = mode;
26166 h->labelno = labelno;
26168 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
26169 if (*found == NULL)
26170 *found = h;
26171 else /* This is indeed a duplicate.
26172 Set this label equal to that label. */
26174 fputs ("\t.set ", file);
26175 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26176 fprintf (file, "%d,", labelno);
26177 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26178 fprintf (file, "%d\n", ((*found)->labelno));
26180 #ifdef HAVE_AS_TLS
26181 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
26182 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
26183 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
26185 fputs ("\t.set ", file);
26186 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26187 fprintf (file, "%d,", labelno);
26188 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26189 fprintf (file, "%d\n", ((*found)->labelno));
26191 #endif
26192 return;
26196 /* If we're going to put a double constant in the TOC, make sure it's
26197 aligned properly when strict alignment is on. */
26198 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
26199 && STRICT_ALIGNMENT
26200 && GET_MODE_BITSIZE (mode) >= 64
26201 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
26202 ASM_OUTPUT_ALIGN (file, 3);
26205 (*targetm.asm_out.internal_label) (file, "LC", labelno);
26207 /* Handle FP constants specially. Note that if we have a minimal
26208 TOC, things we put here aren't actually in the TOC, so we can allow
26209 FP constants. */
26210 if (GET_CODE (x) == CONST_DOUBLE &&
26211 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
26213 REAL_VALUE_TYPE rv;
26214 long k[4];
26216 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26217 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26218 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26219 else
26220 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26222 if (TARGET_64BIT)
26224 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26225 fputs (DOUBLE_INT_ASM_OP, file);
26226 else
26227 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26228 k[0] & 0xffffffff, k[1] & 0xffffffff,
26229 k[2] & 0xffffffff, k[3] & 0xffffffff);
26230 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26231 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26232 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26233 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26234 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26235 return;
26237 else
26239 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26240 fputs ("\t.long ", file);
26241 else
26242 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26243 k[0] & 0xffffffff, k[1] & 0xffffffff,
26244 k[2] & 0xffffffff, k[3] & 0xffffffff);
26245 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26246 k[0] & 0xffffffff, k[1] & 0xffffffff,
26247 k[2] & 0xffffffff, k[3] & 0xffffffff);
26248 return;
26251 else if (GET_CODE (x) == CONST_DOUBLE &&
26252 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26254 REAL_VALUE_TYPE rv;
26255 long k[2];
26257 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26259 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26260 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26261 else
26262 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26264 if (TARGET_64BIT)
26266 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26267 fputs (DOUBLE_INT_ASM_OP, file);
26268 else
26269 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26270 k[0] & 0xffffffff, k[1] & 0xffffffff);
26271 fprintf (file, "0x%lx%08lx\n",
26272 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26273 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26274 return;
26276 else
26278 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26279 fputs ("\t.long ", file);
26280 else
26281 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26282 k[0] & 0xffffffff, k[1] & 0xffffffff);
26283 fprintf (file, "0x%lx,0x%lx\n",
26284 k[0] & 0xffffffff, k[1] & 0xffffffff);
26285 return;
26288 else if (GET_CODE (x) == CONST_DOUBLE &&
26289 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26291 REAL_VALUE_TYPE rv;
26292 long l;
26294 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26295 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26296 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26297 else
26298 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26300 if (TARGET_64BIT)
26302 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26303 fputs (DOUBLE_INT_ASM_OP, file);
26304 else
26305 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26306 if (WORDS_BIG_ENDIAN)
26307 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26308 else
26309 fprintf (file, "0x%lx\n", l & 0xffffffff);
26310 return;
26312 else
26314 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26315 fputs ("\t.long ", file);
26316 else
26317 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26318 fprintf (file, "0x%lx\n", l & 0xffffffff);
26319 return;
26322 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26324 unsigned HOST_WIDE_INT low;
26325 HOST_WIDE_INT high;
26327 low = INTVAL (x) & 0xffffffff;
26328 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26330 /* TOC entries are always Pmode-sized, so when big-endian
26331 smaller integer constants in the TOC need to be padded.
26332 (This is still a win over putting the constants in
26333 a separate constant pool, because then we'd have
26334 to have both a TOC entry _and_ the actual constant.)
26336 For a 32-bit target, CONST_INT values are loaded and shifted
26337 entirely within `low' and can be stored in one TOC entry. */
26339 /* It would be easy to make this work, but it doesn't now. */
26340 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26342 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26344 low |= high << 32;
26345 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26346 high = (HOST_WIDE_INT) low >> 32;
26347 low &= 0xffffffff;
26350 if (TARGET_64BIT)
26352 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26353 fputs (DOUBLE_INT_ASM_OP, file);
26354 else
26355 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26356 (long) high & 0xffffffff, (long) low & 0xffffffff);
26357 fprintf (file, "0x%lx%08lx\n",
26358 (long) high & 0xffffffff, (long) low & 0xffffffff);
26359 return;
26361 else
26363 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26365 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26366 fputs ("\t.long ", file);
26367 else
26368 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26369 (long) high & 0xffffffff, (long) low & 0xffffffff);
26370 fprintf (file, "0x%lx,0x%lx\n",
26371 (long) high & 0xffffffff, (long) low & 0xffffffff);
26373 else
26375 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26376 fputs ("\t.long ", file);
26377 else
26378 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26379 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26381 return;
26385 if (GET_CODE (x) == CONST)
26387 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26388 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26390 base = XEXP (XEXP (x, 0), 0);
26391 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26394 switch (GET_CODE (base))
26396 case SYMBOL_REF:
26397 name = XSTR (base, 0);
26398 break;
26400 case LABEL_REF:
26401 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26402 CODE_LABEL_NUMBER (XEXP (base, 0)));
26403 break;
26405 case CODE_LABEL:
26406 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26407 break;
26409 default:
26410 gcc_unreachable ();
26413 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26414 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26415 else
26417 fputs ("\t.tc ", file);
26418 RS6000_OUTPUT_BASENAME (file, name);
26420 if (offset < 0)
26421 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26422 else if (offset)
26423 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26425 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26426 after other TOC symbols, reducing overflow of small TOC access
26427 to [TC] symbols. */
26428 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26429 ? "[TE]," : "[TC],", file);
26432 /* Currently C++ toc references to vtables can be emitted before it
26433 is decided whether the vtable is public or private. If this is
26434 the case, then the linker will eventually complain that there is
26435 a TOC reference to an unknown section. Thus, for vtables only,
26436 we emit the TOC reference to reference the symbol and not the
26437 section. */
26438 if (VTABLE_NAME_P (name))
26440 RS6000_OUTPUT_BASENAME (file, name);
26441 if (offset < 0)
26442 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26443 else if (offset > 0)
26444 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26446 else
26447 output_addr_const (file, x);
26449 #if HAVE_AS_TLS
26450 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26451 && SYMBOL_REF_TLS_MODEL (base) != 0)
26453 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26454 fputs ("@le", file);
26455 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26456 fputs ("@ie", file);
26457 /* Use global-dynamic for local-dynamic. */
26458 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26459 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26461 putc ('\n', file);
26462 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26463 fputs ("\t.tc .", file);
26464 RS6000_OUTPUT_BASENAME (file, name);
26465 fputs ("[TC],", file);
26466 output_addr_const (file, x);
26467 fputs ("@m", file);
26470 #endif
26472 putc ('\n', file);
26475 /* Output an assembler pseudo-op to write an ASCII string of N characters
26476 starting at P to FILE.
26478 On the RS/6000, we have to do this using the .byte operation and
26479 write out special characters outside the quoted string.
26480 Also, the assembler is broken; very long strings are truncated,
26481 so we must artificially break them up early. */
26483 void
26484 output_ascii (FILE *file, const char *p, int n)
26486 char c;
26487 int i, count_string;
26488 const char *for_string = "\t.byte \"";
26489 const char *for_decimal = "\t.byte ";
26490 const char *to_close = NULL;
26492 count_string = 0;
26493 for (i = 0; i < n; i++)
26495 c = *p++;
26496 if (c >= ' ' && c < 0177)
26498 if (for_string)
26499 fputs (for_string, file);
26500 putc (c, file);
26502 /* Write two quotes to get one. */
26503 if (c == '"')
26505 putc (c, file);
26506 ++count_string;
26509 for_string = NULL;
26510 for_decimal = "\"\n\t.byte ";
26511 to_close = "\"\n";
26512 ++count_string;
26514 if (count_string >= 512)
26516 fputs (to_close, file);
26518 for_string = "\t.byte \"";
26519 for_decimal = "\t.byte ";
26520 to_close = NULL;
26521 count_string = 0;
26524 else
26526 if (for_decimal)
26527 fputs (for_decimal, file);
26528 fprintf (file, "%d", c);
26530 for_string = "\n\t.byte \"";
26531 for_decimal = ", ";
26532 to_close = "\n";
26533 count_string = 0;
26537 /* Now close the string if we have written one. Then end the line. */
26538 if (to_close)
26539 fputs (to_close, file);
26542 /* Generate a unique section name for FILENAME for a section type
26543 represented by SECTION_DESC. Output goes into BUF.
26545 SECTION_DESC can be any string, as long as it is different for each
26546 possible section type.
26548 We name the section in the same manner as xlc. The name begins with an
26549 underscore followed by the filename (after stripping any leading directory
26550 names) with the last period replaced by the string SECTION_DESC. If
26551 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26552 the name. */
26554 void
26555 rs6000_gen_section_name (char **buf, const char *filename,
26556 const char *section_desc)
26558 const char *q, *after_last_slash, *last_period = 0;
26559 char *p;
26560 int len;
26562 after_last_slash = filename;
26563 for (q = filename; *q; q++)
26565 if (*q == '/')
26566 after_last_slash = q + 1;
26567 else if (*q == '.')
26568 last_period = q;
26571 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26572 *buf = (char *) xmalloc (len);
26574 p = *buf;
26575 *p++ = '_';
26577 for (q = after_last_slash; *q; q++)
26579 if (q == last_period)
26581 strcpy (p, section_desc);
26582 p += strlen (section_desc);
26583 break;
26586 else if (ISALNUM (*q))
26587 *p++ = *q;
26590 if (last_period == 0)
26591 strcpy (p, section_desc);
26592 else
26593 *p = '\0';
26596 /* Emit profile function. */
26598 void
26599 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26601 /* Non-standard profiling for kernels, which just saves LR then calls
26602 _mcount without worrying about arg saves. The idea is to change
26603 the function prologue as little as possible as it isn't easy to
26604 account for arg save/restore code added just for _mcount. */
26605 if (TARGET_PROFILE_KERNEL)
26606 return;
26608 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26610 #ifndef NO_PROFILE_COUNTERS
26611 # define NO_PROFILE_COUNTERS 0
26612 #endif
26613 if (NO_PROFILE_COUNTERS)
26614 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26615 LCT_NORMAL, VOIDmode, 0);
26616 else
26618 char buf[30];
26619 const char *label_name;
26620 rtx fun;
26622 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26623 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26624 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26626 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26627 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26630 else if (DEFAULT_ABI == ABI_DARWIN)
26632 const char *mcount_name = RS6000_MCOUNT;
26633 int caller_addr_regno = LR_REGNO;
26635 /* Be conservative and always set this, at least for now. */
26636 crtl->uses_pic_offset_table = 1;
26638 #if TARGET_MACHO
26639 /* For PIC code, set up a stub and collect the caller's address
26640 from r0, which is where the prologue puts it. */
26641 if (MACHOPIC_INDIRECT
26642 && crtl->uses_pic_offset_table)
26643 caller_addr_regno = 0;
26644 #endif
26645 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26646 LCT_NORMAL, VOIDmode, 1,
26647 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26651 /* Write function profiler code. */
26653 void
26654 output_function_profiler (FILE *file, int labelno)
26656 char buf[100];
26658 switch (DEFAULT_ABI)
26660 default:
26661 gcc_unreachable ();
26663 case ABI_V4:
26664 if (!TARGET_32BIT)
26666 warning (0, "no profiling of 64-bit code for this ABI");
26667 return;
26669 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26670 fprintf (file, "\tmflr %s\n", reg_names[0]);
26671 if (NO_PROFILE_COUNTERS)
26673 asm_fprintf (file, "\tstw %s,4(%s)\n",
26674 reg_names[0], reg_names[1]);
26676 else if (TARGET_SECURE_PLT && flag_pic)
26678 if (TARGET_LINK_STACK)
26680 char name[32];
26681 get_ppc476_thunk_name (name);
26682 asm_fprintf (file, "\tbl %s\n", name);
26684 else
26685 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26686 asm_fprintf (file, "\tstw %s,4(%s)\n",
26687 reg_names[0], reg_names[1]);
26688 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26689 asm_fprintf (file, "\taddis %s,%s,",
26690 reg_names[12], reg_names[12]);
26691 assemble_name (file, buf);
26692 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26693 assemble_name (file, buf);
26694 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26696 else if (flag_pic == 1)
26698 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26699 asm_fprintf (file, "\tstw %s,4(%s)\n",
26700 reg_names[0], reg_names[1]);
26701 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26702 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26703 assemble_name (file, buf);
26704 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26706 else if (flag_pic > 1)
26708 asm_fprintf (file, "\tstw %s,4(%s)\n",
26709 reg_names[0], reg_names[1]);
26710 /* Now, we need to get the address of the label. */
26711 if (TARGET_LINK_STACK)
26713 char name[32];
26714 get_ppc476_thunk_name (name);
26715 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26716 assemble_name (file, buf);
26717 fputs ("-.\n1:", file);
26718 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26719 asm_fprintf (file, "\taddi %s,%s,4\n",
26720 reg_names[11], reg_names[11]);
26722 else
26724 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26725 assemble_name (file, buf);
26726 fputs ("-.\n1:", file);
26727 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26729 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26730 reg_names[0], reg_names[11]);
26731 asm_fprintf (file, "\tadd %s,%s,%s\n",
26732 reg_names[0], reg_names[0], reg_names[11]);
26734 else
26736 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26737 assemble_name (file, buf);
26738 fputs ("@ha\n", file);
26739 asm_fprintf (file, "\tstw %s,4(%s)\n",
26740 reg_names[0], reg_names[1]);
26741 asm_fprintf (file, "\tla %s,", reg_names[0]);
26742 assemble_name (file, buf);
26743 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26746 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26747 fprintf (file, "\tbl %s%s\n",
26748 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26749 break;
26751 case ABI_AIX:
26752 case ABI_ELFv2:
26753 case ABI_DARWIN:
26754 /* Don't do anything, done in output_profile_hook (). */
26755 break;
26761 /* The following variable value is the last issued insn. */
26763 static rtx last_scheduled_insn;
26765 /* The following variable helps to balance issuing of load and
26766 store instructions */
26768 static int load_store_pendulum;
26770 /* Power4 load update and store update instructions are cracked into a
26771 load or store and an integer insn which are executed in the same cycle.
26772 Branches have their own dispatch slot which does not count against the
26773 GCC issue rate, but it changes the program flow so there are no other
26774 instructions to issue in this cycle. */
26776 static int
26777 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26779 last_scheduled_insn = insn;
26780 if (GET_CODE (PATTERN (insn)) == USE
26781 || GET_CODE (PATTERN (insn)) == CLOBBER)
26783 cached_can_issue_more = more;
26784 return cached_can_issue_more;
26787 if (insn_terminates_group_p (insn, current_group))
26789 cached_can_issue_more = 0;
26790 return cached_can_issue_more;
26793 /* If no reservation, but reach here */
26794 if (recog_memoized (insn) < 0)
26795 return more;
26797 if (rs6000_sched_groups)
26799 if (is_microcoded_insn (insn))
26800 cached_can_issue_more = 0;
26801 else if (is_cracked_insn (insn))
26802 cached_can_issue_more = more > 2 ? more - 2 : 0;
26803 else
26804 cached_can_issue_more = more - 1;
26806 return cached_can_issue_more;
26809 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26810 return 0;
26812 cached_can_issue_more = more - 1;
26813 return cached_can_issue_more;
26816 static int
26817 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26819 int r = rs6000_variable_issue_1 (insn, more);
26820 if (verbose)
26821 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26822 return r;
26825 /* Adjust the cost of a scheduling dependency. Return the new cost of
26826 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26828 static int
26829 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26831 enum attr_type attr_type;
26833 if (! recog_memoized (insn))
26834 return 0;
26836 switch (REG_NOTE_KIND (link))
26838 case REG_DEP_TRUE:
26840 /* Data dependency; DEP_INSN writes a register that INSN reads
26841 some cycles later. */
26843 /* Separate a load from a narrower, dependent store. */
26844 if (rs6000_sched_groups
26845 && GET_CODE (PATTERN (insn)) == SET
26846 && GET_CODE (PATTERN (dep_insn)) == SET
26847 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26848 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26849 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26850 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26851 return cost + 14;
26853 attr_type = get_attr_type (insn);
26855 switch (attr_type)
26857 case TYPE_JMPREG:
26858 /* Tell the first scheduling pass about the latency between
26859 a mtctr and bctr (and mtlr and br/blr). The first
26860 scheduling pass will not know about this latency since
26861 the mtctr instruction, which has the latency associated
26862 to it, will be generated by reload. */
26863 return 4;
26864 case TYPE_BRANCH:
26865 /* Leave some extra cycles between a compare and its
26866 dependent branch, to inhibit expensive mispredicts. */
26867 if ((rs6000_cpu_attr == CPU_PPC603
26868 || rs6000_cpu_attr == CPU_PPC604
26869 || rs6000_cpu_attr == CPU_PPC604E
26870 || rs6000_cpu_attr == CPU_PPC620
26871 || rs6000_cpu_attr == CPU_PPC630
26872 || rs6000_cpu_attr == CPU_PPC750
26873 || rs6000_cpu_attr == CPU_PPC7400
26874 || rs6000_cpu_attr == CPU_PPC7450
26875 || rs6000_cpu_attr == CPU_PPCE5500
26876 || rs6000_cpu_attr == CPU_PPCE6500
26877 || rs6000_cpu_attr == CPU_POWER4
26878 || rs6000_cpu_attr == CPU_POWER5
26879 || rs6000_cpu_attr == CPU_POWER7
26880 || rs6000_cpu_attr == CPU_POWER8
26881 || rs6000_cpu_attr == CPU_CELL)
26882 && recog_memoized (dep_insn)
26883 && (INSN_CODE (dep_insn) >= 0))
26885 switch (get_attr_type (dep_insn))
26887 case TYPE_CMP:
26888 case TYPE_FPCOMPARE:
26889 case TYPE_CR_LOGICAL:
26890 case TYPE_DELAYED_CR:
26891 return cost + 2;
26892 case TYPE_EXTS:
26893 case TYPE_MUL:
26894 if (get_attr_dot (dep_insn) == DOT_YES)
26895 return cost + 2;
26896 else
26897 break;
26898 case TYPE_SHIFT:
26899 if (get_attr_dot (dep_insn) == DOT_YES
26900 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26901 return cost + 2;
26902 else
26903 break;
26904 default:
26905 break;
26907 break;
26909 case TYPE_STORE:
26910 case TYPE_FPSTORE:
26911 if ((rs6000_cpu == PROCESSOR_POWER6)
26912 && recog_memoized (dep_insn)
26913 && (INSN_CODE (dep_insn) >= 0))
26916 if (GET_CODE (PATTERN (insn)) != SET)
26917 /* If this happens, we have to extend this to schedule
26918 optimally. Return default for now. */
26919 return cost;
26921 /* Adjust the cost for the case where the value written
26922 by a fixed point operation is used as the address
26923 gen value on a store. */
26924 switch (get_attr_type (dep_insn))
26926 case TYPE_LOAD:
26927 case TYPE_CNTLZ:
26929 if (! store_data_bypass_p (dep_insn, insn))
26930 return get_attr_sign_extend (dep_insn)
26931 == SIGN_EXTEND_YES ? 6 : 4;
26932 break;
26934 case TYPE_SHIFT:
26936 if (! store_data_bypass_p (dep_insn, insn))
26937 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26938 6 : 3;
26939 break;
26941 case TYPE_INTEGER:
26942 case TYPE_ADD:
26943 case TYPE_LOGICAL:
26944 case TYPE_EXTS:
26945 case TYPE_INSERT:
26947 if (! store_data_bypass_p (dep_insn, insn))
26948 return 3;
26949 break;
26951 case TYPE_STORE:
26952 case TYPE_FPLOAD:
26953 case TYPE_FPSTORE:
26955 if (get_attr_update (dep_insn) == UPDATE_YES
26956 && ! store_data_bypass_p (dep_insn, insn))
26957 return 3;
26958 break;
26960 case TYPE_MUL:
26962 if (! store_data_bypass_p (dep_insn, insn))
26963 return 17;
26964 break;
26966 case TYPE_DIV:
26968 if (! store_data_bypass_p (dep_insn, insn))
26969 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26970 break;
26972 default:
26973 break;
26976 break;
26978 case TYPE_LOAD:
26979 if ((rs6000_cpu == PROCESSOR_POWER6)
26980 && recog_memoized (dep_insn)
26981 && (INSN_CODE (dep_insn) >= 0))
26984 /* Adjust the cost for the case where the value written
26985 by a fixed point instruction is used within the address
26986 gen portion of a subsequent load(u)(x) */
26987 switch (get_attr_type (dep_insn))
26989 case TYPE_LOAD:
26990 case TYPE_CNTLZ:
26992 if (set_to_load_agen (dep_insn, insn))
26993 return get_attr_sign_extend (dep_insn)
26994 == SIGN_EXTEND_YES ? 6 : 4;
26995 break;
26997 case TYPE_SHIFT:
26999 if (set_to_load_agen (dep_insn, insn))
27000 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
27001 6 : 3;
27002 break;
27004 case TYPE_INTEGER:
27005 case TYPE_ADD:
27006 case TYPE_LOGICAL:
27007 case TYPE_EXTS:
27008 case TYPE_INSERT:
27010 if (set_to_load_agen (dep_insn, insn))
27011 return 3;
27012 break;
27014 case TYPE_STORE:
27015 case TYPE_FPLOAD:
27016 case TYPE_FPSTORE:
27018 if (get_attr_update (dep_insn) == UPDATE_YES
27019 && set_to_load_agen (dep_insn, insn))
27020 return 3;
27021 break;
27023 case TYPE_MUL:
27025 if (set_to_load_agen (dep_insn, insn))
27026 return 17;
27027 break;
27029 case TYPE_DIV:
27031 if (set_to_load_agen (dep_insn, insn))
27032 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
27033 break;
27035 default:
27036 break;
27039 break;
27041 case TYPE_FPLOAD:
27042 if ((rs6000_cpu == PROCESSOR_POWER6)
27043 && get_attr_update (insn) == UPDATE_NO
27044 && recog_memoized (dep_insn)
27045 && (INSN_CODE (dep_insn) >= 0)
27046 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
27047 return 2;
27049 default:
27050 break;
27053 /* Fall out to return default cost. */
27055 break;
27057 case REG_DEP_OUTPUT:
27058 /* Output dependency; DEP_INSN writes a register that INSN writes some
27059 cycles later. */
27060 if ((rs6000_cpu == PROCESSOR_POWER6)
27061 && recog_memoized (dep_insn)
27062 && (INSN_CODE (dep_insn) >= 0))
27064 attr_type = get_attr_type (insn);
27066 switch (attr_type)
27068 case TYPE_FP:
27069 if (get_attr_type (dep_insn) == TYPE_FP)
27070 return 1;
27071 break;
27072 case TYPE_FPLOAD:
27073 if (get_attr_update (insn) == UPDATE_NO
27074 && get_attr_type (dep_insn) == TYPE_MFFGPR)
27075 return 2;
27076 break;
27077 default:
27078 break;
27081 case REG_DEP_ANTI:
27082 /* Anti dependency; DEP_INSN reads a register that INSN writes some
27083 cycles later. */
27084 return 0;
27086 default:
27087 gcc_unreachable ();
27090 return cost;
27093 /* Debug version of rs6000_adjust_cost. */
27095 static int
27096 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
27097 int cost)
27099 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
27101 if (ret != cost)
27103 const char *dep;
27105 switch (REG_NOTE_KIND (link))
27107 default: dep = "unknown depencency"; break;
27108 case REG_DEP_TRUE: dep = "data dependency"; break;
27109 case REG_DEP_OUTPUT: dep = "output dependency"; break;
27110 case REG_DEP_ANTI: dep = "anti depencency"; break;
27113 fprintf (stderr,
27114 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
27115 "%s, insn:\n", ret, cost, dep);
27117 debug_rtx (insn);
27120 return ret;
27123 /* The function returns a true if INSN is microcoded.
27124 Return false otherwise. */
27126 static bool
27127 is_microcoded_insn (rtx_insn *insn)
27129 if (!insn || !NONDEBUG_INSN_P (insn)
27130 || GET_CODE (PATTERN (insn)) == USE
27131 || GET_CODE (PATTERN (insn)) == CLOBBER)
27132 return false;
27134 if (rs6000_cpu_attr == CPU_CELL)
27135 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
27137 if (rs6000_sched_groups
27138 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27140 enum attr_type type = get_attr_type (insn);
27141 if ((type == TYPE_LOAD
27142 && get_attr_update (insn) == UPDATE_YES
27143 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
27144 || ((type == TYPE_LOAD || type == TYPE_STORE)
27145 && get_attr_update (insn) == UPDATE_YES
27146 && get_attr_indexed (insn) == INDEXED_YES)
27147 || type == TYPE_MFCR)
27148 return true;
27151 return false;
27154 /* The function returns true if INSN is cracked into 2 instructions
27155 by the processor (and therefore occupies 2 issue slots). */
27157 static bool
27158 is_cracked_insn (rtx_insn *insn)
27160 if (!insn || !NONDEBUG_INSN_P (insn)
27161 || GET_CODE (PATTERN (insn)) == USE
27162 || GET_CODE (PATTERN (insn)) == CLOBBER)
27163 return false;
27165 if (rs6000_sched_groups
27166 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27168 enum attr_type type = get_attr_type (insn);
27169 if ((type == TYPE_LOAD
27170 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27171 && get_attr_update (insn) == UPDATE_NO)
27172 || (type == TYPE_LOAD
27173 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
27174 && get_attr_update (insn) == UPDATE_YES
27175 && get_attr_indexed (insn) == INDEXED_NO)
27176 || (type == TYPE_STORE
27177 && get_attr_update (insn) == UPDATE_YES
27178 && get_attr_indexed (insn) == INDEXED_NO)
27179 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
27180 && get_attr_update (insn) == UPDATE_YES)
27181 || type == TYPE_DELAYED_CR
27182 || (type == TYPE_EXTS
27183 && get_attr_dot (insn) == DOT_YES)
27184 || (type == TYPE_SHIFT
27185 && get_attr_dot (insn) == DOT_YES
27186 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
27187 || (type == TYPE_MUL
27188 && get_attr_dot (insn) == DOT_YES)
27189 || type == TYPE_DIV
27190 || (type == TYPE_INSERT
27191 && get_attr_size (insn) == SIZE_32))
27192 return true;
27195 return false;
27198 /* The function returns true if INSN can be issued only from
27199 the branch slot. */
27201 static bool
27202 is_branch_slot_insn (rtx_insn *insn)
27204 if (!insn || !NONDEBUG_INSN_P (insn)
27205 || GET_CODE (PATTERN (insn)) == USE
27206 || GET_CODE (PATTERN (insn)) == CLOBBER)
27207 return false;
27209 if (rs6000_sched_groups)
27211 enum attr_type type = get_attr_type (insn);
27212 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27213 return true;
27214 return false;
27217 return false;
27220 /* The function returns true if out_inst sets a value that is
27221 used in the address generation computation of in_insn */
27222 static bool
27223 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27225 rtx out_set, in_set;
27227 /* For performance reasons, only handle the simple case where
27228 both loads are a single_set. */
27229 out_set = single_set (out_insn);
27230 if (out_set)
27232 in_set = single_set (in_insn);
27233 if (in_set)
27234 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27237 return false;
27240 /* Try to determine base/offset/size parts of the given MEM.
27241 Return true if successful, false if all the values couldn't
27242 be determined.
27244 This function only looks for REG or REG+CONST address forms.
27245 REG+REG address form will return false. */
27247 static bool
27248 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27249 HOST_WIDE_INT *size)
27251 rtx addr_rtx;
27252 if MEM_SIZE_KNOWN_P (mem)
27253 *size = MEM_SIZE (mem);
27254 else
27255 return false;
27257 addr_rtx = (XEXP (mem, 0));
27258 if (GET_CODE (addr_rtx) == PRE_MODIFY)
27259 addr_rtx = XEXP (addr_rtx, 1);
27261 *offset = 0;
27262 while (GET_CODE (addr_rtx) == PLUS
27263 && CONST_INT_P (XEXP (addr_rtx, 1)))
27265 *offset += INTVAL (XEXP (addr_rtx, 1));
27266 addr_rtx = XEXP (addr_rtx, 0);
27268 if (!REG_P (addr_rtx))
27269 return false;
27271 *base = addr_rtx;
27272 return true;
27275 /* The function returns true if the target storage location of
27276 mem1 is adjacent to the target storage location of mem2 */
27277 /* Return 1 if memory locations are adjacent. */
27279 static bool
27280 adjacent_mem_locations (rtx mem1, rtx mem2)
27282 rtx reg1, reg2;
27283 HOST_WIDE_INT off1, size1, off2, size2;
27285 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27286 && get_memref_parts (mem2, &reg2, &off2, &size2))
27287 return ((REGNO (reg1) == REGNO (reg2))
27288 && ((off1 + size1 == off2)
27289 || (off2 + size2 == off1)));
27291 return false;
27294 /* This function returns true if it can be determined that the two MEM
27295 locations overlap by at least 1 byte based on base reg/offset/size. */
27297 static bool
27298 mem_locations_overlap (rtx mem1, rtx mem2)
27300 rtx reg1, reg2;
27301 HOST_WIDE_INT off1, size1, off2, size2;
27303 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27304 && get_memref_parts (mem2, &reg2, &off2, &size2))
27305 return ((REGNO (reg1) == REGNO (reg2))
27306 && (((off1 <= off2) && (off1 + size1 > off2))
27307 || ((off2 <= off1) && (off2 + size2 > off1))));
27309 return false;
27312 /* A C statement (sans semicolon) to update the integer scheduling
27313 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27314 INSN earlier, reduce the priority to execute INSN later. Do not
27315 define this macro if you do not need to adjust the scheduling
27316 priorities of insns. */
27318 static int
27319 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27321 rtx load_mem, str_mem;
27322 /* On machines (like the 750) which have asymmetric integer units,
27323 where one integer unit can do multiply and divides and the other
27324 can't, reduce the priority of multiply/divide so it is scheduled
27325 before other integer operations. */
27327 #if 0
27328 if (! INSN_P (insn))
27329 return priority;
27331 if (GET_CODE (PATTERN (insn)) == USE)
27332 return priority;
27334 switch (rs6000_cpu_attr) {
27335 case CPU_PPC750:
27336 switch (get_attr_type (insn))
27338 default:
27339 break;
27341 case TYPE_MUL:
27342 case TYPE_DIV:
27343 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27344 priority, priority);
27345 if (priority >= 0 && priority < 0x01000000)
27346 priority >>= 3;
27347 break;
27350 #endif
27352 if (insn_must_be_first_in_group (insn)
27353 && reload_completed
27354 && current_sched_info->sched_max_insns_priority
27355 && rs6000_sched_restricted_insns_priority)
27358 /* Prioritize insns that can be dispatched only in the first
27359 dispatch slot. */
27360 if (rs6000_sched_restricted_insns_priority == 1)
27361 /* Attach highest priority to insn. This means that in
27362 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27363 precede 'priority' (critical path) considerations. */
27364 return current_sched_info->sched_max_insns_priority;
27365 else if (rs6000_sched_restricted_insns_priority == 2)
27366 /* Increase priority of insn by a minimal amount. This means that in
27367 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27368 considerations precede dispatch-slot restriction considerations. */
27369 return (priority + 1);
27372 if (rs6000_cpu == PROCESSOR_POWER6
27373 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27374 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27375 /* Attach highest priority to insn if the scheduler has just issued two
27376 stores and this instruction is a load, or two loads and this instruction
27377 is a store. Power6 wants loads and stores scheduled alternately
27378 when possible */
27379 return current_sched_info->sched_max_insns_priority;
27381 return priority;
27384 /* Return true if the instruction is nonpipelined on the Cell. */
27385 static bool
27386 is_nonpipeline_insn (rtx_insn *insn)
27388 enum attr_type type;
27389 if (!insn || !NONDEBUG_INSN_P (insn)
27390 || GET_CODE (PATTERN (insn)) == USE
27391 || GET_CODE (PATTERN (insn)) == CLOBBER)
27392 return false;
27394 type = get_attr_type (insn);
27395 if (type == TYPE_MUL
27396 || type == TYPE_DIV
27397 || type == TYPE_SDIV
27398 || type == TYPE_DDIV
27399 || type == TYPE_SSQRT
27400 || type == TYPE_DSQRT
27401 || type == TYPE_MFCR
27402 || type == TYPE_MFCRF
27403 || type == TYPE_MFJMPR)
27405 return true;
27407 return false;
27411 /* Return how many instructions the machine can issue per cycle. */
27413 static int
27414 rs6000_issue_rate (void)
27416 /* Unless scheduling for register pressure, use issue rate of 1 for
27417 first scheduling pass to decrease degradation. */
27418 if (!reload_completed && !flag_sched_pressure)
27419 return 1;
27421 switch (rs6000_cpu_attr) {
27422 case CPU_RS64A:
27423 case CPU_PPC601: /* ? */
27424 case CPU_PPC7450:
27425 return 3;
27426 case CPU_PPC440:
27427 case CPU_PPC603:
27428 case CPU_PPC750:
27429 case CPU_PPC7400:
27430 case CPU_PPC8540:
27431 case CPU_PPC8548:
27432 case CPU_CELL:
27433 case CPU_PPCE300C2:
27434 case CPU_PPCE300C3:
27435 case CPU_PPCE500MC:
27436 case CPU_PPCE500MC64:
27437 case CPU_PPCE5500:
27438 case CPU_PPCE6500:
27439 case CPU_TITAN:
27440 return 2;
27441 case CPU_PPC476:
27442 case CPU_PPC604:
27443 case CPU_PPC604E:
27444 case CPU_PPC620:
27445 case CPU_PPC630:
27446 return 4;
27447 case CPU_POWER4:
27448 case CPU_POWER5:
27449 case CPU_POWER6:
27450 case CPU_POWER7:
27451 return 5;
27452 case CPU_POWER8:
27453 return 7;
27454 default:
27455 return 1;
27459 /* Return how many instructions to look ahead for better insn
27460 scheduling. */
27462 static int
27463 rs6000_use_sched_lookahead (void)
27465 switch (rs6000_cpu_attr)
27467 case CPU_PPC8540:
27468 case CPU_PPC8548:
27469 return 4;
27471 case CPU_CELL:
27472 return (reload_completed ? 8 : 0);
27474 default:
27475 return 0;
27479 /* We are choosing insn from the ready queue. Return zero if INSN can be
27480 chosen. */
27481 static int
27482 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27484 if (ready_index == 0)
27485 return 0;
27487 if (rs6000_cpu_attr != CPU_CELL)
27488 return 0;
27490 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27492 if (!reload_completed
27493 || is_nonpipeline_insn (insn)
27494 || is_microcoded_insn (insn))
27495 return 1;
27497 return 0;
27500 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27501 and return true. */
27503 static bool
27504 find_mem_ref (rtx pat, rtx *mem_ref)
27506 const char * fmt;
27507 int i, j;
27509 /* stack_tie does not produce any real memory traffic. */
27510 if (tie_operand (pat, VOIDmode))
27511 return false;
27513 if (GET_CODE (pat) == MEM)
27515 *mem_ref = pat;
27516 return true;
27519 /* Recursively process the pattern. */
27520 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27522 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27524 if (fmt[i] == 'e')
27526 if (find_mem_ref (XEXP (pat, i), mem_ref))
27527 return true;
27529 else if (fmt[i] == 'E')
27530 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27532 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27533 return true;
27537 return false;
27540 /* Determine if PAT is a PATTERN of a load insn. */
27542 static bool
27543 is_load_insn1 (rtx pat, rtx *load_mem)
27545 if (!pat || pat == NULL_RTX)
27546 return false;
27548 if (GET_CODE (pat) == SET)
27549 return find_mem_ref (SET_SRC (pat), load_mem);
27551 if (GET_CODE (pat) == PARALLEL)
27553 int i;
27555 for (i = 0; i < XVECLEN (pat, 0); i++)
27556 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27557 return true;
27560 return false;
27563 /* Determine if INSN loads from memory. */
27565 static bool
27566 is_load_insn (rtx insn, rtx *load_mem)
27568 if (!insn || !INSN_P (insn))
27569 return false;
27571 if (CALL_P (insn))
27572 return false;
27574 return is_load_insn1 (PATTERN (insn), load_mem);
27577 /* Determine if PAT is a PATTERN of a store insn. */
27579 static bool
27580 is_store_insn1 (rtx pat, rtx *str_mem)
27582 if (!pat || pat == NULL_RTX)
27583 return false;
27585 if (GET_CODE (pat) == SET)
27586 return find_mem_ref (SET_DEST (pat), str_mem);
27588 if (GET_CODE (pat) == PARALLEL)
27590 int i;
27592 for (i = 0; i < XVECLEN (pat, 0); i++)
27593 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27594 return true;
27597 return false;
27600 /* Determine if INSN stores to memory. */
27602 static bool
27603 is_store_insn (rtx insn, rtx *str_mem)
27605 if (!insn || !INSN_P (insn))
27606 return false;
27608 return is_store_insn1 (PATTERN (insn), str_mem);
27611 /* Returns whether the dependence between INSN and NEXT is considered
27612 costly by the given target. */
27614 static bool
27615 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27617 rtx insn;
27618 rtx next;
27619 rtx load_mem, str_mem;
27621 /* If the flag is not enabled - no dependence is considered costly;
27622 allow all dependent insns in the same group.
27623 This is the most aggressive option. */
27624 if (rs6000_sched_costly_dep == no_dep_costly)
27625 return false;
27627 /* If the flag is set to 1 - a dependence is always considered costly;
27628 do not allow dependent instructions in the same group.
27629 This is the most conservative option. */
27630 if (rs6000_sched_costly_dep == all_deps_costly)
27631 return true;
27633 insn = DEP_PRO (dep);
27634 next = DEP_CON (dep);
27636 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27637 && is_load_insn (next, &load_mem)
27638 && is_store_insn (insn, &str_mem))
27639 /* Prevent load after store in the same group. */
27640 return true;
27642 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27643 && is_load_insn (next, &load_mem)
27644 && is_store_insn (insn, &str_mem)
27645 && DEP_TYPE (dep) == REG_DEP_TRUE
27646 && mem_locations_overlap(str_mem, load_mem))
27647 /* Prevent load after store in the same group if it is a true
27648 dependence. */
27649 return true;
27651 /* The flag is set to X; dependences with latency >= X are considered costly,
27652 and will not be scheduled in the same group. */
27653 if (rs6000_sched_costly_dep <= max_dep_latency
27654 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27655 return true;
27657 return false;
27660 /* Return the next insn after INSN that is found before TAIL is reached,
27661 skipping any "non-active" insns - insns that will not actually occupy
27662 an issue slot. Return NULL_RTX if such an insn is not found. */
27664 static rtx_insn *
27665 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27667 if (insn == NULL_RTX || insn == tail)
27668 return NULL;
27670 while (1)
27672 insn = NEXT_INSN (insn);
27673 if (insn == NULL_RTX || insn == tail)
27674 return NULL;
27676 if (CALL_P (insn)
27677 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27678 || (NONJUMP_INSN_P (insn)
27679 && GET_CODE (PATTERN (insn)) != USE
27680 && GET_CODE (PATTERN (insn)) != CLOBBER
27681 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27682 break;
27684 return insn;
27687 /* We are about to begin issuing insns for this clock cycle. */
27689 static int
27690 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27691 rtx_insn **ready ATTRIBUTE_UNUSED,
27692 int *pn_ready ATTRIBUTE_UNUSED,
27693 int clock_var ATTRIBUTE_UNUSED)
27695 int n_ready = *pn_ready;
27697 if (sched_verbose)
27698 fprintf (dump, "// rs6000_sched_reorder :\n");
27700 /* Reorder the ready list, if the second to last ready insn
27701 is a nonepipeline insn. */
27702 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27704 if (is_nonpipeline_insn (ready[n_ready - 1])
27705 && (recog_memoized (ready[n_ready - 2]) > 0))
27706 /* Simply swap first two insns. */
27707 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27710 if (rs6000_cpu == PROCESSOR_POWER6)
27711 load_store_pendulum = 0;
27713 return rs6000_issue_rate ();
27716 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27718 static int
27719 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27720 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27722 if (sched_verbose)
27723 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27725 /* For Power6, we need to handle some special cases to try and keep the
27726 store queue from overflowing and triggering expensive flushes.
27728 This code monitors how load and store instructions are being issued
27729 and skews the ready list one way or the other to increase the likelihood
27730 that a desired instruction is issued at the proper time.
27732 A couple of things are done. First, we maintain a "load_store_pendulum"
27733 to track the current state of load/store issue.
27735 - If the pendulum is at zero, then no loads or stores have been
27736 issued in the current cycle so we do nothing.
27738 - If the pendulum is 1, then a single load has been issued in this
27739 cycle and we attempt to locate another load in the ready list to
27740 issue with it.
27742 - If the pendulum is -2, then two stores have already been
27743 issued in this cycle, so we increase the priority of the first load
27744 in the ready list to increase it's likelihood of being chosen first
27745 in the next cycle.
27747 - If the pendulum is -1, then a single store has been issued in this
27748 cycle and we attempt to locate another store in the ready list to
27749 issue with it, preferring a store to an adjacent memory location to
27750 facilitate store pairing in the store queue.
27752 - If the pendulum is 2, then two loads have already been
27753 issued in this cycle, so we increase the priority of the first store
27754 in the ready list to increase it's likelihood of being chosen first
27755 in the next cycle.
27757 - If the pendulum < -2 or > 2, then do nothing.
27759 Note: This code covers the most common scenarios. There exist non
27760 load/store instructions which make use of the LSU and which
27761 would need to be accounted for to strictly model the behavior
27762 of the machine. Those instructions are currently unaccounted
27763 for to help minimize compile time overhead of this code.
27765 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27767 int pos;
27768 int i;
27769 rtx_insn *tmp;
27770 rtx load_mem, str_mem;
27772 if (is_store_insn (last_scheduled_insn, &str_mem))
27773 /* Issuing a store, swing the load_store_pendulum to the left */
27774 load_store_pendulum--;
27775 else if (is_load_insn (last_scheduled_insn, &load_mem))
27776 /* Issuing a load, swing the load_store_pendulum to the right */
27777 load_store_pendulum++;
27778 else
27779 return cached_can_issue_more;
27781 /* If the pendulum is balanced, or there is only one instruction on
27782 the ready list, then all is well, so return. */
27783 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27784 return cached_can_issue_more;
27786 if (load_store_pendulum == 1)
27788 /* A load has been issued in this cycle. Scan the ready list
27789 for another load to issue with it */
27790 pos = *pn_ready-1;
27792 while (pos >= 0)
27794 if (is_load_insn (ready[pos], &load_mem))
27796 /* Found a load. Move it to the head of the ready list,
27797 and adjust it's priority so that it is more likely to
27798 stay there */
27799 tmp = ready[pos];
27800 for (i=pos; i<*pn_ready-1; i++)
27801 ready[i] = ready[i + 1];
27802 ready[*pn_ready-1] = tmp;
27804 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27805 INSN_PRIORITY (tmp)++;
27806 break;
27808 pos--;
27811 else if (load_store_pendulum == -2)
27813 /* Two stores have been issued in this cycle. Increase the
27814 priority of the first load in the ready list to favor it for
27815 issuing in the next cycle. */
27816 pos = *pn_ready-1;
27818 while (pos >= 0)
27820 if (is_load_insn (ready[pos], &load_mem)
27821 && !sel_sched_p ()
27822 && INSN_PRIORITY_KNOWN (ready[pos]))
27824 INSN_PRIORITY (ready[pos])++;
27826 /* Adjust the pendulum to account for the fact that a load
27827 was found and increased in priority. This is to prevent
27828 increasing the priority of multiple loads */
27829 load_store_pendulum--;
27831 break;
27833 pos--;
27836 else if (load_store_pendulum == -1)
27838 /* A store has been issued in this cycle. Scan the ready list for
27839 another store to issue with it, preferring a store to an adjacent
27840 memory location */
27841 int first_store_pos = -1;
27843 pos = *pn_ready-1;
27845 while (pos >= 0)
27847 if (is_store_insn (ready[pos], &str_mem))
27849 rtx str_mem2;
27850 /* Maintain the index of the first store found on the
27851 list */
27852 if (first_store_pos == -1)
27853 first_store_pos = pos;
27855 if (is_store_insn (last_scheduled_insn, &str_mem2)
27856 && adjacent_mem_locations (str_mem, str_mem2))
27858 /* Found an adjacent store. Move it to the head of the
27859 ready list, and adjust it's priority so that it is
27860 more likely to stay there */
27861 tmp = ready[pos];
27862 for (i=pos; i<*pn_ready-1; i++)
27863 ready[i] = ready[i + 1];
27864 ready[*pn_ready-1] = tmp;
27866 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27867 INSN_PRIORITY (tmp)++;
27869 first_store_pos = -1;
27871 break;
27874 pos--;
27877 if (first_store_pos >= 0)
27879 /* An adjacent store wasn't found, but a non-adjacent store was,
27880 so move the non-adjacent store to the front of the ready
27881 list, and adjust its priority so that it is more likely to
27882 stay there. */
27883 tmp = ready[first_store_pos];
27884 for (i=first_store_pos; i<*pn_ready-1; i++)
27885 ready[i] = ready[i + 1];
27886 ready[*pn_ready-1] = tmp;
27887 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27888 INSN_PRIORITY (tmp)++;
27891 else if (load_store_pendulum == 2)
27893 /* Two loads have been issued in this cycle. Increase the priority
27894 of the first store in the ready list to favor it for issuing in
27895 the next cycle. */
27896 pos = *pn_ready-1;
27898 while (pos >= 0)
27900 if (is_store_insn (ready[pos], &str_mem)
27901 && !sel_sched_p ()
27902 && INSN_PRIORITY_KNOWN (ready[pos]))
27904 INSN_PRIORITY (ready[pos])++;
27906 /* Adjust the pendulum to account for the fact that a store
27907 was found and increased in priority. This is to prevent
27908 increasing the priority of multiple stores */
27909 load_store_pendulum++;
27911 break;
27913 pos--;
27918 return cached_can_issue_more;
27921 /* Return whether the presence of INSN causes a dispatch group termination
27922 of group WHICH_GROUP.
27924 If WHICH_GROUP == current_group, this function will return true if INSN
27925 causes the termination of the current group (i.e, the dispatch group to
27926 which INSN belongs). This means that INSN will be the last insn in the
27927 group it belongs to.
27929 If WHICH_GROUP == previous_group, this function will return true if INSN
27930 causes the termination of the previous group (i.e, the dispatch group that
27931 precedes the group to which INSN belongs). This means that INSN will be
27932 the first insn in the group it belongs to). */
27934 static bool
27935 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27937 bool first, last;
27939 if (! insn)
27940 return false;
27942 first = insn_must_be_first_in_group (insn);
27943 last = insn_must_be_last_in_group (insn);
27945 if (first && last)
27946 return true;
27948 if (which_group == current_group)
27949 return last;
27950 else if (which_group == previous_group)
27951 return first;
27953 return false;
27957 static bool
27958 insn_must_be_first_in_group (rtx_insn *insn)
27960 enum attr_type type;
27962 if (!insn
27963 || NOTE_P (insn)
27964 || DEBUG_INSN_P (insn)
27965 || GET_CODE (PATTERN (insn)) == USE
27966 || GET_CODE (PATTERN (insn)) == CLOBBER)
27967 return false;
27969 switch (rs6000_cpu)
27971 case PROCESSOR_POWER5:
27972 if (is_cracked_insn (insn))
27973 return true;
27974 case PROCESSOR_POWER4:
27975 if (is_microcoded_insn (insn))
27976 return true;
27978 if (!rs6000_sched_groups)
27979 return false;
27981 type = get_attr_type (insn);
27983 switch (type)
27985 case TYPE_MFCR:
27986 case TYPE_MFCRF:
27987 case TYPE_MTCR:
27988 case TYPE_DELAYED_CR:
27989 case TYPE_CR_LOGICAL:
27990 case TYPE_MTJMPR:
27991 case TYPE_MFJMPR:
27992 case TYPE_DIV:
27993 case TYPE_LOAD_L:
27994 case TYPE_STORE_C:
27995 case TYPE_ISYNC:
27996 case TYPE_SYNC:
27997 return true;
27998 default:
27999 break;
28001 break;
28002 case PROCESSOR_POWER6:
28003 type = get_attr_type (insn);
28005 switch (type)
28007 case TYPE_EXTS:
28008 case TYPE_CNTLZ:
28009 case TYPE_TRAP:
28010 case TYPE_MUL:
28011 case TYPE_INSERT:
28012 case TYPE_FPCOMPARE:
28013 case TYPE_MFCR:
28014 case TYPE_MTCR:
28015 case TYPE_MFJMPR:
28016 case TYPE_MTJMPR:
28017 case TYPE_ISYNC:
28018 case TYPE_SYNC:
28019 case TYPE_LOAD_L:
28020 case TYPE_STORE_C:
28021 return true;
28022 case TYPE_SHIFT:
28023 if (get_attr_dot (insn) == DOT_NO
28024 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28025 return true;
28026 else
28027 break;
28028 case TYPE_DIV:
28029 if (get_attr_size (insn) == SIZE_32)
28030 return true;
28031 else
28032 break;
28033 case TYPE_LOAD:
28034 case TYPE_STORE:
28035 case TYPE_FPLOAD:
28036 case TYPE_FPSTORE:
28037 if (get_attr_update (insn) == UPDATE_YES)
28038 return true;
28039 else
28040 break;
28041 default:
28042 break;
28044 break;
28045 case PROCESSOR_POWER7:
28046 type = get_attr_type (insn);
28048 switch (type)
28050 case TYPE_CR_LOGICAL:
28051 case TYPE_MFCR:
28052 case TYPE_MFCRF:
28053 case TYPE_MTCR:
28054 case TYPE_DIV:
28055 case TYPE_ISYNC:
28056 case TYPE_LOAD_L:
28057 case TYPE_STORE_C:
28058 case TYPE_MFJMPR:
28059 case TYPE_MTJMPR:
28060 return true;
28061 case TYPE_MUL:
28062 case TYPE_SHIFT:
28063 case TYPE_EXTS:
28064 if (get_attr_dot (insn) == DOT_YES)
28065 return true;
28066 else
28067 break;
28068 case TYPE_LOAD:
28069 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28070 || get_attr_update (insn) == UPDATE_YES)
28071 return true;
28072 else
28073 break;
28074 case TYPE_STORE:
28075 case TYPE_FPLOAD:
28076 case TYPE_FPSTORE:
28077 if (get_attr_update (insn) == UPDATE_YES)
28078 return true;
28079 else
28080 break;
28081 default:
28082 break;
28084 break;
28085 case PROCESSOR_POWER8:
28086 type = get_attr_type (insn);
28088 switch (type)
28090 case TYPE_CR_LOGICAL:
28091 case TYPE_DELAYED_CR:
28092 case TYPE_MFCR:
28093 case TYPE_MFCRF:
28094 case TYPE_MTCR:
28095 case TYPE_SYNC:
28096 case TYPE_ISYNC:
28097 case TYPE_LOAD_L:
28098 case TYPE_STORE_C:
28099 case TYPE_VECSTORE:
28100 case TYPE_MFJMPR:
28101 case TYPE_MTJMPR:
28102 return true;
28103 case TYPE_SHIFT:
28104 case TYPE_EXTS:
28105 case TYPE_MUL:
28106 if (get_attr_dot (insn) == DOT_YES)
28107 return true;
28108 else
28109 break;
28110 case TYPE_LOAD:
28111 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28112 || get_attr_update (insn) == UPDATE_YES)
28113 return true;
28114 else
28115 break;
28116 case TYPE_STORE:
28117 if (get_attr_update (insn) == UPDATE_YES
28118 && get_attr_indexed (insn) == INDEXED_YES)
28119 return true;
28120 else
28121 break;
28122 default:
28123 break;
28125 break;
28126 default:
28127 break;
28130 return false;
28133 static bool
28134 insn_must_be_last_in_group (rtx_insn *insn)
28136 enum attr_type type;
28138 if (!insn
28139 || NOTE_P (insn)
28140 || DEBUG_INSN_P (insn)
28141 || GET_CODE (PATTERN (insn)) == USE
28142 || GET_CODE (PATTERN (insn)) == CLOBBER)
28143 return false;
28145 switch (rs6000_cpu) {
28146 case PROCESSOR_POWER4:
28147 case PROCESSOR_POWER5:
28148 if (is_microcoded_insn (insn))
28149 return true;
28151 if (is_branch_slot_insn (insn))
28152 return true;
28154 break;
28155 case PROCESSOR_POWER6:
28156 type = get_attr_type (insn);
28158 switch (type)
28160 case TYPE_EXTS:
28161 case TYPE_CNTLZ:
28162 case TYPE_TRAP:
28163 case TYPE_MUL:
28164 case TYPE_FPCOMPARE:
28165 case TYPE_MFCR:
28166 case TYPE_MTCR:
28167 case TYPE_MFJMPR:
28168 case TYPE_MTJMPR:
28169 case TYPE_ISYNC:
28170 case TYPE_SYNC:
28171 case TYPE_LOAD_L:
28172 case TYPE_STORE_C:
28173 return true;
28174 case TYPE_SHIFT:
28175 if (get_attr_dot (insn) == DOT_NO
28176 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28177 return true;
28178 else
28179 break;
28180 case TYPE_DIV:
28181 if (get_attr_size (insn) == SIZE_32)
28182 return true;
28183 else
28184 break;
28185 default:
28186 break;
28188 break;
28189 case PROCESSOR_POWER7:
28190 type = get_attr_type (insn);
28192 switch (type)
28194 case TYPE_ISYNC:
28195 case TYPE_SYNC:
28196 case TYPE_LOAD_L:
28197 case TYPE_STORE_C:
28198 return true;
28199 case TYPE_LOAD:
28200 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28201 && get_attr_update (insn) == UPDATE_YES)
28202 return true;
28203 else
28204 break;
28205 case TYPE_STORE:
28206 if (get_attr_update (insn) == UPDATE_YES
28207 && get_attr_indexed (insn) == INDEXED_YES)
28208 return true;
28209 else
28210 break;
28211 default:
28212 break;
28214 break;
28215 case PROCESSOR_POWER8:
28216 type = get_attr_type (insn);
28218 switch (type)
28220 case TYPE_MFCR:
28221 case TYPE_MTCR:
28222 case TYPE_ISYNC:
28223 case TYPE_SYNC:
28224 case TYPE_LOAD_L:
28225 case TYPE_STORE_C:
28226 return true;
28227 case TYPE_LOAD:
28228 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28229 && get_attr_update (insn) == UPDATE_YES)
28230 return true;
28231 else
28232 break;
28233 case TYPE_STORE:
28234 if (get_attr_update (insn) == UPDATE_YES
28235 && get_attr_indexed (insn) == INDEXED_YES)
28236 return true;
28237 else
28238 break;
28239 default:
28240 break;
28242 break;
28243 default:
28244 break;
28247 return false;
28250 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28251 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28253 static bool
28254 is_costly_group (rtx *group_insns, rtx next_insn)
28256 int i;
28257 int issue_rate = rs6000_issue_rate ();
28259 for (i = 0; i < issue_rate; i++)
28261 sd_iterator_def sd_it;
28262 dep_t dep;
28263 rtx insn = group_insns[i];
28265 if (!insn)
28266 continue;
28268 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28270 rtx next = DEP_CON (dep);
28272 if (next == next_insn
28273 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28274 return true;
28278 return false;
28281 /* Utility of the function redefine_groups.
28282 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28283 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28284 to keep it "far" (in a separate group) from GROUP_INSNS, following
28285 one of the following schemes, depending on the value of the flag
28286 -minsert_sched_nops = X:
28287 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28288 in order to force NEXT_INSN into a separate group.
28289 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28290 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28291 insertion (has a group just ended, how many vacant issue slots remain in the
28292 last group, and how many dispatch groups were encountered so far). */
28294 static int
28295 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28296 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28297 int *group_count)
28299 rtx nop;
28300 bool force;
28301 int issue_rate = rs6000_issue_rate ();
28302 bool end = *group_end;
28303 int i;
28305 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28306 return can_issue_more;
28308 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28309 return can_issue_more;
28311 force = is_costly_group (group_insns, next_insn);
28312 if (!force)
28313 return can_issue_more;
28315 if (sched_verbose > 6)
28316 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28317 *group_count ,can_issue_more);
28319 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28321 if (*group_end)
28322 can_issue_more = 0;
28324 /* Since only a branch can be issued in the last issue_slot, it is
28325 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28326 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28327 in this case the last nop will start a new group and the branch
28328 will be forced to the new group. */
28329 if (can_issue_more && !is_branch_slot_insn (next_insn))
28330 can_issue_more--;
28332 /* Do we have a special group ending nop? */
28333 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28334 || rs6000_cpu_attr == CPU_POWER8)
28336 nop = gen_group_ending_nop ();
28337 emit_insn_before (nop, next_insn);
28338 can_issue_more = 0;
28340 else
28341 while (can_issue_more > 0)
28343 nop = gen_nop ();
28344 emit_insn_before (nop, next_insn);
28345 can_issue_more--;
28348 *group_end = true;
28349 return 0;
28352 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28354 int n_nops = rs6000_sched_insert_nops;
28356 /* Nops can't be issued from the branch slot, so the effective
28357 issue_rate for nops is 'issue_rate - 1'. */
28358 if (can_issue_more == 0)
28359 can_issue_more = issue_rate;
28360 can_issue_more--;
28361 if (can_issue_more == 0)
28363 can_issue_more = issue_rate - 1;
28364 (*group_count)++;
28365 end = true;
28366 for (i = 0; i < issue_rate; i++)
28368 group_insns[i] = 0;
28372 while (n_nops > 0)
28374 nop = gen_nop ();
28375 emit_insn_before (nop, next_insn);
28376 if (can_issue_more == issue_rate - 1) /* new group begins */
28377 end = false;
28378 can_issue_more--;
28379 if (can_issue_more == 0)
28381 can_issue_more = issue_rate - 1;
28382 (*group_count)++;
28383 end = true;
28384 for (i = 0; i < issue_rate; i++)
28386 group_insns[i] = 0;
28389 n_nops--;
28392 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28393 can_issue_more++;
28395 /* Is next_insn going to start a new group? */
28396 *group_end
28397 = (end
28398 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28399 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28400 || (can_issue_more < issue_rate &&
28401 insn_terminates_group_p (next_insn, previous_group)));
28402 if (*group_end && end)
28403 (*group_count)--;
28405 if (sched_verbose > 6)
28406 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28407 *group_count, can_issue_more);
28408 return can_issue_more;
28411 return can_issue_more;
28414 /* This function tries to synch the dispatch groups that the compiler "sees"
28415 with the dispatch groups that the processor dispatcher is expected to
28416 form in practice. It tries to achieve this synchronization by forcing the
28417 estimated processor grouping on the compiler (as opposed to the function
28418 'pad_goups' which tries to force the scheduler's grouping on the processor).
28420 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28421 examines the (estimated) dispatch groups that will be formed by the processor
28422 dispatcher. It marks these group boundaries to reflect the estimated
28423 processor grouping, overriding the grouping that the scheduler had marked.
28424 Depending on the value of the flag '-minsert-sched-nops' this function can
28425 force certain insns into separate groups or force a certain distance between
28426 them by inserting nops, for example, if there exists a "costly dependence"
28427 between the insns.
28429 The function estimates the group boundaries that the processor will form as
28430 follows: It keeps track of how many vacant issue slots are available after
28431 each insn. A subsequent insn will start a new group if one of the following
28432 4 cases applies:
28433 - no more vacant issue slots remain in the current dispatch group.
28434 - only the last issue slot, which is the branch slot, is vacant, but the next
28435 insn is not a branch.
28436 - only the last 2 or less issue slots, including the branch slot, are vacant,
28437 which means that a cracked insn (which occupies two issue slots) can't be
28438 issued in this group.
28439 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28440 start a new group. */
28442 static int
28443 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28444 rtx_insn *tail)
28446 rtx_insn *insn, *next_insn;
28447 int issue_rate;
28448 int can_issue_more;
28449 int slot, i;
28450 bool group_end;
28451 int group_count = 0;
28452 rtx *group_insns;
28454 /* Initialize. */
28455 issue_rate = rs6000_issue_rate ();
28456 group_insns = XALLOCAVEC (rtx, issue_rate);
28457 for (i = 0; i < issue_rate; i++)
28459 group_insns[i] = 0;
28461 can_issue_more = issue_rate;
28462 slot = 0;
28463 insn = get_next_active_insn (prev_head_insn, tail);
28464 group_end = false;
28466 while (insn != NULL_RTX)
28468 slot = (issue_rate - can_issue_more);
28469 group_insns[slot] = insn;
28470 can_issue_more =
28471 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28472 if (insn_terminates_group_p (insn, current_group))
28473 can_issue_more = 0;
28475 next_insn = get_next_active_insn (insn, tail);
28476 if (next_insn == NULL_RTX)
28477 return group_count + 1;
28479 /* Is next_insn going to start a new group? */
28480 group_end
28481 = (can_issue_more == 0
28482 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28483 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28484 || (can_issue_more < issue_rate &&
28485 insn_terminates_group_p (next_insn, previous_group)));
28487 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28488 next_insn, &group_end, can_issue_more,
28489 &group_count);
28491 if (group_end)
28493 group_count++;
28494 can_issue_more = 0;
28495 for (i = 0; i < issue_rate; i++)
28497 group_insns[i] = 0;
28501 if (GET_MODE (next_insn) == TImode && can_issue_more)
28502 PUT_MODE (next_insn, VOIDmode);
28503 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28504 PUT_MODE (next_insn, TImode);
28506 insn = next_insn;
28507 if (can_issue_more == 0)
28508 can_issue_more = issue_rate;
28509 } /* while */
28511 return group_count;
28514 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28515 dispatch group boundaries that the scheduler had marked. Pad with nops
28516 any dispatch groups which have vacant issue slots, in order to force the
28517 scheduler's grouping on the processor dispatcher. The function
28518 returns the number of dispatch groups found. */
28520 static int
28521 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28522 rtx_insn *tail)
28524 rtx_insn *insn, *next_insn;
28525 rtx nop;
28526 int issue_rate;
28527 int can_issue_more;
28528 int group_end;
28529 int group_count = 0;
28531 /* Initialize issue_rate. */
28532 issue_rate = rs6000_issue_rate ();
28533 can_issue_more = issue_rate;
28535 insn = get_next_active_insn (prev_head_insn, tail);
28536 next_insn = get_next_active_insn (insn, tail);
28538 while (insn != NULL_RTX)
28540 can_issue_more =
28541 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28543 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28545 if (next_insn == NULL_RTX)
28546 break;
28548 if (group_end)
28550 /* If the scheduler had marked group termination at this location
28551 (between insn and next_insn), and neither insn nor next_insn will
28552 force group termination, pad the group with nops to force group
28553 termination. */
28554 if (can_issue_more
28555 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28556 && !insn_terminates_group_p (insn, current_group)
28557 && !insn_terminates_group_p (next_insn, previous_group))
28559 if (!is_branch_slot_insn (next_insn))
28560 can_issue_more--;
28562 while (can_issue_more)
28564 nop = gen_nop ();
28565 emit_insn_before (nop, next_insn);
28566 can_issue_more--;
28570 can_issue_more = issue_rate;
28571 group_count++;
28574 insn = next_insn;
28575 next_insn = get_next_active_insn (insn, tail);
28578 return group_count;
28581 /* We're beginning a new block. Initialize data structures as necessary. */
28583 static void
28584 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28585 int sched_verbose ATTRIBUTE_UNUSED,
28586 int max_ready ATTRIBUTE_UNUSED)
28588 last_scheduled_insn = NULL_RTX;
28589 load_store_pendulum = 0;
28592 /* The following function is called at the end of scheduling BB.
28593 After reload, it inserts nops at insn group bundling. */
28595 static void
28596 rs6000_sched_finish (FILE *dump, int sched_verbose)
28598 int n_groups;
28600 if (sched_verbose)
28601 fprintf (dump, "=== Finishing schedule.\n");
28603 if (reload_completed && rs6000_sched_groups)
28605 /* Do not run sched_finish hook when selective scheduling enabled. */
28606 if (sel_sched_p ())
28607 return;
28609 if (rs6000_sched_insert_nops == sched_finish_none)
28610 return;
28612 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28613 n_groups = pad_groups (dump, sched_verbose,
28614 current_sched_info->prev_head,
28615 current_sched_info->next_tail);
28616 else
28617 n_groups = redefine_groups (dump, sched_verbose,
28618 current_sched_info->prev_head,
28619 current_sched_info->next_tail);
28621 if (sched_verbose >= 6)
28623 fprintf (dump, "ngroups = %d\n", n_groups);
28624 print_rtl (dump, current_sched_info->prev_head);
28625 fprintf (dump, "Done finish_sched\n");
28630 struct _rs6000_sched_context
28632 short cached_can_issue_more;
28633 rtx last_scheduled_insn;
28634 int load_store_pendulum;
28637 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28638 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28640 /* Allocate store for new scheduling context. */
28641 static void *
28642 rs6000_alloc_sched_context (void)
28644 return xmalloc (sizeof (rs6000_sched_context_def));
28647 /* If CLEAN_P is true then initializes _SC with clean data,
28648 and from the global context otherwise. */
28649 static void
28650 rs6000_init_sched_context (void *_sc, bool clean_p)
28652 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28654 if (clean_p)
28656 sc->cached_can_issue_more = 0;
28657 sc->last_scheduled_insn = NULL_RTX;
28658 sc->load_store_pendulum = 0;
28660 else
28662 sc->cached_can_issue_more = cached_can_issue_more;
28663 sc->last_scheduled_insn = last_scheduled_insn;
28664 sc->load_store_pendulum = load_store_pendulum;
28668 /* Sets the global scheduling context to the one pointed to by _SC. */
28669 static void
28670 rs6000_set_sched_context (void *_sc)
28672 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28674 gcc_assert (sc != NULL);
28676 cached_can_issue_more = sc->cached_can_issue_more;
28677 last_scheduled_insn = sc->last_scheduled_insn;
28678 load_store_pendulum = sc->load_store_pendulum;
28681 /* Free _SC. */
28682 static void
28683 rs6000_free_sched_context (void *_sc)
28685 gcc_assert (_sc != NULL);
28687 free (_sc);
28691 /* Length in units of the trampoline for entering a nested function. */
28694 rs6000_trampoline_size (void)
28696 int ret = 0;
28698 switch (DEFAULT_ABI)
28700 default:
28701 gcc_unreachable ();
28703 case ABI_AIX:
28704 ret = (TARGET_32BIT) ? 12 : 24;
28705 break;
28707 case ABI_ELFv2:
28708 gcc_assert (!TARGET_32BIT);
28709 ret = 32;
28710 break;
28712 case ABI_DARWIN:
28713 case ABI_V4:
28714 ret = (TARGET_32BIT) ? 40 : 48;
28715 break;
28718 return ret;
28721 /* Emit RTL insns to initialize the variable parts of a trampoline.
28722 FNADDR is an RTX for the address of the function's pure code.
28723 CXT is an RTX for the static chain value for the function. */
28725 static void
28726 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28728 int regsize = (TARGET_32BIT) ? 4 : 8;
28729 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28730 rtx ctx_reg = force_reg (Pmode, cxt);
28731 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28733 switch (DEFAULT_ABI)
28735 default:
28736 gcc_unreachable ();
28738 /* Under AIX, just build the 3 word function descriptor */
28739 case ABI_AIX:
28741 rtx fnmem, fn_reg, toc_reg;
28743 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28744 error ("You cannot take the address of a nested function if you use "
28745 "the -mno-pointers-to-nested-functions option.");
28747 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28748 fn_reg = gen_reg_rtx (Pmode);
28749 toc_reg = gen_reg_rtx (Pmode);
28751 /* Macro to shorten the code expansions below. */
28752 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28754 m_tramp = replace_equiv_address (m_tramp, addr);
28756 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28757 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28758 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28759 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28760 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28762 # undef MEM_PLUS
28764 break;
28766 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28767 case ABI_ELFv2:
28768 case ABI_DARWIN:
28769 case ABI_V4:
28770 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28771 LCT_NORMAL, VOIDmode, 4,
28772 addr, Pmode,
28773 GEN_INT (rs6000_trampoline_size ()), SImode,
28774 fnaddr, Pmode,
28775 ctx_reg, Pmode);
28776 break;
28781 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28782 identifier as an argument, so the front end shouldn't look it up. */
28784 static bool
28785 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28787 return is_attribute_p ("altivec", attr_id);
28790 /* Handle the "altivec" attribute. The attribute may have
28791 arguments as follows:
28793 __attribute__((altivec(vector__)))
28794 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28795 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28797 and may appear more than once (e.g., 'vector bool char') in a
28798 given declaration. */
28800 static tree
28801 rs6000_handle_altivec_attribute (tree *node,
28802 tree name ATTRIBUTE_UNUSED,
28803 tree args,
28804 int flags ATTRIBUTE_UNUSED,
28805 bool *no_add_attrs)
28807 tree type = *node, result = NULL_TREE;
28808 machine_mode mode;
28809 int unsigned_p;
28810 char altivec_type
28811 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28812 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28813 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28814 : '?');
28816 while (POINTER_TYPE_P (type)
28817 || TREE_CODE (type) == FUNCTION_TYPE
28818 || TREE_CODE (type) == METHOD_TYPE
28819 || TREE_CODE (type) == ARRAY_TYPE)
28820 type = TREE_TYPE (type);
28822 mode = TYPE_MODE (type);
28824 /* Check for invalid AltiVec type qualifiers. */
28825 if (type == long_double_type_node)
28826 error ("use of %<long double%> in AltiVec types is invalid");
28827 else if (type == boolean_type_node)
28828 error ("use of boolean types in AltiVec types is invalid");
28829 else if (TREE_CODE (type) == COMPLEX_TYPE)
28830 error ("use of %<complex%> in AltiVec types is invalid");
28831 else if (DECIMAL_FLOAT_MODE_P (mode))
28832 error ("use of decimal floating point types in AltiVec types is invalid");
28833 else if (!TARGET_VSX)
28835 if (type == long_unsigned_type_node || type == long_integer_type_node)
28837 if (TARGET_64BIT)
28838 error ("use of %<long%> in AltiVec types is invalid for "
28839 "64-bit code without -mvsx");
28840 else if (rs6000_warn_altivec_long)
28841 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28842 "use %<int%>");
28844 else if (type == long_long_unsigned_type_node
28845 || type == long_long_integer_type_node)
28846 error ("use of %<long long%> in AltiVec types is invalid without "
28847 "-mvsx");
28848 else if (type == double_type_node)
28849 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28852 switch (altivec_type)
28854 case 'v':
28855 unsigned_p = TYPE_UNSIGNED (type);
28856 switch (mode)
28858 case TImode:
28859 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28860 break;
28861 case DImode:
28862 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28863 break;
28864 case SImode:
28865 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28866 break;
28867 case HImode:
28868 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28869 break;
28870 case QImode:
28871 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28872 break;
28873 case SFmode: result = V4SF_type_node; break;
28874 case DFmode: result = V2DF_type_node; break;
28875 /* If the user says 'vector int bool', we may be handed the 'bool'
28876 attribute _before_ the 'vector' attribute, and so select the
28877 proper type in the 'b' case below. */
28878 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28879 case V2DImode: case V2DFmode:
28880 result = type;
28881 default: break;
28883 break;
28884 case 'b':
28885 switch (mode)
28887 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28888 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28889 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28890 case QImode: case V16QImode: result = bool_V16QI_type_node;
28891 default: break;
28893 break;
28894 case 'p':
28895 switch (mode)
28897 case V8HImode: result = pixel_V8HI_type_node;
28898 default: break;
28900 default: break;
28903 /* Propagate qualifiers attached to the element type
28904 onto the vector type. */
28905 if (result && result != type && TYPE_QUALS (type))
28906 result = build_qualified_type (result, TYPE_QUALS (type));
28908 *no_add_attrs = true; /* No need to hang on to the attribute. */
28910 if (result)
28911 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28913 return NULL_TREE;
28916 /* AltiVec defines four built-in scalar types that serve as vector
28917 elements; we must teach the compiler how to mangle them. */
28919 static const char *
28920 rs6000_mangle_type (const_tree type)
28922 type = TYPE_MAIN_VARIANT (type);
28924 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28925 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28926 return NULL;
28928 if (type == bool_char_type_node) return "U6__boolc";
28929 if (type == bool_short_type_node) return "U6__bools";
28930 if (type == pixel_type_node) return "u7__pixel";
28931 if (type == bool_int_type_node) return "U6__booli";
28932 if (type == bool_long_type_node) return "U6__booll";
28934 /* Mangle IBM extended float long double as `g' (__float128) on
28935 powerpc*-linux where long-double-64 previously was the default. */
28936 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28937 && TARGET_ELF
28938 && TARGET_LONG_DOUBLE_128
28939 && !TARGET_IEEEQUAD)
28940 return "g";
28942 /* For all other types, use normal C++ mangling. */
28943 return NULL;
28946 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28947 struct attribute_spec.handler. */
28949 static tree
28950 rs6000_handle_longcall_attribute (tree *node, tree name,
28951 tree args ATTRIBUTE_UNUSED,
28952 int flags ATTRIBUTE_UNUSED,
28953 bool *no_add_attrs)
28955 if (TREE_CODE (*node) != FUNCTION_TYPE
28956 && TREE_CODE (*node) != FIELD_DECL
28957 && TREE_CODE (*node) != TYPE_DECL)
28959 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28960 name);
28961 *no_add_attrs = true;
28964 return NULL_TREE;
28967 /* Set longcall attributes on all functions declared when
28968 rs6000_default_long_calls is true. */
28969 static void
28970 rs6000_set_default_type_attributes (tree type)
28972 if (rs6000_default_long_calls
28973 && (TREE_CODE (type) == FUNCTION_TYPE
28974 || TREE_CODE (type) == METHOD_TYPE))
28975 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28976 NULL_TREE,
28977 TYPE_ATTRIBUTES (type));
28979 #if TARGET_MACHO
28980 darwin_set_default_type_attributes (type);
28981 #endif
28984 /* Return a reference suitable for calling a function with the
28985 longcall attribute. */
28988 rs6000_longcall_ref (rtx call_ref)
28990 const char *call_name;
28991 tree node;
28993 if (GET_CODE (call_ref) != SYMBOL_REF)
28994 return call_ref;
28996 /* System V adds '.' to the internal name, so skip them. */
28997 call_name = XSTR (call_ref, 0);
28998 if (*call_name == '.')
29000 while (*call_name == '.')
29001 call_name++;
29003 node = get_identifier (call_name);
29004 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
29007 return force_reg (Pmode, call_ref);
29010 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
29011 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
29012 #endif
29014 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29015 struct attribute_spec.handler. */
29016 static tree
29017 rs6000_handle_struct_attribute (tree *node, tree name,
29018 tree args ATTRIBUTE_UNUSED,
29019 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29021 tree *type = NULL;
29022 if (DECL_P (*node))
29024 if (TREE_CODE (*node) == TYPE_DECL)
29025 type = &TREE_TYPE (*node);
29027 else
29028 type = node;
29030 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29031 || TREE_CODE (*type) == UNION_TYPE)))
29033 warning (OPT_Wattributes, "%qE attribute ignored", name);
29034 *no_add_attrs = true;
29037 else if ((is_attribute_p ("ms_struct", name)
29038 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29039 || ((is_attribute_p ("gcc_struct", name)
29040 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29042 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29043 name);
29044 *no_add_attrs = true;
29047 return NULL_TREE;
29050 static bool
29051 rs6000_ms_bitfield_layout_p (const_tree record_type)
29053 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
29054 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29055 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
29058 #ifdef USING_ELFOS_H
29060 /* A get_unnamed_section callback, used for switching to toc_section. */
29062 static void
29063 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29065 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29066 && TARGET_MINIMAL_TOC
29067 && !TARGET_RELOCATABLE)
29069 if (!toc_initialized)
29071 toc_initialized = 1;
29072 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29073 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
29074 fprintf (asm_out_file, "\t.tc ");
29075 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
29076 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29077 fprintf (asm_out_file, "\n");
29079 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29080 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29081 fprintf (asm_out_file, " = .+32768\n");
29083 else
29084 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29086 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29087 && !TARGET_RELOCATABLE)
29088 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29089 else
29091 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29092 if (!toc_initialized)
29094 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29095 fprintf (asm_out_file, " = .+32768\n");
29096 toc_initialized = 1;
29101 /* Implement TARGET_ASM_INIT_SECTIONS. */
29103 static void
29104 rs6000_elf_asm_init_sections (void)
29106 toc_section
29107 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
29109 sdata2_section
29110 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
29111 SDATA2_SECTION_ASM_OP);
29114 /* Implement TARGET_SELECT_RTX_SECTION. */
29116 static section *
29117 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
29118 unsigned HOST_WIDE_INT align)
29120 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29121 return toc_section;
29122 else
29123 return default_elf_select_rtx_section (mode, x, align);
29126 /* For a SYMBOL_REF, set generic flags and then perform some
29127 target-specific processing.
29129 When the AIX ABI is requested on a non-AIX system, replace the
29130 function name with the real name (with a leading .) rather than the
29131 function descriptor name. This saves a lot of overriding code to
29132 read the prefixes. */
29134 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
29135 static void
29136 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
29138 default_encode_section_info (decl, rtl, first);
29140 if (first
29141 && TREE_CODE (decl) == FUNCTION_DECL
29142 && !TARGET_AIX
29143 && DEFAULT_ABI == ABI_AIX)
29145 rtx sym_ref = XEXP (rtl, 0);
29146 size_t len = strlen (XSTR (sym_ref, 0));
29147 char *str = XALLOCAVEC (char, len + 2);
29148 str[0] = '.';
29149 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
29150 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
29154 static inline bool
29155 compare_section_name (const char *section, const char *templ)
29157 int len;
29159 len = strlen (templ);
29160 return (strncmp (section, templ, len) == 0
29161 && (section[len] == 0 || section[len] == '.'));
29164 bool
29165 rs6000_elf_in_small_data_p (const_tree decl)
29167 if (rs6000_sdata == SDATA_NONE)
29168 return false;
29170 /* We want to merge strings, so we never consider them small data. */
29171 if (TREE_CODE (decl) == STRING_CST)
29172 return false;
29174 /* Functions are never in the small data area. */
29175 if (TREE_CODE (decl) == FUNCTION_DECL)
29176 return false;
29178 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
29180 const char *section = DECL_SECTION_NAME (decl);
29181 if (compare_section_name (section, ".sdata")
29182 || compare_section_name (section, ".sdata2")
29183 || compare_section_name (section, ".gnu.linkonce.s")
29184 || compare_section_name (section, ".sbss")
29185 || compare_section_name (section, ".sbss2")
29186 || compare_section_name (section, ".gnu.linkonce.sb")
29187 || strcmp (section, ".PPC.EMB.sdata0") == 0
29188 || strcmp (section, ".PPC.EMB.sbss0") == 0)
29189 return true;
29191 else
29193 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
29195 if (size > 0
29196 && size <= g_switch_value
29197 /* If it's not public, and we're not going to reference it there,
29198 there's no need to put it in the small data section. */
29199 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29200 return true;
29203 return false;
29206 #endif /* USING_ELFOS_H */
29208 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29210 static bool
29211 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29213 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29216 /* Do not place thread-local symbols refs in the object blocks. */
29218 static bool
29219 rs6000_use_blocks_for_decl_p (const_tree decl)
29221 return !DECL_THREAD_LOCAL_P (decl);
29224 /* Return a REG that occurs in ADDR with coefficient 1.
29225 ADDR can be effectively incremented by incrementing REG.
29227 r0 is special and we must not select it as an address
29228 register by this routine since our caller will try to
29229 increment the returned register via an "la" instruction. */
29232 find_addr_reg (rtx addr)
29234 while (GET_CODE (addr) == PLUS)
29236 if (GET_CODE (XEXP (addr, 0)) == REG
29237 && REGNO (XEXP (addr, 0)) != 0)
29238 addr = XEXP (addr, 0);
29239 else if (GET_CODE (XEXP (addr, 1)) == REG
29240 && REGNO (XEXP (addr, 1)) != 0)
29241 addr = XEXP (addr, 1);
29242 else if (CONSTANT_P (XEXP (addr, 0)))
29243 addr = XEXP (addr, 1);
29244 else if (CONSTANT_P (XEXP (addr, 1)))
29245 addr = XEXP (addr, 0);
29246 else
29247 gcc_unreachable ();
29249 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29250 return addr;
29253 void
29254 rs6000_fatal_bad_address (rtx op)
29256 fatal_insn ("bad address", op);
29259 #if TARGET_MACHO
29261 typedef struct branch_island_d {
29262 tree function_name;
29263 tree label_name;
29264 int line_number;
29265 } branch_island;
29268 static vec<branch_island, va_gc> *branch_islands;
29270 /* Remember to generate a branch island for far calls to the given
29271 function. */
29273 static void
29274 add_compiler_branch_island (tree label_name, tree function_name,
29275 int line_number)
29277 branch_island bi = {function_name, label_name, line_number};
29278 vec_safe_push (branch_islands, bi);
29281 /* Generate far-jump branch islands for everything recorded in
29282 branch_islands. Invoked immediately after the last instruction of
29283 the epilogue has been emitted; the branch islands must be appended
29284 to, and contiguous with, the function body. Mach-O stubs are
29285 generated in machopic_output_stub(). */
29287 static void
29288 macho_branch_islands (void)
29290 char tmp_buf[512];
29292 while (!vec_safe_is_empty (branch_islands))
29294 branch_island *bi = &branch_islands->last ();
29295 const char *label = IDENTIFIER_POINTER (bi->label_name);
29296 const char *name = IDENTIFIER_POINTER (bi->function_name);
29297 char name_buf[512];
29298 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29299 if (name[0] == '*' || name[0] == '&')
29300 strcpy (name_buf, name+1);
29301 else
29303 name_buf[0] = '_';
29304 strcpy (name_buf+1, name);
29306 strcpy (tmp_buf, "\n");
29307 strcat (tmp_buf, label);
29308 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29309 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29310 dbxout_stabd (N_SLINE, bi->line_number);
29311 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29312 if (flag_pic)
29314 if (TARGET_LINK_STACK)
29316 char name[32];
29317 get_ppc476_thunk_name (name);
29318 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29319 strcat (tmp_buf, name);
29320 strcat (tmp_buf, "\n");
29321 strcat (tmp_buf, label);
29322 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29324 else
29326 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29327 strcat (tmp_buf, label);
29328 strcat (tmp_buf, "_pic\n");
29329 strcat (tmp_buf, label);
29330 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29333 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29334 strcat (tmp_buf, name_buf);
29335 strcat (tmp_buf, " - ");
29336 strcat (tmp_buf, label);
29337 strcat (tmp_buf, "_pic)\n");
29339 strcat (tmp_buf, "\tmtlr r0\n");
29341 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29342 strcat (tmp_buf, name_buf);
29343 strcat (tmp_buf, " - ");
29344 strcat (tmp_buf, label);
29345 strcat (tmp_buf, "_pic)\n");
29347 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29349 else
29351 strcat (tmp_buf, ":\nlis r12,hi16(");
29352 strcat (tmp_buf, name_buf);
29353 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29354 strcat (tmp_buf, name_buf);
29355 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29357 output_asm_insn (tmp_buf, 0);
29358 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29359 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29360 dbxout_stabd (N_SLINE, bi->line_number);
29361 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29362 branch_islands->pop ();
29366 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29367 already there or not. */
29369 static int
29370 no_previous_def (tree function_name)
29372 branch_island *bi;
29373 unsigned ix;
29375 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29376 if (function_name == bi->function_name)
29377 return 0;
29378 return 1;
29381 /* GET_PREV_LABEL gets the label name from the previous definition of
29382 the function. */
29384 static tree
29385 get_prev_label (tree function_name)
29387 branch_island *bi;
29388 unsigned ix;
29390 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29391 if (function_name == bi->function_name)
29392 return bi->label_name;
29393 return NULL_TREE;
29396 /* INSN is either a function call or a millicode call. It may have an
29397 unconditional jump in its delay slot.
29399 CALL_DEST is the routine we are calling. */
29401 char *
29402 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29403 int cookie_operand_number)
29405 static char buf[256];
29406 if (darwin_emit_branch_islands
29407 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29408 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29410 tree labelname;
29411 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29413 if (no_previous_def (funname))
29415 rtx label_rtx = gen_label_rtx ();
29416 char *label_buf, temp_buf[256];
29417 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29418 CODE_LABEL_NUMBER (label_rtx));
29419 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29420 labelname = get_identifier (label_buf);
29421 add_compiler_branch_island (labelname, funname, insn_line (insn));
29423 else
29424 labelname = get_prev_label (funname);
29426 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29427 instruction will reach 'foo', otherwise link as 'bl L42'".
29428 "L42" should be a 'branch island', that will do a far jump to
29429 'foo'. Branch islands are generated in
29430 macho_branch_islands(). */
29431 sprintf (buf, "jbsr %%z%d,%.246s",
29432 dest_operand_number, IDENTIFIER_POINTER (labelname));
29434 else
29435 sprintf (buf, "bl %%z%d", dest_operand_number);
29436 return buf;
29439 /* Generate PIC and indirect symbol stubs. */
29441 void
29442 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29444 unsigned int length;
29445 char *symbol_name, *lazy_ptr_name;
29446 char *local_label_0;
29447 static int label = 0;
29449 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29450 symb = (*targetm.strip_name_encoding) (symb);
29453 length = strlen (symb);
29454 symbol_name = XALLOCAVEC (char, length + 32);
29455 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29457 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29458 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29460 if (flag_pic == 2)
29461 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29462 else
29463 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29465 if (flag_pic == 2)
29467 fprintf (file, "\t.align 5\n");
29469 fprintf (file, "%s:\n", stub);
29470 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29472 label++;
29473 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29474 sprintf (local_label_0, "\"L%011d$spb\"", label);
29476 fprintf (file, "\tmflr r0\n");
29477 if (TARGET_LINK_STACK)
29479 char name[32];
29480 get_ppc476_thunk_name (name);
29481 fprintf (file, "\tbl %s\n", name);
29482 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29484 else
29486 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29487 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29489 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29490 lazy_ptr_name, local_label_0);
29491 fprintf (file, "\tmtlr r0\n");
29492 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29493 (TARGET_64BIT ? "ldu" : "lwzu"),
29494 lazy_ptr_name, local_label_0);
29495 fprintf (file, "\tmtctr r12\n");
29496 fprintf (file, "\tbctr\n");
29498 else
29500 fprintf (file, "\t.align 4\n");
29502 fprintf (file, "%s:\n", stub);
29503 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29505 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29506 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29507 (TARGET_64BIT ? "ldu" : "lwzu"),
29508 lazy_ptr_name);
29509 fprintf (file, "\tmtctr r12\n");
29510 fprintf (file, "\tbctr\n");
29513 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29514 fprintf (file, "%s:\n", lazy_ptr_name);
29515 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29516 fprintf (file, "%sdyld_stub_binding_helper\n",
29517 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29520 /* Legitimize PIC addresses. If the address is already
29521 position-independent, we return ORIG. Newly generated
29522 position-independent addresses go into a reg. This is REG if non
29523 zero, otherwise we allocate register(s) as necessary. */
29525 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29528 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29529 rtx reg)
29531 rtx base, offset;
29533 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29534 reg = gen_reg_rtx (Pmode);
29536 if (GET_CODE (orig) == CONST)
29538 rtx reg_temp;
29540 if (GET_CODE (XEXP (orig, 0)) == PLUS
29541 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29542 return orig;
29544 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29546 /* Use a different reg for the intermediate value, as
29547 it will be marked UNCHANGING. */
29548 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29549 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29550 Pmode, reg_temp);
29551 offset =
29552 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29553 Pmode, reg);
29555 if (GET_CODE (offset) == CONST_INT)
29557 if (SMALL_INT (offset))
29558 return plus_constant (Pmode, base, INTVAL (offset));
29559 else if (! reload_in_progress && ! reload_completed)
29560 offset = force_reg (Pmode, offset);
29561 else
29563 rtx mem = force_const_mem (Pmode, orig);
29564 return machopic_legitimize_pic_address (mem, Pmode, reg);
29567 return gen_rtx_PLUS (Pmode, base, offset);
29570 /* Fall back on generic machopic code. */
29571 return machopic_legitimize_pic_address (orig, mode, reg);
29574 /* Output a .machine directive for the Darwin assembler, and call
29575 the generic start_file routine. */
29577 static void
29578 rs6000_darwin_file_start (void)
29580 static const struct
29582 const char *arg;
29583 const char *name;
29584 HOST_WIDE_INT if_set;
29585 } mapping[] = {
29586 { "ppc64", "ppc64", MASK_64BIT },
29587 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29588 { "power4", "ppc970", 0 },
29589 { "G5", "ppc970", 0 },
29590 { "7450", "ppc7450", 0 },
29591 { "7400", "ppc7400", MASK_ALTIVEC },
29592 { "G4", "ppc7400", 0 },
29593 { "750", "ppc750", 0 },
29594 { "740", "ppc750", 0 },
29595 { "G3", "ppc750", 0 },
29596 { "604e", "ppc604e", 0 },
29597 { "604", "ppc604", 0 },
29598 { "603e", "ppc603", 0 },
29599 { "603", "ppc603", 0 },
29600 { "601", "ppc601", 0 },
29601 { NULL, "ppc", 0 } };
29602 const char *cpu_id = "";
29603 size_t i;
29605 rs6000_file_start ();
29606 darwin_file_start ();
29608 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29610 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29611 cpu_id = rs6000_default_cpu;
29613 if (global_options_set.x_rs6000_cpu_index)
29614 cpu_id = processor_target_table[rs6000_cpu_index].name;
29616 /* Look through the mapping array. Pick the first name that either
29617 matches the argument, has a bit set in IF_SET that is also set
29618 in the target flags, or has a NULL name. */
29620 i = 0;
29621 while (mapping[i].arg != NULL
29622 && strcmp (mapping[i].arg, cpu_id) != 0
29623 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29624 i++;
29626 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29629 #endif /* TARGET_MACHO */
29631 #if TARGET_ELF
29632 static int
29633 rs6000_elf_reloc_rw_mask (void)
29635 if (flag_pic)
29636 return 3;
29637 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29638 return 2;
29639 else
29640 return 0;
29643 /* Record an element in the table of global constructors. SYMBOL is
29644 a SYMBOL_REF of the function to be called; PRIORITY is a number
29645 between 0 and MAX_INIT_PRIORITY.
29647 This differs from default_named_section_asm_out_constructor in
29648 that we have special handling for -mrelocatable. */
29650 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29651 static void
29652 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29654 const char *section = ".ctors";
29655 char buf[16];
29657 if (priority != DEFAULT_INIT_PRIORITY)
29659 sprintf (buf, ".ctors.%.5u",
29660 /* Invert the numbering so the linker puts us in the proper
29661 order; constructors are run from right to left, and the
29662 linker sorts in increasing order. */
29663 MAX_INIT_PRIORITY - priority);
29664 section = buf;
29667 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29668 assemble_align (POINTER_SIZE);
29670 if (TARGET_RELOCATABLE)
29672 fputs ("\t.long (", asm_out_file);
29673 output_addr_const (asm_out_file, symbol);
29674 fputs (")@fixup\n", asm_out_file);
29676 else
29677 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29680 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29681 static void
29682 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29684 const char *section = ".dtors";
29685 char buf[16];
29687 if (priority != DEFAULT_INIT_PRIORITY)
29689 sprintf (buf, ".dtors.%.5u",
29690 /* Invert the numbering so the linker puts us in the proper
29691 order; constructors are run from right to left, and the
29692 linker sorts in increasing order. */
29693 MAX_INIT_PRIORITY - priority);
29694 section = buf;
29697 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29698 assemble_align (POINTER_SIZE);
29700 if (TARGET_RELOCATABLE)
29702 fputs ("\t.long (", asm_out_file);
29703 output_addr_const (asm_out_file, symbol);
29704 fputs (")@fixup\n", asm_out_file);
29706 else
29707 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29710 void
29711 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29713 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29715 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29716 ASM_OUTPUT_LABEL (file, name);
29717 fputs (DOUBLE_INT_ASM_OP, file);
29718 rs6000_output_function_entry (file, name);
29719 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29720 if (DOT_SYMBOLS)
29722 fputs ("\t.size\t", file);
29723 assemble_name (file, name);
29724 fputs (",24\n\t.type\t.", file);
29725 assemble_name (file, name);
29726 fputs (",@function\n", file);
29727 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29729 fputs ("\t.globl\t.", file);
29730 assemble_name (file, name);
29731 putc ('\n', file);
29734 else
29735 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29736 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29737 rs6000_output_function_entry (file, name);
29738 fputs (":\n", file);
29739 return;
29742 if (TARGET_RELOCATABLE
29743 && !TARGET_SECURE_PLT
29744 && (get_pool_size () != 0 || crtl->profile)
29745 && uses_TOC ())
29747 char buf[256];
29749 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29751 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29752 fprintf (file, "\t.long ");
29753 assemble_name (file, buf);
29754 putc ('-', file);
29755 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29756 assemble_name (file, buf);
29757 putc ('\n', file);
29760 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29761 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29763 if (DEFAULT_ABI == ABI_AIX)
29765 const char *desc_name, *orig_name;
29767 orig_name = (*targetm.strip_name_encoding) (name);
29768 desc_name = orig_name;
29769 while (*desc_name == '.')
29770 desc_name++;
29772 if (TREE_PUBLIC (decl))
29773 fprintf (file, "\t.globl %s\n", desc_name);
29775 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29776 fprintf (file, "%s:\n", desc_name);
29777 fprintf (file, "\t.long %s\n", orig_name);
29778 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29779 fputs ("\t.long 0\n", file);
29780 fprintf (file, "\t.previous\n");
29782 ASM_OUTPUT_LABEL (file, name);
29785 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29786 static void
29787 rs6000_elf_file_end (void)
29789 #ifdef HAVE_AS_GNU_ATTRIBUTE
29790 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29792 if (rs6000_passes_float)
29793 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29794 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29795 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29796 : 2));
29797 if (rs6000_passes_vector)
29798 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29799 (TARGET_ALTIVEC_ABI ? 2
29800 : TARGET_SPE_ABI ? 3
29801 : 1));
29802 if (rs6000_returns_struct)
29803 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29804 aix_struct_return ? 2 : 1);
29806 #endif
29807 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29808 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29809 file_end_indicate_exec_stack ();
29810 #endif
29812 #endif
29814 #if TARGET_XCOFF
29815 static void
29816 rs6000_xcoff_asm_output_anchor (rtx symbol)
29818 char buffer[100];
29820 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29821 SYMBOL_REF_BLOCK_OFFSET (symbol));
29822 fprintf (asm_out_file, "%s", SET_ASM_OP);
29823 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29824 fprintf (asm_out_file, ",");
29825 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29826 fprintf (asm_out_file, "\n");
29829 static void
29830 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29832 fputs (GLOBAL_ASM_OP, stream);
29833 RS6000_OUTPUT_BASENAME (stream, name);
29834 putc ('\n', stream);
29837 /* A get_unnamed_decl callback, used for read-only sections. PTR
29838 points to the section string variable. */
29840 static void
29841 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29843 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29844 *(const char *const *) directive,
29845 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29848 /* Likewise for read-write sections. */
29850 static void
29851 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29853 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29854 *(const char *const *) directive,
29855 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29858 static void
29859 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29861 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29862 *(const char *const *) directive,
29863 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29866 /* A get_unnamed_section callback, used for switching to toc_section. */
29868 static void
29869 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29871 if (TARGET_MINIMAL_TOC)
29873 /* toc_section is always selected at least once from
29874 rs6000_xcoff_file_start, so this is guaranteed to
29875 always be defined once and only once in each file. */
29876 if (!toc_initialized)
29878 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29879 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29880 toc_initialized = 1;
29882 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29883 (TARGET_32BIT ? "" : ",3"));
29885 else
29886 fputs ("\t.toc\n", asm_out_file);
29889 /* Implement TARGET_ASM_INIT_SECTIONS. */
29891 static void
29892 rs6000_xcoff_asm_init_sections (void)
29894 read_only_data_section
29895 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29896 &xcoff_read_only_section_name);
29898 private_data_section
29899 = get_unnamed_section (SECTION_WRITE,
29900 rs6000_xcoff_output_readwrite_section_asm_op,
29901 &xcoff_private_data_section_name);
29903 tls_data_section
29904 = get_unnamed_section (SECTION_TLS,
29905 rs6000_xcoff_output_tls_section_asm_op,
29906 &xcoff_tls_data_section_name);
29908 tls_private_data_section
29909 = get_unnamed_section (SECTION_TLS,
29910 rs6000_xcoff_output_tls_section_asm_op,
29911 &xcoff_private_data_section_name);
29913 read_only_private_data_section
29914 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29915 &xcoff_private_data_section_name);
29917 toc_section
29918 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29920 readonly_data_section = read_only_data_section;
29921 exception_section = data_section;
29924 static int
29925 rs6000_xcoff_reloc_rw_mask (void)
29927 return 3;
29930 static void
29931 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29932 tree decl ATTRIBUTE_UNUSED)
29934 int smclass;
29935 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29937 if (flags & SECTION_CODE)
29938 smclass = 0;
29939 else if (flags & SECTION_TLS)
29940 smclass = 3;
29941 else if (flags & SECTION_WRITE)
29942 smclass = 2;
29943 else
29944 smclass = 1;
29946 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29947 (flags & SECTION_CODE) ? "." : "",
29948 name, suffix[smclass], flags & SECTION_ENTSIZE);
29951 #define IN_NAMED_SECTION(DECL) \
29952 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29953 && DECL_SECTION_NAME (DECL) != NULL)
29955 static section *
29956 rs6000_xcoff_select_section (tree decl, int reloc,
29957 unsigned HOST_WIDE_INT align)
29959 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29960 named section. */
29961 if (align > BIGGEST_ALIGNMENT)
29963 resolve_unique_section (decl, reloc, true);
29964 if (IN_NAMED_SECTION (decl))
29965 return get_named_section (decl, NULL, reloc);
29968 if (decl_readonly_section (decl, reloc))
29970 if (TREE_PUBLIC (decl))
29971 return read_only_data_section;
29972 else
29973 return read_only_private_data_section;
29975 else
29977 #if HAVE_AS_TLS
29978 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29980 if (TREE_PUBLIC (decl))
29981 return tls_data_section;
29982 else if (bss_initializer_p (decl))
29984 /* Convert to COMMON to emit in BSS. */
29985 DECL_COMMON (decl) = 1;
29986 return tls_comm_section;
29988 else
29989 return tls_private_data_section;
29991 else
29992 #endif
29993 if (TREE_PUBLIC (decl))
29994 return data_section;
29995 else
29996 return private_data_section;
30000 static void
30001 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
30003 const char *name;
30005 /* Use select_section for private data and uninitialized data with
30006 alignment <= BIGGEST_ALIGNMENT. */
30007 if (!TREE_PUBLIC (decl)
30008 || DECL_COMMON (decl)
30009 || (DECL_INITIAL (decl) == NULL_TREE
30010 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
30011 || DECL_INITIAL (decl) == error_mark_node
30012 || (flag_zero_initialized_in_bss
30013 && initializer_zerop (DECL_INITIAL (decl))))
30014 return;
30016 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
30017 name = (*targetm.strip_name_encoding) (name);
30018 set_decl_section_name (decl, name);
30021 /* Select section for constant in constant pool.
30023 On RS/6000, all constants are in the private read-only data area.
30024 However, if this is being placed in the TOC it must be output as a
30025 toc entry. */
30027 static section *
30028 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
30029 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
30031 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
30032 return toc_section;
30033 else
30034 return read_only_private_data_section;
30037 /* Remove any trailing [DS] or the like from the symbol name. */
30039 static const char *
30040 rs6000_xcoff_strip_name_encoding (const char *name)
30042 size_t len;
30043 if (*name == '*')
30044 name++;
30045 len = strlen (name);
30046 if (name[len - 1] == ']')
30047 return ggc_alloc_string (name, len - 4);
30048 else
30049 return name;
30052 /* Section attributes. AIX is always PIC. */
30054 static unsigned int
30055 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
30057 unsigned int align;
30058 unsigned int flags = default_section_type_flags (decl, name, reloc);
30060 /* Align to at least UNIT size. */
30061 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
30062 align = MIN_UNITS_PER_WORD;
30063 else
30064 /* Increase alignment of large objects if not already stricter. */
30065 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
30066 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
30067 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
30069 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
30072 /* Output at beginning of assembler file.
30074 Initialize the section names for the RS/6000 at this point.
30076 Specify filename, including full path, to assembler.
30078 We want to go into the TOC section so at least one .toc will be emitted.
30079 Also, in order to output proper .bs/.es pairs, we need at least one static
30080 [RW] section emitted.
30082 Finally, declare mcount when profiling to make the assembler happy. */
30084 static void
30085 rs6000_xcoff_file_start (void)
30087 rs6000_gen_section_name (&xcoff_bss_section_name,
30088 main_input_filename, ".bss_");
30089 rs6000_gen_section_name (&xcoff_private_data_section_name,
30090 main_input_filename, ".rw_");
30091 rs6000_gen_section_name (&xcoff_read_only_section_name,
30092 main_input_filename, ".ro_");
30093 rs6000_gen_section_name (&xcoff_tls_data_section_name,
30094 main_input_filename, ".tls_");
30095 rs6000_gen_section_name (&xcoff_tbss_section_name,
30096 main_input_filename, ".tbss_[UL]");
30098 fputs ("\t.file\t", asm_out_file);
30099 output_quoted_string (asm_out_file, main_input_filename);
30100 fputc ('\n', asm_out_file);
30101 if (write_symbols != NO_DEBUG)
30102 switch_to_section (private_data_section);
30103 switch_to_section (text_section);
30104 if (profile_flag)
30105 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
30106 rs6000_file_start ();
30109 /* Output at end of assembler file.
30110 On the RS/6000, referencing data should automatically pull in text. */
30112 static void
30113 rs6000_xcoff_file_end (void)
30115 switch_to_section (text_section);
30116 fputs ("_section_.text:\n", asm_out_file);
30117 switch_to_section (data_section);
30118 fputs (TARGET_32BIT
30119 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
30120 asm_out_file);
30123 struct declare_alias_data
30125 FILE *file;
30126 bool function_descriptor;
30129 /* Declare alias N. A helper function for for_node_and_aliases. */
30131 static bool
30132 rs6000_declare_alias (struct symtab_node *n, void *d)
30134 struct declare_alias_data *data = (struct declare_alias_data *)d;
30135 /* Main symbol is output specially, because varasm machinery does part of
30136 the job for us - we do not need to declare .globl/lglobs and such. */
30137 if (!n->alias || n->weakref)
30138 return false;
30140 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
30141 return false;
30143 /* Prevent assemble_alias from trying to use .set pseudo operation
30144 that does not behave as expected by the middle-end. */
30145 TREE_ASM_WRITTEN (n->decl) = true;
30147 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
30148 char *buffer = (char *) alloca (strlen (name) + 2);
30149 char *p;
30150 int dollar_inside = 0;
30152 strcpy (buffer, name);
30153 p = strchr (buffer, '$');
30154 while (p) {
30155 *p = '_';
30156 dollar_inside++;
30157 p = strchr (p + 1, '$');
30159 if (TREE_PUBLIC (n->decl))
30161 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
30163 if (dollar_inside) {
30164 if (data->function_descriptor)
30165 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30166 else
30167 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30169 if (data->function_descriptor)
30170 fputs ("\t.globl .", data->file);
30171 else
30172 fputs ("\t.globl ", data->file);
30173 RS6000_OUTPUT_BASENAME (data->file, buffer);
30174 putc ('\n', data->file);
30176 #ifdef ASM_WEAKEN_DECL
30177 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
30178 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
30179 #endif
30181 else
30183 if (dollar_inside)
30185 if (data->function_descriptor)
30186 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30187 else
30188 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30190 if (data->function_descriptor)
30191 fputs ("\t.lglobl .", data->file);
30192 else
30193 fputs ("\t.lglobl ", data->file);
30194 RS6000_OUTPUT_BASENAME (data->file, buffer);
30195 putc ('\n', data->file);
30197 if (data->function_descriptor)
30198 fputs (".", data->file);
30199 RS6000_OUTPUT_BASENAME (data->file, buffer);
30200 fputs (":\n", data->file);
30201 return false;
30204 /* This macro produces the initial definition of a function name.
30205 On the RS/6000, we need to place an extra '.' in the function name and
30206 output the function descriptor.
30207 Dollar signs are converted to underscores.
30209 The csect for the function will have already been created when
30210 text_section was selected. We do have to go back to that csect, however.
30212 The third and fourth parameters to the .function pseudo-op (16 and 044)
30213 are placeholders which no longer have any use.
30215 Because AIX assembler's .set command has unexpected semantics, we output
30216 all aliases as alternative labels in front of the definition. */
30218 void
30219 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30221 char *buffer = (char *) alloca (strlen (name) + 1);
30222 char *p;
30223 int dollar_inside = 0;
30224 struct declare_alias_data data = {file, false};
30226 strcpy (buffer, name);
30227 p = strchr (buffer, '$');
30228 while (p) {
30229 *p = '_';
30230 dollar_inside++;
30231 p = strchr (p + 1, '$');
30233 if (TREE_PUBLIC (decl))
30235 if (!RS6000_WEAK || !DECL_WEAK (decl))
30237 if (dollar_inside) {
30238 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30239 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30241 fputs ("\t.globl .", file);
30242 RS6000_OUTPUT_BASENAME (file, buffer);
30243 putc ('\n', file);
30246 else
30248 if (dollar_inside) {
30249 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30250 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30252 fputs ("\t.lglobl .", file);
30253 RS6000_OUTPUT_BASENAME (file, buffer);
30254 putc ('\n', file);
30256 fputs ("\t.csect ", file);
30257 RS6000_OUTPUT_BASENAME (file, buffer);
30258 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30259 RS6000_OUTPUT_BASENAME (file, buffer);
30260 fputs (":\n", file);
30261 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30262 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30263 RS6000_OUTPUT_BASENAME (file, buffer);
30264 fputs (", TOC[tc0], 0\n", file);
30265 in_section = NULL;
30266 switch_to_section (function_section (decl));
30267 putc ('.', file);
30268 RS6000_OUTPUT_BASENAME (file, buffer);
30269 fputs (":\n", file);
30270 data.function_descriptor = true;
30271 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30272 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30273 xcoffout_declare_function (file, decl, buffer);
30274 return;
30277 /* This macro produces the initial definition of a object (variable) name.
30278 Because AIX assembler's .set command has unexpected semantics, we output
30279 all aliases as alternative labels in front of the definition. */
30281 void
30282 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30284 struct declare_alias_data data = {file, false};
30285 RS6000_OUTPUT_BASENAME (file, name);
30286 fputs (":\n", file);
30287 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30290 #ifdef HAVE_AS_TLS
30291 static void
30292 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30294 rtx symbol;
30295 int flags;
30297 default_encode_section_info (decl, rtl, first);
30299 /* Careful not to prod global register variables. */
30300 if (!MEM_P (rtl))
30301 return;
30302 symbol = XEXP (rtl, 0);
30303 if (GET_CODE (symbol) != SYMBOL_REF)
30304 return;
30306 flags = SYMBOL_REF_FLAGS (symbol);
30308 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30309 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30311 SYMBOL_REF_FLAGS (symbol) = flags;
30313 #endif /* HAVE_AS_TLS */
30314 #endif /* TARGET_XCOFF */
30316 /* Compute a (partial) cost for rtx X. Return true if the complete
30317 cost has been computed, and false if subexpressions should be
30318 scanned. In either case, *TOTAL contains the cost result. */
30320 static bool
30321 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30322 int *total, bool speed)
30324 machine_mode mode = GET_MODE (x);
30326 switch (code)
30328 /* On the RS/6000, if it is valid in the insn, it is free. */
30329 case CONST_INT:
30330 if (((outer_code == SET
30331 || outer_code == PLUS
30332 || outer_code == MINUS)
30333 && (satisfies_constraint_I (x)
30334 || satisfies_constraint_L (x)))
30335 || (outer_code == AND
30336 && (satisfies_constraint_K (x)
30337 || (mode == SImode
30338 ? satisfies_constraint_L (x)
30339 : satisfies_constraint_J (x))
30340 || mask_operand (x, mode)
30341 || (mode == DImode
30342 && mask64_operand (x, DImode))))
30343 || ((outer_code == IOR || outer_code == XOR)
30344 && (satisfies_constraint_K (x)
30345 || (mode == SImode
30346 ? satisfies_constraint_L (x)
30347 : satisfies_constraint_J (x))))
30348 || outer_code == ASHIFT
30349 || outer_code == ASHIFTRT
30350 || outer_code == LSHIFTRT
30351 || outer_code == ROTATE
30352 || outer_code == ROTATERT
30353 || outer_code == ZERO_EXTRACT
30354 || (outer_code == MULT
30355 && satisfies_constraint_I (x))
30356 || ((outer_code == DIV || outer_code == UDIV
30357 || outer_code == MOD || outer_code == UMOD)
30358 && exact_log2 (INTVAL (x)) >= 0)
30359 || (outer_code == COMPARE
30360 && (satisfies_constraint_I (x)
30361 || satisfies_constraint_K (x)))
30362 || ((outer_code == EQ || outer_code == NE)
30363 && (satisfies_constraint_I (x)
30364 || satisfies_constraint_K (x)
30365 || (mode == SImode
30366 ? satisfies_constraint_L (x)
30367 : satisfies_constraint_J (x))))
30368 || (outer_code == GTU
30369 && satisfies_constraint_I (x))
30370 || (outer_code == LTU
30371 && satisfies_constraint_P (x)))
30373 *total = 0;
30374 return true;
30376 else if ((outer_code == PLUS
30377 && reg_or_add_cint_operand (x, VOIDmode))
30378 || (outer_code == MINUS
30379 && reg_or_sub_cint_operand (x, VOIDmode))
30380 || ((outer_code == SET
30381 || outer_code == IOR
30382 || outer_code == XOR)
30383 && (INTVAL (x)
30384 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30386 *total = COSTS_N_INSNS (1);
30387 return true;
30389 /* FALLTHRU */
30391 case CONST_DOUBLE:
30392 case CONST_WIDE_INT:
30393 case CONST:
30394 case HIGH:
30395 case SYMBOL_REF:
30396 case MEM:
30397 /* When optimizing for size, MEM should be slightly more expensive
30398 than generating address, e.g., (plus (reg) (const)).
30399 L1 cache latency is about two instructions. */
30400 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30401 return true;
30403 case LABEL_REF:
30404 *total = 0;
30405 return true;
30407 case PLUS:
30408 case MINUS:
30409 if (FLOAT_MODE_P (mode))
30410 *total = rs6000_cost->fp;
30411 else
30412 *total = COSTS_N_INSNS (1);
30413 return false;
30415 case MULT:
30416 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30417 && satisfies_constraint_I (XEXP (x, 1)))
30419 if (INTVAL (XEXP (x, 1)) >= -256
30420 && INTVAL (XEXP (x, 1)) <= 255)
30421 *total = rs6000_cost->mulsi_const9;
30422 else
30423 *total = rs6000_cost->mulsi_const;
30425 else if (mode == SFmode)
30426 *total = rs6000_cost->fp;
30427 else if (FLOAT_MODE_P (mode))
30428 *total = rs6000_cost->dmul;
30429 else if (mode == DImode)
30430 *total = rs6000_cost->muldi;
30431 else
30432 *total = rs6000_cost->mulsi;
30433 return false;
30435 case FMA:
30436 if (mode == SFmode)
30437 *total = rs6000_cost->fp;
30438 else
30439 *total = rs6000_cost->dmul;
30440 break;
30442 case DIV:
30443 case MOD:
30444 if (FLOAT_MODE_P (mode))
30446 *total = mode == DFmode ? rs6000_cost->ddiv
30447 : rs6000_cost->sdiv;
30448 return false;
30450 /* FALLTHRU */
30452 case UDIV:
30453 case UMOD:
30454 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30455 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30457 if (code == DIV || code == MOD)
30458 /* Shift, addze */
30459 *total = COSTS_N_INSNS (2);
30460 else
30461 /* Shift */
30462 *total = COSTS_N_INSNS (1);
30464 else
30466 if (GET_MODE (XEXP (x, 1)) == DImode)
30467 *total = rs6000_cost->divdi;
30468 else
30469 *total = rs6000_cost->divsi;
30471 /* Add in shift and subtract for MOD. */
30472 if (code == MOD || code == UMOD)
30473 *total += COSTS_N_INSNS (2);
30474 return false;
30476 case CTZ:
30477 case FFS:
30478 *total = COSTS_N_INSNS (4);
30479 return false;
30481 case POPCOUNT:
30482 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30483 return false;
30485 case PARITY:
30486 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30487 return false;
30489 case NOT:
30490 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30492 *total = 0;
30493 return false;
30495 /* FALLTHRU */
30497 case AND:
30498 case CLZ:
30499 case IOR:
30500 case XOR:
30501 case ZERO_EXTRACT:
30502 *total = COSTS_N_INSNS (1);
30503 return false;
30505 case ASHIFT:
30506 case ASHIFTRT:
30507 case LSHIFTRT:
30508 case ROTATE:
30509 case ROTATERT:
30510 /* Handle mul_highpart. */
30511 if (outer_code == TRUNCATE
30512 && GET_CODE (XEXP (x, 0)) == MULT)
30514 if (mode == DImode)
30515 *total = rs6000_cost->muldi;
30516 else
30517 *total = rs6000_cost->mulsi;
30518 return true;
30520 else if (outer_code == AND)
30521 *total = 0;
30522 else
30523 *total = COSTS_N_INSNS (1);
30524 return false;
30526 case SIGN_EXTEND:
30527 case ZERO_EXTEND:
30528 if (GET_CODE (XEXP (x, 0)) == MEM)
30529 *total = 0;
30530 else
30531 *total = COSTS_N_INSNS (1);
30532 return false;
30534 case COMPARE:
30535 case NEG:
30536 case ABS:
30537 if (!FLOAT_MODE_P (mode))
30539 *total = COSTS_N_INSNS (1);
30540 return false;
30542 /* FALLTHRU */
30544 case FLOAT:
30545 case UNSIGNED_FLOAT:
30546 case FIX:
30547 case UNSIGNED_FIX:
30548 case FLOAT_TRUNCATE:
30549 *total = rs6000_cost->fp;
30550 return false;
30552 case FLOAT_EXTEND:
30553 if (mode == DFmode)
30554 *total = rs6000_cost->sfdf_convert;
30555 else
30556 *total = rs6000_cost->fp;
30557 return false;
30559 case UNSPEC:
30560 switch (XINT (x, 1))
30562 case UNSPEC_FRSP:
30563 *total = rs6000_cost->fp;
30564 return true;
30566 default:
30567 break;
30569 break;
30571 case CALL:
30572 case IF_THEN_ELSE:
30573 if (!speed)
30575 *total = COSTS_N_INSNS (1);
30576 return true;
30578 else if (FLOAT_MODE_P (mode)
30579 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30581 *total = rs6000_cost->fp;
30582 return false;
30584 break;
30586 case NE:
30587 case EQ:
30588 case GTU:
30589 case LTU:
30590 /* Carry bit requires mode == Pmode.
30591 NEG or PLUS already counted so only add one. */
30592 if (mode == Pmode
30593 && (outer_code == NEG || outer_code == PLUS))
30595 *total = COSTS_N_INSNS (1);
30596 return true;
30598 if (outer_code == SET)
30600 if (XEXP (x, 1) == const0_rtx)
30602 if (TARGET_ISEL && !TARGET_MFCRF)
30603 *total = COSTS_N_INSNS (8);
30604 else
30605 *total = COSTS_N_INSNS (2);
30606 return true;
30608 else if (mode == Pmode)
30610 *total = COSTS_N_INSNS (3);
30611 return false;
30614 /* FALLTHRU */
30616 case GT:
30617 case LT:
30618 case UNORDERED:
30619 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30621 if (TARGET_ISEL && !TARGET_MFCRF)
30622 *total = COSTS_N_INSNS (8);
30623 else
30624 *total = COSTS_N_INSNS (2);
30625 return true;
30627 /* CC COMPARE. */
30628 if (outer_code == COMPARE)
30630 *total = 0;
30631 return true;
30633 break;
30635 default:
30636 break;
30639 return false;
30642 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30644 static bool
30645 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30646 bool speed)
30648 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30650 fprintf (stderr,
30651 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30652 "opno = %d, total = %d, speed = %s, x:\n",
30653 ret ? "complete" : "scan inner",
30654 GET_RTX_NAME (code),
30655 GET_RTX_NAME (outer_code),
30656 opno,
30657 *total,
30658 speed ? "true" : "false");
30660 debug_rtx (x);
30662 return ret;
30665 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30667 static int
30668 rs6000_debug_address_cost (rtx x, machine_mode mode,
30669 addr_space_t as, bool speed)
30671 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30673 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30674 ret, speed ? "true" : "false");
30675 debug_rtx (x);
30677 return ret;
30681 /* A C expression returning the cost of moving data from a register of class
30682 CLASS1 to one of CLASS2. */
30684 static int
30685 rs6000_register_move_cost (machine_mode mode,
30686 reg_class_t from, reg_class_t to)
30688 int ret;
30690 if (TARGET_DEBUG_COST)
30691 dbg_cost_ctrl++;
30693 /* Moves from/to GENERAL_REGS. */
30694 if (reg_classes_intersect_p (to, GENERAL_REGS)
30695 || reg_classes_intersect_p (from, GENERAL_REGS))
30697 reg_class_t rclass = from;
30699 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30700 rclass = to;
30702 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30703 ret = (rs6000_memory_move_cost (mode, rclass, false)
30704 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30706 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30707 shift. */
30708 else if (rclass == CR_REGS)
30709 ret = 4;
30711 /* For those processors that have slow LR/CTR moves, make them more
30712 expensive than memory in order to bias spills to memory .*/
30713 else if ((rs6000_cpu == PROCESSOR_POWER6
30714 || rs6000_cpu == PROCESSOR_POWER7
30715 || rs6000_cpu == PROCESSOR_POWER8)
30716 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30717 ret = 6 * hard_regno_nregs[0][mode];
30719 else
30720 /* A move will cost one instruction per GPR moved. */
30721 ret = 2 * hard_regno_nregs[0][mode];
30724 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30725 else if (VECTOR_MEM_VSX_P (mode)
30726 && reg_classes_intersect_p (to, VSX_REGS)
30727 && reg_classes_intersect_p (from, VSX_REGS))
30728 ret = 2 * hard_regno_nregs[32][mode];
30730 /* Moving between two similar registers is just one instruction. */
30731 else if (reg_classes_intersect_p (to, from))
30732 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30734 /* Everything else has to go through GENERAL_REGS. */
30735 else
30736 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30737 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30739 if (TARGET_DEBUG_COST)
30741 if (dbg_cost_ctrl == 1)
30742 fprintf (stderr,
30743 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30744 ret, GET_MODE_NAME (mode), reg_class_names[from],
30745 reg_class_names[to]);
30746 dbg_cost_ctrl--;
30749 return ret;
30752 /* A C expressions returning the cost of moving data of MODE from a register to
30753 or from memory. */
30755 static int
30756 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30757 bool in ATTRIBUTE_UNUSED)
30759 int ret;
30761 if (TARGET_DEBUG_COST)
30762 dbg_cost_ctrl++;
30764 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30765 ret = 4 * hard_regno_nregs[0][mode];
30766 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30767 || reg_classes_intersect_p (rclass, VSX_REGS)))
30768 ret = 4 * hard_regno_nregs[32][mode];
30769 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30770 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30771 else
30772 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30774 if (TARGET_DEBUG_COST)
30776 if (dbg_cost_ctrl == 1)
30777 fprintf (stderr,
30778 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30779 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30780 dbg_cost_ctrl--;
30783 return ret;
30786 /* Returns a code for a target-specific builtin that implements
30787 reciprocal of the function, or NULL_TREE if not available. */
30789 static tree
30790 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30791 bool sqrt ATTRIBUTE_UNUSED)
30793 if (optimize_insn_for_size_p ())
30794 return NULL_TREE;
30796 if (md_fn)
30797 switch (fn)
30799 case VSX_BUILTIN_XVSQRTDP:
30800 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30801 return NULL_TREE;
30803 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30805 case VSX_BUILTIN_XVSQRTSP:
30806 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30807 return NULL_TREE;
30809 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30811 default:
30812 return NULL_TREE;
30815 else
30816 switch (fn)
30818 case BUILT_IN_SQRT:
30819 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30820 return NULL_TREE;
30822 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30824 case BUILT_IN_SQRTF:
30825 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30826 return NULL_TREE;
30828 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30830 default:
30831 return NULL_TREE;
30835 /* Load up a constant. If the mode is a vector mode, splat the value across
30836 all of the vector elements. */
30838 static rtx
30839 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30841 rtx reg;
30843 if (mode == SFmode || mode == DFmode)
30845 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30846 reg = force_reg (mode, d);
30848 else if (mode == V4SFmode)
30850 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30851 rtvec v = gen_rtvec (4, d, d, d, d);
30852 reg = gen_reg_rtx (mode);
30853 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30855 else if (mode == V2DFmode)
30857 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30858 rtvec v = gen_rtvec (2, d, d);
30859 reg = gen_reg_rtx (mode);
30860 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30862 else
30863 gcc_unreachable ();
30865 return reg;
30868 /* Generate an FMA instruction. */
30870 static void
30871 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30873 machine_mode mode = GET_MODE (target);
30874 rtx dst;
30876 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30877 gcc_assert (dst != NULL);
30879 if (dst != target)
30880 emit_move_insn (target, dst);
30883 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30885 static void
30886 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30888 machine_mode mode = GET_MODE (target);
30889 rtx dst;
30891 /* Altivec does not support fms directly;
30892 generate in terms of fma in that case. */
30893 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30894 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30895 else
30897 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30898 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30900 gcc_assert (dst != NULL);
30902 if (dst != target)
30903 emit_move_insn (target, dst);
30906 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30908 static void
30909 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30911 machine_mode mode = GET_MODE (dst);
30912 rtx r;
30914 /* This is a tad more complicated, since the fnma_optab is for
30915 a different expression: fma(-m1, m2, a), which is the same
30916 thing except in the case of signed zeros.
30918 Fortunately we know that if FMA is supported that FNMSUB is
30919 also supported in the ISA. Just expand it directly. */
30921 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30923 r = gen_rtx_NEG (mode, a);
30924 r = gen_rtx_FMA (mode, m1, m2, r);
30925 r = gen_rtx_NEG (mode, r);
30926 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30929 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30930 add a reg_note saying that this was a division. Support both scalar and
30931 vector divide. Assumes no trapping math and finite arguments. */
30933 void
30934 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30936 machine_mode mode = GET_MODE (dst);
30937 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30938 int i;
30940 /* Low precision estimates guarantee 5 bits of accuracy. High
30941 precision estimates guarantee 14 bits of accuracy. SFmode
30942 requires 23 bits of accuracy. DFmode requires 52 bits of
30943 accuracy. Each pass at least doubles the accuracy, leading
30944 to the following. */
30945 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30946 if (mode == DFmode || mode == V2DFmode)
30947 passes++;
30949 enum insn_code code = optab_handler (smul_optab, mode);
30950 insn_gen_fn gen_mul = GEN_FCN (code);
30952 gcc_assert (code != CODE_FOR_nothing);
30954 one = rs6000_load_constant_and_splat (mode, dconst1);
30956 /* x0 = 1./d estimate */
30957 x0 = gen_reg_rtx (mode);
30958 emit_insn (gen_rtx_SET (VOIDmode, x0,
30959 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30960 UNSPEC_FRES)));
30962 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30963 if (passes > 1) {
30965 /* e0 = 1. - d * x0 */
30966 e0 = gen_reg_rtx (mode);
30967 rs6000_emit_nmsub (e0, d, x0, one);
30969 /* x1 = x0 + e0 * x0 */
30970 x1 = gen_reg_rtx (mode);
30971 rs6000_emit_madd (x1, e0, x0, x0);
30973 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30974 ++i, xprev = xnext, eprev = enext) {
30976 /* enext = eprev * eprev */
30977 enext = gen_reg_rtx (mode);
30978 emit_insn (gen_mul (enext, eprev, eprev));
30980 /* xnext = xprev + enext * xprev */
30981 xnext = gen_reg_rtx (mode);
30982 rs6000_emit_madd (xnext, enext, xprev, xprev);
30985 } else
30986 xprev = x0;
30988 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30990 /* u = n * xprev */
30991 u = gen_reg_rtx (mode);
30992 emit_insn (gen_mul (u, n, xprev));
30994 /* v = n - (d * u) */
30995 v = gen_reg_rtx (mode);
30996 rs6000_emit_nmsub (v, d, u, n);
30998 /* dst = (v * xprev) + u */
30999 rs6000_emit_madd (dst, v, xprev, u);
31001 if (note_p)
31002 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
31005 /* Newton-Raphson approximation of single/double-precision floating point
31006 rsqrt. Assumes no trapping math and finite arguments. */
31008 void
31009 rs6000_emit_swrsqrt (rtx dst, rtx src)
31011 machine_mode mode = GET_MODE (src);
31012 rtx x0 = gen_reg_rtx (mode);
31013 rtx y = gen_reg_rtx (mode);
31015 /* Low precision estimates guarantee 5 bits of accuracy. High
31016 precision estimates guarantee 14 bits of accuracy. SFmode
31017 requires 23 bits of accuracy. DFmode requires 52 bits of
31018 accuracy. Each pass at least doubles the accuracy, leading
31019 to the following. */
31020 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
31021 if (mode == DFmode || mode == V2DFmode)
31022 passes++;
31024 REAL_VALUE_TYPE dconst3_2;
31025 int i;
31026 rtx halfthree;
31027 enum insn_code code = optab_handler (smul_optab, mode);
31028 insn_gen_fn gen_mul = GEN_FCN (code);
31030 gcc_assert (code != CODE_FOR_nothing);
31032 /* Load up the constant 1.5 either as a scalar, or as a vector. */
31033 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
31034 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
31036 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
31038 /* x0 = rsqrt estimate */
31039 emit_insn (gen_rtx_SET (VOIDmode, x0,
31040 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
31041 UNSPEC_RSQRT)));
31043 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
31044 rs6000_emit_msub (y, src, halfthree, src);
31046 for (i = 0; i < passes; i++)
31048 rtx x1 = gen_reg_rtx (mode);
31049 rtx u = gen_reg_rtx (mode);
31050 rtx v = gen_reg_rtx (mode);
31052 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
31053 emit_insn (gen_mul (u, x0, x0));
31054 rs6000_emit_nmsub (v, y, u, halfthree);
31055 emit_insn (gen_mul (x1, x0, v));
31056 x0 = x1;
31059 emit_move_insn (dst, x0);
31060 return;
31063 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
31064 (Power7) targets. DST is the target, and SRC is the argument operand. */
31066 void
31067 rs6000_emit_popcount (rtx dst, rtx src)
31069 machine_mode mode = GET_MODE (dst);
31070 rtx tmp1, tmp2;
31072 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
31073 if (TARGET_POPCNTD)
31075 if (mode == SImode)
31076 emit_insn (gen_popcntdsi2 (dst, src));
31077 else
31078 emit_insn (gen_popcntddi2 (dst, src));
31079 return;
31082 tmp1 = gen_reg_rtx (mode);
31084 if (mode == SImode)
31086 emit_insn (gen_popcntbsi2 (tmp1, src));
31087 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
31088 NULL_RTX, 0);
31089 tmp2 = force_reg (SImode, tmp2);
31090 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
31092 else
31094 emit_insn (gen_popcntbdi2 (tmp1, src));
31095 tmp2 = expand_mult (DImode, tmp1,
31096 GEN_INT ((HOST_WIDE_INT)
31097 0x01010101 << 32 | 0x01010101),
31098 NULL_RTX, 0);
31099 tmp2 = force_reg (DImode, tmp2);
31100 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
31105 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
31106 target, and SRC is the argument operand. */
31108 void
31109 rs6000_emit_parity (rtx dst, rtx src)
31111 machine_mode mode = GET_MODE (dst);
31112 rtx tmp;
31114 tmp = gen_reg_rtx (mode);
31116 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
31117 if (TARGET_CMPB)
31119 if (mode == SImode)
31121 emit_insn (gen_popcntbsi2 (tmp, src));
31122 emit_insn (gen_paritysi2_cmpb (dst, tmp));
31124 else
31126 emit_insn (gen_popcntbdi2 (tmp, src));
31127 emit_insn (gen_paritydi2_cmpb (dst, tmp));
31129 return;
31132 if (mode == SImode)
31134 /* Is mult+shift >= shift+xor+shift+xor? */
31135 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
31137 rtx tmp1, tmp2, tmp3, tmp4;
31139 tmp1 = gen_reg_rtx (SImode);
31140 emit_insn (gen_popcntbsi2 (tmp1, src));
31142 tmp2 = gen_reg_rtx (SImode);
31143 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
31144 tmp3 = gen_reg_rtx (SImode);
31145 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
31147 tmp4 = gen_reg_rtx (SImode);
31148 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
31149 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
31151 else
31152 rs6000_emit_popcount (tmp, src);
31153 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
31155 else
31157 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
31158 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
31160 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
31162 tmp1 = gen_reg_rtx (DImode);
31163 emit_insn (gen_popcntbdi2 (tmp1, src));
31165 tmp2 = gen_reg_rtx (DImode);
31166 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
31167 tmp3 = gen_reg_rtx (DImode);
31168 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
31170 tmp4 = gen_reg_rtx (DImode);
31171 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
31172 tmp5 = gen_reg_rtx (DImode);
31173 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
31175 tmp6 = gen_reg_rtx (DImode);
31176 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
31177 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
31179 else
31180 rs6000_emit_popcount (tmp, src);
31181 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
31185 /* Expand an Altivec constant permutation for little endian mode.
31186 There are two issues: First, the two input operands must be
31187 swapped so that together they form a double-wide array in LE
31188 order. Second, the vperm instruction has surprising behavior
31189 in LE mode: it interprets the elements of the source vectors
31190 in BE mode ("left to right") and interprets the elements of
31191 the destination vector in LE mode ("right to left"). To
31192 correct for this, we must subtract each element of the permute
31193 control vector from 31.
31195 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
31196 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
31197 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
31198 serve as the permute control vector. Then, in BE mode,
31200 vperm 9,10,11,12
31202 places the desired result in vr9. However, in LE mode the
31203 vector contents will be
31205 vr10 = 00000003 00000002 00000001 00000000
31206 vr11 = 00000007 00000006 00000005 00000004
31208 The result of the vperm using the same permute control vector is
31210 vr9 = 05000000 07000000 01000000 03000000
31212 That is, the leftmost 4 bytes of vr10 are interpreted as the
31213 source for the rightmost 4 bytes of vr9, and so on.
31215 If we change the permute control vector to
31217 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31219 and issue
31221 vperm 9,11,10,12
31223 we get the desired
31225 vr9 = 00000006 00000004 00000002 00000000. */
31227 void
31228 altivec_expand_vec_perm_const_le (rtx operands[4])
31230 unsigned int i;
31231 rtx perm[16];
31232 rtx constv, unspec;
31233 rtx target = operands[0];
31234 rtx op0 = operands[1];
31235 rtx op1 = operands[2];
31236 rtx sel = operands[3];
31238 /* Unpack and adjust the constant selector. */
31239 for (i = 0; i < 16; ++i)
31241 rtx e = XVECEXP (sel, 0, i);
31242 unsigned int elt = 31 - (INTVAL (e) & 31);
31243 perm[i] = GEN_INT (elt);
31246 /* Expand to a permute, swapping the inputs and using the
31247 adjusted selector. */
31248 if (!REG_P (op0))
31249 op0 = force_reg (V16QImode, op0);
31250 if (!REG_P (op1))
31251 op1 = force_reg (V16QImode, op1);
31253 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31254 constv = force_reg (V16QImode, constv);
31255 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31256 UNSPEC_VPERM);
31257 if (!REG_P (target))
31259 rtx tmp = gen_reg_rtx (V16QImode);
31260 emit_move_insn (tmp, unspec);
31261 unspec = tmp;
31264 emit_move_insn (target, unspec);
31267 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31268 permute control vector. But here it's not a constant, so we must
31269 generate a vector NAND or NOR to do the adjustment. */
31271 void
31272 altivec_expand_vec_perm_le (rtx operands[4])
31274 rtx notx, iorx, unspec;
31275 rtx target = operands[0];
31276 rtx op0 = operands[1];
31277 rtx op1 = operands[2];
31278 rtx sel = operands[3];
31279 rtx tmp = target;
31280 rtx norreg = gen_reg_rtx (V16QImode);
31281 machine_mode mode = GET_MODE (target);
31283 /* Get everything in regs so the pattern matches. */
31284 if (!REG_P (op0))
31285 op0 = force_reg (mode, op0);
31286 if (!REG_P (op1))
31287 op1 = force_reg (mode, op1);
31288 if (!REG_P (sel))
31289 sel = force_reg (V16QImode, sel);
31290 if (!REG_P (target))
31291 tmp = gen_reg_rtx (mode);
31293 /* Invert the selector with a VNAND if available, else a VNOR.
31294 The VNAND is preferred for future fusion opportunities. */
31295 notx = gen_rtx_NOT (V16QImode, sel);
31296 iorx = (TARGET_P8_VECTOR
31297 ? gen_rtx_IOR (V16QImode, notx, notx)
31298 : gen_rtx_AND (V16QImode, notx, notx));
31299 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31301 /* Permute with operands reversed and adjusted selector. */
31302 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31303 UNSPEC_VPERM);
31305 /* Copy into target, possibly by way of a register. */
31306 if (!REG_P (target))
31308 emit_move_insn (tmp, unspec);
31309 unspec = tmp;
31312 emit_move_insn (target, unspec);
31315 /* Expand an Altivec constant permutation. Return true if we match
31316 an efficient implementation; false to fall back to VPERM. */
31318 bool
31319 altivec_expand_vec_perm_const (rtx operands[4])
31321 struct altivec_perm_insn {
31322 HOST_WIDE_INT mask;
31323 enum insn_code impl;
31324 unsigned char perm[16];
31326 static const struct altivec_perm_insn patterns[] = {
31327 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31328 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31329 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31330 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31331 { OPTION_MASK_ALTIVEC,
31332 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31333 : CODE_FOR_altivec_vmrglb_direct),
31334 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31335 { OPTION_MASK_ALTIVEC,
31336 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31337 : CODE_FOR_altivec_vmrglh_direct),
31338 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31339 { OPTION_MASK_ALTIVEC,
31340 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31341 : CODE_FOR_altivec_vmrglw_direct),
31342 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31343 { OPTION_MASK_ALTIVEC,
31344 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31345 : CODE_FOR_altivec_vmrghb_direct),
31346 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31347 { OPTION_MASK_ALTIVEC,
31348 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31349 : CODE_FOR_altivec_vmrghh_direct),
31350 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31351 { OPTION_MASK_ALTIVEC,
31352 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31353 : CODE_FOR_altivec_vmrghw_direct),
31354 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31355 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31356 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31357 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31358 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31361 unsigned int i, j, elt, which;
31362 unsigned char perm[16];
31363 rtx target, op0, op1, sel, x;
31364 bool one_vec;
31366 target = operands[0];
31367 op0 = operands[1];
31368 op1 = operands[2];
31369 sel = operands[3];
31371 /* Unpack the constant selector. */
31372 for (i = which = 0; i < 16; ++i)
31374 rtx e = XVECEXP (sel, 0, i);
31375 elt = INTVAL (e) & 31;
31376 which |= (elt < 16 ? 1 : 2);
31377 perm[i] = elt;
31380 /* Simplify the constant selector based on operands. */
31381 switch (which)
31383 default:
31384 gcc_unreachable ();
31386 case 3:
31387 one_vec = false;
31388 if (!rtx_equal_p (op0, op1))
31389 break;
31390 /* FALLTHRU */
31392 case 2:
31393 for (i = 0; i < 16; ++i)
31394 perm[i] &= 15;
31395 op0 = op1;
31396 one_vec = true;
31397 break;
31399 case 1:
31400 op1 = op0;
31401 one_vec = true;
31402 break;
31405 /* Look for splat patterns. */
31406 if (one_vec)
31408 elt = perm[0];
31410 for (i = 0; i < 16; ++i)
31411 if (perm[i] != elt)
31412 break;
31413 if (i == 16)
31415 if (!BYTES_BIG_ENDIAN)
31416 elt = 15 - elt;
31417 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31418 return true;
31421 if (elt % 2 == 0)
31423 for (i = 0; i < 16; i += 2)
31424 if (perm[i] != elt || perm[i + 1] != elt + 1)
31425 break;
31426 if (i == 16)
31428 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31429 x = gen_reg_rtx (V8HImode);
31430 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31431 GEN_INT (field)));
31432 emit_move_insn (target, gen_lowpart (V16QImode, x));
31433 return true;
31437 if (elt % 4 == 0)
31439 for (i = 0; i < 16; i += 4)
31440 if (perm[i] != elt
31441 || perm[i + 1] != elt + 1
31442 || perm[i + 2] != elt + 2
31443 || perm[i + 3] != elt + 3)
31444 break;
31445 if (i == 16)
31447 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31448 x = gen_reg_rtx (V4SImode);
31449 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31450 GEN_INT (field)));
31451 emit_move_insn (target, gen_lowpart (V16QImode, x));
31452 return true;
31457 /* Look for merge and pack patterns. */
31458 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31460 bool swapped;
31462 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31463 continue;
31465 elt = patterns[j].perm[0];
31466 if (perm[0] == elt)
31467 swapped = false;
31468 else if (perm[0] == elt + 16)
31469 swapped = true;
31470 else
31471 continue;
31472 for (i = 1; i < 16; ++i)
31474 elt = patterns[j].perm[i];
31475 if (swapped)
31476 elt = (elt >= 16 ? elt - 16 : elt + 16);
31477 else if (one_vec && elt >= 16)
31478 elt -= 16;
31479 if (perm[i] != elt)
31480 break;
31482 if (i == 16)
31484 enum insn_code icode = patterns[j].impl;
31485 machine_mode omode = insn_data[icode].operand[0].mode;
31486 machine_mode imode = insn_data[icode].operand[1].mode;
31488 /* For little-endian, don't use vpkuwum and vpkuhum if the
31489 underlying vector type is not V4SI and V8HI, respectively.
31490 For example, using vpkuwum with a V8HI picks up the even
31491 halfwords (BE numbering) when the even halfwords (LE
31492 numbering) are what we need. */
31493 if (!BYTES_BIG_ENDIAN
31494 && icode == CODE_FOR_altivec_vpkuwum_direct
31495 && ((GET_CODE (op0) == REG
31496 && GET_MODE (op0) != V4SImode)
31497 || (GET_CODE (op0) == SUBREG
31498 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31499 continue;
31500 if (!BYTES_BIG_ENDIAN
31501 && icode == CODE_FOR_altivec_vpkuhum_direct
31502 && ((GET_CODE (op0) == REG
31503 && GET_MODE (op0) != V8HImode)
31504 || (GET_CODE (op0) == SUBREG
31505 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31506 continue;
31508 /* For little-endian, the two input operands must be swapped
31509 (or swapped back) to ensure proper right-to-left numbering
31510 from 0 to 2N-1. */
31511 if (swapped ^ !BYTES_BIG_ENDIAN)
31512 std::swap (op0, op1);
31513 if (imode != V16QImode)
31515 op0 = gen_lowpart (imode, op0);
31516 op1 = gen_lowpart (imode, op1);
31518 if (omode == V16QImode)
31519 x = target;
31520 else
31521 x = gen_reg_rtx (omode);
31522 emit_insn (GEN_FCN (icode) (x, op0, op1));
31523 if (omode != V16QImode)
31524 emit_move_insn (target, gen_lowpart (V16QImode, x));
31525 return true;
31529 if (!BYTES_BIG_ENDIAN)
31531 altivec_expand_vec_perm_const_le (operands);
31532 return true;
31535 return false;
31538 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31539 Return true if we match an efficient implementation. */
31541 static bool
31542 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31543 unsigned char perm0, unsigned char perm1)
31545 rtx x;
31547 /* If both selectors come from the same operand, fold to single op. */
31548 if ((perm0 & 2) == (perm1 & 2))
31550 if (perm0 & 2)
31551 op0 = op1;
31552 else
31553 op1 = op0;
31555 /* If both operands are equal, fold to simpler permutation. */
31556 if (rtx_equal_p (op0, op1))
31558 perm0 = perm0 & 1;
31559 perm1 = (perm1 & 1) + 2;
31561 /* If the first selector comes from the second operand, swap. */
31562 else if (perm0 & 2)
31564 if (perm1 & 2)
31565 return false;
31566 perm0 -= 2;
31567 perm1 += 2;
31568 std::swap (op0, op1);
31570 /* If the second selector does not come from the second operand, fail. */
31571 else if ((perm1 & 2) == 0)
31572 return false;
31574 /* Success! */
31575 if (target != NULL)
31577 machine_mode vmode, dmode;
31578 rtvec v;
31580 vmode = GET_MODE (target);
31581 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31582 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31583 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31584 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31585 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31586 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31588 return true;
31591 bool
31592 rs6000_expand_vec_perm_const (rtx operands[4])
31594 rtx target, op0, op1, sel;
31595 unsigned char perm0, perm1;
31597 target = operands[0];
31598 op0 = operands[1];
31599 op1 = operands[2];
31600 sel = operands[3];
31602 /* Unpack the constant selector. */
31603 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31604 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31606 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31609 /* Test whether a constant permutation is supported. */
31611 static bool
31612 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31613 const unsigned char *sel)
31615 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31616 if (TARGET_ALTIVEC)
31617 return true;
31619 /* Check for ps_merge* or evmerge* insns. */
31620 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31621 || (TARGET_SPE && vmode == V2SImode))
31623 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31624 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31625 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31628 return false;
31631 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31633 static void
31634 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31635 machine_mode vmode, unsigned nelt, rtx perm[])
31637 machine_mode imode;
31638 rtx x;
31640 imode = vmode;
31641 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31643 imode = GET_MODE_INNER (vmode);
31644 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31645 imode = mode_for_vector (imode, nelt);
31648 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31649 x = expand_vec_perm (vmode, op0, op1, x, target);
31650 if (x != target)
31651 emit_move_insn (target, x);
31654 /* Expand an extract even operation. */
31656 void
31657 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31659 machine_mode vmode = GET_MODE (target);
31660 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31661 rtx perm[16];
31663 for (i = 0; i < nelt; i++)
31664 perm[i] = GEN_INT (i * 2);
31666 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31669 /* Expand a vector interleave operation. */
31671 void
31672 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31674 machine_mode vmode = GET_MODE (target);
31675 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31676 rtx perm[16];
31678 high = (highp ? 0 : nelt / 2);
31679 for (i = 0; i < nelt / 2; i++)
31681 perm[i * 2] = GEN_INT (i + high);
31682 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31685 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31688 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31689 void
31690 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31692 HOST_WIDE_INT hwi_scale (scale);
31693 REAL_VALUE_TYPE r_pow;
31694 rtvec v = rtvec_alloc (2);
31695 rtx elt;
31696 rtx scale_vec = gen_reg_rtx (V2DFmode);
31697 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31698 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31699 RTVEC_ELT (v, 0) = elt;
31700 RTVEC_ELT (v, 1) = elt;
31701 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31702 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31705 /* Return an RTX representing where to find the function value of a
31706 function returning MODE. */
31707 static rtx
31708 rs6000_complex_function_value (machine_mode mode)
31710 unsigned int regno;
31711 rtx r1, r2;
31712 machine_mode inner = GET_MODE_INNER (mode);
31713 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31715 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31716 regno = FP_ARG_RETURN;
31717 else
31719 regno = GP_ARG_RETURN;
31721 /* 32-bit is OK since it'll go in r3/r4. */
31722 if (TARGET_32BIT && inner_bytes >= 4)
31723 return gen_rtx_REG (mode, regno);
31726 if (inner_bytes >= 8)
31727 return gen_rtx_REG (mode, regno);
31729 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31730 const0_rtx);
31731 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31732 GEN_INT (inner_bytes));
31733 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31736 /* Return an rtx describing a return value of MODE as a PARALLEL
31737 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
31738 stride REG_STRIDE. */
31740 static rtx
31741 rs6000_parallel_return (machine_mode mode,
31742 int n_elts, machine_mode elt_mode,
31743 unsigned int regno, unsigned int reg_stride)
31745 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
31747 int i;
31748 for (i = 0; i < n_elts; i++)
31750 rtx r = gen_rtx_REG (elt_mode, regno);
31751 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31752 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31753 regno += reg_stride;
31756 return par;
31759 /* Target hook for TARGET_FUNCTION_VALUE.
31761 On the SPE, both FPs and vectors are returned in r3.
31763 On RS/6000 an integer value is in r3 and a floating-point value is in
31764 fp1, unless -msoft-float. */
31766 static rtx
31767 rs6000_function_value (const_tree valtype,
31768 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31769 bool outgoing ATTRIBUTE_UNUSED)
31771 machine_mode mode;
31772 unsigned int regno;
31773 machine_mode elt_mode;
31774 int n_elts;
31776 /* Special handling for structs in darwin64. */
31777 if (TARGET_MACHO
31778 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31780 CUMULATIVE_ARGS valcum;
31781 rtx valret;
31783 valcum.words = 0;
31784 valcum.fregno = FP_ARG_MIN_REG;
31785 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31786 /* Do a trial code generation as if this were going to be passed as
31787 an argument; if any part goes in memory, we return NULL. */
31788 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31789 if (valret)
31790 return valret;
31791 /* Otherwise fall through to standard ABI rules. */
31794 mode = TYPE_MODE (valtype);
31796 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31797 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
31799 int first_reg, n_regs;
31801 if (SCALAR_FLOAT_MODE_P (elt_mode))
31803 /* _Decimal128 must use even/odd register pairs. */
31804 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31805 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31807 else
31809 first_reg = ALTIVEC_ARG_RETURN;
31810 n_regs = 1;
31813 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
31816 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
31817 if (TARGET_32BIT && TARGET_POWERPC64)
31818 switch (mode)
31820 default:
31821 break;
31822 case DImode:
31823 case SCmode:
31824 case DCmode:
31825 case TCmode:
31826 int count = GET_MODE_SIZE (mode) / 4;
31827 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
31830 if ((INTEGRAL_TYPE_P (valtype)
31831 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
31832 || (POINTER_TYPE_P (valtype) && !upc_shared_type_p (TREE_TYPE (valtype))))
31833 mode = TARGET_32BIT ? SImode : DImode;
31835 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31836 /* _Decimal128 must use an even/odd register pair. */
31837 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31838 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31839 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31840 regno = FP_ARG_RETURN;
31841 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31842 && targetm.calls.split_complex_arg)
31843 return rs6000_complex_function_value (mode);
31844 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31845 return register is used in both cases, and we won't see V2DImode/V2DFmode
31846 for pure altivec, combine the two cases. */
31847 else if (TREE_CODE (valtype) == VECTOR_TYPE
31848 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31849 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31850 regno = ALTIVEC_ARG_RETURN;
31851 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31852 && (mode == DFmode || mode == DCmode
31853 || mode == TFmode || mode == TCmode))
31854 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31855 else
31856 regno = GP_ARG_RETURN;
31858 return gen_rtx_REG (mode, regno);
31861 /* Define how to find the value returned by a library function
31862 assuming the value has mode MODE. */
31864 rs6000_libcall_value (machine_mode mode)
31866 unsigned int regno;
31868 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31869 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31870 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
31872 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31873 /* _Decimal128 must use an even/odd register pair. */
31874 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31875 else if (SCALAR_FLOAT_MODE_P (mode)
31876 && TARGET_HARD_FLOAT && TARGET_FPRS
31877 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31878 regno = FP_ARG_RETURN;
31879 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31880 return register is used in both cases, and we won't see V2DImode/V2DFmode
31881 for pure altivec, combine the two cases. */
31882 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31883 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31884 regno = ALTIVEC_ARG_RETURN;
31885 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31886 return rs6000_complex_function_value (mode);
31887 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31888 && (mode == DFmode || mode == DCmode
31889 || mode == TFmode || mode == TCmode))
31890 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31891 else
31892 regno = GP_ARG_RETURN;
31894 return gen_rtx_REG (mode, regno);
31898 /* Return true if we use LRA instead of reload pass. */
31899 static bool
31900 rs6000_lra_p (void)
31902 return rs6000_lra_flag;
31905 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31906 Frame pointer elimination is automatically handled.
31908 For the RS/6000, if frame pointer elimination is being done, we would like
31909 to convert ap into fp, not sp.
31911 We need r30 if -mminimal-toc was specified, and there are constant pool
31912 references. */
31914 static bool
31915 rs6000_can_eliminate (const int from, const int to)
31917 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31918 ? ! frame_pointer_needed
31919 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31920 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31921 : true);
31924 /* Define the offset between two registers, FROM to be eliminated and its
31925 replacement TO, at the start of a routine. */
31926 HOST_WIDE_INT
31927 rs6000_initial_elimination_offset (int from, int to)
31929 rs6000_stack_t *info = rs6000_stack_info ();
31930 HOST_WIDE_INT offset;
31932 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31933 offset = info->push_p ? 0 : -info->total_size;
31934 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31936 offset = info->push_p ? 0 : -info->total_size;
31937 if (FRAME_GROWS_DOWNWARD)
31938 offset += info->fixed_size + info->vars_size + info->parm_size;
31940 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31941 offset = FRAME_GROWS_DOWNWARD
31942 ? info->fixed_size + info->vars_size + info->parm_size
31943 : 0;
31944 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31945 offset = info->total_size;
31946 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31947 offset = info->push_p ? info->total_size : 0;
31948 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31949 offset = 0;
31950 else
31951 gcc_unreachable ();
31953 return offset;
31956 static rtx
31957 rs6000_dwarf_register_span (rtx reg)
31959 rtx parts[8];
31960 int i, words;
31961 unsigned regno = REGNO (reg);
31962 machine_mode mode = GET_MODE (reg);
31964 if (TARGET_SPE
31965 && regno < 32
31966 && (SPE_VECTOR_MODE (GET_MODE (reg))
31967 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31968 && mode != SFmode && mode != SDmode && mode != SCmode)))
31970 else
31971 return NULL_RTX;
31973 regno = REGNO (reg);
31975 /* The duality of the SPE register size wreaks all kinds of havoc.
31976 This is a way of distinguishing r0 in 32-bits from r0 in
31977 64-bits. */
31978 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31979 gcc_assert (words <= 4);
31980 for (i = 0; i < words; i++, regno++)
31982 if (BYTES_BIG_ENDIAN)
31984 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31985 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31987 else
31989 parts[2 * i] = gen_rtx_REG (SImode, regno);
31990 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31994 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31997 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31999 static void
32000 rs6000_init_dwarf_reg_sizes_extra (tree address)
32002 if (TARGET_SPE)
32004 int i;
32005 machine_mode mode = TYPE_MODE (char_type_node);
32006 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
32007 rtx mem = gen_rtx_MEM (BLKmode, addr);
32008 rtx value = gen_int_mode (4, mode);
32010 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
32012 int column = DWARF_REG_TO_UNWIND_COLUMN
32013 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
32014 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
32016 emit_move_insn (adjust_address (mem, mode, offset), value);
32020 if (TARGET_MACHO && ! TARGET_ALTIVEC)
32022 int i;
32023 machine_mode mode = TYPE_MODE (char_type_node);
32024 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
32025 rtx mem = gen_rtx_MEM (BLKmode, addr);
32026 rtx value = gen_int_mode (16, mode);
32028 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
32029 The unwinder still needs to know the size of Altivec registers. */
32031 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
32033 int column = DWARF_REG_TO_UNWIND_COLUMN
32034 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
32035 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
32037 emit_move_insn (adjust_address (mem, mode, offset), value);
32042 /* Map internal gcc register numbers to debug format register numbers.
32043 FORMAT specifies the type of debug register number to use:
32044 0 -- debug information, except for frame-related sections
32045 1 -- DWARF .debug_frame section
32046 2 -- DWARF .eh_frame section */
32048 unsigned int
32049 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
32051 /* We never use the GCC internal number for SPE high registers.
32052 Those are mapped to the 1200..1231 range for all debug formats. */
32053 if (SPE_HIGH_REGNO_P (regno))
32054 return regno - FIRST_SPE_HIGH_REGNO + 1200;
32056 /* Except for the above, we use the internal number for non-DWARF
32057 debug information, and also for .eh_frame. */
32058 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
32059 return regno;
32061 /* On some platforms, we use the standard DWARF register
32062 numbering for .debug_info and .debug_frame. */
32063 #ifdef RS6000_USE_DWARF_NUMBERING
32064 if (regno <= 63)
32065 return regno;
32066 if (regno == LR_REGNO)
32067 return 108;
32068 if (regno == CTR_REGNO)
32069 return 109;
32070 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
32071 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
32072 The actual code emitted saves the whole of CR, so we map CR2_REGNO
32073 to the DWARF reg for CR. */
32074 if (format == 1 && regno == CR2_REGNO)
32075 return 64;
32076 if (CR_REGNO_P (regno))
32077 return regno - CR0_REGNO + 86;
32078 if (regno == CA_REGNO)
32079 return 101; /* XER */
32080 if (ALTIVEC_REGNO_P (regno))
32081 return regno - FIRST_ALTIVEC_REGNO + 1124;
32082 if (regno == VRSAVE_REGNO)
32083 return 356;
32084 if (regno == VSCR_REGNO)
32085 return 67;
32086 if (regno == SPE_ACC_REGNO)
32087 return 99;
32088 if (regno == SPEFSCR_REGNO)
32089 return 612;
32090 #endif
32091 return regno;
32094 /* target hook eh_return_filter_mode */
32095 static machine_mode
32096 rs6000_eh_return_filter_mode (void)
32098 return TARGET_32BIT ? SImode : word_mode;
32101 /* Target hook for scalar_mode_supported_p. */
32102 static bool
32103 rs6000_scalar_mode_supported_p (machine_mode mode)
32105 /* -m32 does not support TImode. This is the default, from
32106 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
32107 same ABI as for -m32. But default_scalar_mode_supported_p allows
32108 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
32109 for -mpowerpc64. */
32110 if (TARGET_32BIT && mode == TImode)
32111 return false;
32113 if (DECIMAL_FLOAT_MODE_P (mode))
32114 return default_decimal_float_supported_p ();
32115 else
32116 return default_scalar_mode_supported_p (mode);
32119 /* Target hook for vector_mode_supported_p. */
32120 static bool
32121 rs6000_vector_mode_supported_p (machine_mode mode)
32124 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
32125 return true;
32127 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
32128 return true;
32130 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
32131 return true;
32133 else
32134 return false;
32137 /* Target hook for invalid_arg_for_unprototyped_fn. */
32138 static const char *
32139 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
32141 return (!rs6000_darwin64_abi
32142 && typelist == 0
32143 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
32144 && (funcdecl == NULL_TREE
32145 || (TREE_CODE (funcdecl) == FUNCTION_DECL
32146 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
32147 ? N_("AltiVec argument passed to unprototyped function")
32148 : NULL;
32151 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
32152 setup by using __stack_chk_fail_local hidden function instead of
32153 calling __stack_chk_fail directly. Otherwise it is better to call
32154 __stack_chk_fail directly. */
32156 static tree ATTRIBUTE_UNUSED
32157 rs6000_stack_protect_fail (void)
32159 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
32160 ? default_hidden_stack_protect_fail ()
32161 : default_external_stack_protect_fail ();
32164 void
32165 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
32166 int num_operands ATTRIBUTE_UNUSED)
32168 if (rs6000_warn_cell_microcode)
32170 const char *temp;
32171 int insn_code_number = recog_memoized (insn);
32172 location_t location = INSN_LOCATION (insn);
32174 /* Punt on insns we cannot recognize. */
32175 if (insn_code_number < 0)
32176 return;
32178 temp = get_insn_template (insn_code_number, insn);
32180 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
32181 warning_at (location, OPT_mwarn_cell_microcode,
32182 "emitting microcode insn %s\t[%s] #%d",
32183 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32184 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
32185 warning_at (location, OPT_mwarn_cell_microcode,
32186 "emitting conditional microcode insn %s\t[%s] #%d",
32187 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32191 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32193 #if TARGET_ELF
32194 static unsigned HOST_WIDE_INT
32195 rs6000_asan_shadow_offset (void)
32197 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32199 #endif
32201 /* Mask options that we want to support inside of attribute((target)) and
32202 #pragma GCC target operations. Note, we do not include things like
32203 64/32-bit, endianess, hard/soft floating point, etc. that would have
32204 different calling sequences. */
32206 struct rs6000_opt_mask {
32207 const char *name; /* option name */
32208 HOST_WIDE_INT mask; /* mask to set */
32209 bool invert; /* invert sense of mask */
32210 bool valid_target; /* option is a target option */
32213 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32215 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32216 { "cmpb", OPTION_MASK_CMPB, false, true },
32217 { "crypto", OPTION_MASK_CRYPTO, false, true },
32218 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32219 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32220 { "fprnd", OPTION_MASK_FPRND, false, true },
32221 { "hard-dfp", OPTION_MASK_DFP, false, true },
32222 { "htm", OPTION_MASK_HTM, false, true },
32223 { "isel", OPTION_MASK_ISEL, false, true },
32224 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32225 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32226 { "mulhw", OPTION_MASK_MULHW, false, true },
32227 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32228 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32229 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32230 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32231 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32232 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32233 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32234 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32235 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32236 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32237 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32238 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
32239 { "string", OPTION_MASK_STRING, false, true },
32240 { "update", OPTION_MASK_NO_UPDATE, true , true },
32241 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
32242 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
32243 { "vsx", OPTION_MASK_VSX, false, true },
32244 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32245 #ifdef OPTION_MASK_64BIT
32246 #if TARGET_AIX_OS
32247 { "aix64", OPTION_MASK_64BIT, false, false },
32248 { "aix32", OPTION_MASK_64BIT, true, false },
32249 #else
32250 { "64", OPTION_MASK_64BIT, false, false },
32251 { "32", OPTION_MASK_64BIT, true, false },
32252 #endif
32253 #endif
32254 #ifdef OPTION_MASK_EABI
32255 { "eabi", OPTION_MASK_EABI, false, false },
32256 #endif
32257 #ifdef OPTION_MASK_LITTLE_ENDIAN
32258 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32259 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32260 #endif
32261 #ifdef OPTION_MASK_RELOCATABLE
32262 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32263 #endif
32264 #ifdef OPTION_MASK_STRICT_ALIGN
32265 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32266 #endif
32267 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32268 { "string", OPTION_MASK_STRING, false, false },
32271 /* Builtin mask mapping for printing the flags. */
32272 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32274 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32275 { "vsx", RS6000_BTM_VSX, false, false },
32276 { "spe", RS6000_BTM_SPE, false, false },
32277 { "paired", RS6000_BTM_PAIRED, false, false },
32278 { "fre", RS6000_BTM_FRE, false, false },
32279 { "fres", RS6000_BTM_FRES, false, false },
32280 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32281 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32282 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32283 { "cell", RS6000_BTM_CELL, false, false },
32284 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32285 { "crypto", RS6000_BTM_CRYPTO, false, false },
32286 { "htm", RS6000_BTM_HTM, false, false },
32287 { "hard-dfp", RS6000_BTM_DFP, false, false },
32288 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32289 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32292 /* Option variables that we want to support inside attribute((target)) and
32293 #pragma GCC target operations. */
32295 struct rs6000_opt_var {
32296 const char *name; /* option name */
32297 size_t global_offset; /* offset of the option in global_options. */
32298 size_t target_offset; /* offset of the option in target optiosn. */
32301 static struct rs6000_opt_var const rs6000_opt_vars[] =
32303 { "friz",
32304 offsetof (struct gcc_options, x_TARGET_FRIZ),
32305 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32306 { "avoid-indexed-addresses",
32307 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32308 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32309 { "paired",
32310 offsetof (struct gcc_options, x_rs6000_paired_float),
32311 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32312 { "longcall",
32313 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32314 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32315 { "optimize-swaps",
32316 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
32317 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
32318 { "allow-movmisalign",
32319 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
32320 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
32321 { "allow-df-permute",
32322 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
32323 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
32324 { "sched-groups",
32325 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
32326 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
32327 { "always-hint",
32328 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
32329 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
32330 { "align-branch-targets",
32331 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
32332 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
32333 { "vectorize-builtins",
32334 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
32335 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
32336 { "tls-markers",
32337 offsetof (struct gcc_options, x_tls_markers),
32338 offsetof (struct cl_target_option, x_tls_markers), },
32339 { "sched-prolog",
32340 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
32341 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
32342 { "sched-epilog",
32343 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
32344 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
32345 { "gen-cell-microcode",
32346 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
32347 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
32348 { "warn-cell-microcode",
32349 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
32350 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
32353 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32354 parsing. Return true if there were no errors. */
32356 static bool
32357 rs6000_inner_target_options (tree args, bool attr_p)
32359 bool ret = true;
32361 if (args == NULL_TREE)
32364 else if (TREE_CODE (args) == STRING_CST)
32366 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32367 char *q;
32369 while ((q = strtok (p, ",")) != NULL)
32371 bool error_p = false;
32372 bool not_valid_p = false;
32373 const char *cpu_opt = NULL;
32375 p = NULL;
32376 if (strncmp (q, "cpu=", 4) == 0)
32378 int cpu_index = rs6000_cpu_name_lookup (q+4);
32379 if (cpu_index >= 0)
32380 rs6000_cpu_index = cpu_index;
32381 else
32383 error_p = true;
32384 cpu_opt = q+4;
32387 else if (strncmp (q, "tune=", 5) == 0)
32389 int tune_index = rs6000_cpu_name_lookup (q+5);
32390 if (tune_index >= 0)
32391 rs6000_tune_index = tune_index;
32392 else
32394 error_p = true;
32395 cpu_opt = q+5;
32398 else
32400 size_t i;
32401 bool invert = false;
32402 char *r = q;
32404 error_p = true;
32405 if (strncmp (r, "no-", 3) == 0)
32407 invert = true;
32408 r += 3;
32411 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32412 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32414 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32416 if (!rs6000_opt_masks[i].valid_target)
32417 not_valid_p = true;
32418 else
32420 error_p = false;
32421 rs6000_isa_flags_explicit |= mask;
32423 /* VSX needs altivec, so -mvsx automagically sets
32424 altivec and disables -mavoid-indexed-addresses. */
32425 if (!invert)
32427 if (mask == OPTION_MASK_VSX)
32429 mask |= OPTION_MASK_ALTIVEC;
32430 TARGET_AVOID_XFORM = 0;
32434 if (rs6000_opt_masks[i].invert)
32435 invert = !invert;
32437 if (invert)
32438 rs6000_isa_flags &= ~mask;
32439 else
32440 rs6000_isa_flags |= mask;
32442 break;
32445 if (error_p && !not_valid_p)
32447 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32448 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32450 size_t j = rs6000_opt_vars[i].global_offset;
32451 *((int *) ((char *)&global_options + j)) = !invert;
32452 error_p = false;
32453 not_valid_p = false;
32454 break;
32459 if (error_p)
32461 const char *eprefix, *esuffix;
32463 ret = false;
32464 if (attr_p)
32466 eprefix = "__attribute__((__target__(";
32467 esuffix = ")))";
32469 else
32471 eprefix = "#pragma GCC target ";
32472 esuffix = "";
32475 if (cpu_opt)
32476 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32477 q, esuffix);
32478 else if (not_valid_p)
32479 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32480 else
32481 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32486 else if (TREE_CODE (args) == TREE_LIST)
32490 tree value = TREE_VALUE (args);
32491 if (value)
32493 bool ret2 = rs6000_inner_target_options (value, attr_p);
32494 if (!ret2)
32495 ret = false;
32497 args = TREE_CHAIN (args);
32499 while (args != NULL_TREE);
32502 else
32503 gcc_unreachable ();
32505 return ret;
32508 /* Print out the target options as a list for -mdebug=target. */
32510 static void
32511 rs6000_debug_target_options (tree args, const char *prefix)
32513 if (args == NULL_TREE)
32514 fprintf (stderr, "%s<NULL>", prefix);
32516 else if (TREE_CODE (args) == STRING_CST)
32518 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32519 char *q;
32521 while ((q = strtok (p, ",")) != NULL)
32523 p = NULL;
32524 fprintf (stderr, "%s\"%s\"", prefix, q);
32525 prefix = ", ";
32529 else if (TREE_CODE (args) == TREE_LIST)
32533 tree value = TREE_VALUE (args);
32534 if (value)
32536 rs6000_debug_target_options (value, prefix);
32537 prefix = ", ";
32539 args = TREE_CHAIN (args);
32541 while (args != NULL_TREE);
32544 else
32545 gcc_unreachable ();
32547 return;
32551 /* Hook to validate attribute((target("..."))). */
32553 static bool
32554 rs6000_valid_attribute_p (tree fndecl,
32555 tree ARG_UNUSED (name),
32556 tree args,
32557 int flags)
32559 struct cl_target_option cur_target;
32560 bool ret;
32561 tree old_optimize = build_optimization_node (&global_options);
32562 tree new_target, new_optimize;
32563 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32565 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32567 if (TARGET_DEBUG_TARGET)
32569 tree tname = DECL_NAME (fndecl);
32570 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32571 if (tname)
32572 fprintf (stderr, "function: %.*s\n",
32573 (int) IDENTIFIER_LENGTH (tname),
32574 IDENTIFIER_POINTER (tname));
32575 else
32576 fprintf (stderr, "function: unknown\n");
32578 fprintf (stderr, "args:");
32579 rs6000_debug_target_options (args, " ");
32580 fprintf (stderr, "\n");
32582 if (flags)
32583 fprintf (stderr, "flags: 0x%x\n", flags);
32585 fprintf (stderr, "--------------------\n");
32588 old_optimize = build_optimization_node (&global_options);
32589 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32591 /* If the function changed the optimization levels as well as setting target
32592 options, start with the optimizations specified. */
32593 if (func_optimize && func_optimize != old_optimize)
32594 cl_optimization_restore (&global_options,
32595 TREE_OPTIMIZATION (func_optimize));
32597 /* The target attributes may also change some optimization flags, so update
32598 the optimization options if necessary. */
32599 cl_target_option_save (&cur_target, &global_options);
32600 rs6000_cpu_index = rs6000_tune_index = -1;
32601 ret = rs6000_inner_target_options (args, true);
32603 /* Set up any additional state. */
32604 if (ret)
32606 ret = rs6000_option_override_internal (false);
32607 new_target = build_target_option_node (&global_options);
32609 else
32610 new_target = NULL;
32612 new_optimize = build_optimization_node (&global_options);
32614 if (!new_target)
32615 ret = false;
32617 else if (fndecl)
32619 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32621 if (old_optimize != new_optimize)
32622 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32625 cl_target_option_restore (&global_options, &cur_target);
32627 if (old_optimize != new_optimize)
32628 cl_optimization_restore (&global_options,
32629 TREE_OPTIMIZATION (old_optimize));
32631 return ret;
32635 /* Hook to validate the current #pragma GCC target and set the state, and
32636 update the macros based on what was changed. If ARGS is NULL, then
32637 POP_TARGET is used to reset the options. */
32639 bool
32640 rs6000_pragma_target_parse (tree args, tree pop_target)
32642 tree prev_tree = build_target_option_node (&global_options);
32643 tree cur_tree;
32644 struct cl_target_option *prev_opt, *cur_opt;
32645 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32646 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32648 if (TARGET_DEBUG_TARGET)
32650 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32651 fprintf (stderr, "args:");
32652 rs6000_debug_target_options (args, " ");
32653 fprintf (stderr, "\n");
32655 if (pop_target)
32657 fprintf (stderr, "pop_target:\n");
32658 debug_tree (pop_target);
32660 else
32661 fprintf (stderr, "pop_target: <NULL>\n");
32663 fprintf (stderr, "--------------------\n");
32666 if (! args)
32668 cur_tree = ((pop_target)
32669 ? pop_target
32670 : target_option_default_node);
32671 cl_target_option_restore (&global_options,
32672 TREE_TARGET_OPTION (cur_tree));
32674 else
32676 rs6000_cpu_index = rs6000_tune_index = -1;
32677 if (!rs6000_inner_target_options (args, false)
32678 || !rs6000_option_override_internal (false)
32679 || (cur_tree = build_target_option_node (&global_options))
32680 == NULL_TREE)
32682 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32683 fprintf (stderr, "invalid pragma\n");
32685 return false;
32689 target_option_current_node = cur_tree;
32691 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32692 change the macros that are defined. */
32693 if (rs6000_target_modify_macros_ptr)
32695 prev_opt = TREE_TARGET_OPTION (prev_tree);
32696 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32697 prev_flags = prev_opt->x_rs6000_isa_flags;
32699 cur_opt = TREE_TARGET_OPTION (cur_tree);
32700 cur_flags = cur_opt->x_rs6000_isa_flags;
32701 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32703 diff_bumask = (prev_bumask ^ cur_bumask);
32704 diff_flags = (prev_flags ^ cur_flags);
32706 if ((diff_flags != 0) || (diff_bumask != 0))
32708 /* Delete old macros. */
32709 rs6000_target_modify_macros_ptr (false,
32710 prev_flags & diff_flags,
32711 prev_bumask & diff_bumask);
32713 /* Define new macros. */
32714 rs6000_target_modify_macros_ptr (true,
32715 cur_flags & diff_flags,
32716 cur_bumask & diff_bumask);
32720 return true;
32724 /* Remember the last target of rs6000_set_current_function. */
32725 static GTY(()) tree rs6000_previous_fndecl;
32727 /* Establish appropriate back-end context for processing the function
32728 FNDECL. The argument might be NULL to indicate processing at top
32729 level, outside of any function scope. */
32730 static void
32731 rs6000_set_current_function (tree fndecl)
32733 tree old_tree = (rs6000_previous_fndecl
32734 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32735 : NULL_TREE);
32737 tree new_tree = (fndecl
32738 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32739 : NULL_TREE);
32741 if (TARGET_DEBUG_TARGET)
32743 bool print_final = false;
32744 fprintf (stderr, "\n==================== rs6000_set_current_function");
32746 if (fndecl)
32747 fprintf (stderr, ", fndecl %s (%p)",
32748 (DECL_NAME (fndecl)
32749 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32750 : "<unknown>"), (void *)fndecl);
32752 if (rs6000_previous_fndecl)
32753 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32755 fprintf (stderr, "\n");
32756 if (new_tree)
32758 fprintf (stderr, "\nnew fndecl target specific options:\n");
32759 debug_tree (new_tree);
32760 print_final = true;
32763 if (old_tree)
32765 fprintf (stderr, "\nold fndecl target specific options:\n");
32766 debug_tree (old_tree);
32767 print_final = true;
32770 if (print_final)
32771 fprintf (stderr, "--------------------\n");
32774 /* Only change the context if the function changes. This hook is called
32775 several times in the course of compiling a function, and we don't want to
32776 slow things down too much or call target_reinit when it isn't safe. */
32777 if (fndecl && fndecl != rs6000_previous_fndecl)
32779 rs6000_previous_fndecl = fndecl;
32780 if (old_tree == new_tree)
32783 else if (new_tree && new_tree != target_option_default_node)
32785 cl_target_option_restore (&global_options,
32786 TREE_TARGET_OPTION (new_tree));
32787 if (TREE_TARGET_GLOBALS (new_tree))
32788 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32789 else
32790 TREE_TARGET_GLOBALS (new_tree)
32791 = save_target_globals_default_opts ();
32794 else if (old_tree && old_tree != target_option_default_node)
32796 new_tree = target_option_current_node;
32797 cl_target_option_restore (&global_options,
32798 TREE_TARGET_OPTION (new_tree));
32799 if (TREE_TARGET_GLOBALS (new_tree))
32800 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32801 else if (new_tree == target_option_default_node)
32802 restore_target_globals (&default_target_globals);
32803 else
32804 TREE_TARGET_GLOBALS (new_tree)
32805 = save_target_globals_default_opts ();
32811 /* Save the current options */
32813 static void
32814 rs6000_function_specific_save (struct cl_target_option *ptr,
32815 struct gcc_options *opts)
32817 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32818 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32821 /* Restore the current options */
32823 static void
32824 rs6000_function_specific_restore (struct gcc_options *opts,
32825 struct cl_target_option *ptr)
32828 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32829 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32830 (void) rs6000_option_override_internal (false);
32833 /* Print the current options */
32835 static void
32836 rs6000_function_specific_print (FILE *file, int indent,
32837 struct cl_target_option *ptr)
32839 rs6000_print_isa_options (file, indent, "Isa options set",
32840 ptr->x_rs6000_isa_flags);
32842 rs6000_print_isa_options (file, indent, "Isa options explicit",
32843 ptr->x_rs6000_isa_flags_explicit);
32846 /* Helper function to print the current isa or misc options on a line. */
32848 static void
32849 rs6000_print_options_internal (FILE *file,
32850 int indent,
32851 const char *string,
32852 HOST_WIDE_INT flags,
32853 const char *prefix,
32854 const struct rs6000_opt_mask *opts,
32855 size_t num_elements)
32857 size_t i;
32858 size_t start_column = 0;
32859 size_t cur_column;
32860 size_t max_column = 76;
32861 const char *comma = "";
32863 if (indent)
32864 start_column += fprintf (file, "%*s", indent, "");
32866 if (!flags)
32868 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32869 return;
32872 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32874 /* Print the various mask options. */
32875 cur_column = start_column;
32876 for (i = 0; i < num_elements; i++)
32878 if ((flags & opts[i].mask) != 0)
32880 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32881 size_t len = (strlen (comma)
32882 + strlen (prefix)
32883 + strlen (no_str)
32884 + strlen (rs6000_opt_masks[i].name));
32886 cur_column += len;
32887 if (cur_column > max_column)
32889 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32890 cur_column = start_column + len;
32891 comma = "";
32894 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32895 rs6000_opt_masks[i].name);
32896 flags &= ~ opts[i].mask;
32897 comma = ", ";
32901 fputs ("\n", file);
32904 /* Helper function to print the current isa options on a line. */
32906 static void
32907 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32908 HOST_WIDE_INT flags)
32910 rs6000_print_options_internal (file, indent, string, flags, "-m",
32911 &rs6000_opt_masks[0],
32912 ARRAY_SIZE (rs6000_opt_masks));
32915 static void
32916 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32917 HOST_WIDE_INT flags)
32919 rs6000_print_options_internal (file, indent, string, flags, "",
32920 &rs6000_builtin_mask_names[0],
32921 ARRAY_SIZE (rs6000_builtin_mask_names));
32925 /* Hook to determine if one function can safely inline another. */
32927 static bool
32928 rs6000_can_inline_p (tree caller, tree callee)
32930 bool ret = false;
32931 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32932 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32934 /* If callee has no option attributes, then it is ok to inline. */
32935 if (!callee_tree)
32936 ret = true;
32938 /* If caller has no option attributes, but callee does then it is not ok to
32939 inline. */
32940 else if (!caller_tree)
32941 ret = false;
32943 else
32945 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32946 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32948 /* Callee's options should a subset of the caller's, i.e. a vsx function
32949 can inline an altivec function but a non-vsx function can't inline a
32950 vsx function. */
32951 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32952 == callee_opts->x_rs6000_isa_flags)
32953 ret = true;
32956 if (TARGET_DEBUG_TARGET)
32957 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32958 (DECL_NAME (caller)
32959 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32960 : "<unknown>"),
32961 (DECL_NAME (callee)
32962 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32963 : "<unknown>"),
32964 (ret ? "can" : "cannot"));
32966 return ret;
32969 /* Allocate a stack temp and fixup the address so it meets the particular
32970 memory requirements (either offetable or REG+REG addressing). */
32973 rs6000_allocate_stack_temp (machine_mode mode,
32974 bool offsettable_p,
32975 bool reg_reg_p)
32977 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32978 rtx addr = XEXP (stack, 0);
32979 int strict_p = (reload_in_progress || reload_completed);
32981 if (!legitimate_indirect_address_p (addr, strict_p))
32983 if (offsettable_p
32984 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32985 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32987 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32988 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32991 return stack;
32994 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32995 to such a form to deal with memory reference instructions like STFIWX that
32996 only take reg+reg addressing. */
32999 rs6000_address_for_fpconvert (rtx x)
33001 int strict_p = (reload_in_progress || reload_completed);
33002 rtx addr;
33004 gcc_assert (MEM_P (x));
33005 addr = XEXP (x, 0);
33006 if (! legitimate_indirect_address_p (addr, strict_p)
33007 && ! legitimate_indexed_address_p (addr, strict_p))
33009 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
33011 rtx reg = XEXP (addr, 0);
33012 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
33013 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
33014 gcc_assert (REG_P (reg));
33015 emit_insn (gen_add3_insn (reg, reg, size_rtx));
33016 addr = reg;
33018 else if (GET_CODE (addr) == PRE_MODIFY)
33020 rtx reg = XEXP (addr, 0);
33021 rtx expr = XEXP (addr, 1);
33022 gcc_assert (REG_P (reg));
33023 gcc_assert (GET_CODE (expr) == PLUS);
33024 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
33025 addr = reg;
33028 x = replace_equiv_address (x, copy_addr_to_reg (addr));
33031 return x;
33034 /* Given a memory reference, if it is not in the form for altivec memory
33035 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
33036 convert to the altivec format. */
33039 rs6000_address_for_altivec (rtx x)
33041 gcc_assert (MEM_P (x));
33042 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
33044 rtx addr = XEXP (x, 0);
33045 int strict_p = (reload_in_progress || reload_completed);
33047 if (!legitimate_indexed_address_p (addr, strict_p)
33048 && !legitimate_indirect_address_p (addr, strict_p))
33049 addr = copy_to_mode_reg (Pmode, addr);
33051 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
33052 x = change_address (x, GET_MODE (x), addr);
33055 return x;
33058 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
33060 On the RS/6000, all integer constants are acceptable, most won't be valid
33061 for particular insns, though. Only easy FP constants are acceptable. */
33063 static bool
33064 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
33066 if (TARGET_ELF && tls_referenced_p (x))
33067 return false;
33069 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
33070 || GET_MODE (x) == VOIDmode
33071 || (TARGET_POWERPC64 && mode == DImode)
33072 || easy_fp_constant (x, mode)
33073 || easy_vector_constant (x, mode));
33077 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
33079 static bool
33080 chain_already_loaded (rtx_insn *last)
33082 for (; last != NULL; last = PREV_INSN (last))
33084 if (NONJUMP_INSN_P (last))
33086 rtx patt = PATTERN (last);
33088 if (GET_CODE (patt) == SET)
33090 rtx lhs = XEXP (patt, 0);
33092 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
33093 return true;
33097 return false;
33100 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
33102 void
33103 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33105 const bool direct_call_p
33106 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
33107 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
33108 rtx toc_load = NULL_RTX;
33109 rtx toc_restore = NULL_RTX;
33110 rtx func_addr;
33111 rtx abi_reg = NULL_RTX;
33112 rtx call[4];
33113 int n_call;
33114 rtx insn;
33116 /* Handle longcall attributes. */
33117 if (INTVAL (cookie) & CALL_LONG)
33118 func_desc = rs6000_longcall_ref (func_desc);
33120 /* Handle indirect calls. */
33121 if (GET_CODE (func_desc) != SYMBOL_REF
33122 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
33124 /* Save the TOC into its reserved slot before the call,
33125 and prepare to restore it after the call. */
33126 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
33127 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
33128 rtx stack_toc_mem = gen_frame_mem (Pmode,
33129 gen_rtx_PLUS (Pmode, stack_ptr,
33130 stack_toc_offset));
33131 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
33132 gen_rtvec (1, stack_toc_offset),
33133 UNSPEC_TOCSLOT);
33134 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_unspec);
33136 /* Can we optimize saving the TOC in the prologue or
33137 do we need to do it at every call? */
33138 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
33139 cfun->machine->save_toc_in_prologue = true;
33140 else
33142 MEM_VOLATILE_P (stack_toc_mem) = 1;
33143 emit_move_insn (stack_toc_mem, toc_reg);
33146 if (DEFAULT_ABI == ABI_ELFv2)
33148 /* A function pointer in the ELFv2 ABI is just a plain address, but
33149 the ABI requires it to be loaded into r12 before the call. */
33150 func_addr = gen_rtx_REG (Pmode, 12);
33151 emit_move_insn (func_addr, func_desc);
33152 abi_reg = func_addr;
33154 else
33156 /* A function pointer under AIX is a pointer to a data area whose
33157 first word contains the actual address of the function, whose
33158 second word contains a pointer to its TOC, and whose third word
33159 contains a value to place in the static chain register (r11).
33160 Note that if we load the static chain, our "trampoline" need
33161 not have any executable code. */
33163 /* Load up address of the actual function. */
33164 func_desc = force_reg (Pmode, func_desc);
33165 func_addr = gen_reg_rtx (Pmode);
33166 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
33168 /* Prepare to load the TOC of the called function. Note that the
33169 TOC load must happen immediately before the actual call so
33170 that unwinding the TOC registers works correctly. See the
33171 comment in frob_update_context. */
33172 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
33173 rtx func_toc_mem = gen_rtx_MEM (Pmode,
33174 gen_rtx_PLUS (Pmode, func_desc,
33175 func_toc_offset));
33176 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
33178 /* If we have a static chain, load it up. But, if the call was
33179 originally direct, the 3rd word has not been written since no
33180 trampoline has been built, so we ought not to load it, lest we
33181 override a static chain value. */
33182 if (!direct_call_p
33183 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
33184 && !chain_already_loaded (get_current_sequence ()->next->last))
33186 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
33187 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
33188 rtx func_sc_mem = gen_rtx_MEM (Pmode,
33189 gen_rtx_PLUS (Pmode, func_desc,
33190 func_sc_offset));
33191 emit_move_insn (sc_reg, func_sc_mem);
33192 abi_reg = sc_reg;
33196 else
33198 /* Direct calls use the TOC: for local calls, the callee will
33199 assume the TOC register is set; for non-local calls, the
33200 PLT stub needs the TOC register. */
33201 abi_reg = toc_reg;
33202 func_addr = func_desc;
33205 /* Create the call. */
33206 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
33207 if (value != NULL_RTX)
33208 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33209 n_call = 1;
33211 if (toc_load)
33212 call[n_call++] = toc_load;
33213 if (toc_restore)
33214 call[n_call++] = toc_restore;
33216 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
33218 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
33219 insn = emit_call_insn (insn);
33221 /* Mention all registers defined by the ABI to hold information
33222 as uses in CALL_INSN_FUNCTION_USAGE. */
33223 if (abi_reg)
33224 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
33227 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
33229 void
33230 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33232 rtx call[2];
33233 rtx insn;
33235 gcc_assert (INTVAL (cookie) == 0);
33237 /* Create the call. */
33238 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
33239 if (value != NULL_RTX)
33240 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33242 call[1] = simple_return_rtx;
33244 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
33245 insn = emit_call_insn (insn);
33247 /* Note use of the TOC register. */
33248 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
33249 /* We need to also mark a use of the link register since the function we
33250 sibling-call to will use it to return to our caller. */
33251 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
33254 /* Return whether we need to always update the saved TOC pointer when we update
33255 the stack pointer. */
33257 static bool
33258 rs6000_save_toc_in_prologue_p (void)
33260 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
33263 #ifdef HAVE_GAS_HIDDEN
33264 # define USE_HIDDEN_LINKONCE 1
33265 #else
33266 # define USE_HIDDEN_LINKONCE 0
33267 #endif
33269 /* Fills in the label name that should be used for a 476 link stack thunk. */
33271 void
33272 get_ppc476_thunk_name (char name[32])
33274 gcc_assert (TARGET_LINK_STACK);
33276 if (USE_HIDDEN_LINKONCE)
33277 sprintf (name, "__ppc476.get_thunk");
33278 else
33279 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33282 /* This function emits the simple thunk routine that is used to preserve
33283 the link stack on the 476 cpu. */
33285 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33286 static void
33287 rs6000_code_end (void)
33289 char name[32];
33290 tree decl;
33292 if (!TARGET_LINK_STACK)
33293 return;
33295 get_ppc476_thunk_name (name);
33297 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33298 build_function_type_list (void_type_node, NULL_TREE));
33299 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33300 NULL_TREE, void_type_node);
33301 TREE_PUBLIC (decl) = 1;
33302 TREE_STATIC (decl) = 1;
33304 #if RS6000_WEAK
33305 if (USE_HIDDEN_LINKONCE)
33307 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33308 targetm.asm_out.unique_section (decl, 0);
33309 switch_to_section (get_named_section (decl, NULL, 0));
33310 DECL_WEAK (decl) = 1;
33311 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33312 targetm.asm_out.globalize_label (asm_out_file, name);
33313 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33314 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33316 else
33317 #endif
33319 switch_to_section (text_section);
33320 ASM_OUTPUT_LABEL (asm_out_file, name);
33323 DECL_INITIAL (decl) = make_node (BLOCK);
33324 current_function_decl = decl;
33325 init_function_start (decl);
33326 first_function_block_is_cold = false;
33327 /* Make sure unwind info is emitted for the thunk if needed. */
33328 final_start_function (emit_barrier (), asm_out_file, 1);
33330 fputs ("\tblr\n", asm_out_file);
33332 final_end_function ();
33333 init_insn_lengths ();
33334 free_after_compilation (cfun);
33335 set_cfun (NULL);
33336 current_function_decl = NULL;
33339 /* Add r30 to hard reg set if the prologue sets it up and it is not
33340 pic_offset_table_rtx. */
33342 static void
33343 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33345 if (!TARGET_SINGLE_PIC_BASE
33346 && TARGET_TOC
33347 && TARGET_MINIMAL_TOC
33348 && get_pool_size () != 0)
33349 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33353 /* Helper function for rs6000_split_logical to emit a logical instruction after
33354 spliting the operation to single GPR registers.
33356 DEST is the destination register.
33357 OP1 and OP2 are the input source registers.
33358 CODE is the base operation (AND, IOR, XOR, NOT).
33359 MODE is the machine mode.
33360 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33361 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33362 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33364 static void
33365 rs6000_split_logical_inner (rtx dest,
33366 rtx op1,
33367 rtx op2,
33368 enum rtx_code code,
33369 machine_mode mode,
33370 bool complement_final_p,
33371 bool complement_op1_p,
33372 bool complement_op2_p)
33374 rtx bool_rtx;
33376 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33377 if (op2 && GET_CODE (op2) == CONST_INT
33378 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33379 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33381 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33382 HOST_WIDE_INT value = INTVAL (op2) & mask;
33384 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33385 if (code == AND)
33387 if (value == 0)
33389 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33390 return;
33393 else if (value == mask)
33395 if (!rtx_equal_p (dest, op1))
33396 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33397 return;
33401 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33402 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33403 else if (code == IOR || code == XOR)
33405 if (value == 0)
33407 if (!rtx_equal_p (dest, op1))
33408 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33409 return;
33414 if (code == AND && mode == SImode
33415 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33417 emit_insn (gen_andsi3 (dest, op1, op2));
33418 return;
33421 if (complement_op1_p)
33422 op1 = gen_rtx_NOT (mode, op1);
33424 if (complement_op2_p)
33425 op2 = gen_rtx_NOT (mode, op2);
33427 /* For canonical RTL, if only one arm is inverted it is the first. */
33428 if (!complement_op1_p && complement_op2_p)
33429 std::swap (op1, op2);
33431 bool_rtx = ((code == NOT)
33432 ? gen_rtx_NOT (mode, op1)
33433 : gen_rtx_fmt_ee (code, mode, op1, op2));
33435 if (complement_final_p)
33436 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33438 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33441 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33442 operations are split immediately during RTL generation to allow for more
33443 optimizations of the AND/IOR/XOR.
33445 OPERANDS is an array containing the destination and two input operands.
33446 CODE is the base operation (AND, IOR, XOR, NOT).
33447 MODE is the machine mode.
33448 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33449 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33450 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33451 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33452 formation of the AND instructions. */
33454 static void
33455 rs6000_split_logical_di (rtx operands[3],
33456 enum rtx_code code,
33457 bool complement_final_p,
33458 bool complement_op1_p,
33459 bool complement_op2_p)
33461 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33462 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33463 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33464 enum hi_lo { hi = 0, lo = 1 };
33465 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33466 size_t i;
33468 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33469 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33470 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33471 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33473 if (code == NOT)
33474 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33475 else
33477 if (GET_CODE (operands[2]) != CONST_INT)
33479 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33480 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33482 else
33484 HOST_WIDE_INT value = INTVAL (operands[2]);
33485 HOST_WIDE_INT value_hi_lo[2];
33487 gcc_assert (!complement_final_p);
33488 gcc_assert (!complement_op1_p);
33489 gcc_assert (!complement_op2_p);
33491 value_hi_lo[hi] = value >> 32;
33492 value_hi_lo[lo] = value & lower_32bits;
33494 for (i = 0; i < 2; i++)
33496 HOST_WIDE_INT sub_value = value_hi_lo[i];
33498 if (sub_value & sign_bit)
33499 sub_value |= upper_32bits;
33501 op2_hi_lo[i] = GEN_INT (sub_value);
33503 /* If this is an AND instruction, check to see if we need to load
33504 the value in a register. */
33505 if (code == AND && sub_value != -1 && sub_value != 0
33506 && !and_operand (op2_hi_lo[i], SImode))
33507 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33512 for (i = 0; i < 2; i++)
33514 /* Split large IOR/XOR operations. */
33515 if ((code == IOR || code == XOR)
33516 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33517 && !complement_final_p
33518 && !complement_op1_p
33519 && !complement_op2_p
33520 && !logical_const_operand (op2_hi_lo[i], SImode))
33522 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33523 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33524 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33525 rtx tmp = gen_reg_rtx (SImode);
33527 /* Make sure the constant is sign extended. */
33528 if ((hi_16bits & sign_bit) != 0)
33529 hi_16bits |= upper_32bits;
33531 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33532 code, SImode, false, false, false);
33534 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33535 code, SImode, false, false, false);
33537 else
33538 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33539 code, SImode, complement_final_p,
33540 complement_op1_p, complement_op2_p);
33543 return;
33546 /* Split the insns that make up boolean operations operating on multiple GPR
33547 registers. The boolean MD patterns ensure that the inputs either are
33548 exactly the same as the output registers, or there is no overlap.
33550 OPERANDS is an array containing the destination and two input operands.
33551 CODE is the base operation (AND, IOR, XOR, NOT).
33552 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33553 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33554 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33556 void
33557 rs6000_split_logical (rtx operands[3],
33558 enum rtx_code code,
33559 bool complement_final_p,
33560 bool complement_op1_p,
33561 bool complement_op2_p)
33563 machine_mode mode = GET_MODE (operands[0]);
33564 machine_mode sub_mode;
33565 rtx op0, op1, op2;
33566 int sub_size, regno0, regno1, nregs, i;
33568 /* If this is DImode, use the specialized version that can run before
33569 register allocation. */
33570 if (mode == DImode && !TARGET_POWERPC64)
33572 rs6000_split_logical_di (operands, code, complement_final_p,
33573 complement_op1_p, complement_op2_p);
33574 return;
33577 op0 = operands[0];
33578 op1 = operands[1];
33579 op2 = (code == NOT) ? NULL_RTX : operands[2];
33580 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33581 sub_size = GET_MODE_SIZE (sub_mode);
33582 regno0 = REGNO (op0);
33583 regno1 = REGNO (op1);
33585 gcc_assert (reload_completed);
33586 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33587 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33589 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33590 gcc_assert (nregs > 1);
33592 if (op2 && REG_P (op2))
33593 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33595 for (i = 0; i < nregs; i++)
33597 int offset = i * sub_size;
33598 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33599 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33600 rtx sub_op2 = ((code == NOT)
33601 ? NULL_RTX
33602 : simplify_subreg (sub_mode, op2, mode, offset));
33604 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33605 complement_final_p, complement_op1_p,
33606 complement_op2_p);
33609 return;
33613 /* Return true if the peephole2 can combine a load involving a combination of
33614 an addis instruction and a load with an offset that can be fused together on
33615 a power8. */
33617 bool
33618 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33619 rtx addis_value, /* addis value. */
33620 rtx target, /* target register that is loaded. */
33621 rtx mem) /* bottom part of the memory addr. */
33623 rtx addr;
33624 rtx base_reg;
33626 /* Validate arguments. */
33627 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33628 return false;
33630 if (!base_reg_operand (target, GET_MODE (target)))
33631 return false;
33633 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33634 return false;
33636 /* Allow sign/zero extension. */
33637 if (GET_CODE (mem) == ZERO_EXTEND
33638 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33639 mem = XEXP (mem, 0);
33641 if (!MEM_P (mem))
33642 return false;
33644 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33645 return false;
33647 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33648 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33649 return false;
33651 /* Validate that the register used to load the high value is either the
33652 register being loaded, or we can safely replace its use.
33654 This function is only called from the peephole2 pass and we assume that
33655 there are 2 instructions in the peephole (addis and load), so we want to
33656 check if the target register was not used in the memory address and the
33657 register to hold the addis result is dead after the peephole. */
33658 if (REGNO (addis_reg) != REGNO (target))
33660 if (reg_mentioned_p (target, mem))
33661 return false;
33663 if (!peep2_reg_dead_p (2, addis_reg))
33664 return false;
33666 /* If the target register being loaded is the stack pointer, we must
33667 avoid loading any other value into it, even temporarily. */
33668 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33669 return false;
33672 base_reg = XEXP (addr, 0);
33673 return REGNO (addis_reg) == REGNO (base_reg);
33676 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33677 sequence. We adjust the addis register to use the target register. If the
33678 load sign extends, we adjust the code to do the zero extending load, and an
33679 explicit sign extension later since the fusion only covers zero extending
33680 loads.
33682 The operands are:
33683 operands[0] register set with addis (to be replaced with target)
33684 operands[1] value set via addis
33685 operands[2] target register being loaded
33686 operands[3] D-form memory reference using operands[0]. */
33688 void
33689 expand_fusion_gpr_load (rtx *operands)
33691 rtx addis_value = operands[1];
33692 rtx target = operands[2];
33693 rtx orig_mem = operands[3];
33694 rtx new_addr, new_mem, orig_addr, offset;
33695 enum rtx_code plus_or_lo_sum;
33696 machine_mode target_mode = GET_MODE (target);
33697 machine_mode extend_mode = target_mode;
33698 machine_mode ptr_mode = Pmode;
33699 enum rtx_code extend = UNKNOWN;
33701 if (GET_CODE (orig_mem) == ZERO_EXTEND
33702 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33704 extend = GET_CODE (orig_mem);
33705 orig_mem = XEXP (orig_mem, 0);
33706 target_mode = GET_MODE (orig_mem);
33709 gcc_assert (MEM_P (orig_mem));
33711 orig_addr = XEXP (orig_mem, 0);
33712 plus_or_lo_sum = GET_CODE (orig_addr);
33713 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33715 offset = XEXP (orig_addr, 1);
33716 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33717 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33719 if (extend != UNKNOWN)
33720 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33722 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33723 UNSPEC_FUSION_GPR);
33724 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33726 if (extend == SIGN_EXTEND)
33728 int sub_off = ((BYTES_BIG_ENDIAN)
33729 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33730 : 0);
33731 rtx sign_reg
33732 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33734 emit_insn (gen_rtx_SET (VOIDmode, target,
33735 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33738 return;
33741 /* Return a string to fuse an addis instruction with a gpr load to the same
33742 register that we loaded up the addis instruction. The address that is used
33743 is the logical address that was formed during peephole2:
33744 (lo_sum (high) (low-part))
33746 The code is complicated, so we call output_asm_insn directly, and just
33747 return "". */
33749 const char *
33750 emit_fusion_gpr_load (rtx target, rtx mem)
33752 rtx addis_value;
33753 rtx fuse_ops[10];
33754 rtx addr;
33755 rtx load_offset;
33756 const char *addis_str = NULL;
33757 const char *load_str = NULL;
33758 const char *mode_name = NULL;
33759 char insn_template[80];
33760 machine_mode mode;
33761 const char *comment_str = ASM_COMMENT_START;
33763 if (GET_CODE (mem) == ZERO_EXTEND)
33764 mem = XEXP (mem, 0);
33766 gcc_assert (REG_P (target) && MEM_P (mem));
33768 if (*comment_str == ' ')
33769 comment_str++;
33771 addr = XEXP (mem, 0);
33772 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33773 gcc_unreachable ();
33775 addis_value = XEXP (addr, 0);
33776 load_offset = XEXP (addr, 1);
33778 /* Now emit the load instruction to the same register. */
33779 mode = GET_MODE (mem);
33780 switch (mode)
33782 case QImode:
33783 mode_name = "char";
33784 load_str = "lbz";
33785 break;
33787 case HImode:
33788 mode_name = "short";
33789 load_str = "lhz";
33790 break;
33792 case SImode:
33793 mode_name = "int";
33794 load_str = "lwz";
33795 break;
33797 case DImode:
33798 gcc_assert (TARGET_POWERPC64);
33799 mode_name = "long";
33800 load_str = "ld";
33801 break;
33803 default:
33804 gcc_unreachable ();
33807 /* Emit the addis instruction. */
33808 fuse_ops[0] = target;
33809 if (satisfies_constraint_L (addis_value))
33811 fuse_ops[1] = addis_value;
33812 addis_str = "lis %0,%v1";
33815 else if (GET_CODE (addis_value) == PLUS)
33817 rtx op0 = XEXP (addis_value, 0);
33818 rtx op1 = XEXP (addis_value, 1);
33820 if (REG_P (op0) && CONST_INT_P (op1)
33821 && satisfies_constraint_L (op1))
33823 fuse_ops[1] = op0;
33824 fuse_ops[2] = op1;
33825 addis_str = "addis %0,%1,%v2";
33829 else if (GET_CODE (addis_value) == HIGH)
33831 rtx value = XEXP (addis_value, 0);
33832 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33834 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33835 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33836 if (TARGET_ELF)
33837 addis_str = "addis %0,%2,%1@toc@ha";
33839 else if (TARGET_XCOFF)
33840 addis_str = "addis %0,%1@u(%2)";
33842 else
33843 gcc_unreachable ();
33846 else if (GET_CODE (value) == PLUS)
33848 rtx op0 = XEXP (value, 0);
33849 rtx op1 = XEXP (value, 1);
33851 if (GET_CODE (op0) == UNSPEC
33852 && XINT (op0, 1) == UNSPEC_TOCREL
33853 && CONST_INT_P (op1))
33855 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33856 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33857 fuse_ops[3] = op1;
33858 if (TARGET_ELF)
33859 addis_str = "addis %0,%2,%1+%3@toc@ha";
33861 else if (TARGET_XCOFF)
33862 addis_str = "addis %0,%1+%3@u(%2)";
33864 else
33865 gcc_unreachable ();
33869 else if (satisfies_constraint_L (value))
33871 fuse_ops[1] = value;
33872 addis_str = "lis %0,%v1";
33875 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33877 fuse_ops[1] = value;
33878 addis_str = "lis %0,%1@ha";
33882 if (!addis_str)
33883 fatal_insn ("Could not generate addis value for fusion", addis_value);
33885 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33886 comment_str, mode_name);
33887 output_asm_insn (insn_template, fuse_ops);
33889 /* Emit the D-form load instruction. */
33890 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33892 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33893 fuse_ops[1] = load_offset;
33894 output_asm_insn (insn_template, fuse_ops);
33897 else if (GET_CODE (load_offset) == UNSPEC
33898 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33900 if (TARGET_ELF)
33901 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33903 else if (TARGET_XCOFF)
33904 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33906 else
33907 gcc_unreachable ();
33909 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33910 output_asm_insn (insn_template, fuse_ops);
33913 else if (GET_CODE (load_offset) == PLUS
33914 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33915 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33916 && CONST_INT_P (XEXP (load_offset, 1)))
33918 rtx tocrel_unspec = XEXP (load_offset, 0);
33919 if (TARGET_ELF)
33920 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33922 else if (TARGET_XCOFF)
33923 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33925 else
33926 gcc_unreachable ();
33928 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33929 fuse_ops[2] = XEXP (load_offset, 1);
33930 output_asm_insn (insn_template, fuse_ops);
33933 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33935 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33937 fuse_ops[1] = load_offset;
33938 output_asm_insn (insn_template, fuse_ops);
33941 else
33942 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33944 return "";
33947 /* Analyze vector computations and remove unnecessary doubleword
33948 swaps (xxswapdi instructions). This pass is performed only
33949 for little-endian VSX code generation.
33951 For this specific case, loads and stores of 4x32 and 2x64 vectors
33952 are inefficient. These are implemented using the lvx2dx and
33953 stvx2dx instructions, which invert the order of doublewords in
33954 a vector register. Thus the code generation inserts an xxswapdi
33955 after each such load, and prior to each such store. (For spill
33956 code after register assignment, an additional xxswapdi is inserted
33957 following each store in order to return a hard register to its
33958 unpermuted value.)
33960 The extra xxswapdi instructions reduce performance. This can be
33961 particularly bad for vectorized code. The purpose of this pass
33962 is to reduce the number of xxswapdi instructions required for
33963 correctness.
33965 The primary insight is that much code that operates on vectors
33966 does not care about the relative order of elements in a register,
33967 so long as the correct memory order is preserved. If we have
33968 a computation where all input values are provided by lvxd2x/xxswapdi
33969 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33970 and all intermediate computations are pure SIMD (independent of
33971 element order), then all the xxswapdi's associated with the loads
33972 and stores may be removed.
33974 This pass uses some of the infrastructure and logical ideas from
33975 the "web" pass in web.c. We create maximal webs of computations
33976 fitting the description above using union-find. Each such web is
33977 then optimized by removing its unnecessary xxswapdi instructions.
33979 The pass is placed prior to global optimization so that we can
33980 perform the optimization in the safest and simplest way possible;
33981 that is, by replacing each xxswapdi insn with a register copy insn.
33982 Subsequent forward propagation will remove copies where possible.
33984 There are some operations sensitive to element order for which we
33985 can still allow the operation, provided we modify those operations.
33986 These include CONST_VECTORs, for which we must swap the first and
33987 second halves of the constant vector; and SUBREGs, for which we
33988 must adjust the byte offset to account for the swapped doublewords.
33989 A remaining opportunity would be non-immediate-form splats, for
33990 which we should adjust the selected lane of the input. We should
33991 also make code generation adjustments for sum-across operations,
33992 since this is a common vectorizer reduction.
33994 Because we run prior to the first split, we can see loads and stores
33995 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33996 vector loads and stores that have not yet been split into a permuting
33997 load/store and a swap. (One way this can happen is with a builtin
33998 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33999 than deleting a swap, we convert the load/store into a permuting
34000 load/store (which effectively removes the swap). */
34002 /* Notes on Permutes
34004 We do not currently handle computations that contain permutes. There
34005 is a general transformation that can be performed correctly, but it
34006 may introduce more expensive code than it replaces. To handle these
34007 would require a cost model to determine when to perform the optimization.
34008 This commentary records how this could be done if desired.
34010 The most general permute is something like this (example for V16QI):
34012 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
34013 (parallel [(const_int a0) (const_int a1)
34015 (const_int a14) (const_int a15)]))
34017 where a0,...,a15 are in [0,31] and select elements from op1 and op2
34018 to produce in the result.
34020 Regardless of mode, we can convert the PARALLEL to a mask of 16
34021 byte-element selectors. Let's call this M, with M[i] representing
34022 the ith byte-element selector value. Then if we swap doublewords
34023 throughout the computation, we can get correct behavior by replacing
34024 M with M' as follows:
34026 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
34027 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
34028 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
34029 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
34031 This seems promising at first, since we are just replacing one mask
34032 with another. But certain masks are preferable to others. If M
34033 is a mask that matches a vmrghh pattern, for example, M' certainly
34034 will not. Instead of a single vmrghh, we would generate a load of
34035 M' and a vperm. So we would need to know how many xxswapd's we can
34036 remove as a result of this transformation to determine if it's
34037 profitable; and preferably the logic would need to be aware of all
34038 the special preferable masks.
34040 Another form of permute is an UNSPEC_VPERM, in which the mask is
34041 already in a register. In some cases, this mask may be a constant
34042 that we can discover with ud-chains, in which case the above
34043 transformation is ok. However, the common usage here is for the
34044 mask to be produced by an UNSPEC_LVSL, in which case the mask
34045 cannot be known at compile time. In such a case we would have to
34046 generate several instructions to compute M' as above at run time,
34047 and a cost model is needed again. */
34049 /* This is based on the union-find logic in web.c. web_entry_base is
34050 defined in df.h. */
34051 class swap_web_entry : public web_entry_base
34053 public:
34054 /* Pointer to the insn. */
34055 rtx_insn *insn;
34056 /* Set if insn contains a mention of a vector register. All other
34057 fields are undefined if this field is unset. */
34058 unsigned int is_relevant : 1;
34059 /* Set if insn is a load. */
34060 unsigned int is_load : 1;
34061 /* Set if insn is a store. */
34062 unsigned int is_store : 1;
34063 /* Set if insn is a doubleword swap. This can either be a register swap
34064 or a permuting load or store (test is_load and is_store for this). */
34065 unsigned int is_swap : 1;
34066 /* Set if the insn has a live-in use of a parameter register. */
34067 unsigned int is_live_in : 1;
34068 /* Set if the insn has a live-out def of a return register. */
34069 unsigned int is_live_out : 1;
34070 /* Set if the insn contains a subreg reference of a vector register. */
34071 unsigned int contains_subreg : 1;
34072 /* Set if the insn contains a 128-bit integer operand. */
34073 unsigned int is_128_int : 1;
34074 /* Set if this is a call-insn. */
34075 unsigned int is_call : 1;
34076 /* Set if this insn does not perform a vector operation for which
34077 element order matters, or if we know how to fix it up if it does.
34078 Undefined if is_swap is set. */
34079 unsigned int is_swappable : 1;
34080 /* A nonzero value indicates what kind of special handling for this
34081 insn is required if doublewords are swapped. Undefined if
34082 is_swappable is not set. */
34083 unsigned int special_handling : 3;
34084 /* Set if the web represented by this entry cannot be optimized. */
34085 unsigned int web_not_optimizable : 1;
34086 /* Set if this insn should be deleted. */
34087 unsigned int will_delete : 1;
34090 enum special_handling_values {
34091 SH_NONE = 0,
34092 SH_CONST_VECTOR,
34093 SH_SUBREG,
34094 SH_NOSWAP_LD,
34095 SH_NOSWAP_ST,
34096 SH_EXTRACT,
34097 SH_SPLAT
34100 /* Union INSN with all insns containing definitions that reach USE.
34101 Detect whether USE is live-in to the current function. */
34102 static void
34103 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
34105 struct df_link *link = DF_REF_CHAIN (use);
34107 if (!link)
34108 insn_entry[INSN_UID (insn)].is_live_in = 1;
34110 while (link)
34112 if (DF_REF_IS_ARTIFICIAL (link->ref))
34113 insn_entry[INSN_UID (insn)].is_live_in = 1;
34115 if (DF_REF_INSN_INFO (link->ref))
34117 rtx def_insn = DF_REF_INSN (link->ref);
34118 (void)unionfind_union (insn_entry + INSN_UID (insn),
34119 insn_entry + INSN_UID (def_insn));
34122 link = link->next;
34126 /* Union INSN with all insns containing uses reached from DEF.
34127 Detect whether DEF is live-out from the current function. */
34128 static void
34129 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
34131 struct df_link *link = DF_REF_CHAIN (def);
34133 if (!link)
34134 insn_entry[INSN_UID (insn)].is_live_out = 1;
34136 while (link)
34138 /* This could be an eh use or some other artificial use;
34139 we treat these all the same (killing the optimization). */
34140 if (DF_REF_IS_ARTIFICIAL (link->ref))
34141 insn_entry[INSN_UID (insn)].is_live_out = 1;
34143 if (DF_REF_INSN_INFO (link->ref))
34145 rtx use_insn = DF_REF_INSN (link->ref);
34146 (void)unionfind_union (insn_entry + INSN_UID (insn),
34147 insn_entry + INSN_UID (use_insn));
34150 link = link->next;
34154 /* Return 1 iff INSN is a load insn, including permuting loads that
34155 represent an lvxd2x instruction; else return 0. */
34156 static unsigned int
34157 insn_is_load_p (rtx insn)
34159 rtx body = PATTERN (insn);
34161 if (GET_CODE (body) == SET)
34163 if (GET_CODE (SET_SRC (body)) == MEM)
34164 return 1;
34166 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
34167 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
34168 return 1;
34170 return 0;
34173 if (GET_CODE (body) != PARALLEL)
34174 return 0;
34176 rtx set = XVECEXP (body, 0, 0);
34178 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
34179 return 1;
34181 return 0;
34184 /* Return 1 iff INSN is a store insn, including permuting stores that
34185 represent an stvxd2x instruction; else return 0. */
34186 static unsigned int
34187 insn_is_store_p (rtx insn)
34189 rtx body = PATTERN (insn);
34190 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
34191 return 1;
34192 if (GET_CODE (body) != PARALLEL)
34193 return 0;
34194 rtx set = XVECEXP (body, 0, 0);
34195 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
34196 return 1;
34197 return 0;
34200 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
34201 a permuting load, or a permuting store. */
34202 static unsigned int
34203 insn_is_swap_p (rtx insn)
34205 rtx body = PATTERN (insn);
34206 if (GET_CODE (body) != SET)
34207 return 0;
34208 rtx rhs = SET_SRC (body);
34209 if (GET_CODE (rhs) != VEC_SELECT)
34210 return 0;
34211 rtx parallel = XEXP (rhs, 1);
34212 if (GET_CODE (parallel) != PARALLEL)
34213 return 0;
34214 unsigned int len = XVECLEN (parallel, 0);
34215 if (len != 2 && len != 4 && len != 8 && len != 16)
34216 return 0;
34217 for (unsigned int i = 0; i < len / 2; ++i)
34219 rtx op = XVECEXP (parallel, 0, i);
34220 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
34221 return 0;
34223 for (unsigned int i = len / 2; i < len; ++i)
34225 rtx op = XVECEXP (parallel, 0, i);
34226 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
34227 return 0;
34229 return 1;
34232 /* Return 1 iff OP is an operand that will not be affected by having
34233 vector doublewords swapped in memory. */
34234 static unsigned int
34235 rtx_is_swappable_p (rtx op, unsigned int *special)
34237 enum rtx_code code = GET_CODE (op);
34238 int i, j;
34239 rtx parallel;
34241 switch (code)
34243 case LABEL_REF:
34244 case SYMBOL_REF:
34245 case CLOBBER:
34246 case REG:
34247 return 1;
34249 case VEC_CONCAT:
34250 case ASM_INPUT:
34251 case ASM_OPERANDS:
34252 return 0;
34254 case CONST_VECTOR:
34256 *special = SH_CONST_VECTOR;
34257 return 1;
34260 case VEC_DUPLICATE:
34261 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
34262 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
34263 it represents a vector splat for which we can do special
34264 handling. */
34265 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
34266 return 1;
34267 else if (GET_CODE (XEXP (op, 0)) == REG
34268 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
34269 /* This catches V2DF and V2DI splat, at a minimum. */
34270 return 1;
34271 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
34272 /* If the duplicated item is from a select, defer to the select
34273 processing to see if we can change the lane for the splat. */
34274 return rtx_is_swappable_p (XEXP (op, 0), special);
34275 else
34276 return 0;
34278 case VEC_SELECT:
34279 /* A vec_extract operation is ok if we change the lane. */
34280 if (GET_CODE (XEXP (op, 0)) == REG
34281 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34282 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34283 && XVECLEN (parallel, 0) == 1
34284 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34286 *special = SH_EXTRACT;
34287 return 1;
34289 else
34290 return 0;
34292 case UNSPEC:
34294 /* Various operations are unsafe for this optimization, at least
34295 without significant additional work. Permutes are obviously
34296 problematic, as both the permute control vector and the ordering
34297 of the target values are invalidated by doubleword swapping.
34298 Vector pack and unpack modify the number of vector lanes.
34299 Merge-high/low will not operate correctly on swapped operands.
34300 Vector shifts across element boundaries are clearly uncool,
34301 as are vector select and concatenate operations. Vector
34302 sum-across instructions define one operand with a specific
34303 order-dependent element, so additional fixup code would be
34304 needed to make those work. Vector set and non-immediate-form
34305 vector splat are element-order sensitive. A few of these
34306 cases might be workable with special handling if required.
34307 Adding cost modeling would be appropriate in some cases. */
34308 int val = XINT (op, 1);
34309 switch (val)
34311 default:
34312 break;
34313 case UNSPEC_VMRGH_DIRECT:
34314 case UNSPEC_VMRGL_DIRECT:
34315 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34316 case UNSPEC_VPACK_SIGN_UNS_SAT:
34317 case UNSPEC_VPACK_UNS_UNS_MOD:
34318 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34319 case UNSPEC_VPACK_UNS_UNS_SAT:
34320 case UNSPEC_VPERM:
34321 case UNSPEC_VPERM_UNS:
34322 case UNSPEC_VPERMHI:
34323 case UNSPEC_VPERMSI:
34324 case UNSPEC_VPKPX:
34325 case UNSPEC_VSLDOI:
34326 case UNSPEC_VSLO:
34327 case UNSPEC_VSRO:
34328 case UNSPEC_VSUM2SWS:
34329 case UNSPEC_VSUM4S:
34330 case UNSPEC_VSUM4UBS:
34331 case UNSPEC_VSUMSWS:
34332 case UNSPEC_VSUMSWS_DIRECT:
34333 case UNSPEC_VSX_CONCAT:
34334 case UNSPEC_VSX_SET:
34335 case UNSPEC_VSX_SLDWI:
34336 case UNSPEC_VUNPACK_HI_SIGN:
34337 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34338 case UNSPEC_VUNPACK_LO_SIGN:
34339 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34340 case UNSPEC_VUPKHPX:
34341 case UNSPEC_VUPKHS_V4SF:
34342 case UNSPEC_VUPKHU_V4SF:
34343 case UNSPEC_VUPKLPX:
34344 case UNSPEC_VUPKLS_V4SF:
34345 case UNSPEC_VUPKLU_V4SF:
34346 case UNSPEC_VSX_CVDPSPN:
34347 case UNSPEC_VSX_CVSPDP:
34348 case UNSPEC_VSX_CVSPDPN:
34349 return 0;
34350 case UNSPEC_VSPLT_DIRECT:
34351 *special = SH_SPLAT;
34352 return 1;
34356 default:
34357 break;
34360 const char *fmt = GET_RTX_FORMAT (code);
34361 int ok = 1;
34363 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34364 if (fmt[i] == 'e' || fmt[i] == 'u')
34366 unsigned int special_op = SH_NONE;
34367 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34368 if (special_op == SH_NONE)
34369 continue;
34370 /* Ensure we never have two kinds of special handling
34371 for the same insn. */
34372 if (*special != SH_NONE && *special != special_op)
34373 return 0;
34374 *special = special_op;
34376 else if (fmt[i] == 'E')
34377 for (j = 0; j < XVECLEN (op, i); ++j)
34379 unsigned int special_op = SH_NONE;
34380 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34381 if (special_op == SH_NONE)
34382 continue;
34383 /* Ensure we never have two kinds of special handling
34384 for the same insn. */
34385 if (*special != SH_NONE && *special != special_op)
34386 return 0;
34387 *special = special_op;
34390 return ok;
34393 /* Return 1 iff INSN is an operand that will not be affected by
34394 having vector doublewords swapped in memory (in which case
34395 *SPECIAL is unchanged), or that can be modified to be correct
34396 if vector doublewords are swapped in memory (in which case
34397 *SPECIAL is changed to a value indicating how). */
34398 static unsigned int
34399 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34400 unsigned int *special)
34402 /* Calls are always bad. */
34403 if (GET_CODE (insn) == CALL_INSN)
34404 return 0;
34406 /* Loads and stores seen here are not permuting, but we can still
34407 fix them up by converting them to permuting ones. Exceptions:
34408 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34409 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34410 for the SET source. */
34411 rtx body = PATTERN (insn);
34412 int i = INSN_UID (insn);
34414 if (insn_entry[i].is_load)
34416 if (GET_CODE (body) == SET)
34418 *special = SH_NOSWAP_LD;
34419 return 1;
34421 else
34422 return 0;
34425 if (insn_entry[i].is_store)
34427 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34429 *special = SH_NOSWAP_ST;
34430 return 1;
34432 else
34433 return 0;
34436 /* A convert to single precision can be left as is provided that
34437 all of its uses are in xxspltw instructions that splat BE element
34438 zero. */
34439 if (GET_CODE (body) == SET
34440 && GET_CODE (SET_SRC (body)) == UNSPEC
34441 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
34443 df_ref def;
34444 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34446 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34448 struct df_link *link = DF_REF_CHAIN (def);
34449 if (!link)
34450 return 0;
34452 for (; link; link = link->next) {
34453 rtx use_insn = DF_REF_INSN (link->ref);
34454 rtx use_body = PATTERN (use_insn);
34455 if (GET_CODE (use_body) != SET
34456 || GET_CODE (SET_SRC (use_body)) != UNSPEC
34457 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
34458 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
34459 return 0;
34463 return 1;
34466 /* Otherwise check the operands for vector lane violations. */
34467 return rtx_is_swappable_p (body, special);
34470 enum chain_purpose { FOR_LOADS, FOR_STORES };
34472 /* Return true if the UD or DU chain headed by LINK is non-empty,
34473 and every entry on the chain references an insn that is a
34474 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34475 register swap must have only permuting loads as reaching defs.
34476 If PURPOSE is FOR_STORES, each such register swap must have only
34477 register swaps or permuting stores as reached uses. */
34478 static bool
34479 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34480 enum chain_purpose purpose)
34482 if (!link)
34483 return false;
34485 for (; link; link = link->next)
34487 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34488 continue;
34490 if (DF_REF_IS_ARTIFICIAL (link->ref))
34491 return false;
34493 rtx reached_insn = DF_REF_INSN (link->ref);
34494 unsigned uid = INSN_UID (reached_insn);
34495 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34497 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34498 || insn_entry[uid].is_store)
34499 return false;
34501 if (purpose == FOR_LOADS)
34503 df_ref use;
34504 FOR_EACH_INSN_INFO_USE (use, insn_info)
34506 struct df_link *swap_link = DF_REF_CHAIN (use);
34508 while (swap_link)
34510 if (DF_REF_IS_ARTIFICIAL (link->ref))
34511 return false;
34513 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34514 unsigned uid2 = INSN_UID (swap_def_insn);
34516 /* Only permuting loads are allowed. */
34517 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34518 return false;
34520 swap_link = swap_link->next;
34524 else if (purpose == FOR_STORES)
34526 df_ref def;
34527 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34529 struct df_link *swap_link = DF_REF_CHAIN (def);
34531 while (swap_link)
34533 if (DF_REF_IS_ARTIFICIAL (link->ref))
34534 return false;
34536 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34537 unsigned uid2 = INSN_UID (swap_use_insn);
34539 /* Permuting stores or register swaps are allowed. */
34540 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34541 return false;
34543 swap_link = swap_link->next;
34549 return true;
34552 /* Mark the xxswapdi instructions associated with permuting loads and
34553 stores for removal. Note that we only flag them for deletion here,
34554 as there is a possibility of a swap being reached from multiple
34555 loads, etc. */
34556 static void
34557 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34559 rtx insn = insn_entry[i].insn;
34560 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34562 if (insn_entry[i].is_load)
34564 df_ref def;
34565 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34567 struct df_link *link = DF_REF_CHAIN (def);
34569 /* We know by now that these are swaps, so we can delete
34570 them confidently. */
34571 while (link)
34573 rtx use_insn = DF_REF_INSN (link->ref);
34574 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34575 link = link->next;
34579 else if (insn_entry[i].is_store)
34581 df_ref use;
34582 FOR_EACH_INSN_INFO_USE (use, insn_info)
34584 /* Ignore uses for addressability. */
34585 machine_mode mode = GET_MODE (DF_REF_REG (use));
34586 if (!VECTOR_MODE_P (mode))
34587 continue;
34589 struct df_link *link = DF_REF_CHAIN (use);
34591 /* We know by now that these are swaps, so we can delete
34592 them confidently. */
34593 while (link)
34595 rtx def_insn = DF_REF_INSN (link->ref);
34596 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34597 link = link->next;
34603 /* OP is either a CONST_VECTOR or an expression containing one.
34604 Swap the first half of the vector with the second in the first
34605 case. Recurse to find it in the second. */
34606 static void
34607 swap_const_vector_halves (rtx op)
34609 int i;
34610 enum rtx_code code = GET_CODE (op);
34611 if (GET_CODE (op) == CONST_VECTOR)
34613 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34614 for (i = 0; i < half_units; ++i)
34616 rtx temp = CONST_VECTOR_ELT (op, i);
34617 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34618 CONST_VECTOR_ELT (op, i + half_units) = temp;
34621 else
34623 int j;
34624 const char *fmt = GET_RTX_FORMAT (code);
34625 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34626 if (fmt[i] == 'e' || fmt[i] == 'u')
34627 swap_const_vector_halves (XEXP (op, i));
34628 else if (fmt[i] == 'E')
34629 for (j = 0; j < XVECLEN (op, i); ++j)
34630 swap_const_vector_halves (XVECEXP (op, i, j));
34634 /* Find all subregs of a vector expression that perform a narrowing,
34635 and adjust the subreg index to account for doubleword swapping. */
34636 static void
34637 adjust_subreg_index (rtx op)
34639 enum rtx_code code = GET_CODE (op);
34640 if (code == SUBREG
34641 && (GET_MODE_SIZE (GET_MODE (op))
34642 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34644 unsigned int index = SUBREG_BYTE (op);
34645 if (index < 8)
34646 index += 8;
34647 else
34648 index -= 8;
34649 SUBREG_BYTE (op) = index;
34652 const char *fmt = GET_RTX_FORMAT (code);
34653 int i,j;
34654 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34655 if (fmt[i] == 'e' || fmt[i] == 'u')
34656 adjust_subreg_index (XEXP (op, i));
34657 else if (fmt[i] == 'E')
34658 for (j = 0; j < XVECLEN (op, i); ++j)
34659 adjust_subreg_index (XVECEXP (op, i, j));
34662 /* Convert the non-permuting load INSN to a permuting one. */
34663 static void
34664 permute_load (rtx_insn *insn)
34666 rtx body = PATTERN (insn);
34667 rtx mem_op = SET_SRC (body);
34668 rtx tgt_reg = SET_DEST (body);
34669 machine_mode mode = GET_MODE (tgt_reg);
34670 int n_elts = GET_MODE_NUNITS (mode);
34671 int half_elts = n_elts / 2;
34672 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34673 int i, j;
34674 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34675 XVECEXP (par, 0, i) = GEN_INT (j);
34676 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34677 XVECEXP (par, 0, i) = GEN_INT (j);
34678 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34679 SET_SRC (body) = sel;
34680 INSN_CODE (insn) = -1; /* Force re-recognition. */
34681 df_insn_rescan (insn);
34683 if (dump_file)
34684 fprintf (dump_file, "Replacing load %d with permuted load\n",
34685 INSN_UID (insn));
34688 /* Convert the non-permuting store INSN to a permuting one. */
34689 static void
34690 permute_store (rtx_insn *insn)
34692 rtx body = PATTERN (insn);
34693 rtx src_reg = SET_SRC (body);
34694 machine_mode mode = GET_MODE (src_reg);
34695 int n_elts = GET_MODE_NUNITS (mode);
34696 int half_elts = n_elts / 2;
34697 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34698 int i, j;
34699 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34700 XVECEXP (par, 0, i) = GEN_INT (j);
34701 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34702 XVECEXP (par, 0, i) = GEN_INT (j);
34703 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34704 SET_SRC (body) = sel;
34705 INSN_CODE (insn) = -1; /* Force re-recognition. */
34706 df_insn_rescan (insn);
34708 if (dump_file)
34709 fprintf (dump_file, "Replacing store %d with permuted store\n",
34710 INSN_UID (insn));
34713 /* Given OP that contains a vector extract operation, adjust the index
34714 of the extracted lane to account for the doubleword swap. */
34715 static void
34716 adjust_extract (rtx_insn *insn)
34718 rtx pattern = PATTERN (insn);
34719 if (GET_CODE (pattern) == PARALLEL)
34720 pattern = XVECEXP (pattern, 0, 0);
34721 rtx src = SET_SRC (pattern);
34722 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34723 account for that. */
34724 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34725 rtx par = XEXP (sel, 1);
34726 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34727 int lane = INTVAL (XVECEXP (par, 0, 0));
34728 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34729 XVECEXP (par, 0, 0) = GEN_INT (lane);
34730 INSN_CODE (insn) = -1; /* Force re-recognition. */
34731 df_insn_rescan (insn);
34733 if (dump_file)
34734 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34737 /* Given OP that contains a vector direct-splat operation, adjust the index
34738 of the source lane to account for the doubleword swap. */
34739 static void
34740 adjust_splat (rtx_insn *insn)
34742 rtx body = PATTERN (insn);
34743 rtx unspec = XEXP (body, 1);
34744 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34745 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34746 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34747 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34748 INSN_CODE (insn) = -1; /* Force re-recognition. */
34749 df_insn_rescan (insn);
34751 if (dump_file)
34752 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34755 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34756 with special handling. Take care of that here. */
34757 static void
34758 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34760 rtx_insn *insn = insn_entry[i].insn;
34761 rtx body = PATTERN (insn);
34763 switch (insn_entry[i].special_handling)
34765 default:
34766 gcc_unreachable ();
34767 case SH_CONST_VECTOR:
34769 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34770 gcc_assert (GET_CODE (body) == SET);
34771 rtx rhs = SET_SRC (body);
34772 swap_const_vector_halves (rhs);
34773 if (dump_file)
34774 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34775 break;
34777 case SH_SUBREG:
34778 /* A subreg of the same size is already safe. For subregs that
34779 select a smaller portion of a reg, adjust the index for
34780 swapped doublewords. */
34781 adjust_subreg_index (body);
34782 if (dump_file)
34783 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34784 break;
34785 case SH_NOSWAP_LD:
34786 /* Convert a non-permuting load to a permuting one. */
34787 permute_load (insn);
34788 break;
34789 case SH_NOSWAP_ST:
34790 /* Convert a non-permuting store to a permuting one. */
34791 permute_store (insn);
34792 break;
34793 case SH_EXTRACT:
34794 /* Change the lane on an extract operation. */
34795 adjust_extract (insn);
34796 break;
34797 case SH_SPLAT:
34798 /* Change the lane on a direct-splat operation. */
34799 adjust_splat (insn);
34800 break;
34804 /* Find the insn from the Ith table entry, which is known to be a
34805 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34806 static void
34807 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34809 rtx_insn *insn = insn_entry[i].insn;
34810 rtx body = PATTERN (insn);
34811 rtx src_reg = XEXP (SET_SRC (body), 0);
34812 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34813 rtx_insn *new_insn = emit_insn_before (copy, insn);
34814 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34815 df_insn_rescan (new_insn);
34817 if (dump_file)
34819 unsigned int new_uid = INSN_UID (new_insn);
34820 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34823 df_insn_delete (insn);
34824 remove_insn (insn);
34825 insn->set_deleted ();
34828 /* Dump the swap table to DUMP_FILE. */
34829 static void
34830 dump_swap_insn_table (swap_web_entry *insn_entry)
34832 int e = get_max_uid ();
34833 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34835 for (int i = 0; i < e; ++i)
34836 if (insn_entry[i].is_relevant)
34838 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34839 fprintf (dump_file, "%6d %6d ", i,
34840 pred_entry && pred_entry->insn
34841 ? INSN_UID (pred_entry->insn) : 0);
34842 if (insn_entry[i].is_load)
34843 fputs ("load ", dump_file);
34844 if (insn_entry[i].is_store)
34845 fputs ("store ", dump_file);
34846 if (insn_entry[i].is_swap)
34847 fputs ("swap ", dump_file);
34848 if (insn_entry[i].is_live_in)
34849 fputs ("live-in ", dump_file);
34850 if (insn_entry[i].is_live_out)
34851 fputs ("live-out ", dump_file);
34852 if (insn_entry[i].contains_subreg)
34853 fputs ("subreg ", dump_file);
34854 if (insn_entry[i].is_128_int)
34855 fputs ("int128 ", dump_file);
34856 if (insn_entry[i].is_call)
34857 fputs ("call ", dump_file);
34858 if (insn_entry[i].is_swappable)
34860 fputs ("swappable ", dump_file);
34861 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34862 fputs ("special:constvec ", dump_file);
34863 else if (insn_entry[i].special_handling == SH_SUBREG)
34864 fputs ("special:subreg ", dump_file);
34865 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34866 fputs ("special:load ", dump_file);
34867 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34868 fputs ("special:store ", dump_file);
34869 else if (insn_entry[i].special_handling == SH_EXTRACT)
34870 fputs ("special:extract ", dump_file);
34871 else if (insn_entry[i].special_handling == SH_SPLAT)
34872 fputs ("special:splat ", dump_file);
34874 if (insn_entry[i].web_not_optimizable)
34875 fputs ("unoptimizable ", dump_file);
34876 if (insn_entry[i].will_delete)
34877 fputs ("delete ", dump_file);
34878 fputs ("\n", dump_file);
34880 fputs ("\n", dump_file);
34883 /* Main entry point for this pass. */
34884 unsigned int
34885 rs6000_analyze_swaps (function *fun)
34887 swap_web_entry *insn_entry;
34888 basic_block bb;
34889 rtx_insn *insn;
34891 /* Dataflow analysis for use-def chains. */
34892 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34893 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34894 df_analyze ();
34895 df_set_flags (DF_DEFER_INSN_RESCAN);
34897 /* Allocate structure to represent webs of insns. */
34898 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34900 /* Walk the insns to gather basic data. */
34901 FOR_ALL_BB_FN (bb, fun)
34902 FOR_BB_INSNS (bb, insn)
34904 unsigned int uid = INSN_UID (insn);
34905 if (NONDEBUG_INSN_P (insn))
34907 insn_entry[uid].insn = insn;
34909 if (GET_CODE (insn) == CALL_INSN)
34910 insn_entry[uid].is_call = 1;
34912 /* Walk the uses and defs to see if we mention vector regs.
34913 Record any constraints on optimization of such mentions. */
34914 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34915 df_ref mention;
34916 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34918 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34919 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34921 /* If a use gets its value from a call insn, it will be
34922 a hard register and will look like (reg:V4SI 3 3).
34923 The df analysis creates two mentions for GPR3 and GPR4,
34924 both DImode. We must recognize this and treat it as a
34925 vector mention to ensure the call is unioned with this
34926 use. */
34927 if (mode == DImode && DF_REF_INSN_INFO (mention))
34929 rtx feeder = DF_REF_INSN (mention);
34930 /* FIXME: It is pretty hard to get from the df mention
34931 to the mode of the use in the insn. We arbitrarily
34932 pick a vector mode here, even though the use might
34933 be a real DImode. We can be too conservative
34934 (create a web larger than necessary) because of
34935 this, so consider eventually fixing this. */
34936 if (GET_CODE (feeder) == CALL_INSN)
34937 mode = V4SImode;
34940 if (VECTOR_MODE_P (mode) || mode == TImode)
34942 insn_entry[uid].is_relevant = 1;
34943 if (mode == TImode || mode == V1TImode)
34944 insn_entry[uid].is_128_int = 1;
34945 if (DF_REF_INSN_INFO (mention))
34946 insn_entry[uid].contains_subreg
34947 = !rtx_equal_p (DF_REF_REG (mention),
34948 DF_REF_REAL_REG (mention));
34949 union_defs (insn_entry, insn, mention);
34952 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34954 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34955 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34957 /* If we're loading up a hard vector register for a call,
34958 it looks like (set (reg:V4SI 9 9) (...)). The df
34959 analysis creates two mentions for GPR9 and GPR10, both
34960 DImode. So relying on the mode from the mentions
34961 isn't sufficient to ensure we union the call into the
34962 web with the parameter setup code. */
34963 if (mode == DImode && GET_CODE (insn) == SET
34964 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34965 mode = GET_MODE (SET_DEST (insn));
34967 if (VECTOR_MODE_P (mode) || mode == TImode)
34969 insn_entry[uid].is_relevant = 1;
34970 if (mode == TImode || mode == V1TImode)
34971 insn_entry[uid].is_128_int = 1;
34972 if (DF_REF_INSN_INFO (mention))
34973 insn_entry[uid].contains_subreg
34974 = !rtx_equal_p (DF_REF_REG (mention),
34975 DF_REF_REAL_REG (mention));
34976 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34977 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34978 insn_entry[uid].is_live_out = 1;
34979 union_uses (insn_entry, insn, mention);
34983 if (insn_entry[uid].is_relevant)
34985 /* Determine if this is a load or store. */
34986 insn_entry[uid].is_load = insn_is_load_p (insn);
34987 insn_entry[uid].is_store = insn_is_store_p (insn);
34989 /* Determine if this is a doubleword swap. If not,
34990 determine whether it can legally be swapped. */
34991 if (insn_is_swap_p (insn))
34992 insn_entry[uid].is_swap = 1;
34993 else
34995 unsigned int special = SH_NONE;
34996 insn_entry[uid].is_swappable
34997 = insn_is_swappable_p (insn_entry, insn, &special);
34998 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34999 insn_entry[uid].is_swappable = 0;
35000 else if (special != SH_NONE)
35001 insn_entry[uid].special_handling = special;
35002 else if (insn_entry[uid].contains_subreg)
35003 insn_entry[uid].special_handling = SH_SUBREG;
35009 if (dump_file)
35011 fprintf (dump_file, "\nSwap insn entry table when first built\n");
35012 dump_swap_insn_table (insn_entry);
35015 /* Record unoptimizable webs. */
35016 unsigned e = get_max_uid (), i;
35017 for (i = 0; i < e; ++i)
35019 if (!insn_entry[i].is_relevant)
35020 continue;
35022 swap_web_entry *root
35023 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
35025 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
35026 || (insn_entry[i].contains_subreg
35027 && insn_entry[i].special_handling != SH_SUBREG)
35028 || insn_entry[i].is_128_int || insn_entry[i].is_call
35029 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
35030 root->web_not_optimizable = 1;
35032 /* If we have loads or stores that aren't permuting then the
35033 optimization isn't appropriate. */
35034 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
35035 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
35036 root->web_not_optimizable = 1;
35038 /* If we have permuting loads or stores that are not accompanied
35039 by a register swap, the optimization isn't appropriate. */
35040 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
35042 rtx insn = insn_entry[i].insn;
35043 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35044 df_ref def;
35046 FOR_EACH_INSN_INFO_DEF (def, insn_info)
35048 struct df_link *link = DF_REF_CHAIN (def);
35050 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
35052 root->web_not_optimizable = 1;
35053 break;
35057 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
35059 rtx insn = insn_entry[i].insn;
35060 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35061 df_ref use;
35063 FOR_EACH_INSN_INFO_USE (use, insn_info)
35065 struct df_link *link = DF_REF_CHAIN (use);
35067 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
35069 root->web_not_optimizable = 1;
35070 break;
35076 if (dump_file)
35078 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
35079 dump_swap_insn_table (insn_entry);
35082 /* For each load and store in an optimizable web (which implies
35083 the loads and stores are permuting), find the associated
35084 register swaps and mark them for removal. Due to various
35085 optimizations we may mark the same swap more than once. Also
35086 perform special handling for swappable insns that require it. */
35087 for (i = 0; i < e; ++i)
35088 if ((insn_entry[i].is_load || insn_entry[i].is_store)
35089 && insn_entry[i].is_swap)
35091 swap_web_entry* root_entry
35092 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
35093 if (!root_entry->web_not_optimizable)
35094 mark_swaps_for_removal (insn_entry, i);
35096 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
35098 swap_web_entry* root_entry
35099 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
35100 if (!root_entry->web_not_optimizable)
35101 handle_special_swappables (insn_entry, i);
35104 /* Now delete the swaps marked for removal. */
35105 for (i = 0; i < e; ++i)
35106 if (insn_entry[i].will_delete)
35107 replace_swap_with_copy (insn_entry, i);
35109 /* Clean up. */
35110 free (insn_entry);
35111 return 0;
35114 const pass_data pass_data_analyze_swaps =
35116 RTL_PASS, /* type */
35117 "swaps", /* name */
35118 OPTGROUP_NONE, /* optinfo_flags */
35119 TV_NONE, /* tv_id */
35120 0, /* properties_required */
35121 0, /* properties_provided */
35122 0, /* properties_destroyed */
35123 0, /* todo_flags_start */
35124 TODO_df_finish, /* todo_flags_finish */
35127 class pass_analyze_swaps : public rtl_opt_pass
35129 public:
35130 pass_analyze_swaps(gcc::context *ctxt)
35131 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
35134 /* opt_pass methods: */
35135 virtual bool gate (function *)
35137 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
35138 && rs6000_optimize_swaps);
35141 virtual unsigned int execute (function *fun)
35143 return rs6000_analyze_swaps (fun);
35146 }; // class pass_analyze_swaps
35148 rtl_opt_pass *
35149 make_pass_analyze_swaps (gcc::context *ctxt)
35151 return new pass_analyze_swaps (ctxt);
35154 #ifdef RS6000_GLIBC_ATOMIC_FENV
35155 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
35156 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
35157 #endif
35159 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
35161 static void
35162 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
35164 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
35166 #ifdef RS6000_GLIBC_ATOMIC_FENV
35167 if (atomic_hold_decl == NULL_TREE)
35169 atomic_hold_decl
35170 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35171 get_identifier ("__atomic_feholdexcept"),
35172 build_function_type_list (void_type_node,
35173 double_ptr_type_node,
35174 NULL_TREE));
35175 TREE_PUBLIC (atomic_hold_decl) = 1;
35176 DECL_EXTERNAL (atomic_hold_decl) = 1;
35179 if (atomic_clear_decl == NULL_TREE)
35181 atomic_clear_decl
35182 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35183 get_identifier ("__atomic_feclearexcept"),
35184 build_function_type_list (void_type_node,
35185 NULL_TREE));
35186 TREE_PUBLIC (atomic_clear_decl) = 1;
35187 DECL_EXTERNAL (atomic_clear_decl) = 1;
35190 tree const_double = build_qualified_type (double_type_node,
35191 TYPE_QUAL_CONST);
35192 tree const_double_ptr = build_pointer_type (const_double);
35193 if (atomic_update_decl == NULL_TREE)
35195 atomic_update_decl
35196 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35197 get_identifier ("__atomic_feupdateenv"),
35198 build_function_type_list (void_type_node,
35199 const_double_ptr,
35200 NULL_TREE));
35201 TREE_PUBLIC (atomic_update_decl) = 1;
35202 DECL_EXTERNAL (atomic_update_decl) = 1;
35205 tree fenv_var = create_tmp_var (double_type_node);
35206 mark_addressable (fenv_var);
35207 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
35209 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
35210 *clear = build_call_expr (atomic_clear_decl, 0);
35211 *update = build_call_expr (atomic_update_decl, 1,
35212 fold_convert (const_double_ptr, fenv_addr));
35213 #endif
35214 return;
35217 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
35218 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
35219 tree call_mffs = build_call_expr (mffs, 0);
35221 /* Generates the equivalent of feholdexcept (&fenv_var)
35223 *fenv_var = __builtin_mffs ();
35224 double fenv_hold;
35225 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
35226 __builtin_mtfsf (0xff, fenv_hold); */
35228 /* Mask to clear everything except for the rounding modes and non-IEEE
35229 arithmetic flag. */
35230 const unsigned HOST_WIDE_INT hold_exception_mask =
35231 HOST_WIDE_INT_C (0xffffffff00000007);
35233 tree fenv_var = create_tmp_var (double_type_node);
35235 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
35237 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
35238 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35239 build_int_cst (uint64_type_node,
35240 hold_exception_mask));
35242 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35243 fenv_llu_and);
35245 tree hold_mtfsf = build_call_expr (mtfsf, 2,
35246 build_int_cst (unsigned_type_node, 0xff),
35247 fenv_hold_mtfsf);
35249 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
35251 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
35253 double fenv_clear = __builtin_mffs ();
35254 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
35255 __builtin_mtfsf (0xff, fenv_clear); */
35257 /* Mask to clear everything except for the rounding modes and non-IEEE
35258 arithmetic flag. */
35259 const unsigned HOST_WIDE_INT clear_exception_mask =
35260 HOST_WIDE_INT_C (0xffffffff00000000);
35262 tree fenv_clear = create_tmp_var (double_type_node);
35264 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
35266 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
35267 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
35268 fenv_clean_llu,
35269 build_int_cst (uint64_type_node,
35270 clear_exception_mask));
35272 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35273 fenv_clear_llu_and);
35275 tree clear_mtfsf = build_call_expr (mtfsf, 2,
35276 build_int_cst (unsigned_type_node, 0xff),
35277 fenv_clear_mtfsf);
35279 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
35281 /* Generates the equivalent of feupdateenv (&fenv_var)
35283 double old_fenv = __builtin_mffs ();
35284 double fenv_update;
35285 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
35286 (*(uint64_t*)fenv_var 0x1ff80fff);
35287 __builtin_mtfsf (0xff, fenv_update); */
35289 const unsigned HOST_WIDE_INT update_exception_mask =
35290 HOST_WIDE_INT_C (0xffffffff1fffff00);
35291 const unsigned HOST_WIDE_INT new_exception_mask =
35292 HOST_WIDE_INT_C (0x1ff80fff);
35294 tree old_fenv = create_tmp_var (double_type_node);
35295 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
35297 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
35298 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
35299 build_int_cst (uint64_type_node,
35300 update_exception_mask));
35302 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35303 build_int_cst (uint64_type_node,
35304 new_exception_mask));
35306 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35307 old_llu_and, new_llu_and);
35309 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35310 new_llu_mask);
35312 tree update_mtfsf = build_call_expr (mtfsf, 2,
35313 build_int_cst (unsigned_type_node, 0xff),
35314 fenv_update_mtfsf);
35316 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35320 struct gcc_target targetm = TARGET_INITIALIZER;
35322 #include "gt-rs6000.h"