Refactor some code for a future change.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob127927d9583a418367f854e51a80ea5afc0150ec
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load;
104 /* Set to nonzero once AIX common-mode calls have been defined. */
105 static GTY(()) int common_mode_defined;
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
166 static int dbg_cost_ctrl;
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
187 extern GTY(()) section *toc_section;
188 section *toc_section = 0;
190 /* Describe the vector unit used for modes. */
191 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
192 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
194 /* Register classes for various constraints that are based on the target
195 switches. */
196 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
198 /* Describe the alignment of a vector. */
199 int rs6000_vector_align[NUM_MACHINE_MODES];
201 /* Map selected modes to types for builtins. */
202 GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
204 /* What modes to automatically generate reciprocal divide estimate (fre) and
205 reciprocal sqrt (frsqrte) for. */
206 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
208 /* Masks to determine which reciprocal esitmate instructions to generate
209 automatically. */
210 enum rs6000_recip_mask {
211 RECIP_SF_DIV = 0x001, /* Use divide estimate */
212 RECIP_DF_DIV = 0x002,
213 RECIP_V4SF_DIV = 0x004,
214 RECIP_V2DF_DIV = 0x008,
216 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
217 RECIP_DF_RSQRT = 0x020,
218 RECIP_V4SF_RSQRT = 0x040,
219 RECIP_V2DF_RSQRT = 0x080,
221 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_NONE = 0,
223 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
224 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
225 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
227 RECIP_HIGH_PRECISION = RECIP_ALL,
229 /* On low precision machines like the power5, don't enable double precision
230 reciprocal square root estimate, since it isn't accurate enough. */
231 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
234 /* -mrecip options. */
235 static struct
237 const char *string; /* option name */
238 unsigned int mask; /* mask bits to set */
239 } recip_options[] = {
240 { "all", RECIP_ALL },
241 { "none", RECIP_NONE },
242 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
243 | RECIP_V2DF_DIV) },
244 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
245 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
246 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
247 | RECIP_V2DF_RSQRT) },
248 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
249 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
252 /* On PowerPC, we have a limited number of target clones that we care about
253 which means we can use an array to hold the options, rather than having more
254 elaborate data structures to identify each possible variation. Order the
255 clones from the default to the highest ISA. */
256 enum {
257 CLONE_DEFAULT = 0, /* default clone. */
258 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
259 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
260 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
261 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
262 CLONE_MAX
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
280 /* Newer LIBCs explicitly export this symbol to declare that they provide
281 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
282 reference to this symbol whenever we expand a CPU builtin, so that
283 we never link against an old LIBC. */
284 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286 /* True if we have expanded a CPU builtin. */
287 bool cpu_builtin_p = false;
289 /* Pointer to function (in rs6000-c.c) that can define or undefine target
290 macros that have changed. Languages that don't support the preprocessor
291 don't link in rs6000-c.c, so we can't call it directly. */
292 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294 /* Simplfy register classes into simpler classifications. We assume
295 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
296 check for standard register classes (gpr/floating/altivec/vsx) and
297 floating/vector classes (float/altivec/vsx). */
299 enum rs6000_reg_type {
300 NO_REG_TYPE,
301 PSEUDO_REG_TYPE,
302 GPR_REG_TYPE,
303 VSX_REG_TYPE,
304 ALTIVEC_REG_TYPE,
305 FPR_REG_TYPE,
306 SPR_REG_TYPE,
307 CR_REG_TYPE
310 /* Map register class to register type. */
311 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313 /* First/last register type for the 'normal' register types (i.e. general
314 purpose, floating point, altivec, and VSX registers). */
315 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
320 /* Register classes we care about in secondary reload or go if legitimate
321 address. We only need to worry about GPR, FPR, and Altivec registers here,
322 along an ANY field that is the OR of the 3 register classes. */
324 enum rs6000_reload_reg_type {
325 RELOAD_REG_GPR, /* General purpose registers. */
326 RELOAD_REG_FPR, /* Traditional floating point regs. */
327 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
328 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
329 N_RELOAD_REG
332 /* For setting up register classes, loop through the 3 register classes mapping
333 into real registers, and skip the ANY class, which is just an OR of the
334 bits. */
335 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
336 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338 /* Map reload register type to a register in the register class. */
339 struct reload_reg_map_type {
340 const char *name; /* Register class name. */
341 int reg; /* Register in the register class. */
344 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
345 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
346 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
347 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
348 { "Any", -1 }, /* RELOAD_REG_ANY. */
351 /* Mask bits for each register class, indexed per mode. Historically the
352 compiler has been more restrictive which types can do PRE_MODIFY instead of
353 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
354 typedef unsigned char addr_mask_type;
356 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
357 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
358 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
359 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
360 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
361 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
362 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
363 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365 /* Register type masks based on the type, of valid addressing modes. */
366 struct rs6000_reg_addr {
367 enum insn_code reload_load; /* INSN to reload for loading. */
368 enum insn_code reload_store; /* INSN to reload for storing. */
369 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
370 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
371 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
372 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
373 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
376 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
379 static inline bool
380 mode_supports_pre_incdec_p (machine_mode mode)
382 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
383 != 0);
386 /* Helper function to say whether a mode supports PRE_MODIFY. */
387 static inline bool
388 mode_supports_pre_modify_p (machine_mode mode)
390 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
391 != 0);
394 /* Return true if we have D-form addressing in altivec registers. */
395 static inline bool
396 mode_supports_vmx_dform (machine_mode mode)
398 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
401 /* Return true if we have D-form addressing in VSX registers. This addressing
402 is more limited than normal d-form addressing in that the offset must be
403 aligned on a 16-byte boundary. */
404 static inline bool
405 mode_supports_dq_form (machine_mode mode)
407 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
408 != 0);
411 /* Given that there exists at least one variable that is set (produced)
412 by OUT_INSN and read (consumed) by IN_INSN, return true iff
413 IN_INSN represents one or more memory store operations and none of
414 the variables set by OUT_INSN is used by IN_INSN as the address of a
415 store operation. If either IN_INSN or OUT_INSN does not represent
416 a "single" RTL SET expression (as loosely defined by the
417 implementation of the single_set function) or a PARALLEL with only
418 SETs, CLOBBERs, and USEs inside, this function returns false.
420 This rs6000-specific version of store_data_bypass_p checks for
421 certain conditions that result in assertion failures (and internal
422 compiler errors) in the generic store_data_bypass_p function and
423 returns false rather than calling store_data_bypass_p if one of the
424 problematic conditions is detected. */
427 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 rtx out_set, in_set;
430 rtx out_pat, in_pat;
431 rtx out_exp, in_exp;
432 int i, j;
434 in_set = single_set (in_insn);
435 if (in_set)
437 if (MEM_P (SET_DEST (in_set)))
439 out_set = single_set (out_insn);
440 if (!out_set)
442 out_pat = PATTERN (out_insn);
443 if (GET_CODE (out_pat) == PARALLEL)
445 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 out_exp = XVECEXP (out_pat, 0, i);
448 if ((GET_CODE (out_exp) == CLOBBER)
449 || (GET_CODE (out_exp) == USE))
450 continue;
451 else if (GET_CODE (out_exp) != SET)
452 return false;
458 else
460 in_pat = PATTERN (in_insn);
461 if (GET_CODE (in_pat) != PARALLEL)
462 return false;
464 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 in_exp = XVECEXP (in_pat, 0, i);
467 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
468 continue;
469 else if (GET_CODE (in_exp) != SET)
470 return false;
472 if (MEM_P (SET_DEST (in_exp)))
474 out_set = single_set (out_insn);
475 if (!out_set)
477 out_pat = PATTERN (out_insn);
478 if (GET_CODE (out_pat) != PARALLEL)
479 return false;
480 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 out_exp = XVECEXP (out_pat, 0, j);
483 if ((GET_CODE (out_exp) == CLOBBER)
484 || (GET_CODE (out_exp) == USE))
485 continue;
486 else if (GET_CODE (out_exp) != SET)
487 return false;
493 return store_data_bypass_p (out_insn, in_insn);
497 /* Processor costs (relative to an add) */
499 const struct processor_costs *rs6000_cost;
501 /* Instruction size costs on 32bit processors. */
502 static const
503 struct processor_costs size32_cost = {
504 COSTS_N_INSNS (1), /* mulsi */
505 COSTS_N_INSNS (1), /* mulsi_const */
506 COSTS_N_INSNS (1), /* mulsi_const9 */
507 COSTS_N_INSNS (1), /* muldi */
508 COSTS_N_INSNS (1), /* divsi */
509 COSTS_N_INSNS (1), /* divdi */
510 COSTS_N_INSNS (1), /* fp */
511 COSTS_N_INSNS (1), /* dmul */
512 COSTS_N_INSNS (1), /* sdiv */
513 COSTS_N_INSNS (1), /* ddiv */
514 32, /* cache line size */
515 0, /* l1 cache */
516 0, /* l2 cache */
517 0, /* streams */
518 0, /* SF->DF convert */
521 /* Instruction size costs on 64bit processors. */
522 static const
523 struct processor_costs size64_cost = {
524 COSTS_N_INSNS (1), /* mulsi */
525 COSTS_N_INSNS (1), /* mulsi_const */
526 COSTS_N_INSNS (1), /* mulsi_const9 */
527 COSTS_N_INSNS (1), /* muldi */
528 COSTS_N_INSNS (1), /* divsi */
529 COSTS_N_INSNS (1), /* divdi */
530 COSTS_N_INSNS (1), /* fp */
531 COSTS_N_INSNS (1), /* dmul */
532 COSTS_N_INSNS (1), /* sdiv */
533 COSTS_N_INSNS (1), /* ddiv */
534 128, /* cache line size */
535 0, /* l1 cache */
536 0, /* l2 cache */
537 0, /* streams */
538 0, /* SF->DF convert */
541 /* Instruction costs on RS64A processors. */
542 static const
543 struct processor_costs rs64a_cost = {
544 COSTS_N_INSNS (20), /* mulsi */
545 COSTS_N_INSNS (12), /* mulsi_const */
546 COSTS_N_INSNS (8), /* mulsi_const9 */
547 COSTS_N_INSNS (34), /* muldi */
548 COSTS_N_INSNS (65), /* divsi */
549 COSTS_N_INSNS (67), /* divdi */
550 COSTS_N_INSNS (4), /* fp */
551 COSTS_N_INSNS (4), /* dmul */
552 COSTS_N_INSNS (31), /* sdiv */
553 COSTS_N_INSNS (31), /* ddiv */
554 128, /* cache line size */
555 128, /* l1 cache */
556 2048, /* l2 cache */
557 1, /* streams */
558 0, /* SF->DF convert */
561 /* Instruction costs on MPCCORE processors. */
562 static const
563 struct processor_costs mpccore_cost = {
564 COSTS_N_INSNS (2), /* mulsi */
565 COSTS_N_INSNS (2), /* mulsi_const */
566 COSTS_N_INSNS (2), /* mulsi_const9 */
567 COSTS_N_INSNS (2), /* muldi */
568 COSTS_N_INSNS (6), /* divsi */
569 COSTS_N_INSNS (6), /* divdi */
570 COSTS_N_INSNS (4), /* fp */
571 COSTS_N_INSNS (5), /* dmul */
572 COSTS_N_INSNS (10), /* sdiv */
573 COSTS_N_INSNS (17), /* ddiv */
574 32, /* cache line size */
575 4, /* l1 cache */
576 16, /* l2 cache */
577 1, /* streams */
578 0, /* SF->DF convert */
581 /* Instruction costs on PPC403 processors. */
582 static const
583 struct processor_costs ppc403_cost = {
584 COSTS_N_INSNS (4), /* mulsi */
585 COSTS_N_INSNS (4), /* mulsi_const */
586 COSTS_N_INSNS (4), /* mulsi_const9 */
587 COSTS_N_INSNS (4), /* muldi */
588 COSTS_N_INSNS (33), /* divsi */
589 COSTS_N_INSNS (33), /* divdi */
590 COSTS_N_INSNS (11), /* fp */
591 COSTS_N_INSNS (11), /* dmul */
592 COSTS_N_INSNS (11), /* sdiv */
593 COSTS_N_INSNS (11), /* ddiv */
594 32, /* cache line size */
595 4, /* l1 cache */
596 16, /* l2 cache */
597 1, /* streams */
598 0, /* SF->DF convert */
601 /* Instruction costs on PPC405 processors. */
602 static const
603 struct processor_costs ppc405_cost = {
604 COSTS_N_INSNS (5), /* mulsi */
605 COSTS_N_INSNS (4), /* mulsi_const */
606 COSTS_N_INSNS (3), /* mulsi_const9 */
607 COSTS_N_INSNS (5), /* muldi */
608 COSTS_N_INSNS (35), /* divsi */
609 COSTS_N_INSNS (35), /* divdi */
610 COSTS_N_INSNS (11), /* fp */
611 COSTS_N_INSNS (11), /* dmul */
612 COSTS_N_INSNS (11), /* sdiv */
613 COSTS_N_INSNS (11), /* ddiv */
614 32, /* cache line size */
615 16, /* l1 cache */
616 128, /* l2 cache */
617 1, /* streams */
618 0, /* SF->DF convert */
621 /* Instruction costs on PPC440 processors. */
622 static const
623 struct processor_costs ppc440_cost = {
624 COSTS_N_INSNS (3), /* mulsi */
625 COSTS_N_INSNS (2), /* mulsi_const */
626 COSTS_N_INSNS (2), /* mulsi_const9 */
627 COSTS_N_INSNS (3), /* muldi */
628 COSTS_N_INSNS (34), /* divsi */
629 COSTS_N_INSNS (34), /* divdi */
630 COSTS_N_INSNS (5), /* fp */
631 COSTS_N_INSNS (5), /* dmul */
632 COSTS_N_INSNS (19), /* sdiv */
633 COSTS_N_INSNS (33), /* ddiv */
634 32, /* cache line size */
635 32, /* l1 cache */
636 256, /* l2 cache */
637 1, /* streams */
638 0, /* SF->DF convert */
641 /* Instruction costs on PPC476 processors. */
642 static const
643 struct processor_costs ppc476_cost = {
644 COSTS_N_INSNS (4), /* mulsi */
645 COSTS_N_INSNS (4), /* mulsi_const */
646 COSTS_N_INSNS (4), /* mulsi_const9 */
647 COSTS_N_INSNS (4), /* muldi */
648 COSTS_N_INSNS (11), /* divsi */
649 COSTS_N_INSNS (11), /* divdi */
650 COSTS_N_INSNS (6), /* fp */
651 COSTS_N_INSNS (6), /* dmul */
652 COSTS_N_INSNS (19), /* sdiv */
653 COSTS_N_INSNS (33), /* ddiv */
654 32, /* l1 cache line size */
655 32, /* l1 cache */
656 512, /* l2 cache */
657 1, /* streams */
658 0, /* SF->DF convert */
661 /* Instruction costs on PPC601 processors. */
662 static const
663 struct processor_costs ppc601_cost = {
664 COSTS_N_INSNS (5), /* mulsi */
665 COSTS_N_INSNS (5), /* mulsi_const */
666 COSTS_N_INSNS (5), /* mulsi_const9 */
667 COSTS_N_INSNS (5), /* muldi */
668 COSTS_N_INSNS (36), /* divsi */
669 COSTS_N_INSNS (36), /* divdi */
670 COSTS_N_INSNS (4), /* fp */
671 COSTS_N_INSNS (5), /* dmul */
672 COSTS_N_INSNS (17), /* sdiv */
673 COSTS_N_INSNS (31), /* ddiv */
674 32, /* cache line size */
675 32, /* l1 cache */
676 256, /* l2 cache */
677 1, /* streams */
678 0, /* SF->DF convert */
681 /* Instruction costs on PPC603 processors. */
682 static const
683 struct processor_costs ppc603_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (3), /* mulsi_const */
686 COSTS_N_INSNS (2), /* mulsi_const9 */
687 COSTS_N_INSNS (5), /* muldi */
688 COSTS_N_INSNS (37), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (4), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (33), /* ddiv */
694 32, /* cache line size */
695 8, /* l1 cache */
696 64, /* l2 cache */
697 1, /* streams */
698 0, /* SF->DF convert */
701 /* Instruction costs on PPC604 processors. */
702 static const
703 struct processor_costs ppc604_cost = {
704 COSTS_N_INSNS (4), /* mulsi */
705 COSTS_N_INSNS (4), /* mulsi_const */
706 COSTS_N_INSNS (4), /* mulsi_const9 */
707 COSTS_N_INSNS (4), /* muldi */
708 COSTS_N_INSNS (20), /* divsi */
709 COSTS_N_INSNS (20), /* divdi */
710 COSTS_N_INSNS (3), /* fp */
711 COSTS_N_INSNS (3), /* dmul */
712 COSTS_N_INSNS (18), /* sdiv */
713 COSTS_N_INSNS (32), /* ddiv */
714 32, /* cache line size */
715 16, /* l1 cache */
716 512, /* l2 cache */
717 1, /* streams */
718 0, /* SF->DF convert */
721 /* Instruction costs on PPC604e processors. */
722 static const
723 struct processor_costs ppc604e_cost = {
724 COSTS_N_INSNS (2), /* mulsi */
725 COSTS_N_INSNS (2), /* mulsi_const */
726 COSTS_N_INSNS (2), /* mulsi_const9 */
727 COSTS_N_INSNS (2), /* muldi */
728 COSTS_N_INSNS (20), /* divsi */
729 COSTS_N_INSNS (20), /* divdi */
730 COSTS_N_INSNS (3), /* fp */
731 COSTS_N_INSNS (3), /* dmul */
732 COSTS_N_INSNS (18), /* sdiv */
733 COSTS_N_INSNS (32), /* ddiv */
734 32, /* cache line size */
735 32, /* l1 cache */
736 1024, /* l2 cache */
737 1, /* streams */
738 0, /* SF->DF convert */
741 /* Instruction costs on PPC620 processors. */
742 static const
743 struct processor_costs ppc620_cost = {
744 COSTS_N_INSNS (5), /* mulsi */
745 COSTS_N_INSNS (4), /* mulsi_const */
746 COSTS_N_INSNS (3), /* mulsi_const9 */
747 COSTS_N_INSNS (7), /* muldi */
748 COSTS_N_INSNS (21), /* divsi */
749 COSTS_N_INSNS (37), /* divdi */
750 COSTS_N_INSNS (3), /* fp */
751 COSTS_N_INSNS (3), /* dmul */
752 COSTS_N_INSNS (18), /* sdiv */
753 COSTS_N_INSNS (32), /* ddiv */
754 128, /* cache line size */
755 32, /* l1 cache */
756 1024, /* l2 cache */
757 1, /* streams */
758 0, /* SF->DF convert */
761 /* Instruction costs on PPC630 processors. */
762 static const
763 struct processor_costs ppc630_cost = {
764 COSTS_N_INSNS (5), /* mulsi */
765 COSTS_N_INSNS (4), /* mulsi_const */
766 COSTS_N_INSNS (3), /* mulsi_const9 */
767 COSTS_N_INSNS (7), /* muldi */
768 COSTS_N_INSNS (21), /* divsi */
769 COSTS_N_INSNS (37), /* divdi */
770 COSTS_N_INSNS (3), /* fp */
771 COSTS_N_INSNS (3), /* dmul */
772 COSTS_N_INSNS (17), /* sdiv */
773 COSTS_N_INSNS (21), /* ddiv */
774 128, /* cache line size */
775 64, /* l1 cache */
776 1024, /* l2 cache */
777 1, /* streams */
778 0, /* SF->DF convert */
781 /* Instruction costs on Cell processor. */
782 /* COSTS_N_INSNS (1) ~ one add. */
783 static const
784 struct processor_costs ppccell_cost = {
785 COSTS_N_INSNS (9/2)+2, /* mulsi */
786 COSTS_N_INSNS (6/2), /* mulsi_const */
787 COSTS_N_INSNS (6/2), /* mulsi_const9 */
788 COSTS_N_INSNS (15/2)+2, /* muldi */
789 COSTS_N_INSNS (38/2), /* divsi */
790 COSTS_N_INSNS (70/2), /* divdi */
791 COSTS_N_INSNS (10/2), /* fp */
792 COSTS_N_INSNS (10/2), /* dmul */
793 COSTS_N_INSNS (74/2), /* sdiv */
794 COSTS_N_INSNS (74/2), /* ddiv */
795 128, /* cache line size */
796 32, /* l1 cache */
797 512, /* l2 cache */
798 6, /* streams */
799 0, /* SF->DF convert */
802 /* Instruction costs on PPC750 and PPC7400 processors. */
803 static const
804 struct processor_costs ppc750_cost = {
805 COSTS_N_INSNS (5), /* mulsi */
806 COSTS_N_INSNS (3), /* mulsi_const */
807 COSTS_N_INSNS (2), /* mulsi_const9 */
808 COSTS_N_INSNS (5), /* muldi */
809 COSTS_N_INSNS (17), /* divsi */
810 COSTS_N_INSNS (17), /* divdi */
811 COSTS_N_INSNS (3), /* fp */
812 COSTS_N_INSNS (3), /* dmul */
813 COSTS_N_INSNS (17), /* sdiv */
814 COSTS_N_INSNS (31), /* ddiv */
815 32, /* cache line size */
816 32, /* l1 cache */
817 512, /* l2 cache */
818 1, /* streams */
819 0, /* SF->DF convert */
822 /* Instruction costs on PPC7450 processors. */
823 static const
824 struct processor_costs ppc7450_cost = {
825 COSTS_N_INSNS (4), /* mulsi */
826 COSTS_N_INSNS (3), /* mulsi_const */
827 COSTS_N_INSNS (3), /* mulsi_const9 */
828 COSTS_N_INSNS (4), /* muldi */
829 COSTS_N_INSNS (23), /* divsi */
830 COSTS_N_INSNS (23), /* divdi */
831 COSTS_N_INSNS (5), /* fp */
832 COSTS_N_INSNS (5), /* dmul */
833 COSTS_N_INSNS (21), /* sdiv */
834 COSTS_N_INSNS (35), /* ddiv */
835 32, /* cache line size */
836 32, /* l1 cache */
837 1024, /* l2 cache */
838 1, /* streams */
839 0, /* SF->DF convert */
842 /* Instruction costs on PPC8540 processors. */
843 static const
844 struct processor_costs ppc8540_cost = {
845 COSTS_N_INSNS (4), /* mulsi */
846 COSTS_N_INSNS (4), /* mulsi_const */
847 COSTS_N_INSNS (4), /* mulsi_const9 */
848 COSTS_N_INSNS (4), /* muldi */
849 COSTS_N_INSNS (19), /* divsi */
850 COSTS_N_INSNS (19), /* divdi */
851 COSTS_N_INSNS (4), /* fp */
852 COSTS_N_INSNS (4), /* dmul */
853 COSTS_N_INSNS (29), /* sdiv */
854 COSTS_N_INSNS (29), /* ddiv */
855 32, /* cache line size */
856 32, /* l1 cache */
857 256, /* l2 cache */
858 1, /* prefetch streams /*/
859 0, /* SF->DF convert */
862 /* Instruction costs on E300C2 and E300C3 cores. */
863 static const
864 struct processor_costs ppce300c2c3_cost = {
865 COSTS_N_INSNS (4), /* mulsi */
866 COSTS_N_INSNS (4), /* mulsi_const */
867 COSTS_N_INSNS (4), /* mulsi_const9 */
868 COSTS_N_INSNS (4), /* muldi */
869 COSTS_N_INSNS (19), /* divsi */
870 COSTS_N_INSNS (19), /* divdi */
871 COSTS_N_INSNS (3), /* fp */
872 COSTS_N_INSNS (4), /* dmul */
873 COSTS_N_INSNS (18), /* sdiv */
874 COSTS_N_INSNS (33), /* ddiv */
876 16, /* l1 cache */
877 16, /* l2 cache */
878 1, /* prefetch streams /*/
879 0, /* SF->DF convert */
882 /* Instruction costs on PPCE500MC processors. */
883 static const
884 struct processor_costs ppce500mc_cost = {
885 COSTS_N_INSNS (4), /* mulsi */
886 COSTS_N_INSNS (4), /* mulsi_const */
887 COSTS_N_INSNS (4), /* mulsi_const9 */
888 COSTS_N_INSNS (4), /* muldi */
889 COSTS_N_INSNS (14), /* divsi */
890 COSTS_N_INSNS (14), /* divdi */
891 COSTS_N_INSNS (8), /* fp */
892 COSTS_N_INSNS (10), /* dmul */
893 COSTS_N_INSNS (36), /* sdiv */
894 COSTS_N_INSNS (66), /* ddiv */
895 64, /* cache line size */
896 32, /* l1 cache */
897 128, /* l2 cache */
898 1, /* prefetch streams /*/
899 0, /* SF->DF convert */
902 /* Instruction costs on PPCE500MC64 processors. */
903 static const
904 struct processor_costs ppce500mc64_cost = {
905 COSTS_N_INSNS (4), /* mulsi */
906 COSTS_N_INSNS (4), /* mulsi_const */
907 COSTS_N_INSNS (4), /* mulsi_const9 */
908 COSTS_N_INSNS (4), /* muldi */
909 COSTS_N_INSNS (14), /* divsi */
910 COSTS_N_INSNS (14), /* divdi */
911 COSTS_N_INSNS (4), /* fp */
912 COSTS_N_INSNS (10), /* dmul */
913 COSTS_N_INSNS (36), /* sdiv */
914 COSTS_N_INSNS (66), /* ddiv */
915 64, /* cache line size */
916 32, /* l1 cache */
917 128, /* l2 cache */
918 1, /* prefetch streams /*/
919 0, /* SF->DF convert */
922 /* Instruction costs on PPCE5500 processors. */
923 static const
924 struct processor_costs ppce5500_cost = {
925 COSTS_N_INSNS (5), /* mulsi */
926 COSTS_N_INSNS (5), /* mulsi_const */
927 COSTS_N_INSNS (4), /* mulsi_const9 */
928 COSTS_N_INSNS (5), /* muldi */
929 COSTS_N_INSNS (14), /* divsi */
930 COSTS_N_INSNS (14), /* divdi */
931 COSTS_N_INSNS (7), /* fp */
932 COSTS_N_INSNS (10), /* dmul */
933 COSTS_N_INSNS (36), /* sdiv */
934 COSTS_N_INSNS (66), /* ddiv */
935 64, /* cache line size */
936 32, /* l1 cache */
937 128, /* l2 cache */
938 1, /* prefetch streams /*/
939 0, /* SF->DF convert */
942 /* Instruction costs on PPCE6500 processors. */
943 static const
944 struct processor_costs ppce6500_cost = {
945 COSTS_N_INSNS (5), /* mulsi */
946 COSTS_N_INSNS (5), /* mulsi_const */
947 COSTS_N_INSNS (4), /* mulsi_const9 */
948 COSTS_N_INSNS (5), /* muldi */
949 COSTS_N_INSNS (14), /* divsi */
950 COSTS_N_INSNS (14), /* divdi */
951 COSTS_N_INSNS (7), /* fp */
952 COSTS_N_INSNS (10), /* dmul */
953 COSTS_N_INSNS (36), /* sdiv */
954 COSTS_N_INSNS (66), /* ddiv */
955 64, /* cache line size */
956 32, /* l1 cache */
957 128, /* l2 cache */
958 1, /* prefetch streams /*/
959 0, /* SF->DF convert */
962 /* Instruction costs on AppliedMicro Titan processors. */
963 static const
964 struct processor_costs titan_cost = {
965 COSTS_N_INSNS (5), /* mulsi */
966 COSTS_N_INSNS (5), /* mulsi_const */
967 COSTS_N_INSNS (5), /* mulsi_const9 */
968 COSTS_N_INSNS (5), /* muldi */
969 COSTS_N_INSNS (18), /* divsi */
970 COSTS_N_INSNS (18), /* divdi */
971 COSTS_N_INSNS (10), /* fp */
972 COSTS_N_INSNS (10), /* dmul */
973 COSTS_N_INSNS (46), /* sdiv */
974 COSTS_N_INSNS (72), /* ddiv */
975 32, /* cache line size */
976 32, /* l1 cache */
977 512, /* l2 cache */
978 1, /* prefetch streams /*/
979 0, /* SF->DF convert */
982 /* Instruction costs on POWER4 and POWER5 processors. */
983 static const
984 struct processor_costs power4_cost = {
985 COSTS_N_INSNS (3), /* mulsi */
986 COSTS_N_INSNS (2), /* mulsi_const */
987 COSTS_N_INSNS (2), /* mulsi_const9 */
988 COSTS_N_INSNS (4), /* muldi */
989 COSTS_N_INSNS (18), /* divsi */
990 COSTS_N_INSNS (34), /* divdi */
991 COSTS_N_INSNS (3), /* fp */
992 COSTS_N_INSNS (3), /* dmul */
993 COSTS_N_INSNS (17), /* sdiv */
994 COSTS_N_INSNS (17), /* ddiv */
995 128, /* cache line size */
996 32, /* l1 cache */
997 1024, /* l2 cache */
998 8, /* prefetch streams /*/
999 0, /* SF->DF convert */
1002 /* Instruction costs on POWER6 processors. */
1003 static const
1004 struct processor_costs power6_cost = {
1005 COSTS_N_INSNS (8), /* mulsi */
1006 COSTS_N_INSNS (8), /* mulsi_const */
1007 COSTS_N_INSNS (8), /* mulsi_const9 */
1008 COSTS_N_INSNS (8), /* muldi */
1009 COSTS_N_INSNS (22), /* divsi */
1010 COSTS_N_INSNS (28), /* divdi */
1011 COSTS_N_INSNS (3), /* fp */
1012 COSTS_N_INSNS (3), /* dmul */
1013 COSTS_N_INSNS (13), /* sdiv */
1014 COSTS_N_INSNS (16), /* ddiv */
1015 128, /* cache line size */
1016 64, /* l1 cache */
1017 2048, /* l2 cache */
1018 16, /* prefetch streams */
1019 0, /* SF->DF convert */
1022 /* Instruction costs on POWER7 processors. */
1023 static const
1024 struct processor_costs power7_cost = {
1025 COSTS_N_INSNS (2), /* mulsi */
1026 COSTS_N_INSNS (2), /* mulsi_const */
1027 COSTS_N_INSNS (2), /* mulsi_const9 */
1028 COSTS_N_INSNS (2), /* muldi */
1029 COSTS_N_INSNS (18), /* divsi */
1030 COSTS_N_INSNS (34), /* divdi */
1031 COSTS_N_INSNS (3), /* fp */
1032 COSTS_N_INSNS (3), /* dmul */
1033 COSTS_N_INSNS (13), /* sdiv */
1034 COSTS_N_INSNS (16), /* ddiv */
1035 128, /* cache line size */
1036 32, /* l1 cache */
1037 256, /* l2 cache */
1038 12, /* prefetch streams */
1039 COSTS_N_INSNS (3), /* SF->DF convert */
1042 /* Instruction costs on POWER8 processors. */
1043 static const
1044 struct processor_costs power8_cost = {
1045 COSTS_N_INSNS (3), /* mulsi */
1046 COSTS_N_INSNS (3), /* mulsi_const */
1047 COSTS_N_INSNS (3), /* mulsi_const9 */
1048 COSTS_N_INSNS (3), /* muldi */
1049 COSTS_N_INSNS (19), /* divsi */
1050 COSTS_N_INSNS (35), /* divdi */
1051 COSTS_N_INSNS (3), /* fp */
1052 COSTS_N_INSNS (3), /* dmul */
1053 COSTS_N_INSNS (14), /* sdiv */
1054 COSTS_N_INSNS (17), /* ddiv */
1055 128, /* cache line size */
1056 32, /* l1 cache */
1057 256, /* l2 cache */
1058 12, /* prefetch streams */
1059 COSTS_N_INSNS (3), /* SF->DF convert */
1062 /* Instruction costs on POWER9 processors. */
1063 static const
1064 struct processor_costs power9_cost = {
1065 COSTS_N_INSNS (3), /* mulsi */
1066 COSTS_N_INSNS (3), /* mulsi_const */
1067 COSTS_N_INSNS (3), /* mulsi_const9 */
1068 COSTS_N_INSNS (3), /* muldi */
1069 COSTS_N_INSNS (8), /* divsi */
1070 COSTS_N_INSNS (12), /* divdi */
1071 COSTS_N_INSNS (3), /* fp */
1072 COSTS_N_INSNS (3), /* dmul */
1073 COSTS_N_INSNS (13), /* sdiv */
1074 COSTS_N_INSNS (18), /* ddiv */
1075 128, /* cache line size */
1076 32, /* l1 cache */
1077 512, /* l2 cache */
1078 8, /* prefetch streams */
1079 COSTS_N_INSNS (3), /* SF->DF convert */
1082 /* Instruction costs on POWER A2 processors. */
1083 static const
1084 struct processor_costs ppca2_cost = {
1085 COSTS_N_INSNS (16), /* mulsi */
1086 COSTS_N_INSNS (16), /* mulsi_const */
1087 COSTS_N_INSNS (16), /* mulsi_const9 */
1088 COSTS_N_INSNS (16), /* muldi */
1089 COSTS_N_INSNS (22), /* divsi */
1090 COSTS_N_INSNS (28), /* divdi */
1091 COSTS_N_INSNS (3), /* fp */
1092 COSTS_N_INSNS (3), /* dmul */
1093 COSTS_N_INSNS (59), /* sdiv */
1094 COSTS_N_INSNS (72), /* ddiv */
1096 16, /* l1 cache */
1097 2048, /* l2 cache */
1098 16, /* prefetch streams */
1099 0, /* SF->DF convert */
1102 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1103 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1106 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1107 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1108 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1111 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1112 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1113 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1114 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1115 bool);
1116 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1117 unsigned int);
1118 static bool is_microcoded_insn (rtx_insn *);
1119 static bool is_nonpipeline_insn (rtx_insn *);
1120 static bool is_cracked_insn (rtx_insn *);
1121 static bool is_load_insn (rtx, rtx *);
1122 static bool is_store_insn (rtx, rtx *);
1123 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1124 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1125 static bool insn_must_be_first_in_group (rtx_insn *);
1126 static bool insn_must_be_last_in_group (rtx_insn *);
1127 int easy_vector_constant (rtx, machine_mode);
1128 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1129 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1130 #if TARGET_MACHO
1131 static tree get_prev_label (tree);
1132 #endif
1133 static bool rs6000_mode_dependent_address (const_rtx);
1134 static bool rs6000_debug_mode_dependent_address (const_rtx);
1135 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1136 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1137 machine_mode, rtx);
1138 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1139 machine_mode,
1140 rtx);
1141 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1142 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1143 enum reg_class);
1144 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1145 reg_class_t,
1146 reg_class_t);
1147 static bool rs6000_debug_can_change_mode_class (machine_mode,
1148 machine_mode,
1149 reg_class_t);
1151 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1152 = rs6000_mode_dependent_address;
1154 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1155 machine_mode, rtx)
1156 = rs6000_secondary_reload_class;
1158 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1159 = rs6000_preferred_reload_class;
1161 const int INSN_NOT_AVAILABLE = -1;
1163 static void rs6000_print_isa_options (FILE *, int, const char *,
1164 HOST_WIDE_INT);
1165 static void rs6000_print_builtin_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1167 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1170 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1171 enum rs6000_reg_type,
1172 machine_mode,
1173 secondary_reload_info *,
1174 bool);
1175 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1177 /* Hash table stuff for keeping track of TOC entries. */
1179 struct GTY((for_user)) toc_hash_struct
1181 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1182 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1183 rtx key;
1184 machine_mode key_mode;
1185 int labelno;
1188 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1190 static hashval_t hash (toc_hash_struct *);
1191 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1198 /* Default register names. */
1199 char rs6000_reg_names[][8] =
1201 /* GPRs */
1202 "0", "1", "2", "3", "4", "5", "6", "7",
1203 "8", "9", "10", "11", "12", "13", "14", "15",
1204 "16", "17", "18", "19", "20", "21", "22", "23",
1205 "24", "25", "26", "27", "28", "29", "30", "31",
1206 /* FPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* VRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* lr ctr ca ap */
1217 "lr", "ctr", "ca", "ap",
1218 /* cr0..cr7 */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 /* vrsave vscr sfp */
1221 "vrsave", "vscr", "sfp",
1224 #ifdef TARGET_REGNAMES
1225 static const char alt_reg_names[][8] =
1227 /* GPRs */
1228 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1229 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1230 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1231 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1232 /* FPRs */
1233 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1234 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1235 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1236 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1237 /* VRs */
1238 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1239 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1240 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1241 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1242 /* lr ctr ca ap */
1243 "lr", "ctr", "ca", "ap",
1244 /* cr0..cr7 */
1245 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1246 /* vrsave vscr sfp */
1247 "vrsave", "vscr", "sfp",
1249 #endif
1251 /* Table of valid machine attributes. */
1253 static const struct attribute_spec rs6000_attribute_table[] =
1255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1256 affects_type_identity, handler, exclude } */
1257 { "altivec", 1, 1, false, true, false, false,
1258 rs6000_handle_altivec_attribute, NULL },
1259 { "longcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute, NULL },
1261 { "shortcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "ms_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute, NULL },
1265 { "gcc_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1268 SUBTARGET_ATTRIBUTE_TABLE,
1269 #endif
1270 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 #ifndef TARGET_PROFILE_KERNEL
1274 #define TARGET_PROFILE_KERNEL 0
1275 #endif
1277 /* Initialize the GCC target structure. */
1278 #undef TARGET_ATTRIBUTE_TABLE
1279 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1280 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1281 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1282 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1283 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1285 #undef TARGET_ASM_ALIGNED_DI_OP
1286 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1288 /* Default unaligned ops are only provided for ELF. Find the ops needed
1289 for non-ELF systems. */
1290 #ifndef OBJECT_FORMAT_ELF
1291 #if TARGET_XCOFF
1292 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1293 64-bit targets. */
1294 #undef TARGET_ASM_UNALIGNED_HI_OP
1295 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1296 #undef TARGET_ASM_UNALIGNED_SI_OP
1297 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1298 #undef TARGET_ASM_UNALIGNED_DI_OP
1299 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1300 #else
1301 /* For Darwin. */
1302 #undef TARGET_ASM_UNALIGNED_HI_OP
1303 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1304 #undef TARGET_ASM_UNALIGNED_SI_OP
1305 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1306 #undef TARGET_ASM_UNALIGNED_DI_OP
1307 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1308 #undef TARGET_ASM_ALIGNED_DI_OP
1309 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1310 #endif
1311 #endif
1313 /* This hook deals with fixups for relocatable code and DI-mode objects
1314 in 64-bit code. */
1315 #undef TARGET_ASM_INTEGER
1316 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1318 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1319 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1320 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1321 #endif
1323 #undef TARGET_SET_UP_BY_PROLOGUE
1324 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1326 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1327 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1328 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1329 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1330 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1336 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1339 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1340 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1342 #undef TARGET_INTERNAL_ARG_POINTER
1343 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1345 #undef TARGET_HAVE_TLS
1346 #define TARGET_HAVE_TLS HAVE_AS_TLS
1348 #undef TARGET_CANNOT_FORCE_CONST_MEM
1349 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1351 #undef TARGET_DELEGITIMIZE_ADDRESS
1352 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1354 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1355 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1357 #undef TARGET_LEGITIMATE_COMBINED_INSN
1358 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1360 #undef TARGET_ASM_FUNCTION_PROLOGUE
1361 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1362 #undef TARGET_ASM_FUNCTION_EPILOGUE
1363 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1368 #undef TARGET_LEGITIMIZE_ADDRESS
1369 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1371 #undef TARGET_SCHED_VARIABLE_ISSUE
1372 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1374 #undef TARGET_SCHED_ISSUE_RATE
1375 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1376 #undef TARGET_SCHED_ADJUST_COST
1377 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1378 #undef TARGET_SCHED_ADJUST_PRIORITY
1379 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1380 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1381 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1382 #undef TARGET_SCHED_INIT
1383 #define TARGET_SCHED_INIT rs6000_sched_init
1384 #undef TARGET_SCHED_FINISH
1385 #define TARGET_SCHED_FINISH rs6000_sched_finish
1386 #undef TARGET_SCHED_REORDER
1387 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1388 #undef TARGET_SCHED_REORDER2
1389 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1394 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1395 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1397 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1398 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1399 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1400 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1401 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1402 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1403 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1404 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1406 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1407 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1409 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1410 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1411 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1412 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1413 rs6000_builtin_support_vector_misalignment
1414 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1415 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1418 rs6000_builtin_vectorization_cost
1419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1421 rs6000_preferred_simd_mode
1422 #undef TARGET_VECTORIZE_INIT_COST
1423 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1424 #undef TARGET_VECTORIZE_ADD_STMT_COST
1425 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1426 #undef TARGET_VECTORIZE_FINISH_COST
1427 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1428 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1429 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1431 #undef TARGET_LOOP_UNROLL_ADJUST
1432 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1434 #undef TARGET_INIT_BUILTINS
1435 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1436 #undef TARGET_BUILTIN_DECL
1437 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1439 #undef TARGET_FOLD_BUILTIN
1440 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1441 #undef TARGET_GIMPLE_FOLD_BUILTIN
1442 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1444 #undef TARGET_EXPAND_BUILTIN
1445 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1447 #undef TARGET_MANGLE_TYPE
1448 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1450 #undef TARGET_INIT_LIBFUNCS
1451 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1453 #if TARGET_MACHO
1454 #undef TARGET_BINDS_LOCAL_P
1455 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1456 #endif
1458 #undef TARGET_MS_BITFIELD_LAYOUT_P
1459 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1461 #undef TARGET_ASM_OUTPUT_MI_THUNK
1462 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1467 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1468 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1470 #undef TARGET_REGISTER_MOVE_COST
1471 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1472 #undef TARGET_MEMORY_MOVE_COST
1473 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1474 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1475 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1476 rs6000_ira_change_pseudo_allocno_class
1477 #undef TARGET_CANNOT_COPY_INSN_P
1478 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1479 #undef TARGET_RTX_COSTS
1480 #define TARGET_RTX_COSTS rs6000_rtx_costs
1481 #undef TARGET_ADDRESS_COST
1482 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1483 #undef TARGET_INSN_COST
1484 #define TARGET_INSN_COST rs6000_insn_cost
1486 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1487 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1489 #undef TARGET_PROMOTE_FUNCTION_MODE
1490 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1492 #undef TARGET_RETURN_IN_MEMORY
1493 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1495 #undef TARGET_RETURN_IN_MSB
1496 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1498 #undef TARGET_SETUP_INCOMING_VARARGS
1499 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1501 /* Always strict argument naming on rs6000. */
1502 #undef TARGET_STRICT_ARGUMENT_NAMING
1503 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1504 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1505 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_SPLIT_COMPLEX_ARG
1507 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1508 #undef TARGET_MUST_PASS_IN_STACK
1509 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1510 #undef TARGET_PASS_BY_REFERENCE
1511 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1512 #undef TARGET_ARG_PARTIAL_BYTES
1513 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1514 #undef TARGET_FUNCTION_ARG_ADVANCE
1515 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1516 #undef TARGET_FUNCTION_ARG
1517 #define TARGET_FUNCTION_ARG rs6000_function_arg
1518 #undef TARGET_FUNCTION_ARG_PADDING
1519 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1520 #undef TARGET_FUNCTION_ARG_BOUNDARY
1521 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1523 #undef TARGET_BUILD_BUILTIN_VA_LIST
1524 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1526 #undef TARGET_EXPAND_BUILTIN_VA_START
1527 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1530 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1532 #undef TARGET_EH_RETURN_FILTER_MODE
1533 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1535 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1536 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1538 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1539 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1541 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1542 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1544 #undef TARGET_FLOATN_MODE
1545 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1547 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1548 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1550 #undef TARGET_MD_ASM_ADJUST
1551 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1553 #undef TARGET_OPTION_OVERRIDE
1554 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1556 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1557 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1558 rs6000_builtin_vectorized_function
1560 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1561 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1562 rs6000_builtin_md_vectorized_function
1564 #undef TARGET_STACK_PROTECT_GUARD
1565 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1567 #if !TARGET_MACHO
1568 #undef TARGET_STACK_PROTECT_FAIL
1569 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1570 #endif
1572 #ifdef HAVE_AS_TLS
1573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1575 #endif
1577 /* Use a 32-bit anchor range. This leads to sequences like:
1579 addis tmp,anchor,high
1580 add dest,tmp,low
1582 where tmp itself acts as an anchor, and can be shared between
1583 accesses to the same 64k page. */
1584 #undef TARGET_MIN_ANCHOR_OFFSET
1585 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1586 #undef TARGET_MAX_ANCHOR_OFFSET
1587 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1588 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1589 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1590 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1591 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1593 #undef TARGET_BUILTIN_RECIPROCAL
1594 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1596 #undef TARGET_SECONDARY_RELOAD
1597 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1598 #undef TARGET_SECONDARY_MEMORY_NEEDED
1599 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1601 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1603 #undef TARGET_LEGITIMATE_ADDRESS_P
1604 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1606 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1607 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1609 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1610 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1612 #undef TARGET_CAN_ELIMINATE
1613 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1615 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1616 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1618 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1619 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1621 #undef TARGET_TRAMPOLINE_INIT
1622 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1624 #undef TARGET_FUNCTION_VALUE
1625 #define TARGET_FUNCTION_VALUE rs6000_function_value
1627 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1628 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1630 #undef TARGET_OPTION_SAVE
1631 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1633 #undef TARGET_OPTION_RESTORE
1634 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1636 #undef TARGET_OPTION_PRINT
1637 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1639 #undef TARGET_CAN_INLINE_P
1640 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1642 #undef TARGET_SET_CURRENT_FUNCTION
1643 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1645 #undef TARGET_LEGITIMATE_CONSTANT_P
1646 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1648 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1649 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1651 #undef TARGET_CAN_USE_DOLOOP_P
1652 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1654 #undef TARGET_PREDICT_DOLOOP_P
1655 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1657 #undef TARGET_HAVE_COUNT_REG_DECR_P
1658 #define TARGET_HAVE_COUNT_REG_DECR_P true
1660 /* 1000000000 is infinite cost in IVOPTs. */
1661 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1662 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1664 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1665 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1667 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1668 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1670 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1671 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1672 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1673 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1674 #undef TARGET_UNWIND_WORD_MODE
1675 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1677 #undef TARGET_OFFLOAD_OPTIONS
1678 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1680 #undef TARGET_C_MODE_FOR_SUFFIX
1681 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1683 #undef TARGET_INVALID_BINARY_OP
1684 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1686 #undef TARGET_OPTAB_SUPPORTED_P
1687 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1689 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1690 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1692 #undef TARGET_COMPARE_VERSION_PRIORITY
1693 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1695 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1696 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1697 rs6000_generate_version_dispatcher_body
1699 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1700 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1701 rs6000_get_function_versions_dispatcher
1703 #undef TARGET_OPTION_FUNCTION_VERSIONS
1704 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1706 #undef TARGET_HARD_REGNO_NREGS
1707 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1708 #undef TARGET_HARD_REGNO_MODE_OK
1709 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1711 #undef TARGET_MODES_TIEABLE_P
1712 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1714 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1715 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1716 rs6000_hard_regno_call_part_clobbered
1718 #undef TARGET_SLOW_UNALIGNED_ACCESS
1719 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1721 #undef TARGET_CAN_CHANGE_MODE_CLASS
1722 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1724 #undef TARGET_CONSTANT_ALIGNMENT
1725 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1727 #undef TARGET_STARTING_FRAME_OFFSET
1728 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1730 #if TARGET_ELF && RS6000_WEAK
1731 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1732 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1733 #endif
1735 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1736 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1738 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1739 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1742 /* Processor table. */
1743 struct rs6000_ptt
1745 const char *const name; /* Canonical processor name. */
1746 const enum processor_type processor; /* Processor type enum value. */
1747 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1750 static struct rs6000_ptt const processor_target_table[] =
1752 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1753 #include "rs6000-cpus.def"
1754 #undef RS6000_CPU
1757 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1758 name is invalid. */
1760 static int
1761 rs6000_cpu_name_lookup (const char *name)
1763 size_t i;
1765 if (name != NULL)
1767 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1768 if (! strcmp (name, processor_target_table[i].name))
1769 return (int)i;
1772 return -1;
1776 /* Return number of consecutive hard regs needed starting at reg REGNO
1777 to hold something of mode MODE.
1778 This is ordinarily the length in words of a value of mode MODE
1779 but can be less for certain modes in special long registers.
1781 POWER and PowerPC GPRs hold 32 bits worth;
1782 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1784 static int
1785 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1787 unsigned HOST_WIDE_INT reg_size;
1789 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1790 128-bit floating point that can go in vector registers, which has VSX
1791 memory addressing. */
1792 if (FP_REGNO_P (regno))
1793 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1794 ? UNITS_PER_VSX_WORD
1795 : UNITS_PER_FP_WORD);
1797 else if (ALTIVEC_REGNO_P (regno))
1798 reg_size = UNITS_PER_ALTIVEC_WORD;
1800 else
1801 reg_size = UNITS_PER_WORD;
1803 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1806 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1807 MODE. */
1808 static int
1809 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1811 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1813 if (COMPLEX_MODE_P (mode))
1814 mode = GET_MODE_INNER (mode);
1816 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1817 register combinations, and use PTImode where we need to deal with quad
1818 word memory operations. Don't allow quad words in the argument or frame
1819 pointer registers, just registers 0..31. */
1820 if (mode == PTImode)
1821 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1822 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1823 && ((regno & 1) == 0));
1825 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1826 implementations. Don't allow an item to be split between a FP register
1827 and an Altivec register. Allow TImode in all VSX registers if the user
1828 asked for it. */
1829 if (TARGET_VSX && VSX_REGNO_P (regno)
1830 && (VECTOR_MEM_VSX_P (mode)
1831 || FLOAT128_VECTOR_P (mode)
1832 || reg_addr[mode].scalar_in_vmx_p
1833 || mode == TImode
1834 || (TARGET_VADDUQM && mode == V1TImode)))
1836 if (FP_REGNO_P (regno))
1837 return FP_REGNO_P (last_regno);
1839 if (ALTIVEC_REGNO_P (regno))
1841 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1842 return 0;
1844 return ALTIVEC_REGNO_P (last_regno);
1848 /* The GPRs can hold any mode, but values bigger than one register
1849 cannot go past R31. */
1850 if (INT_REGNO_P (regno))
1851 return INT_REGNO_P (last_regno);
1853 /* The float registers (except for VSX vector modes) can only hold floating
1854 modes and DImode. */
1855 if (FP_REGNO_P (regno))
1857 if (FLOAT128_VECTOR_P (mode))
1858 return false;
1860 if (SCALAR_FLOAT_MODE_P (mode)
1861 && (mode != TDmode || (regno % 2) == 0)
1862 && FP_REGNO_P (last_regno))
1863 return 1;
1865 if (GET_MODE_CLASS (mode) == MODE_INT)
1867 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1868 return 1;
1870 if (TARGET_P8_VECTOR && (mode == SImode))
1871 return 1;
1873 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1874 return 1;
1877 return 0;
1880 /* The CR register can only hold CC modes. */
1881 if (CR_REGNO_P (regno))
1882 return GET_MODE_CLASS (mode) == MODE_CC;
1884 if (CA_REGNO_P (regno))
1885 return mode == Pmode || mode == SImode;
1887 /* AltiVec only in AldyVec registers. */
1888 if (ALTIVEC_REGNO_P (regno))
1889 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1890 || mode == V1TImode);
1892 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1893 and it must be able to fit within the register set. */
1895 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1898 /* Implement TARGET_HARD_REGNO_NREGS. */
1900 static unsigned int
1901 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1903 return rs6000_hard_regno_nregs[mode][regno];
1906 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1908 static bool
1909 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1911 return rs6000_hard_regno_mode_ok_p[mode][regno];
1914 /* Implement TARGET_MODES_TIEABLE_P.
1916 PTImode cannot tie with other modes because PTImode is restricted to even
1917 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1918 57744).
1920 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1921 128-bit floating point on VSX systems ties with other vectors. */
1923 static bool
1924 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1926 if (mode1 == PTImode)
1927 return mode2 == PTImode;
1928 if (mode2 == PTImode)
1929 return false;
1931 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1932 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1933 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1934 return false;
1936 if (SCALAR_FLOAT_MODE_P (mode1))
1937 return SCALAR_FLOAT_MODE_P (mode2);
1938 if (SCALAR_FLOAT_MODE_P (mode2))
1939 return false;
1941 if (GET_MODE_CLASS (mode1) == MODE_CC)
1942 return GET_MODE_CLASS (mode2) == MODE_CC;
1943 if (GET_MODE_CLASS (mode2) == MODE_CC)
1944 return false;
1946 return true;
1949 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1951 static bool
1952 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1953 machine_mode mode)
1955 if (TARGET_32BIT
1956 && TARGET_POWERPC64
1957 && GET_MODE_SIZE (mode) > 4
1958 && INT_REGNO_P (regno))
1959 return true;
1961 if (TARGET_VSX
1962 && FP_REGNO_P (regno)
1963 && GET_MODE_SIZE (mode) > 8
1964 && !FLOAT128_2REG_P (mode))
1965 return true;
1967 return false;
1970 /* Print interesting facts about registers. */
1971 static void
1972 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1974 int r, m;
1976 for (r = first_regno; r <= last_regno; ++r)
1978 const char *comma = "";
1979 int len;
1981 if (first_regno == last_regno)
1982 fprintf (stderr, "%s:\t", reg_name);
1983 else
1984 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1986 len = 8;
1987 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1988 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1990 if (len > 70)
1992 fprintf (stderr, ",\n\t");
1993 len = 8;
1994 comma = "";
1997 if (rs6000_hard_regno_nregs[m][r] > 1)
1998 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1999 rs6000_hard_regno_nregs[m][r]);
2000 else
2001 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2003 comma = ", ";
2006 if (call_used_or_fixed_reg_p (r))
2008 if (len > 70)
2010 fprintf (stderr, ",\n\t");
2011 len = 8;
2012 comma = "";
2015 len += fprintf (stderr, "%s%s", comma, "call-used");
2016 comma = ", ";
2019 if (fixed_regs[r])
2021 if (len > 70)
2023 fprintf (stderr, ",\n\t");
2024 len = 8;
2025 comma = "";
2028 len += fprintf (stderr, "%s%s", comma, "fixed");
2029 comma = ", ";
2032 if (len > 70)
2034 fprintf (stderr, ",\n\t");
2035 comma = "";
2038 len += fprintf (stderr, "%sreg-class = %s", comma,
2039 reg_class_names[(int)rs6000_regno_regclass[r]]);
2040 comma = ", ";
2042 if (len > 70)
2044 fprintf (stderr, ",\n\t");
2045 comma = "";
2048 fprintf (stderr, "%sregno = %d\n", comma, r);
2052 static const char *
2053 rs6000_debug_vector_unit (enum rs6000_vector v)
2055 const char *ret;
2057 switch (v)
2059 case VECTOR_NONE: ret = "none"; break;
2060 case VECTOR_ALTIVEC: ret = "altivec"; break;
2061 case VECTOR_VSX: ret = "vsx"; break;
2062 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2063 default: ret = "unknown"; break;
2066 return ret;
2069 /* Inner function printing just the address mask for a particular reload
2070 register class. */
2071 DEBUG_FUNCTION char *
2072 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2074 static char ret[8];
2075 char *p = ret;
2077 if ((mask & RELOAD_REG_VALID) != 0)
2078 *p++ = 'v';
2079 else if (keep_spaces)
2080 *p++ = ' ';
2082 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2083 *p++ = 'm';
2084 else if (keep_spaces)
2085 *p++ = ' ';
2087 if ((mask & RELOAD_REG_INDEXED) != 0)
2088 *p++ = 'i';
2089 else if (keep_spaces)
2090 *p++ = ' ';
2092 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2093 *p++ = 'O';
2094 else if ((mask & RELOAD_REG_OFFSET) != 0)
2095 *p++ = 'o';
2096 else if (keep_spaces)
2097 *p++ = ' ';
2099 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2100 *p++ = '+';
2101 else if (keep_spaces)
2102 *p++ = ' ';
2104 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2105 *p++ = '+';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2109 if ((mask & RELOAD_REG_AND_M16) != 0)
2110 *p++ = '&';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2114 *p = '\0';
2116 return ret;
2119 /* Print the address masks in a human readble fashion. */
2120 DEBUG_FUNCTION void
2121 rs6000_debug_print_mode (ssize_t m)
2123 ssize_t rc;
2124 int spaces = 0;
2126 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2127 for (rc = 0; rc < N_RELOAD_REG; rc++)
2128 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2129 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2131 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2132 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2134 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2135 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2136 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2137 spaces = 0;
2139 else
2140 spaces += sizeof (" Reload=sl") - 1;
2142 if (reg_addr[m].scalar_in_vmx_p)
2144 fprintf (stderr, "%*s Upper=y", spaces, "");
2145 spaces = 0;
2147 else
2148 spaces += sizeof (" Upper=y") - 1;
2150 if (rs6000_vector_unit[m] != VECTOR_NONE
2151 || rs6000_vector_mem[m] != VECTOR_NONE)
2153 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2154 spaces, "",
2155 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2156 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2159 fputs ("\n", stderr);
2162 #define DEBUG_FMT_ID "%-32s= "
2163 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2164 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2165 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2167 /* Print various interesting information with -mdebug=reg. */
2168 static void
2169 rs6000_debug_reg_global (void)
2171 static const char *const tf[2] = { "false", "true" };
2172 const char *nl = (const char *)0;
2173 int m;
2174 size_t m1, m2, v;
2175 char costly_num[20];
2176 char nop_num[20];
2177 char flags_buffer[40];
2178 const char *costly_str;
2179 const char *nop_str;
2180 const char *trace_str;
2181 const char *abi_str;
2182 const char *cmodel_str;
2183 struct cl_target_option cl_opts;
2185 /* Modes we want tieable information on. */
2186 static const machine_mode print_tieable_modes[] = {
2187 QImode,
2188 HImode,
2189 SImode,
2190 DImode,
2191 TImode,
2192 PTImode,
2193 SFmode,
2194 DFmode,
2195 TFmode,
2196 IFmode,
2197 KFmode,
2198 SDmode,
2199 DDmode,
2200 TDmode,
2201 V16QImode,
2202 V8HImode,
2203 V4SImode,
2204 V2DImode,
2205 V1TImode,
2206 V32QImode,
2207 V16HImode,
2208 V8SImode,
2209 V4DImode,
2210 V2TImode,
2211 V4SFmode,
2212 V2DFmode,
2213 V8SFmode,
2214 V4DFmode,
2215 CCmode,
2216 CCUNSmode,
2217 CCEQmode,
2220 /* Virtual regs we are interested in. */
2221 const static struct {
2222 int regno; /* register number. */
2223 const char *name; /* register name. */
2224 } virtual_regs[] = {
2225 { STACK_POINTER_REGNUM, "stack pointer:" },
2226 { TOC_REGNUM, "toc: " },
2227 { STATIC_CHAIN_REGNUM, "static chain: " },
2228 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2229 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2230 { ARG_POINTER_REGNUM, "arg pointer: " },
2231 { FRAME_POINTER_REGNUM, "frame pointer:" },
2232 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2233 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2234 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2235 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2236 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2237 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2238 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2239 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2240 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2243 fputs ("\nHard register information:\n", stderr);
2244 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2245 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2246 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2247 LAST_ALTIVEC_REGNO,
2248 "vs");
2249 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2250 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2251 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2252 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2253 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2254 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2256 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2257 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2258 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2260 fprintf (stderr,
2261 "\n"
2262 "d reg_class = %s\n"
2263 "f reg_class = %s\n"
2264 "v reg_class = %s\n"
2265 "wa reg_class = %s\n"
2266 "we reg_class = %s\n"
2267 "wr reg_class = %s\n"
2268 "wx reg_class = %s\n"
2269 "wA reg_class = %s\n"
2270 "\n",
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2276 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2280 nl = "\n";
2281 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2282 rs6000_debug_print_mode (m);
2284 fputs ("\n", stderr);
2286 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2288 machine_mode mode1 = print_tieable_modes[m1];
2289 bool first_time = true;
2291 nl = (const char *)0;
2292 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2294 machine_mode mode2 = print_tieable_modes[m2];
2295 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2297 if (first_time)
2299 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2300 nl = "\n";
2301 first_time = false;
2304 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2308 if (!first_time)
2309 fputs ("\n", stderr);
2312 if (nl)
2313 fputs (nl, stderr);
2315 if (rs6000_recip_control)
2317 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2319 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2320 if (rs6000_recip_bits[m])
2322 fprintf (stderr,
2323 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2324 GET_MODE_NAME (m),
2325 (RS6000_RECIP_AUTO_RE_P (m)
2326 ? "auto"
2327 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2328 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2329 ? "auto"
2330 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2333 fputs ("\n", stderr);
2336 if (rs6000_cpu_index >= 0)
2338 const char *name = processor_target_table[rs6000_cpu_index].name;
2339 HOST_WIDE_INT flags
2340 = processor_target_table[rs6000_cpu_index].target_enable;
2342 sprintf (flags_buffer, "-mcpu=%s flags", name);
2343 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2345 else
2346 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2348 if (rs6000_tune_index >= 0)
2350 const char *name = processor_target_table[rs6000_tune_index].name;
2351 HOST_WIDE_INT flags
2352 = processor_target_table[rs6000_tune_index].target_enable;
2354 sprintf (flags_buffer, "-mtune=%s flags", name);
2355 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2357 else
2358 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2360 cl_target_option_save (&cl_opts, &global_options);
2361 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2362 rs6000_isa_flags);
2364 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2365 rs6000_isa_flags_explicit);
2367 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2368 rs6000_builtin_mask);
2370 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2372 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2373 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2375 switch (rs6000_sched_costly_dep)
2377 case max_dep_latency:
2378 costly_str = "max_dep_latency";
2379 break;
2381 case no_dep_costly:
2382 costly_str = "no_dep_costly";
2383 break;
2385 case all_deps_costly:
2386 costly_str = "all_deps_costly";
2387 break;
2389 case true_store_to_load_dep_costly:
2390 costly_str = "true_store_to_load_dep_costly";
2391 break;
2393 case store_to_load_dep_costly:
2394 costly_str = "store_to_load_dep_costly";
2395 break;
2397 default:
2398 costly_str = costly_num;
2399 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2400 break;
2403 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2405 switch (rs6000_sched_insert_nops)
2407 case sched_finish_regroup_exact:
2408 nop_str = "sched_finish_regroup_exact";
2409 break;
2411 case sched_finish_pad_groups:
2412 nop_str = "sched_finish_pad_groups";
2413 break;
2415 case sched_finish_none:
2416 nop_str = "sched_finish_none";
2417 break;
2419 default:
2420 nop_str = nop_num;
2421 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2422 break;
2425 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2427 switch (rs6000_sdata)
2429 default:
2430 case SDATA_NONE:
2431 break;
2433 case SDATA_DATA:
2434 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2435 break;
2437 case SDATA_SYSV:
2438 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2439 break;
2441 case SDATA_EABI:
2442 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2443 break;
2447 switch (rs6000_traceback)
2449 case traceback_default: trace_str = "default"; break;
2450 case traceback_none: trace_str = "none"; break;
2451 case traceback_part: trace_str = "part"; break;
2452 case traceback_full: trace_str = "full"; break;
2453 default: trace_str = "unknown"; break;
2456 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2458 switch (rs6000_current_cmodel)
2460 case CMODEL_SMALL: cmodel_str = "small"; break;
2461 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2462 case CMODEL_LARGE: cmodel_str = "large"; break;
2463 default: cmodel_str = "unknown"; break;
2466 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2468 switch (rs6000_current_abi)
2470 case ABI_NONE: abi_str = "none"; break;
2471 case ABI_AIX: abi_str = "aix"; break;
2472 case ABI_ELFv2: abi_str = "ELFv2"; break;
2473 case ABI_V4: abi_str = "V4"; break;
2474 case ABI_DARWIN: abi_str = "darwin"; break;
2475 default: abi_str = "unknown"; break;
2478 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2480 if (rs6000_altivec_abi)
2481 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2483 if (rs6000_darwin64_abi)
2484 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2486 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2487 (TARGET_SOFT_FLOAT ? "true" : "false"));
2489 if (TARGET_LINK_STACK)
2490 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2492 if (TARGET_P8_FUSION)
2494 char options[80];
2496 strcpy (options, "power8");
2497 if (TARGET_P8_FUSION_SIGN)
2498 strcat (options, ", sign");
2500 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2503 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2504 TARGET_SECURE_PLT ? "secure" : "bss");
2505 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2506 aix_struct_return ? "aix" : "sysv");
2507 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2508 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2509 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2510 tf[!!rs6000_align_branch_targets]);
2511 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2512 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2513 rs6000_long_double_type_size);
2514 if (rs6000_long_double_type_size > 64)
2516 fprintf (stderr, DEBUG_FMT_S, "long double type",
2517 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2518 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2519 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2521 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2522 (int)rs6000_sched_restricted_insns_priority);
2523 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2524 (int)END_BUILTINS);
2525 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2526 (int)RS6000_BUILTIN_COUNT);
2528 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2529 (int)TARGET_FLOAT128_ENABLE_TYPE);
2531 if (TARGET_VSX)
2532 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2533 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2535 if (TARGET_DIRECT_MOVE_128)
2536 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2537 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2541 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2542 legitimate address support to figure out the appropriate addressing to
2543 use. */
2545 static void
2546 rs6000_setup_reg_addr_masks (void)
2548 ssize_t rc, reg, m, nregs;
2549 addr_mask_type any_addr_mask, addr_mask;
2551 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2553 machine_mode m2 = (machine_mode) m;
2554 bool complex_p = false;
2555 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2556 size_t msize;
2558 if (COMPLEX_MODE_P (m2))
2560 complex_p = true;
2561 m2 = GET_MODE_INNER (m2);
2564 msize = GET_MODE_SIZE (m2);
2566 /* SDmode is special in that we want to access it only via REG+REG
2567 addressing on power7 and above, since we want to use the LFIWZX and
2568 STFIWZX instructions to load it. */
2569 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2571 any_addr_mask = 0;
2572 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2574 addr_mask = 0;
2575 reg = reload_reg_map[rc].reg;
2577 /* Can mode values go in the GPR/FPR/Altivec registers? */
2578 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2580 bool small_int_vsx_p = (small_int_p
2581 && (rc == RELOAD_REG_FPR
2582 || rc == RELOAD_REG_VMX));
2584 nregs = rs6000_hard_regno_nregs[m][reg];
2585 addr_mask |= RELOAD_REG_VALID;
2587 /* Indicate if the mode takes more than 1 physical register. If
2588 it takes a single register, indicate it can do REG+REG
2589 addressing. Small integers in VSX registers can only do
2590 REG+REG addressing. */
2591 if (small_int_vsx_p)
2592 addr_mask |= RELOAD_REG_INDEXED;
2593 else if (nregs > 1 || m == BLKmode || complex_p)
2594 addr_mask |= RELOAD_REG_MULTIPLE;
2595 else
2596 addr_mask |= RELOAD_REG_INDEXED;
2598 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2599 addressing. If we allow scalars into Altivec registers,
2600 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2602 For VSX systems, we don't allow update addressing for
2603 DFmode/SFmode if those registers can go in both the
2604 traditional floating point registers and Altivec registers.
2605 The load/store instructions for the Altivec registers do not
2606 have update forms. If we allowed update addressing, it seems
2607 to break IV-OPT code using floating point if the index type is
2608 int instead of long (PR target/81550 and target/84042). */
2610 if (TARGET_UPDATE
2611 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2612 && msize <= 8
2613 && !VECTOR_MODE_P (m2)
2614 && !FLOAT128_VECTOR_P (m2)
2615 && !complex_p
2616 && (m != E_DFmode || !TARGET_VSX)
2617 && (m != E_SFmode || !TARGET_P8_VECTOR)
2618 && !small_int_vsx_p)
2620 addr_mask |= RELOAD_REG_PRE_INCDEC;
2622 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2623 we don't allow PRE_MODIFY for some multi-register
2624 operations. */
2625 switch (m)
2627 default:
2628 addr_mask |= RELOAD_REG_PRE_MODIFY;
2629 break;
2631 case E_DImode:
2632 if (TARGET_POWERPC64)
2633 addr_mask |= RELOAD_REG_PRE_MODIFY;
2634 break;
2636 case E_DFmode:
2637 case E_DDmode:
2638 if (TARGET_HARD_FLOAT)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2645 /* GPR and FPR registers can do REG+OFFSET addressing, except
2646 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2647 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2648 if ((addr_mask != 0) && !indexed_only_p
2649 && msize <= 8
2650 && (rc == RELOAD_REG_GPR
2651 || ((msize == 8 || m2 == SFmode)
2652 && (rc == RELOAD_REG_FPR
2653 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2654 addr_mask |= RELOAD_REG_OFFSET;
2656 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2657 instructions are enabled. The offset for 128-bit VSX registers is
2658 only 12-bits. While GPRs can handle the full offset range, VSX
2659 registers can only handle the restricted range. */
2660 else if ((addr_mask != 0) && !indexed_only_p
2661 && msize == 16 && TARGET_P9_VECTOR
2662 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2663 || (m2 == TImode && TARGET_VSX)))
2665 addr_mask |= RELOAD_REG_OFFSET;
2666 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2667 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2670 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2671 addressing on 128-bit types. */
2672 if (rc == RELOAD_REG_VMX && msize == 16
2673 && (addr_mask & RELOAD_REG_VALID) != 0)
2674 addr_mask |= RELOAD_REG_AND_M16;
2676 reg_addr[m].addr_mask[rc] = addr_mask;
2677 any_addr_mask |= addr_mask;
2680 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2685 /* Initialize the various global tables that are based on register size. */
2686 static void
2687 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2689 ssize_t r, m, c;
2690 int align64;
2691 int align32;
2693 /* Precalculate REGNO_REG_CLASS. */
2694 rs6000_regno_regclass[0] = GENERAL_REGS;
2695 for (r = 1; r < 32; ++r)
2696 rs6000_regno_regclass[r] = BASE_REGS;
2698 for (r = 32; r < 64; ++r)
2699 rs6000_regno_regclass[r] = FLOAT_REGS;
2701 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2702 rs6000_regno_regclass[r] = NO_REGS;
2704 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2705 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2707 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2708 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2709 rs6000_regno_regclass[r] = CR_REGS;
2711 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2712 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2713 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2714 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2715 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2716 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2717 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2719 /* Precalculate register class to simpler reload register class. We don't
2720 need all of the register classes that are combinations of different
2721 classes, just the simple ones that have constraint letters. */
2722 for (c = 0; c < N_REG_CLASSES; c++)
2723 reg_class_to_reg_type[c] = NO_REG_TYPE;
2725 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2726 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2728 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2734 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2736 if (TARGET_VSX)
2738 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2739 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2741 else
2743 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2744 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2747 /* Precalculate the valid memory formats as well as the vector information,
2748 this must be set up before the rs6000_hard_regno_nregs_internal calls
2749 below. */
2750 gcc_assert ((int)VECTOR_NONE == 0);
2751 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2752 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2754 gcc_assert ((int)CODE_FOR_nothing == 0);
2755 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2757 gcc_assert ((int)NO_REGS == 0);
2758 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2760 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2761 believes it can use native alignment or still uses 128-bit alignment. */
2762 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2764 align64 = 64;
2765 align32 = 32;
2767 else
2769 align64 = 128;
2770 align32 = 128;
2773 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2774 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2775 if (TARGET_FLOAT128_TYPE)
2777 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2778 rs6000_vector_align[KFmode] = 128;
2780 if (FLOAT128_IEEE_P (TFmode))
2782 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2783 rs6000_vector_align[TFmode] = 128;
2787 /* V2DF mode, VSX only. */
2788 if (TARGET_VSX)
2790 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2791 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2792 rs6000_vector_align[V2DFmode] = align64;
2795 /* V4SF mode, either VSX or Altivec. */
2796 if (TARGET_VSX)
2798 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2799 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2800 rs6000_vector_align[V4SFmode] = align32;
2802 else if (TARGET_ALTIVEC)
2804 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2806 rs6000_vector_align[V4SFmode] = align32;
2809 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2810 and stores. */
2811 if (TARGET_ALTIVEC)
2813 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2814 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2815 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2816 rs6000_vector_align[V4SImode] = align32;
2817 rs6000_vector_align[V8HImode] = align32;
2818 rs6000_vector_align[V16QImode] = align32;
2820 if (TARGET_VSX)
2822 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2823 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2824 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2826 else
2828 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2829 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2830 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2834 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2835 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2836 if (TARGET_VSX)
2838 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2839 rs6000_vector_unit[V2DImode]
2840 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2841 rs6000_vector_align[V2DImode] = align64;
2843 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2844 rs6000_vector_unit[V1TImode]
2845 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2846 rs6000_vector_align[V1TImode] = 128;
2849 /* DFmode, see if we want to use the VSX unit. Memory is handled
2850 differently, so don't set rs6000_vector_mem. */
2851 if (TARGET_VSX)
2853 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2854 rs6000_vector_align[DFmode] = 64;
2857 /* SFmode, see if we want to use the VSX unit. */
2858 if (TARGET_P8_VECTOR)
2860 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2861 rs6000_vector_align[SFmode] = 32;
2864 /* Allow TImode in VSX register and set the VSX memory macros. */
2865 if (TARGET_VSX)
2867 rs6000_vector_mem[TImode] = VECTOR_VSX;
2868 rs6000_vector_align[TImode] = align64;
2871 /* Register class constraints for the constraints that depend on compile
2872 switches. When the VSX code was added, different constraints were added
2873 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2874 of the VSX registers are used. The register classes for scalar floating
2875 point types is set, based on whether we allow that type into the upper
2876 (Altivec) registers. GCC has register classes to target the Altivec
2877 registers for load/store operations, to select using a VSX memory
2878 operation instead of the traditional floating point operation. The
2879 constraints are:
2881 d - Register class to use with traditional DFmode instructions.
2882 f - Register class to use with traditional SFmode instructions.
2883 v - Altivec register.
2884 wa - Any VSX register.
2885 wc - Reserved to represent individual CR bits (used in LLVM).
2886 wn - always NO_REGS.
2887 wr - GPR if 64-bit mode is permitted.
2888 wx - Float register if we can do 32-bit int stores. */
2890 if (TARGET_HARD_FLOAT)
2892 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2893 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2896 if (TARGET_VSX)
2897 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2899 /* Add conditional constraints based on various options, to allow us to
2900 collapse multiple insn patterns. */
2901 if (TARGET_ALTIVEC)
2902 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2904 if (TARGET_POWERPC64)
2906 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2907 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2910 if (TARGET_STFIWX)
2911 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2913 /* Support for new direct moves (ISA 3.0 + 64bit). */
2914 if (TARGET_DIRECT_MOVE_128)
2915 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2917 /* Set up the reload helper and direct move functions. */
2918 if (TARGET_VSX || TARGET_ALTIVEC)
2920 if (TARGET_64BIT)
2922 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2923 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2924 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2925 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2926 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2927 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2928 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2929 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2930 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2931 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2932 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2933 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2934 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2935 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2936 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2937 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2938 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2939 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2940 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2941 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2943 if (FLOAT128_VECTOR_P (KFmode))
2945 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2946 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2949 if (FLOAT128_VECTOR_P (TFmode))
2951 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2952 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2955 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2956 available. */
2957 if (TARGET_NO_SDMODE_STACK)
2959 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2960 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2963 if (TARGET_VSX)
2965 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2966 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2969 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2971 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2972 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2973 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2974 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2975 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2976 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2977 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2978 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2979 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2981 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2982 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2983 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2984 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2985 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2986 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2987 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2988 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2989 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2991 if (FLOAT128_VECTOR_P (KFmode))
2993 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2994 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2997 if (FLOAT128_VECTOR_P (TFmode))
2999 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3000 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3004 else
3006 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3007 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3008 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3009 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3010 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3011 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3012 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3013 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3014 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3015 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3016 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3017 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3018 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3019 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3020 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3021 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3022 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3023 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3024 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3025 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3027 if (FLOAT128_VECTOR_P (KFmode))
3029 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3030 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3033 if (FLOAT128_IEEE_P (TFmode))
3035 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3036 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3039 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3040 available. */
3041 if (TARGET_NO_SDMODE_STACK)
3043 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3044 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3047 if (TARGET_VSX)
3049 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3050 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3053 if (TARGET_DIRECT_MOVE)
3055 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3056 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3057 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3061 reg_addr[DFmode].scalar_in_vmx_p = true;
3062 reg_addr[DImode].scalar_in_vmx_p = true;
3064 if (TARGET_P8_VECTOR)
3066 reg_addr[SFmode].scalar_in_vmx_p = true;
3067 reg_addr[SImode].scalar_in_vmx_p = true;
3069 if (TARGET_P9_VECTOR)
3071 reg_addr[HImode].scalar_in_vmx_p = true;
3072 reg_addr[QImode].scalar_in_vmx_p = true;
3077 /* Precalculate HARD_REGNO_NREGS. */
3078 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3079 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3080 rs6000_hard_regno_nregs[m][r]
3081 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3083 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_mode_ok_p[m][r]
3087 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3089 /* Precalculate CLASS_MAX_NREGS sizes. */
3090 for (c = 0; c < LIM_REG_CLASSES; ++c)
3092 int reg_size;
3094 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3095 reg_size = UNITS_PER_VSX_WORD;
3097 else if (c == ALTIVEC_REGS)
3098 reg_size = UNITS_PER_ALTIVEC_WORD;
3100 else if (c == FLOAT_REGS)
3101 reg_size = UNITS_PER_FP_WORD;
3103 else
3104 reg_size = UNITS_PER_WORD;
3106 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3108 machine_mode m2 = (machine_mode)m;
3109 int reg_size2 = reg_size;
3111 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3112 in VSX. */
3113 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3114 reg_size2 = UNITS_PER_FP_WORD;
3116 rs6000_class_max_nregs[m][c]
3117 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3121 /* Calculate which modes to automatically generate code to use a the
3122 reciprocal divide and square root instructions. In the future, possibly
3123 automatically generate the instructions even if the user did not specify
3124 -mrecip. The older machines double precision reciprocal sqrt estimate is
3125 not accurate enough. */
3126 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3127 if (TARGET_FRES)
3128 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3129 if (TARGET_FRE)
3130 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3131 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3132 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3133 if (VECTOR_UNIT_VSX_P (V2DFmode))
3134 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3136 if (TARGET_FRSQRTES)
3137 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3138 if (TARGET_FRSQRTE)
3139 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3140 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3141 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3142 if (VECTOR_UNIT_VSX_P (V2DFmode))
3143 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3145 if (rs6000_recip_control)
3147 if (!flag_finite_math_only)
3148 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3149 "-ffast-math");
3150 if (flag_trapping_math)
3151 warning (0, "%qs requires %qs or %qs", "-mrecip",
3152 "-fno-trapping-math", "-ffast-math");
3153 if (!flag_reciprocal_math)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3155 "-ffast-math");
3156 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3158 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3159 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3160 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3162 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3163 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3164 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3166 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3167 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3168 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3170 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3171 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3172 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3174 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3175 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3176 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3178 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3179 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3180 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3182 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3183 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3184 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3186 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3187 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3188 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3192 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3193 legitimate address support to figure out the appropriate addressing to
3194 use. */
3195 rs6000_setup_reg_addr_masks ();
3197 if (global_init_p || TARGET_DEBUG_TARGET)
3199 if (TARGET_DEBUG_REG)
3200 rs6000_debug_reg_global ();
3202 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3203 fprintf (stderr,
3204 "SImode variable mult cost = %d\n"
3205 "SImode constant mult cost = %d\n"
3206 "SImode short constant mult cost = %d\n"
3207 "DImode multipliciation cost = %d\n"
3208 "SImode division cost = %d\n"
3209 "DImode division cost = %d\n"
3210 "Simple fp operation cost = %d\n"
3211 "DFmode multiplication cost = %d\n"
3212 "SFmode division cost = %d\n"
3213 "DFmode division cost = %d\n"
3214 "cache line size = %d\n"
3215 "l1 cache size = %d\n"
3216 "l2 cache size = %d\n"
3217 "simultaneous prefetches = %d\n"
3218 "\n",
3219 rs6000_cost->mulsi,
3220 rs6000_cost->mulsi_const,
3221 rs6000_cost->mulsi_const9,
3222 rs6000_cost->muldi,
3223 rs6000_cost->divsi,
3224 rs6000_cost->divdi,
3225 rs6000_cost->fp,
3226 rs6000_cost->dmul,
3227 rs6000_cost->sdiv,
3228 rs6000_cost->ddiv,
3229 rs6000_cost->cache_line_size,
3230 rs6000_cost->l1_cache_size,
3231 rs6000_cost->l2_cache_size,
3232 rs6000_cost->simultaneous_prefetches);
3236 #if TARGET_MACHO
3237 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3239 static void
3240 darwin_rs6000_override_options (void)
3242 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3243 off. */
3244 rs6000_altivec_abi = 1;
3245 TARGET_ALTIVEC_VRSAVE = 1;
3246 rs6000_current_abi = ABI_DARWIN;
3248 if (DEFAULT_ABI == ABI_DARWIN
3249 && TARGET_64BIT)
3250 darwin_one_byte_bool = 1;
3252 if (TARGET_64BIT && ! TARGET_POWERPC64)
3254 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3255 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3258 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3259 optimisation, and will not work with the most generic case (where the
3260 symbol is undefined external, but there is no symbl stub). */
3261 if (TARGET_64BIT)
3262 rs6000_default_long_calls = 0;
3264 /* ld_classic is (so far) still used for kernel (static) code, and supports
3265 the JBSR longcall / branch islands. */
3266 if (flag_mkernel)
3268 rs6000_default_long_calls = 1;
3270 /* Allow a kext author to do -mkernel -mhard-float. */
3271 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3272 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3275 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3276 Altivec. */
3277 if (!flag_mkernel && !flag_apple_kext
3278 && TARGET_64BIT
3279 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3280 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3282 /* Unless the user (not the configurer) has explicitly overridden
3283 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3284 G4 unless targeting the kernel. */
3285 if (!flag_mkernel
3286 && !flag_apple_kext
3287 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3288 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3289 && ! global_options_set.x_rs6000_cpu_index)
3291 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3294 #endif
3296 /* If not otherwise specified by a target, make 'long double' equivalent to
3297 'double'. */
3299 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3300 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3301 #endif
3303 /* Return the builtin mask of the various options used that could affect which
3304 builtins were used. In the past we used target_flags, but we've run out of
3305 bits, and some options are no longer in target_flags. */
3307 HOST_WIDE_INT
3308 rs6000_builtin_mask_calculate (void)
3310 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3311 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3312 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3313 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3314 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3315 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3316 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3317 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3318 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3319 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3320 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3321 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3322 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3323 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3324 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3325 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3326 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3327 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3328 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3329 | ((TARGET_LONG_DOUBLE_128
3330 && TARGET_HARD_FLOAT
3331 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3332 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3333 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3336 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3337 to clobber the XER[CA] bit because clobbering that bit without telling
3338 the compiler worked just fine with versions of GCC before GCC 5, and
3339 breaking a lot of older code in ways that are hard to track down is
3340 not such a great idea. */
3342 static rtx_insn *
3343 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3344 vec<const char *> &/*constraints*/,
3345 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3347 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3348 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3349 return NULL;
3352 /* Override command line options.
3354 Combine build-specific configuration information with options
3355 specified on the command line to set various state variables which
3356 influence code generation, optimization, and expansion of built-in
3357 functions. Assure that command-line configuration preferences are
3358 compatible with each other and with the build configuration; issue
3359 warnings while adjusting configuration or error messages while
3360 rejecting configuration.
3362 Upon entry to this function:
3364 This function is called once at the beginning of
3365 compilation, and then again at the start and end of compiling
3366 each section of code that has a different configuration, as
3367 indicated, for example, by adding the
3369 __attribute__((__target__("cpu=power9")))
3371 qualifier to a function definition or, for example, by bracketing
3372 code between
3374 #pragma GCC target("altivec")
3378 #pragma GCC reset_options
3380 directives. Parameter global_init_p is true for the initial
3381 invocation, which initializes global variables, and false for all
3382 subsequent invocations.
3385 Various global state information is assumed to be valid. This
3386 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3387 default CPU specified at build configure time, TARGET_DEFAULT,
3388 representing the default set of option flags for the default
3389 target, and global_options_set.x_rs6000_isa_flags, representing
3390 which options were requested on the command line.
3392 Upon return from this function:
3394 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3395 was set by name on the command line. Additionally, if certain
3396 attributes are automatically enabled or disabled by this function
3397 in order to assure compatibility between options and
3398 configuration, the flags associated with those attributes are
3399 also set. By setting these "explicit bits", we avoid the risk
3400 that other code might accidentally overwrite these particular
3401 attributes with "default values".
3403 The various bits of rs6000_isa_flags are set to indicate the
3404 target options that have been selected for the most current
3405 compilation efforts. This has the effect of also turning on the
3406 associated TARGET_XXX values since these are macros which are
3407 generally defined to test the corresponding bit of the
3408 rs6000_isa_flags variable.
3410 The variable rs6000_builtin_mask is set to represent the target
3411 options for the most current compilation efforts, consistent with
3412 the current contents of rs6000_isa_flags. This variable controls
3413 expansion of built-in functions.
3415 Various other global variables and fields of global structures
3416 (over 50 in all) are initialized to reflect the desired options
3417 for the most current compilation efforts. */
3419 static bool
3420 rs6000_option_override_internal (bool global_init_p)
3422 bool ret = true;
3424 HOST_WIDE_INT set_masks;
3425 HOST_WIDE_INT ignore_masks;
3426 int cpu_index = -1;
3427 int tune_index;
3428 struct cl_target_option *main_target_opt
3429 = ((global_init_p || target_option_default_node == NULL)
3430 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3432 /* Print defaults. */
3433 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3434 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3436 /* Remember the explicit arguments. */
3437 if (global_init_p)
3438 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3440 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3441 library functions, so warn about it. The flag may be useful for
3442 performance studies from time to time though, so don't disable it
3443 entirely. */
3444 if (global_options_set.x_rs6000_alignment_flags
3445 && rs6000_alignment_flags == MASK_ALIGN_POWER
3446 && DEFAULT_ABI == ABI_DARWIN
3447 && TARGET_64BIT)
3448 warning (0, "%qs is not supported for 64-bit Darwin;"
3449 " it is incompatible with the installed C and C++ libraries",
3450 "-malign-power");
3452 /* Numerous experiment shows that IRA based loop pressure
3453 calculation works better for RTL loop invariant motion on targets
3454 with enough (>= 32) registers. It is an expensive optimization.
3455 So it is on only for peak performance. */
3456 if (optimize >= 3 && global_init_p
3457 && !global_options_set.x_flag_ira_loop_pressure)
3458 flag_ira_loop_pressure = 1;
3460 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3461 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3462 options were already specified. */
3463 if (flag_sanitize & SANITIZE_USER_ADDRESS
3464 && !global_options_set.x_flag_asynchronous_unwind_tables)
3465 flag_asynchronous_unwind_tables = 1;
3467 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3468 loop unroller is active. It is only checked during unrolling, so
3469 we can just set it on by default. */
3470 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3471 flag_variable_expansion_in_unroller = 1;
3473 /* Set the pointer size. */
3474 if (TARGET_64BIT)
3476 rs6000_pmode = DImode;
3477 rs6000_pointer_size = 64;
3479 else
3481 rs6000_pmode = SImode;
3482 rs6000_pointer_size = 32;
3485 /* Some OSs don't support saving the high part of 64-bit registers on context
3486 switch. Other OSs don't support saving Altivec registers. On those OSs,
3487 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3488 if the user wants either, the user must explicitly specify them and we
3489 won't interfere with the user's specification. */
3491 set_masks = POWERPC_MASKS;
3492 #ifdef OS_MISSING_POWERPC64
3493 if (OS_MISSING_POWERPC64)
3494 set_masks &= ~OPTION_MASK_POWERPC64;
3495 #endif
3496 #ifdef OS_MISSING_ALTIVEC
3497 if (OS_MISSING_ALTIVEC)
3498 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3499 | OTHER_VSX_VECTOR_MASKS);
3500 #endif
3502 /* Don't override by the processor default if given explicitly. */
3503 set_masks &= ~rs6000_isa_flags_explicit;
3505 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3506 the cpu in a target attribute or pragma, but did not specify a tuning
3507 option, use the cpu for the tuning option rather than the option specified
3508 with -mtune on the command line. Process a '--with-cpu' configuration
3509 request as an implicit --cpu. */
3510 if (rs6000_cpu_index >= 0)
3511 cpu_index = rs6000_cpu_index;
3512 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3513 cpu_index = main_target_opt->x_rs6000_cpu_index;
3514 else if (OPTION_TARGET_CPU_DEFAULT)
3515 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3517 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3518 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3519 with those from the cpu, except for options that were explicitly set. If
3520 we don't have a cpu, do not override the target bits set in
3521 TARGET_DEFAULT. */
3522 if (cpu_index >= 0)
3524 rs6000_cpu_index = cpu_index;
3525 rs6000_isa_flags &= ~set_masks;
3526 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3527 & set_masks);
3529 else
3531 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3532 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3533 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3534 to using rs6000_isa_flags, we need to do the initialization here.
3536 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3537 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3538 HOST_WIDE_INT flags;
3539 if (TARGET_DEFAULT)
3540 flags = TARGET_DEFAULT;
3541 else
3543 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3544 const char *default_cpu = (!TARGET_POWERPC64
3545 ? "powerpc"
3546 : (BYTES_BIG_ENDIAN
3547 ? "powerpc64"
3548 : "powerpc64le"));
3549 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3550 flags = processor_target_table[default_cpu_index].target_enable;
3552 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3555 if (rs6000_tune_index >= 0)
3556 tune_index = rs6000_tune_index;
3557 else if (cpu_index >= 0)
3558 rs6000_tune_index = tune_index = cpu_index;
3559 else
3561 size_t i;
3562 enum processor_type tune_proc
3563 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3565 tune_index = -1;
3566 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3567 if (processor_target_table[i].processor == tune_proc)
3569 tune_index = i;
3570 break;
3574 if (cpu_index >= 0)
3575 rs6000_cpu = processor_target_table[cpu_index].processor;
3576 else
3577 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3579 gcc_assert (tune_index >= 0);
3580 rs6000_tune = processor_target_table[tune_index].processor;
3582 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3583 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3584 || rs6000_cpu == PROCESSOR_PPCE5500)
3586 if (TARGET_ALTIVEC)
3587 error ("AltiVec not supported in this target");
3590 /* If we are optimizing big endian systems for space, use the load/store
3591 multiple instructions. */
3592 if (BYTES_BIG_ENDIAN && optimize_size)
3593 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3595 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3596 because the hardware doesn't support the instructions used in little
3597 endian mode, and causes an alignment trap. The 750 does not cause an
3598 alignment trap (except when the target is unaligned). */
3600 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3602 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3603 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3604 warning (0, "%qs is not supported on little endian systems",
3605 "-mmultiple");
3608 /* If little-endian, default to -mstrict-align on older processors.
3609 Testing for htm matches power8 and later. */
3610 if (!BYTES_BIG_ENDIAN
3611 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3612 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3614 if (!rs6000_fold_gimple)
3615 fprintf (stderr,
3616 "gimple folding of rs6000 builtins has been disabled.\n");
3618 /* Add some warnings for VSX. */
3619 if (TARGET_VSX)
3621 const char *msg = NULL;
3622 if (!TARGET_HARD_FLOAT)
3624 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3625 msg = N_("%<-mvsx%> requires hardware floating point");
3626 else
3628 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3629 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3632 else if (TARGET_AVOID_XFORM > 0)
3633 msg = N_("%<-mvsx%> needs indexed addressing");
3634 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3635 & OPTION_MASK_ALTIVEC))
3637 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3638 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3639 else
3640 msg = N_("%<-mno-altivec%> disables vsx");
3643 if (msg)
3645 warning (0, msg);
3646 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3647 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3651 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3652 the -mcpu setting to enable options that conflict. */
3653 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3654 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3655 | OPTION_MASK_ALTIVEC
3656 | OPTION_MASK_VSX)) != 0)
3657 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3658 | OPTION_MASK_DIRECT_MOVE)
3659 & ~rs6000_isa_flags_explicit);
3661 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3662 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3664 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3665 off all of the options that depend on those flags. */
3666 ignore_masks = rs6000_disable_incompatible_switches ();
3668 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3669 unless the user explicitly used the -mno-<option> to disable the code. */
3670 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3671 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3672 else if (TARGET_P9_MINMAX)
3674 if (cpu_index >= 0)
3676 if (cpu_index == PROCESSOR_POWER9)
3678 /* legacy behavior: allow -mcpu=power9 with certain
3679 capabilities explicitly disabled. */
3680 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3682 else
3683 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3684 "for <xxx> less than power9", "-mcpu");
3686 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3687 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3688 & rs6000_isa_flags_explicit))
3689 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3690 were explicitly cleared. */
3691 error ("%qs incompatible with explicitly disabled options",
3692 "-mpower9-minmax");
3693 else
3694 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3696 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3697 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3698 else if (TARGET_VSX)
3699 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3700 else if (TARGET_POPCNTD)
3701 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3702 else if (TARGET_DFP)
3703 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_CMPB)
3705 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3706 else if (TARGET_FPRND)
3707 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3708 else if (TARGET_POPCNTB)
3709 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3710 else if (TARGET_ALTIVEC)
3711 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3713 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3715 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3716 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3717 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3720 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3722 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3723 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3724 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3727 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3729 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3730 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3731 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3734 if (TARGET_P8_VECTOR && !TARGET_VSX)
3736 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3737 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3738 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3739 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3741 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3742 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3743 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3745 else
3747 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3748 not explicit. */
3749 rs6000_isa_flags |= OPTION_MASK_VSX;
3750 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3754 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3757 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3758 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3761 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3762 silently turn off quad memory mode. */
3763 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3765 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3766 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3768 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3769 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3771 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3772 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3775 /* Non-atomic quad memory load/store are disabled for little endian, since
3776 the words are reversed, but atomic operations can still be done by
3777 swapping the words. */
3778 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3780 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3781 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3782 "mode"));
3784 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3787 /* Assume if the user asked for normal quad memory instructions, they want
3788 the atomic versions as well, unless they explicity told us not to use quad
3789 word atomic instructions. */
3790 if (TARGET_QUAD_MEMORY
3791 && !TARGET_QUAD_MEMORY_ATOMIC
3792 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3793 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3795 /* If we can shrink-wrap the TOC register save separately, then use
3796 -msave-toc-indirect unless explicitly disabled. */
3797 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3798 && flag_shrink_wrap_separate
3799 && optimize_function_for_speed_p (cfun))
3800 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3802 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3803 generating power8 instructions. Power9 does not optimize power8 fusion
3804 cases. */
3805 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3807 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3808 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3809 else
3810 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3813 /* Setting additional fusion flags turns on base fusion. */
3814 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3816 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3818 if (TARGET_P8_FUSION_SIGN)
3819 error ("%qs requires %qs", "-mpower8-fusion-sign",
3820 "-mpower8-fusion");
3822 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3824 else
3825 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3828 /* Power8 does not fuse sign extended loads with the addis. If we are
3829 optimizing at high levels for speed, convert a sign extended load into a
3830 zero extending load, and an explicit sign extension. */
3831 if (TARGET_P8_FUSION
3832 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3833 && optimize_function_for_speed_p (cfun)
3834 && optimize >= 3)
3835 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3837 /* ISA 3.0 vector instructions include ISA 2.07. */
3838 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3840 /* We prefer to not mention undocumented options in
3841 error messages. However, if users have managed to select
3842 power9-vector without selecting power8-vector, they
3843 already know about undocumented flags. */
3844 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3845 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3846 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3847 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3849 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3850 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3851 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3853 else
3855 /* OPTION_MASK_P9_VECTOR is explicit and
3856 OPTION_MASK_P8_VECTOR is not explicit. */
3857 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3858 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3862 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3863 support. If we only have ISA 2.06 support, and the user did not specify
3864 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3865 but we don't enable the full vectorization support */
3866 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3867 TARGET_ALLOW_MOVMISALIGN = 1;
3869 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3871 if (TARGET_ALLOW_MOVMISALIGN > 0
3872 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3873 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3875 TARGET_ALLOW_MOVMISALIGN = 0;
3878 /* Determine when unaligned vector accesses are permitted, and when
3879 they are preferred over masked Altivec loads. Note that if
3880 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3881 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3882 not true. */
3883 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3885 if (!TARGET_VSX)
3887 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3888 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3890 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3893 else if (!TARGET_ALLOW_MOVMISALIGN)
3895 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3896 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3897 "-mallow-movmisalign");
3899 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3903 /* Use long double size to select the appropriate long double. We use
3904 TYPE_PRECISION to differentiate the 3 different long double types. We map
3905 128 into the precision used for TFmode. */
3906 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3907 ? 64
3908 : FLOAT_PRECISION_TFmode);
3910 /* Set long double size before the IEEE 128-bit tests. */
3911 if (!global_options_set.x_rs6000_long_double_type_size)
3913 if (main_target_opt != NULL
3914 && (main_target_opt->x_rs6000_long_double_type_size
3915 != default_long_double_size))
3916 error ("target attribute or pragma changes %<long double%> size");
3917 else
3918 rs6000_long_double_type_size = default_long_double_size;
3920 else if (rs6000_long_double_type_size == 128)
3921 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3922 else if (global_options_set.x_rs6000_ieeequad)
3924 if (global_options.x_rs6000_ieeequad)
3925 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3926 else
3927 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3930 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3931 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3932 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3933 those systems will not pick up this default. Warn if the user changes the
3934 default unless -Wno-psabi. */
3935 if (!global_options_set.x_rs6000_ieeequad)
3936 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3938 else
3940 if (global_options.x_rs6000_ieeequad
3941 && (!TARGET_POPCNTD || !TARGET_VSX))
3942 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3944 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3946 static bool warned_change_long_double;
3947 if (!warned_change_long_double)
3949 warned_change_long_double = true;
3950 if (TARGET_IEEEQUAD)
3951 warning (OPT_Wpsabi, "Using IEEE extended precision "
3952 "%<long double%>");
3953 else
3954 warning (OPT_Wpsabi, "Using IBM extended precision "
3955 "%<long double%>");
3960 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3961 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3962 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3963 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3964 the keyword as well as the type. */
3965 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3967 /* IEEE 128-bit floating point requires VSX support. */
3968 if (TARGET_FLOAT128_KEYWORD)
3970 if (!TARGET_VSX)
3972 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3973 error ("%qs requires VSX support", "%<-mfloat128%>");
3975 TARGET_FLOAT128_TYPE = 0;
3976 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3977 | OPTION_MASK_FLOAT128_HW);
3979 else if (!TARGET_FLOAT128_TYPE)
3981 TARGET_FLOAT128_TYPE = 1;
3982 warning (0, "The %<-mfloat128%> option may not be fully supported");
3986 /* Enable the __float128 keyword under Linux by default. */
3987 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3988 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3989 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3991 /* If we have are supporting the float128 type and full ISA 3.0 support,
3992 enable -mfloat128-hardware by default. However, don't enable the
3993 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3994 because sometimes the compiler wants to put things in an integer
3995 container, and if we don't have __int128 support, it is impossible. */
3996 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
3997 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
3998 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
3999 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4001 if (TARGET_FLOAT128_HW
4002 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4004 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4005 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4007 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4010 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4012 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4013 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4015 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4018 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4019 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4021 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4022 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4023 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4024 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4026 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4029 /* -mpcrel requires prefixed load/store addressing. */
4030 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4032 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4033 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4035 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4038 /* Print the options after updating the defaults. */
4039 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4040 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4042 /* E500mc does "better" if we inline more aggressively. Respect the
4043 user's opinion, though. */
4044 if (rs6000_block_move_inline_limit == 0
4045 && (rs6000_tune == PROCESSOR_PPCE500MC
4046 || rs6000_tune == PROCESSOR_PPCE500MC64
4047 || rs6000_tune == PROCESSOR_PPCE5500
4048 || rs6000_tune == PROCESSOR_PPCE6500))
4049 rs6000_block_move_inline_limit = 128;
4051 /* store_one_arg depends on expand_block_move to handle at least the
4052 size of reg_parm_stack_space. */
4053 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4054 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4056 if (global_init_p)
4058 /* If the appropriate debug option is enabled, replace the target hooks
4059 with debug versions that call the real version and then prints
4060 debugging information. */
4061 if (TARGET_DEBUG_COST)
4063 targetm.rtx_costs = rs6000_debug_rtx_costs;
4064 targetm.address_cost = rs6000_debug_address_cost;
4065 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4068 if (TARGET_DEBUG_ADDR)
4070 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4071 targetm.legitimize_address = rs6000_debug_legitimize_address;
4072 rs6000_secondary_reload_class_ptr
4073 = rs6000_debug_secondary_reload_class;
4074 targetm.secondary_memory_needed
4075 = rs6000_debug_secondary_memory_needed;
4076 targetm.can_change_mode_class
4077 = rs6000_debug_can_change_mode_class;
4078 rs6000_preferred_reload_class_ptr
4079 = rs6000_debug_preferred_reload_class;
4080 rs6000_mode_dependent_address_ptr
4081 = rs6000_debug_mode_dependent_address;
4084 if (rs6000_veclibabi_name)
4086 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4087 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4088 else
4090 error ("unknown vectorization library ABI type (%qs) for "
4091 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4092 ret = false;
4097 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4098 target attribute or pragma which automatically enables both options,
4099 unless the altivec ABI was set. This is set by default for 64-bit, but
4100 not for 32-bit. */
4101 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4103 TARGET_FLOAT128_TYPE = 0;
4104 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4105 | OPTION_MASK_FLOAT128_KEYWORD)
4106 & ~rs6000_isa_flags_explicit);
4109 /* Enable Altivec ABI for AIX -maltivec. */
4110 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4112 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4113 error ("target attribute or pragma changes AltiVec ABI");
4114 else
4115 rs6000_altivec_abi = 1;
4118 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4119 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4120 be explicitly overridden in either case. */
4121 if (TARGET_ELF)
4123 if (!global_options_set.x_rs6000_altivec_abi
4124 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4126 if (main_target_opt != NULL &&
4127 !main_target_opt->x_rs6000_altivec_abi)
4128 error ("target attribute or pragma changes AltiVec ABI");
4129 else
4130 rs6000_altivec_abi = 1;
4134 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4135 So far, the only darwin64 targets are also MACH-O. */
4136 if (TARGET_MACHO
4137 && DEFAULT_ABI == ABI_DARWIN
4138 && TARGET_64BIT)
4140 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4141 error ("target attribute or pragma changes darwin64 ABI");
4142 else
4144 rs6000_darwin64_abi = 1;
4145 /* Default to natural alignment, for better performance. */
4146 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4150 /* Place FP constants in the constant pool instead of TOC
4151 if section anchors enabled. */
4152 if (flag_section_anchors
4153 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4154 TARGET_NO_FP_IN_TOC = 1;
4156 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4157 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4159 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4160 SUBTARGET_OVERRIDE_OPTIONS;
4161 #endif
4162 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4163 SUBSUBTARGET_OVERRIDE_OPTIONS;
4164 #endif
4165 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4166 SUB3TARGET_OVERRIDE_OPTIONS;
4167 #endif
4169 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4170 after the subtarget override options are done. */
4171 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4173 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4174 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4176 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4179 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4180 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4182 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4183 && rs6000_tune != PROCESSOR_POWER5
4184 && rs6000_tune != PROCESSOR_POWER6
4185 && rs6000_tune != PROCESSOR_POWER7
4186 && rs6000_tune != PROCESSOR_POWER8
4187 && rs6000_tune != PROCESSOR_POWER9
4188 && rs6000_tune != PROCESSOR_FUTURE
4189 && rs6000_tune != PROCESSOR_PPCA2
4190 && rs6000_tune != PROCESSOR_CELL
4191 && rs6000_tune != PROCESSOR_PPC476);
4192 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4193 || rs6000_tune == PROCESSOR_POWER5
4194 || rs6000_tune == PROCESSOR_POWER7
4195 || rs6000_tune == PROCESSOR_POWER8);
4196 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4197 || rs6000_tune == PROCESSOR_POWER5
4198 || rs6000_tune == PROCESSOR_POWER6
4199 || rs6000_tune == PROCESSOR_POWER7
4200 || rs6000_tune == PROCESSOR_POWER8
4201 || rs6000_tune == PROCESSOR_POWER9
4202 || rs6000_tune == PROCESSOR_FUTURE
4203 || rs6000_tune == PROCESSOR_PPCE500MC
4204 || rs6000_tune == PROCESSOR_PPCE500MC64
4205 || rs6000_tune == PROCESSOR_PPCE5500
4206 || rs6000_tune == PROCESSOR_PPCE6500);
4208 /* Allow debug switches to override the above settings. These are set to -1
4209 in rs6000.opt to indicate the user hasn't directly set the switch. */
4210 if (TARGET_ALWAYS_HINT >= 0)
4211 rs6000_always_hint = TARGET_ALWAYS_HINT;
4213 if (TARGET_SCHED_GROUPS >= 0)
4214 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4216 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4217 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4219 rs6000_sched_restricted_insns_priority
4220 = (rs6000_sched_groups ? 1 : 0);
4222 /* Handle -msched-costly-dep option. */
4223 rs6000_sched_costly_dep
4224 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4226 if (rs6000_sched_costly_dep_str)
4228 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4229 rs6000_sched_costly_dep = no_dep_costly;
4230 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4231 rs6000_sched_costly_dep = all_deps_costly;
4232 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4233 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4234 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4235 rs6000_sched_costly_dep = store_to_load_dep_costly;
4236 else
4237 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4238 atoi (rs6000_sched_costly_dep_str));
4241 /* Handle -minsert-sched-nops option. */
4242 rs6000_sched_insert_nops
4243 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4245 if (rs6000_sched_insert_nops_str)
4247 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4248 rs6000_sched_insert_nops = sched_finish_none;
4249 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4250 rs6000_sched_insert_nops = sched_finish_pad_groups;
4251 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4252 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4253 else
4254 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4255 atoi (rs6000_sched_insert_nops_str));
4258 /* Handle stack protector */
4259 if (!global_options_set.x_rs6000_stack_protector_guard)
4260 #ifdef TARGET_THREAD_SSP_OFFSET
4261 rs6000_stack_protector_guard = SSP_TLS;
4262 #else
4263 rs6000_stack_protector_guard = SSP_GLOBAL;
4264 #endif
4266 #ifdef TARGET_THREAD_SSP_OFFSET
4267 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4268 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4269 #endif
4271 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4273 char *endp;
4274 const char *str = rs6000_stack_protector_guard_offset_str;
4276 errno = 0;
4277 long offset = strtol (str, &endp, 0);
4278 if (!*str || *endp || errno)
4279 error ("%qs is not a valid number in %qs", str,
4280 "-mstack-protector-guard-offset=");
4282 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4283 || (TARGET_64BIT && (offset & 3)))
4284 error ("%qs is not a valid offset in %qs", str,
4285 "-mstack-protector-guard-offset=");
4287 rs6000_stack_protector_guard_offset = offset;
4290 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4292 const char *str = rs6000_stack_protector_guard_reg_str;
4293 int reg = decode_reg_name (str);
4295 if (!IN_RANGE (reg, 1, 31))
4296 error ("%qs is not a valid base register in %qs", str,
4297 "-mstack-protector-guard-reg=");
4299 rs6000_stack_protector_guard_reg = reg;
4302 if (rs6000_stack_protector_guard == SSP_TLS
4303 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4304 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4306 if (global_init_p)
4308 #ifdef TARGET_REGNAMES
4309 /* If the user desires alternate register names, copy in the
4310 alternate names now. */
4311 if (TARGET_REGNAMES)
4312 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4313 #endif
4315 /* Set aix_struct_return last, after the ABI is determined.
4316 If -maix-struct-return or -msvr4-struct-return was explicitly
4317 used, don't override with the ABI default. */
4318 if (!global_options_set.x_aix_struct_return)
4319 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4321 #if 0
4322 /* IBM XL compiler defaults to unsigned bitfields. */
4323 if (TARGET_XL_COMPAT)
4324 flag_signed_bitfields = 0;
4325 #endif
4327 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4328 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4330 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4332 /* We can only guarantee the availability of DI pseudo-ops when
4333 assembling for 64-bit targets. */
4334 if (!TARGET_64BIT)
4336 targetm.asm_out.aligned_op.di = NULL;
4337 targetm.asm_out.unaligned_op.di = NULL;
4341 /* Set branch target alignment, if not optimizing for size. */
4342 if (!optimize_size)
4344 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4345 aligned 8byte to avoid misprediction by the branch predictor. */
4346 if (rs6000_tune == PROCESSOR_TITAN
4347 || rs6000_tune == PROCESSOR_CELL)
4349 if (flag_align_functions && !str_align_functions)
4350 str_align_functions = "8";
4351 if (flag_align_jumps && !str_align_jumps)
4352 str_align_jumps = "8";
4353 if (flag_align_loops && !str_align_loops)
4354 str_align_loops = "8";
4356 if (rs6000_align_branch_targets)
4358 if (flag_align_functions && !str_align_functions)
4359 str_align_functions = "16";
4360 if (flag_align_jumps && !str_align_jumps)
4361 str_align_jumps = "16";
4362 if (flag_align_loops && !str_align_loops)
4364 can_override_loop_align = 1;
4365 str_align_loops = "16";
4369 if (flag_align_jumps && !str_align_jumps)
4370 str_align_jumps = "16";
4371 if (flag_align_loops && !str_align_loops)
4372 str_align_loops = "16";
4375 /* Arrange to save and restore machine status around nested functions. */
4376 init_machine_status = rs6000_init_machine_status;
4378 /* We should always be splitting complex arguments, but we can't break
4379 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4380 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4381 targetm.calls.split_complex_arg = NULL;
4383 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4384 if (DEFAULT_ABI == ABI_AIX)
4385 targetm.calls.custom_function_descriptors = 0;
4388 /* Initialize rs6000_cost with the appropriate target costs. */
4389 if (optimize_size)
4390 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4391 else
4392 switch (rs6000_tune)
4394 case PROCESSOR_RS64A:
4395 rs6000_cost = &rs64a_cost;
4396 break;
4398 case PROCESSOR_MPCCORE:
4399 rs6000_cost = &mpccore_cost;
4400 break;
4402 case PROCESSOR_PPC403:
4403 rs6000_cost = &ppc403_cost;
4404 break;
4406 case PROCESSOR_PPC405:
4407 rs6000_cost = &ppc405_cost;
4408 break;
4410 case PROCESSOR_PPC440:
4411 rs6000_cost = &ppc440_cost;
4412 break;
4414 case PROCESSOR_PPC476:
4415 rs6000_cost = &ppc476_cost;
4416 break;
4418 case PROCESSOR_PPC601:
4419 rs6000_cost = &ppc601_cost;
4420 break;
4422 case PROCESSOR_PPC603:
4423 rs6000_cost = &ppc603_cost;
4424 break;
4426 case PROCESSOR_PPC604:
4427 rs6000_cost = &ppc604_cost;
4428 break;
4430 case PROCESSOR_PPC604e:
4431 rs6000_cost = &ppc604e_cost;
4432 break;
4434 case PROCESSOR_PPC620:
4435 rs6000_cost = &ppc620_cost;
4436 break;
4438 case PROCESSOR_PPC630:
4439 rs6000_cost = &ppc630_cost;
4440 break;
4442 case PROCESSOR_CELL:
4443 rs6000_cost = &ppccell_cost;
4444 break;
4446 case PROCESSOR_PPC750:
4447 case PROCESSOR_PPC7400:
4448 rs6000_cost = &ppc750_cost;
4449 break;
4451 case PROCESSOR_PPC7450:
4452 rs6000_cost = &ppc7450_cost;
4453 break;
4455 case PROCESSOR_PPC8540:
4456 case PROCESSOR_PPC8548:
4457 rs6000_cost = &ppc8540_cost;
4458 break;
4460 case PROCESSOR_PPCE300C2:
4461 case PROCESSOR_PPCE300C3:
4462 rs6000_cost = &ppce300c2c3_cost;
4463 break;
4465 case PROCESSOR_PPCE500MC:
4466 rs6000_cost = &ppce500mc_cost;
4467 break;
4469 case PROCESSOR_PPCE500MC64:
4470 rs6000_cost = &ppce500mc64_cost;
4471 break;
4473 case PROCESSOR_PPCE5500:
4474 rs6000_cost = &ppce5500_cost;
4475 break;
4477 case PROCESSOR_PPCE6500:
4478 rs6000_cost = &ppce6500_cost;
4479 break;
4481 case PROCESSOR_TITAN:
4482 rs6000_cost = &titan_cost;
4483 break;
4485 case PROCESSOR_POWER4:
4486 case PROCESSOR_POWER5:
4487 rs6000_cost = &power4_cost;
4488 break;
4490 case PROCESSOR_POWER6:
4491 rs6000_cost = &power6_cost;
4492 break;
4494 case PROCESSOR_POWER7:
4495 rs6000_cost = &power7_cost;
4496 break;
4498 case PROCESSOR_POWER8:
4499 rs6000_cost = &power8_cost;
4500 break;
4502 case PROCESSOR_POWER9:
4503 case PROCESSOR_FUTURE:
4504 rs6000_cost = &power9_cost;
4505 break;
4507 case PROCESSOR_PPCA2:
4508 rs6000_cost = &ppca2_cost;
4509 break;
4511 default:
4512 gcc_unreachable ();
4515 if (global_init_p)
4517 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4518 param_simultaneous_prefetches,
4519 rs6000_cost->simultaneous_prefetches);
4520 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4521 param_l1_cache_size,
4522 rs6000_cost->l1_cache_size);
4523 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4524 param_l1_cache_line_size,
4525 rs6000_cost->cache_line_size);
4526 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4527 param_l2_cache_size,
4528 rs6000_cost->l2_cache_size);
4530 /* Increase loop peeling limits based on performance analysis. */
4531 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4532 param_max_peeled_insns, 400);
4533 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4534 param_max_completely_peeled_insns, 400);
4536 /* Use the 'model' -fsched-pressure algorithm by default. */
4537 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4538 param_sched_pressure_algorithm,
4539 SCHED_PRESSURE_MODEL);
4541 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4542 turns -fweb and -frename-registers on. */
4543 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4544 || (global_options_set.x_flag_unroll_all_loops
4545 && flag_unroll_all_loops))
4547 if (!global_options_set.x_unroll_only_small_loops)
4548 unroll_only_small_loops = 0;
4549 if (!global_options_set.x_flag_rename_registers)
4550 flag_rename_registers = 1;
4551 if (!global_options_set.x_flag_web)
4552 flag_web = 1;
4555 /* If using typedef char *va_list, signal that
4556 __builtin_va_start (&ap, 0) can be optimized to
4557 ap = __builtin_next_arg (0). */
4558 if (DEFAULT_ABI != ABI_V4)
4559 targetm.expand_builtin_va_start = NULL;
4562 /* If not explicitly specified via option, decide whether to generate indexed
4563 load/store instructions. A value of -1 indicates that the
4564 initial value of this variable has not been overwritten. During
4565 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4566 if (TARGET_AVOID_XFORM == -1)
4567 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4568 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4569 need indexed accesses and the type used is the scalar type of the element
4570 being loaded or stored. */
4571 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4572 && !TARGET_ALTIVEC);
4574 /* Set the -mrecip options. */
4575 if (rs6000_recip_name)
4577 char *p = ASTRDUP (rs6000_recip_name);
4578 char *q;
4579 unsigned int mask, i;
4580 bool invert;
4582 while ((q = strtok (p, ",")) != NULL)
4584 p = NULL;
4585 if (*q == '!')
4587 invert = true;
4588 q++;
4590 else
4591 invert = false;
4593 if (!strcmp (q, "default"))
4594 mask = ((TARGET_RECIP_PRECISION)
4595 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4596 else
4598 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4599 if (!strcmp (q, recip_options[i].string))
4601 mask = recip_options[i].mask;
4602 break;
4605 if (i == ARRAY_SIZE (recip_options))
4607 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4608 invert = false;
4609 mask = 0;
4610 ret = false;
4614 if (invert)
4615 rs6000_recip_control &= ~mask;
4616 else
4617 rs6000_recip_control |= mask;
4621 /* Set the builtin mask of the various options used that could affect which
4622 builtins were used. In the past we used target_flags, but we've run out
4623 of bits, and some options are no longer in target_flags. */
4624 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4625 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4626 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4627 rs6000_builtin_mask);
4629 /* Initialize all of the registers. */
4630 rs6000_init_hard_regno_mode_ok (global_init_p);
4632 /* Save the initial options in case the user does function specific options */
4633 if (global_init_p)
4634 target_option_default_node = target_option_current_node
4635 = build_target_option_node (&global_options);
4637 /* If not explicitly specified via option, decide whether to generate the
4638 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4639 if (TARGET_LINK_STACK == -1)
4640 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4642 /* Deprecate use of -mno-speculate-indirect-jumps. */
4643 if (!rs6000_speculate_indirect_jumps)
4644 warning (0, "%qs is deprecated and not recommended in any circumstances",
4645 "-mno-speculate-indirect-jumps");
4647 return ret;
4650 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4651 define the target cpu type. */
4653 static void
4654 rs6000_option_override (void)
4656 (void) rs6000_option_override_internal (true);
4660 /* Implement targetm.vectorize.builtin_mask_for_load. */
4661 static tree
4662 rs6000_builtin_mask_for_load (void)
4664 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4665 if ((TARGET_ALTIVEC && !TARGET_VSX)
4666 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4667 return altivec_builtin_mask_for_load;
4668 else
4669 return 0;
4672 /* Implement LOOP_ALIGN. */
4673 align_flags
4674 rs6000_loop_align (rtx label)
4676 basic_block bb;
4677 int ninsns;
4679 /* Don't override loop alignment if -falign-loops was specified. */
4680 if (!can_override_loop_align)
4681 return align_loops;
4683 bb = BLOCK_FOR_INSN (label);
4684 ninsns = num_loop_insns(bb->loop_father);
4686 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4687 if (ninsns > 4 && ninsns <= 8
4688 && (rs6000_tune == PROCESSOR_POWER4
4689 || rs6000_tune == PROCESSOR_POWER5
4690 || rs6000_tune == PROCESSOR_POWER6
4691 || rs6000_tune == PROCESSOR_POWER7
4692 || rs6000_tune == PROCESSOR_POWER8))
4693 return align_flags (5);
4694 else
4695 return align_loops;
4698 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4699 after applying N number of iterations. This routine does not determine
4700 how may iterations are required to reach desired alignment. */
4702 static bool
4703 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4705 if (is_packed)
4706 return false;
4708 if (TARGET_32BIT)
4710 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4711 return true;
4713 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4714 return true;
4716 return false;
4718 else
4720 if (TARGET_MACHO)
4721 return false;
4723 /* Assuming that all other types are naturally aligned. CHECKME! */
4724 return true;
4728 /* Return true if the vector misalignment factor is supported by the
4729 target. */
4730 static bool
4731 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4732 const_tree type,
4733 int misalignment,
4734 bool is_packed)
4736 if (TARGET_VSX)
4738 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4739 return true;
4741 /* Return if movmisalign pattern is not supported for this mode. */
4742 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4743 return false;
4745 if (misalignment == -1)
4747 /* Misalignment factor is unknown at compile time but we know
4748 it's word aligned. */
4749 if (rs6000_vector_alignment_reachable (type, is_packed))
4751 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4753 if (element_size == 64 || element_size == 32)
4754 return true;
4757 return false;
4760 /* VSX supports word-aligned vector. */
4761 if (misalignment % 4 == 0)
4762 return true;
4764 return false;
4767 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4768 static int
4769 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4770 tree vectype, int misalign)
4772 unsigned elements;
4773 tree elem_type;
4775 switch (type_of_cost)
4777 case scalar_stmt:
4778 case scalar_store:
4779 case vector_stmt:
4780 case vector_store:
4781 case vec_to_scalar:
4782 case scalar_to_vec:
4783 case cond_branch_not_taken:
4784 return 1;
4785 case scalar_load:
4786 case vector_load:
4787 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4788 return 2;
4790 case vec_perm:
4791 /* Power7 has only one permute unit, make it a bit expensive. */
4792 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4793 return 3;
4794 else
4795 return 1;
4797 case vec_promote_demote:
4798 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4799 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4800 return 4;
4801 else
4802 return 1;
4804 case cond_branch_taken:
4805 return 3;
4807 case unaligned_load:
4808 case vector_gather_load:
4809 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4810 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4811 return 2;
4813 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4815 elements = TYPE_VECTOR_SUBPARTS (vectype);
4816 if (elements == 2)
4817 /* Double word aligned. */
4818 return 4;
4820 if (elements == 4)
4822 switch (misalign)
4824 case 8:
4825 /* Double word aligned. */
4826 return 4;
4828 case -1:
4829 /* Unknown misalignment. */
4830 case 4:
4831 case 12:
4832 /* Word aligned. */
4833 return 33;
4835 default:
4836 gcc_unreachable ();
4841 if (TARGET_ALTIVEC)
4842 /* Misaligned loads are not supported. */
4843 gcc_unreachable ();
4845 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4846 return 4;
4848 case unaligned_store:
4849 case vector_scatter_store:
4850 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4851 return 1;
4853 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4855 elements = TYPE_VECTOR_SUBPARTS (vectype);
4856 if (elements == 2)
4857 /* Double word aligned. */
4858 return 2;
4860 if (elements == 4)
4862 switch (misalign)
4864 case 8:
4865 /* Double word aligned. */
4866 return 2;
4868 case -1:
4869 /* Unknown misalignment. */
4870 case 4:
4871 case 12:
4872 /* Word aligned. */
4873 return 23;
4875 default:
4876 gcc_unreachable ();
4881 if (TARGET_ALTIVEC)
4882 /* Misaligned stores are not supported. */
4883 gcc_unreachable ();
4885 return 2;
4887 case vec_construct:
4888 /* This is a rough approximation assuming non-constant elements
4889 constructed into a vector via element insertion. FIXME:
4890 vec_construct is not granular enough for uniformly good
4891 decisions. If the initialization is a splat, this is
4892 cheaper than we estimate. Improve this someday. */
4893 elem_type = TREE_TYPE (vectype);
4894 /* 32-bit vectors loaded into registers are stored as double
4895 precision, so we need 2 permutes, 2 converts, and 1 merge
4896 to construct a vector of short floats from them. */
4897 if (SCALAR_FLOAT_TYPE_P (elem_type)
4898 && TYPE_PRECISION (elem_type) == 32)
4899 return 5;
4900 /* On POWER9, integer vector types are built up in GPRs and then
4901 use a direct move (2 cycles). For POWER8 this is even worse,
4902 as we need two direct moves and a merge, and the direct moves
4903 are five cycles. */
4904 else if (INTEGRAL_TYPE_P (elem_type))
4906 if (TARGET_P9_VECTOR)
4907 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4908 else
4909 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4911 else
4912 /* V2DFmode doesn't need a direct move. */
4913 return 2;
4915 default:
4916 gcc_unreachable ();
4920 /* Implement targetm.vectorize.preferred_simd_mode. */
4922 static machine_mode
4923 rs6000_preferred_simd_mode (scalar_mode mode)
4925 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4927 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4928 return vmode.require ();
4930 return word_mode;
4933 typedef struct _rs6000_cost_data
4935 struct loop *loop_info;
4936 unsigned cost[3];
4937 } rs6000_cost_data;
4939 /* Test for likely overcommitment of vector hardware resources. If a
4940 loop iteration is relatively large, and too large a percentage of
4941 instructions in the loop are vectorized, the cost model may not
4942 adequately reflect delays from unavailable vector resources.
4943 Penalize the loop body cost for this case. */
4945 static void
4946 rs6000_density_test (rs6000_cost_data *data)
4948 const int DENSITY_PCT_THRESHOLD = 85;
4949 const int DENSITY_SIZE_THRESHOLD = 70;
4950 const int DENSITY_PENALTY = 10;
4951 struct loop *loop = data->loop_info;
4952 basic_block *bbs = get_loop_body (loop);
4953 int nbbs = loop->num_nodes;
4954 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4955 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4956 int i, density_pct;
4958 for (i = 0; i < nbbs; i++)
4960 basic_block bb = bbs[i];
4961 gimple_stmt_iterator gsi;
4963 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4965 gimple *stmt = gsi_stmt (gsi);
4966 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4968 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4969 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4970 not_vec_cost++;
4974 free (bbs);
4975 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4977 if (density_pct > DENSITY_PCT_THRESHOLD
4978 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4980 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_NOTE, vect_location,
4983 "density %d%%, cost %d exceeds threshold, penalizing "
4984 "loop body cost by %d%%", density_pct,
4985 vec_cost + not_vec_cost, DENSITY_PENALTY);
4989 /* Implement targetm.vectorize.init_cost. */
4991 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4992 instruction is needed by the vectorization. */
4993 static bool rs6000_vect_nonmem;
4995 static void *
4996 rs6000_init_cost (struct loop *loop_info)
4998 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4999 data->loop_info = loop_info;
5000 data->cost[vect_prologue] = 0;
5001 data->cost[vect_body] = 0;
5002 data->cost[vect_epilogue] = 0;
5003 rs6000_vect_nonmem = false;
5004 return data;
5007 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5008 For some statement, we would like to further fine-grain tweak the cost on
5009 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5010 information on statement operation codes etc. One typical case here is
5011 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5012 for scalar cost, but it should be priced more whatever transformed to either
5013 compare + branch or compare + isel instructions. */
5015 static unsigned
5016 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5017 struct _stmt_vec_info *stmt_info)
5019 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5020 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5022 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5023 if (subcode == COND_EXPR)
5024 return 2;
5027 return 0;
5030 /* Implement targetm.vectorize.add_stmt_cost. */
5032 static unsigned
5033 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5034 struct _stmt_vec_info *stmt_info, int misalign,
5035 enum vect_cost_model_location where)
5037 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5038 unsigned retval = 0;
5040 if (flag_vect_cost_model)
5042 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5043 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5044 misalign);
5045 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5046 /* Statements in an inner loop relative to the loop being
5047 vectorized are weighted more heavily. The value here is
5048 arbitrary and could potentially be improved with analysis. */
5049 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5050 count *= 50; /* FIXME. */
5052 retval = (unsigned) (count * stmt_cost);
5053 cost_data->cost[where] += retval;
5055 /* Check whether we're doing something other than just a copy loop.
5056 Not all such loops may be profitably vectorized; see
5057 rs6000_finish_cost. */
5058 if ((kind == vec_to_scalar || kind == vec_perm
5059 || kind == vec_promote_demote || kind == vec_construct
5060 || kind == scalar_to_vec)
5061 || (where == vect_body && kind == vector_stmt))
5062 rs6000_vect_nonmem = true;
5065 return retval;
5068 /* Implement targetm.vectorize.finish_cost. */
5070 static void
5071 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5072 unsigned *body_cost, unsigned *epilogue_cost)
5074 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5076 if (cost_data->loop_info)
5077 rs6000_density_test (cost_data);
5079 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5080 that require versioning for any reason. The vectorization is at
5081 best a wash inside the loop, and the versioning checks make
5082 profitability highly unlikely and potentially quite harmful. */
5083 if (cost_data->loop_info)
5085 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5086 if (!rs6000_vect_nonmem
5087 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5088 && LOOP_REQUIRES_VERSIONING (vec_info))
5089 cost_data->cost[vect_body] += 10000;
5092 *prologue_cost = cost_data->cost[vect_prologue];
5093 *body_cost = cost_data->cost[vect_body];
5094 *epilogue_cost = cost_data->cost[vect_epilogue];
5097 /* Implement targetm.vectorize.destroy_cost_data. */
5099 static void
5100 rs6000_destroy_cost_data (void *data)
5102 free (data);
5105 /* Implement targetm.loop_unroll_adjust. */
5107 static unsigned
5108 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5110 if (unroll_only_small_loops)
5112 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5113 example we may want to unroll very small loops more times (4 perhaps).
5114 We also should use a PARAM for this. */
5115 if (loop->ninsns <= 10)
5116 return MIN (2, nunroll);
5117 else
5118 return 0;
5121 return nunroll;
5124 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5125 library with vectorized intrinsics. */
5127 static tree
5128 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5129 tree type_in)
5131 char name[32];
5132 const char *suffix = NULL;
5133 tree fntype, new_fndecl, bdecl = NULL_TREE;
5134 int n_args = 1;
5135 const char *bname;
5136 machine_mode el_mode, in_mode;
5137 int n, in_n;
5139 /* Libmass is suitable for unsafe math only as it does not correctly support
5140 parts of IEEE with the required precision such as denormals. Only support
5141 it if we have VSX to use the simd d2 or f4 functions.
5142 XXX: Add variable length support. */
5143 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5144 return NULL_TREE;
5146 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5147 n = TYPE_VECTOR_SUBPARTS (type_out);
5148 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5149 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5150 if (el_mode != in_mode
5151 || n != in_n)
5152 return NULL_TREE;
5154 switch (fn)
5156 CASE_CFN_ATAN2:
5157 CASE_CFN_HYPOT:
5158 CASE_CFN_POW:
5159 n_args = 2;
5160 gcc_fallthrough ();
5162 CASE_CFN_ACOS:
5163 CASE_CFN_ACOSH:
5164 CASE_CFN_ASIN:
5165 CASE_CFN_ASINH:
5166 CASE_CFN_ATAN:
5167 CASE_CFN_ATANH:
5168 CASE_CFN_CBRT:
5169 CASE_CFN_COS:
5170 CASE_CFN_COSH:
5171 CASE_CFN_ERF:
5172 CASE_CFN_ERFC:
5173 CASE_CFN_EXP2:
5174 CASE_CFN_EXP:
5175 CASE_CFN_EXPM1:
5176 CASE_CFN_LGAMMA:
5177 CASE_CFN_LOG10:
5178 CASE_CFN_LOG1P:
5179 CASE_CFN_LOG2:
5180 CASE_CFN_LOG:
5181 CASE_CFN_SIN:
5182 CASE_CFN_SINH:
5183 CASE_CFN_SQRT:
5184 CASE_CFN_TAN:
5185 CASE_CFN_TANH:
5186 if (el_mode == DFmode && n == 2)
5188 bdecl = mathfn_built_in (double_type_node, fn);
5189 suffix = "d2"; /* pow -> powd2 */
5191 else if (el_mode == SFmode && n == 4)
5193 bdecl = mathfn_built_in (float_type_node, fn);
5194 suffix = "4"; /* powf -> powf4 */
5196 else
5197 return NULL_TREE;
5198 if (!bdecl)
5199 return NULL_TREE;
5200 break;
5202 default:
5203 return NULL_TREE;
5206 gcc_assert (suffix != NULL);
5207 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5208 if (!bname)
5209 return NULL_TREE;
5211 strcpy (name, bname + sizeof ("__builtin_") - 1);
5212 strcat (name, suffix);
5214 if (n_args == 1)
5215 fntype = build_function_type_list (type_out, type_in, NULL);
5216 else if (n_args == 2)
5217 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5218 else
5219 gcc_unreachable ();
5221 /* Build a function declaration for the vectorized function. */
5222 new_fndecl = build_decl (BUILTINS_LOCATION,
5223 FUNCTION_DECL, get_identifier (name), fntype);
5224 TREE_PUBLIC (new_fndecl) = 1;
5225 DECL_EXTERNAL (new_fndecl) = 1;
5226 DECL_IS_NOVOPS (new_fndecl) = 1;
5227 TREE_READONLY (new_fndecl) = 1;
5229 return new_fndecl;
5232 /* Returns a function decl for a vectorized version of the builtin function
5233 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5234 if it is not available. */
5236 static tree
5237 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5238 tree type_in)
5240 machine_mode in_mode, out_mode;
5241 int in_n, out_n;
5243 if (TARGET_DEBUG_BUILTIN)
5244 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5245 combined_fn_name (combined_fn (fn)),
5246 GET_MODE_NAME (TYPE_MODE (type_out)),
5247 GET_MODE_NAME (TYPE_MODE (type_in)));
5249 if (TREE_CODE (type_out) != VECTOR_TYPE
5250 || TREE_CODE (type_in) != VECTOR_TYPE)
5251 return NULL_TREE;
5253 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5254 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5255 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5256 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5258 switch (fn)
5260 CASE_CFN_COPYSIGN:
5261 if (VECTOR_UNIT_VSX_P (V2DFmode)
5262 && out_mode == DFmode && out_n == 2
5263 && in_mode == DFmode && in_n == 2)
5264 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5265 if (VECTOR_UNIT_VSX_P (V4SFmode)
5266 && out_mode == SFmode && out_n == 4
5267 && in_mode == SFmode && in_n == 4)
5268 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5269 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5270 && out_mode == SFmode && out_n == 4
5271 && in_mode == SFmode && in_n == 4)
5272 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5273 break;
5274 CASE_CFN_CEIL:
5275 if (VECTOR_UNIT_VSX_P (V2DFmode)
5276 && out_mode == DFmode && out_n == 2
5277 && in_mode == DFmode && in_n == 2)
5278 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5279 if (VECTOR_UNIT_VSX_P (V4SFmode)
5280 && out_mode == SFmode && out_n == 4
5281 && in_mode == SFmode && in_n == 4)
5282 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5283 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5284 && out_mode == SFmode && out_n == 4
5285 && in_mode == SFmode && in_n == 4)
5286 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5287 break;
5288 CASE_CFN_FLOOR:
5289 if (VECTOR_UNIT_VSX_P (V2DFmode)
5290 && out_mode == DFmode && out_n == 2
5291 && in_mode == DFmode && in_n == 2)
5292 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5293 if (VECTOR_UNIT_VSX_P (V4SFmode)
5294 && out_mode == SFmode && out_n == 4
5295 && in_mode == SFmode && in_n == 4)
5296 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5297 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5298 && out_mode == SFmode && out_n == 4
5299 && in_mode == SFmode && in_n == 4)
5300 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5301 break;
5302 CASE_CFN_FMA:
5303 if (VECTOR_UNIT_VSX_P (V2DFmode)
5304 && out_mode == DFmode && out_n == 2
5305 && in_mode == DFmode && in_n == 2)
5306 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5307 if (VECTOR_UNIT_VSX_P (V4SFmode)
5308 && out_mode == SFmode && out_n == 4
5309 && in_mode == SFmode && in_n == 4)
5310 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5311 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5312 && out_mode == SFmode && out_n == 4
5313 && in_mode == SFmode && in_n == 4)
5314 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5315 break;
5316 CASE_CFN_TRUNC:
5317 if (VECTOR_UNIT_VSX_P (V2DFmode)
5318 && out_mode == DFmode && out_n == 2
5319 && in_mode == DFmode && in_n == 2)
5320 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5321 if (VECTOR_UNIT_VSX_P (V4SFmode)
5322 && out_mode == SFmode && out_n == 4
5323 && in_mode == SFmode && in_n == 4)
5324 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5325 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5326 && out_mode == SFmode && out_n == 4
5327 && in_mode == SFmode && in_n == 4)
5328 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5329 break;
5330 CASE_CFN_NEARBYINT:
5331 if (VECTOR_UNIT_VSX_P (V2DFmode)
5332 && flag_unsafe_math_optimizations
5333 && out_mode == DFmode && out_n == 2
5334 && in_mode == DFmode && in_n == 2)
5335 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5336 if (VECTOR_UNIT_VSX_P (V4SFmode)
5337 && flag_unsafe_math_optimizations
5338 && out_mode == SFmode && out_n == 4
5339 && in_mode == SFmode && in_n == 4)
5340 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5341 break;
5342 CASE_CFN_RINT:
5343 if (VECTOR_UNIT_VSX_P (V2DFmode)
5344 && !flag_trapping_math
5345 && out_mode == DFmode && out_n == 2
5346 && in_mode == DFmode && in_n == 2)
5347 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5348 if (VECTOR_UNIT_VSX_P (V4SFmode)
5349 && !flag_trapping_math
5350 && out_mode == SFmode && out_n == 4
5351 && in_mode == SFmode && in_n == 4)
5352 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5353 break;
5354 default:
5355 break;
5358 /* Generate calls to libmass if appropriate. */
5359 if (rs6000_veclib_handler)
5360 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5362 return NULL_TREE;
5365 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5367 static tree
5368 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5369 tree type_in)
5371 machine_mode in_mode, out_mode;
5372 int in_n, out_n;
5374 if (TARGET_DEBUG_BUILTIN)
5375 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5376 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5377 GET_MODE_NAME (TYPE_MODE (type_out)),
5378 GET_MODE_NAME (TYPE_MODE (type_in)));
5380 if (TREE_CODE (type_out) != VECTOR_TYPE
5381 || TREE_CODE (type_in) != VECTOR_TYPE)
5382 return NULL_TREE;
5384 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5385 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5386 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5387 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5389 enum rs6000_builtins fn
5390 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5391 switch (fn)
5393 case RS6000_BUILTIN_RSQRTF:
5394 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5395 && out_mode == SFmode && out_n == 4
5396 && in_mode == SFmode && in_n == 4)
5397 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5398 break;
5399 case RS6000_BUILTIN_RSQRT:
5400 if (VECTOR_UNIT_VSX_P (V2DFmode)
5401 && out_mode == DFmode && out_n == 2
5402 && in_mode == DFmode && in_n == 2)
5403 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5404 break;
5405 case RS6000_BUILTIN_RECIPF:
5406 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5407 && out_mode == SFmode && out_n == 4
5408 && in_mode == SFmode && in_n == 4)
5409 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5410 break;
5411 case RS6000_BUILTIN_RECIP:
5412 if (VECTOR_UNIT_VSX_P (V2DFmode)
5413 && out_mode == DFmode && out_n == 2
5414 && in_mode == DFmode && in_n == 2)
5415 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5416 break;
5417 default:
5418 break;
5420 return NULL_TREE;
5423 /* Default CPU string for rs6000*_file_start functions. */
5424 static const char *rs6000_default_cpu;
5426 #ifdef USING_ELFOS_H
5427 const char *rs6000_machine;
5429 const char *
5430 rs6000_machine_from_flags (void)
5432 HOST_WIDE_INT flags = rs6000_isa_flags;
5434 /* Disable the flags that should never influence the .machine selection. */
5435 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5437 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5438 return "future";
5439 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5440 return "power9";
5441 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5442 return "power8";
5443 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5444 return "power7";
5445 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5446 return "power6";
5447 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5448 return "power5";
5449 if ((flags & ISA_2_1_MASKS) != 0)
5450 return "power4";
5451 if ((flags & OPTION_MASK_POWERPC64) != 0)
5452 return "ppc64";
5453 return "ppc";
5456 void
5457 emit_asm_machine (void)
5459 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5461 #endif
5463 /* Do anything needed at the start of the asm file. */
5465 static void
5466 rs6000_file_start (void)
5468 char buffer[80];
5469 const char *start = buffer;
5470 FILE *file = asm_out_file;
5472 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5474 default_file_start ();
5476 if (flag_verbose_asm)
5478 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5480 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5482 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5483 start = "";
5486 if (global_options_set.x_rs6000_cpu_index)
5488 fprintf (file, "%s -mcpu=%s", start,
5489 processor_target_table[rs6000_cpu_index].name);
5490 start = "";
5493 if (global_options_set.x_rs6000_tune_index)
5495 fprintf (file, "%s -mtune=%s", start,
5496 processor_target_table[rs6000_tune_index].name);
5497 start = "";
5500 if (PPC405_ERRATUM77)
5502 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5503 start = "";
5506 #ifdef USING_ELFOS_H
5507 switch (rs6000_sdata)
5509 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5510 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5511 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5512 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5515 if (rs6000_sdata && g_switch_value)
5517 fprintf (file, "%s -G %d", start,
5518 g_switch_value);
5519 start = "";
5521 #endif
5523 if (*start == '\0')
5524 putc ('\n', file);
5527 #ifdef USING_ELFOS_H
5528 rs6000_machine = rs6000_machine_from_flags ();
5529 emit_asm_machine ();
5530 #endif
5532 if (DEFAULT_ABI == ABI_ELFv2)
5533 fprintf (file, "\t.abiversion 2\n");
5537 /* Return nonzero if this function is known to have a null epilogue. */
5540 direct_return (void)
5542 if (reload_completed)
5544 rs6000_stack_t *info = rs6000_stack_info ();
5546 if (info->first_gp_reg_save == 32
5547 && info->first_fp_reg_save == 64
5548 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5549 && ! info->lr_save_p
5550 && ! info->cr_save_p
5551 && info->vrsave_size == 0
5552 && ! info->push_p)
5553 return 1;
5556 return 0;
5559 /* Helper for num_insns_constant. Calculate number of instructions to
5560 load VALUE to a single gpr using combinations of addi, addis, ori,
5561 oris and sldi instructions. */
5563 static int
5564 num_insns_constant_gpr (HOST_WIDE_INT value)
5566 /* signed constant loadable with addi */
5567 if (SIGNED_INTEGER_16BIT_P (value))
5568 return 1;
5570 /* constant loadable with addis */
5571 else if ((value & 0xffff) == 0
5572 && (value >> 31 == -1 || value >> 31 == 0))
5573 return 1;
5575 /* PADDI can support up to 34 bit signed integers. */
5576 else if (TARGET_PREFIXED_ADDR && SIGNED_INTEGER_34BIT_P (value))
5577 return 1;
5579 else if (TARGET_POWERPC64)
5581 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5582 HOST_WIDE_INT high = value >> 31;
5584 if (high == 0 || high == -1)
5585 return 2;
5587 high >>= 1;
5589 if (low == 0)
5590 return num_insns_constant_gpr (high) + 1;
5591 else if (high == 0)
5592 return num_insns_constant_gpr (low) + 1;
5593 else
5594 return (num_insns_constant_gpr (high)
5595 + num_insns_constant_gpr (low) + 1);
5598 else
5599 return 2;
5602 /* Helper for num_insns_constant. Allow constants formed by the
5603 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5604 and handle modes that require multiple gprs. */
5606 static int
5607 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5609 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5610 int total = 0;
5611 while (nregs-- > 0)
5613 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5614 int insns = num_insns_constant_gpr (low);
5615 if (insns > 2
5616 /* We won't get more than 2 from num_insns_constant_gpr
5617 except when TARGET_POWERPC64 and mode is DImode or
5618 wider, so the register mode must be DImode. */
5619 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5620 insns = 2;
5621 total += insns;
5622 value >>= BITS_PER_WORD;
5624 return total;
5627 /* Return the number of instructions it takes to form a constant in as
5628 many gprs are needed for MODE. */
5631 num_insns_constant (rtx op, machine_mode mode)
5633 HOST_WIDE_INT val;
5635 switch (GET_CODE (op))
5637 case CONST_INT:
5638 val = INTVAL (op);
5639 break;
5641 case CONST_WIDE_INT:
5643 int insns = 0;
5644 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5645 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5646 DImode);
5647 return insns;
5650 case CONST_DOUBLE:
5652 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5654 if (mode == SFmode || mode == SDmode)
5656 long l;
5658 if (mode == SDmode)
5659 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5660 else
5661 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5662 /* See the first define_split in rs6000.md handling a
5663 const_double_operand. */
5664 val = l;
5665 mode = SImode;
5667 else if (mode == DFmode || mode == DDmode)
5669 long l[2];
5671 if (mode == DDmode)
5672 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5673 else
5674 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5676 /* See the second (32-bit) and third (64-bit) define_split
5677 in rs6000.md handling a const_double_operand. */
5678 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5679 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5680 mode = DImode;
5682 else if (mode == TFmode || mode == TDmode
5683 || mode == KFmode || mode == IFmode)
5685 long l[4];
5686 int insns;
5688 if (mode == TDmode)
5689 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5690 else
5691 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5693 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5694 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5695 insns = num_insns_constant_multi (val, DImode);
5696 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5697 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5698 insns += num_insns_constant_multi (val, DImode);
5699 return insns;
5701 else
5702 gcc_unreachable ();
5704 break;
5706 default:
5707 gcc_unreachable ();
5710 return num_insns_constant_multi (val, mode);
5713 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5714 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5715 corresponding element of the vector, but for V4SFmode, the
5716 corresponding "float" is interpreted as an SImode integer. */
5718 HOST_WIDE_INT
5719 const_vector_elt_as_int (rtx op, unsigned int elt)
5721 rtx tmp;
5723 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5724 gcc_assert (GET_MODE (op) != V2DImode
5725 && GET_MODE (op) != V2DFmode);
5727 tmp = CONST_VECTOR_ELT (op, elt);
5728 if (GET_MODE (op) == V4SFmode)
5729 tmp = gen_lowpart (SImode, tmp);
5730 return INTVAL (tmp);
5733 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5734 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5735 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5736 all items are set to the same value and contain COPIES replicas of the
5737 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5738 operand and the others are set to the value of the operand's msb. */
5740 static bool
5741 vspltis_constant (rtx op, unsigned step, unsigned copies)
5743 machine_mode mode = GET_MODE (op);
5744 machine_mode inner = GET_MODE_INNER (mode);
5746 unsigned i;
5747 unsigned nunits;
5748 unsigned bitsize;
5749 unsigned mask;
5751 HOST_WIDE_INT val;
5752 HOST_WIDE_INT splat_val;
5753 HOST_WIDE_INT msb_val;
5755 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5756 return false;
5758 nunits = GET_MODE_NUNITS (mode);
5759 bitsize = GET_MODE_BITSIZE (inner);
5760 mask = GET_MODE_MASK (inner);
5762 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5763 splat_val = val;
5764 msb_val = val >= 0 ? 0 : -1;
5766 /* Construct the value to be splatted, if possible. If not, return 0. */
5767 for (i = 2; i <= copies; i *= 2)
5769 HOST_WIDE_INT small_val;
5770 bitsize /= 2;
5771 small_val = splat_val >> bitsize;
5772 mask >>= bitsize;
5773 if (splat_val != ((HOST_WIDE_INT)
5774 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5775 | (small_val & mask)))
5776 return false;
5777 splat_val = small_val;
5780 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5781 if (EASY_VECTOR_15 (splat_val))
5784 /* Also check if we can splat, and then add the result to itself. Do so if
5785 the value is positive, of if the splat instruction is using OP's mode;
5786 for splat_val < 0, the splat and the add should use the same mode. */
5787 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5788 && (splat_val >= 0 || (step == 1 && copies == 1)))
5791 /* Also check if are loading up the most significant bit which can be done by
5792 loading up -1 and shifting the value left by -1. */
5793 else if (EASY_VECTOR_MSB (splat_val, inner))
5796 else
5797 return false;
5799 /* Check if VAL is present in every STEP-th element, and the
5800 other elements are filled with its most significant bit. */
5801 for (i = 1; i < nunits; ++i)
5803 HOST_WIDE_INT desired_val;
5804 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5805 if ((i & (step - 1)) == 0)
5806 desired_val = val;
5807 else
5808 desired_val = msb_val;
5810 if (desired_val != const_vector_elt_as_int (op, elt))
5811 return false;
5814 return true;
5817 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5818 instruction, filling in the bottom elements with 0 or -1.
5820 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5821 for the number of zeroes to shift in, or negative for the number of 0xff
5822 bytes to shift in.
5824 OP is a CONST_VECTOR. */
5827 vspltis_shifted (rtx op)
5829 machine_mode mode = GET_MODE (op);
5830 machine_mode inner = GET_MODE_INNER (mode);
5832 unsigned i, j;
5833 unsigned nunits;
5834 unsigned mask;
5836 HOST_WIDE_INT val;
5838 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5839 return false;
5841 /* We need to create pseudo registers to do the shift, so don't recognize
5842 shift vector constants after reload. */
5843 if (!can_create_pseudo_p ())
5844 return false;
5846 nunits = GET_MODE_NUNITS (mode);
5847 mask = GET_MODE_MASK (inner);
5849 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5851 /* Check if the value can really be the operand of a vspltis[bhw]. */
5852 if (EASY_VECTOR_15 (val))
5855 /* Also check if we are loading up the most significant bit which can be done
5856 by loading up -1 and shifting the value left by -1. */
5857 else if (EASY_VECTOR_MSB (val, inner))
5860 else
5861 return 0;
5863 /* Check if VAL is present in every STEP-th element until we find elements
5864 that are 0 or all 1 bits. */
5865 for (i = 1; i < nunits; ++i)
5867 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5868 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5870 /* If the value isn't the splat value, check for the remaining elements
5871 being 0/-1. */
5872 if (val != elt_val)
5874 if (elt_val == 0)
5876 for (j = i+1; j < nunits; ++j)
5878 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5879 if (const_vector_elt_as_int (op, elt2) != 0)
5880 return 0;
5883 return (nunits - i) * GET_MODE_SIZE (inner);
5886 else if ((elt_val & mask) == mask)
5888 for (j = i+1; j < nunits; ++j)
5890 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5891 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5892 return 0;
5895 return -((nunits - i) * GET_MODE_SIZE (inner));
5898 else
5899 return 0;
5903 /* If all elements are equal, we don't need to do VLSDOI. */
5904 return 0;
5908 /* Return true if OP is of the given MODE and can be synthesized
5909 with a vspltisb, vspltish or vspltisw. */
5911 bool
5912 easy_altivec_constant (rtx op, machine_mode mode)
5914 unsigned step, copies;
5916 if (mode == VOIDmode)
5917 mode = GET_MODE (op);
5918 else if (mode != GET_MODE (op))
5919 return false;
5921 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5922 constants. */
5923 if (mode == V2DFmode)
5924 return zero_constant (op, mode);
5926 else if (mode == V2DImode)
5928 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5929 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5930 return false;
5932 if (zero_constant (op, mode))
5933 return true;
5935 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5936 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5937 return true;
5939 return false;
5942 /* V1TImode is a special container for TImode. Ignore for now. */
5943 else if (mode == V1TImode)
5944 return false;
5946 /* Start with a vspltisw. */
5947 step = GET_MODE_NUNITS (mode) / 4;
5948 copies = 1;
5950 if (vspltis_constant (op, step, copies))
5951 return true;
5953 /* Then try with a vspltish. */
5954 if (step == 1)
5955 copies <<= 1;
5956 else
5957 step >>= 1;
5959 if (vspltis_constant (op, step, copies))
5960 return true;
5962 /* And finally a vspltisb. */
5963 if (step == 1)
5964 copies <<= 1;
5965 else
5966 step >>= 1;
5968 if (vspltis_constant (op, step, copies))
5969 return true;
5971 if (vspltis_shifted (op) != 0)
5972 return true;
5974 return false;
5977 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5978 result is OP. Abort if it is not possible. */
5981 gen_easy_altivec_constant (rtx op)
5983 machine_mode mode = GET_MODE (op);
5984 int nunits = GET_MODE_NUNITS (mode);
5985 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5986 unsigned step = nunits / 4;
5987 unsigned copies = 1;
5989 /* Start with a vspltisw. */
5990 if (vspltis_constant (op, step, copies))
5991 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5993 /* Then try with a vspltish. */
5994 if (step == 1)
5995 copies <<= 1;
5996 else
5997 step >>= 1;
5999 if (vspltis_constant (op, step, copies))
6000 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6002 /* And finally a vspltisb. */
6003 if (step == 1)
6004 copies <<= 1;
6005 else
6006 step >>= 1;
6008 if (vspltis_constant (op, step, copies))
6009 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6011 gcc_unreachable ();
6014 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6015 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6017 Return the number of instructions needed (1 or 2) into the address pointed
6018 via NUM_INSNS_PTR.
6020 Return the constant that is being split via CONSTANT_PTR. */
6022 bool
6023 xxspltib_constant_p (rtx op,
6024 machine_mode mode,
6025 int *num_insns_ptr,
6026 int *constant_ptr)
6028 size_t nunits = GET_MODE_NUNITS (mode);
6029 size_t i;
6030 HOST_WIDE_INT value;
6031 rtx element;
6033 /* Set the returned values to out of bound values. */
6034 *num_insns_ptr = -1;
6035 *constant_ptr = 256;
6037 if (!TARGET_P9_VECTOR)
6038 return false;
6040 if (mode == VOIDmode)
6041 mode = GET_MODE (op);
6043 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6044 return false;
6046 /* Handle (vec_duplicate <constant>). */
6047 if (GET_CODE (op) == VEC_DUPLICATE)
6049 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6050 && mode != V2DImode)
6051 return false;
6053 element = XEXP (op, 0);
6054 if (!CONST_INT_P (element))
6055 return false;
6057 value = INTVAL (element);
6058 if (!IN_RANGE (value, -128, 127))
6059 return false;
6062 /* Handle (const_vector [...]). */
6063 else if (GET_CODE (op) == CONST_VECTOR)
6065 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6066 && mode != V2DImode)
6067 return false;
6069 element = CONST_VECTOR_ELT (op, 0);
6070 if (!CONST_INT_P (element))
6071 return false;
6073 value = INTVAL (element);
6074 if (!IN_RANGE (value, -128, 127))
6075 return false;
6077 for (i = 1; i < nunits; i++)
6079 element = CONST_VECTOR_ELT (op, i);
6080 if (!CONST_INT_P (element))
6081 return false;
6083 if (value != INTVAL (element))
6084 return false;
6088 /* Handle integer constants being loaded into the upper part of the VSX
6089 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6090 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6091 else if (CONST_INT_P (op))
6093 if (!SCALAR_INT_MODE_P (mode))
6094 return false;
6096 value = INTVAL (op);
6097 if (!IN_RANGE (value, -128, 127))
6098 return false;
6100 if (!IN_RANGE (value, -1, 0))
6102 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6103 return false;
6105 if (EASY_VECTOR_15 (value))
6106 return false;
6110 else
6111 return false;
6113 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6114 sign extend. Special case 0/-1 to allow getting any VSX register instead
6115 of an Altivec register. */
6116 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6117 && EASY_VECTOR_15 (value))
6118 return false;
6120 /* Return # of instructions and the constant byte for XXSPLTIB. */
6121 if (mode == V16QImode)
6122 *num_insns_ptr = 1;
6124 else if (IN_RANGE (value, -1, 0))
6125 *num_insns_ptr = 1;
6127 else
6128 *num_insns_ptr = 2;
6130 *constant_ptr = (int) value;
6131 return true;
6134 const char *
6135 output_vec_const_move (rtx *operands)
6137 int shift;
6138 machine_mode mode;
6139 rtx dest, vec;
6141 dest = operands[0];
6142 vec = operands[1];
6143 mode = GET_MODE (dest);
6145 if (TARGET_VSX)
6147 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6148 int xxspltib_value = 256;
6149 int num_insns = -1;
6151 if (zero_constant (vec, mode))
6153 if (TARGET_P9_VECTOR)
6154 return "xxspltib %x0,0";
6156 else if (dest_vmx_p)
6157 return "vspltisw %0,0";
6159 else
6160 return "xxlxor %x0,%x0,%x0";
6163 if (all_ones_constant (vec, mode))
6165 if (TARGET_P9_VECTOR)
6166 return "xxspltib %x0,255";
6168 else if (dest_vmx_p)
6169 return "vspltisw %0,-1";
6171 else if (TARGET_P8_VECTOR)
6172 return "xxlorc %x0,%x0,%x0";
6174 else
6175 gcc_unreachable ();
6178 if (TARGET_P9_VECTOR
6179 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6181 if (num_insns == 1)
6183 operands[2] = GEN_INT (xxspltib_value & 0xff);
6184 return "xxspltib %x0,%2";
6187 return "#";
6191 if (TARGET_ALTIVEC)
6193 rtx splat_vec;
6195 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6196 if (zero_constant (vec, mode))
6197 return "vspltisw %0,0";
6199 if (all_ones_constant (vec, mode))
6200 return "vspltisw %0,-1";
6202 /* Do we need to construct a value using VSLDOI? */
6203 shift = vspltis_shifted (vec);
6204 if (shift != 0)
6205 return "#";
6207 splat_vec = gen_easy_altivec_constant (vec);
6208 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6209 operands[1] = XEXP (splat_vec, 0);
6210 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6211 return "#";
6213 switch (GET_MODE (splat_vec))
6215 case E_V4SImode:
6216 return "vspltisw %0,%1";
6218 case E_V8HImode:
6219 return "vspltish %0,%1";
6221 case E_V16QImode:
6222 return "vspltisb %0,%1";
6224 default:
6225 gcc_unreachable ();
6229 gcc_unreachable ();
6232 /* Initialize vector TARGET to VALS. */
6234 void
6235 rs6000_expand_vector_init (rtx target, rtx vals)
6237 machine_mode mode = GET_MODE (target);
6238 machine_mode inner_mode = GET_MODE_INNER (mode);
6239 int n_elts = GET_MODE_NUNITS (mode);
6240 int n_var = 0, one_var = -1;
6241 bool all_same = true, all_const_zero = true;
6242 rtx x, mem;
6243 int i;
6245 for (i = 0; i < n_elts; ++i)
6247 x = XVECEXP (vals, 0, i);
6248 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6249 ++n_var, one_var = i;
6250 else if (x != CONST0_RTX (inner_mode))
6251 all_const_zero = false;
6253 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6254 all_same = false;
6257 if (n_var == 0)
6259 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6260 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6261 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6263 /* Zero register. */
6264 emit_move_insn (target, CONST0_RTX (mode));
6265 return;
6267 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6269 /* Splat immediate. */
6270 emit_insn (gen_rtx_SET (target, const_vec));
6271 return;
6273 else
6275 /* Load from constant pool. */
6276 emit_move_insn (target, const_vec);
6277 return;
6281 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6282 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6284 rtx op[2];
6285 size_t i;
6286 size_t num_elements = all_same ? 1 : 2;
6287 for (i = 0; i < num_elements; i++)
6289 op[i] = XVECEXP (vals, 0, i);
6290 /* Just in case there is a SUBREG with a smaller mode, do a
6291 conversion. */
6292 if (GET_MODE (op[i]) != inner_mode)
6294 rtx tmp = gen_reg_rtx (inner_mode);
6295 convert_move (tmp, op[i], 0);
6296 op[i] = tmp;
6298 /* Allow load with splat double word. */
6299 else if (MEM_P (op[i]))
6301 if (!all_same)
6302 op[i] = force_reg (inner_mode, op[i]);
6304 else if (!REG_P (op[i]))
6305 op[i] = force_reg (inner_mode, op[i]);
6308 if (all_same)
6310 if (mode == V2DFmode)
6311 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6312 else
6313 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6315 else
6317 if (mode == V2DFmode)
6318 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6319 else
6320 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6322 return;
6325 /* Special case initializing vector int if we are on 64-bit systems with
6326 direct move or we have the ISA 3.0 instructions. */
6327 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6328 && TARGET_DIRECT_MOVE_64BIT)
6330 if (all_same)
6332 rtx element0 = XVECEXP (vals, 0, 0);
6333 if (MEM_P (element0))
6334 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6335 else
6336 element0 = force_reg (SImode, element0);
6338 if (TARGET_P9_VECTOR)
6339 emit_insn (gen_vsx_splat_v4si (target, element0));
6340 else
6342 rtx tmp = gen_reg_rtx (DImode);
6343 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6344 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6346 return;
6348 else
6350 rtx elements[4];
6351 size_t i;
6353 for (i = 0; i < 4; i++)
6354 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6356 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6357 elements[2], elements[3]));
6358 return;
6362 /* With single precision floating point on VSX, know that internally single
6363 precision is actually represented as a double, and either make 2 V2DF
6364 vectors, and convert these vectors to single precision, or do one
6365 conversion, and splat the result to the other elements. */
6366 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6368 if (all_same)
6370 rtx element0 = XVECEXP (vals, 0, 0);
6372 if (TARGET_P9_VECTOR)
6374 if (MEM_P (element0))
6375 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6377 emit_insn (gen_vsx_splat_v4sf (target, element0));
6380 else
6382 rtx freg = gen_reg_rtx (V4SFmode);
6383 rtx sreg = force_reg (SFmode, element0);
6384 rtx cvt = (TARGET_XSCVDPSPN
6385 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6386 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6388 emit_insn (cvt);
6389 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6390 const0_rtx));
6393 else
6395 rtx dbl_even = gen_reg_rtx (V2DFmode);
6396 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6397 rtx flt_even = gen_reg_rtx (V4SFmode);
6398 rtx flt_odd = gen_reg_rtx (V4SFmode);
6399 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6400 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6401 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6402 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6404 /* Use VMRGEW if we can instead of doing a permute. */
6405 if (TARGET_P8_VECTOR)
6407 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6408 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6409 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6410 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6411 if (BYTES_BIG_ENDIAN)
6412 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6413 else
6414 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6416 else
6418 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6419 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6420 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6421 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6422 rs6000_expand_extract_even (target, flt_even, flt_odd);
6425 return;
6428 /* Special case initializing vector short/char that are splats if we are on
6429 64-bit systems with direct move. */
6430 if (all_same && TARGET_DIRECT_MOVE_64BIT
6431 && (mode == V16QImode || mode == V8HImode))
6433 rtx op0 = XVECEXP (vals, 0, 0);
6434 rtx di_tmp = gen_reg_rtx (DImode);
6436 if (!REG_P (op0))
6437 op0 = force_reg (GET_MODE_INNER (mode), op0);
6439 if (mode == V16QImode)
6441 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6442 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6443 return;
6446 if (mode == V8HImode)
6448 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6449 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6450 return;
6454 /* Store value to stack temp. Load vector element. Splat. However, splat
6455 of 64-bit items is not supported on Altivec. */
6456 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6458 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6459 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6460 XVECEXP (vals, 0, 0));
6461 x = gen_rtx_UNSPEC (VOIDmode,
6462 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6463 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6464 gen_rtvec (2,
6465 gen_rtx_SET (target, mem),
6466 x)));
6467 x = gen_rtx_VEC_SELECT (inner_mode, target,
6468 gen_rtx_PARALLEL (VOIDmode,
6469 gen_rtvec (1, const0_rtx)));
6470 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6471 return;
6474 /* One field is non-constant. Load constant then overwrite
6475 varying field. */
6476 if (n_var == 1)
6478 rtx copy = copy_rtx (vals);
6480 /* Load constant part of vector, substitute neighboring value for
6481 varying element. */
6482 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6483 rs6000_expand_vector_init (target, copy);
6485 /* Insert variable. */
6486 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6487 return;
6490 /* Construct the vector in memory one field at a time
6491 and load the whole vector. */
6492 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6493 for (i = 0; i < n_elts; i++)
6494 emit_move_insn (adjust_address_nv (mem, inner_mode,
6495 i * GET_MODE_SIZE (inner_mode)),
6496 XVECEXP (vals, 0, i));
6497 emit_move_insn (target, mem);
6500 /* Set field ELT of TARGET to VAL. */
6502 void
6503 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6505 machine_mode mode = GET_MODE (target);
6506 machine_mode inner_mode = GET_MODE_INNER (mode);
6507 rtx reg = gen_reg_rtx (mode);
6508 rtx mask, mem, x;
6509 int width = GET_MODE_SIZE (inner_mode);
6510 int i;
6512 val = force_reg (GET_MODE (val), val);
6514 if (VECTOR_MEM_VSX_P (mode))
6516 rtx insn = NULL_RTX;
6517 rtx elt_rtx = GEN_INT (elt);
6519 if (mode == V2DFmode)
6520 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6522 else if (mode == V2DImode)
6523 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6525 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6527 if (mode == V4SImode)
6528 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6529 else if (mode == V8HImode)
6530 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6531 else if (mode == V16QImode)
6532 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6533 else if (mode == V4SFmode)
6534 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6537 if (insn)
6539 emit_insn (insn);
6540 return;
6544 /* Simplify setting single element vectors like V1TImode. */
6545 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6547 emit_move_insn (target, gen_lowpart (mode, val));
6548 return;
6551 /* Load single variable value. */
6552 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6553 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6554 x = gen_rtx_UNSPEC (VOIDmode,
6555 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6556 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6557 gen_rtvec (2,
6558 gen_rtx_SET (reg, mem),
6559 x)));
6561 /* Linear sequence. */
6562 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6563 for (i = 0; i < 16; ++i)
6564 XVECEXP (mask, 0, i) = GEN_INT (i);
6566 /* Set permute mask to insert element into target. */
6567 for (i = 0; i < width; ++i)
6568 XVECEXP (mask, 0, elt*width + i)
6569 = GEN_INT (i + 0x10);
6570 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6572 if (BYTES_BIG_ENDIAN)
6573 x = gen_rtx_UNSPEC (mode,
6574 gen_rtvec (3, target, reg,
6575 force_reg (V16QImode, x)),
6576 UNSPEC_VPERM);
6577 else
6579 if (TARGET_P9_VECTOR)
6580 x = gen_rtx_UNSPEC (mode,
6581 gen_rtvec (3, reg, target,
6582 force_reg (V16QImode, x)),
6583 UNSPEC_VPERMR);
6584 else
6586 /* Invert selector. We prefer to generate VNAND on P8 so
6587 that future fusion opportunities can kick in, but must
6588 generate VNOR elsewhere. */
6589 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6590 rtx iorx = (TARGET_P8_VECTOR
6591 ? gen_rtx_IOR (V16QImode, notx, notx)
6592 : gen_rtx_AND (V16QImode, notx, notx));
6593 rtx tmp = gen_reg_rtx (V16QImode);
6594 emit_insn (gen_rtx_SET (tmp, iorx));
6596 /* Permute with operands reversed and adjusted selector. */
6597 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6598 UNSPEC_VPERM);
6602 emit_insn (gen_rtx_SET (target, x));
6605 /* Extract field ELT from VEC into TARGET. */
6607 void
6608 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6610 machine_mode mode = GET_MODE (vec);
6611 machine_mode inner_mode = GET_MODE_INNER (mode);
6612 rtx mem;
6614 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6616 switch (mode)
6618 default:
6619 break;
6620 case E_V1TImode:
6621 emit_move_insn (target, gen_lowpart (TImode, vec));
6622 break;
6623 case E_V2DFmode:
6624 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6625 return;
6626 case E_V2DImode:
6627 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6628 return;
6629 case E_V4SFmode:
6630 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6631 return;
6632 case E_V16QImode:
6633 if (TARGET_DIRECT_MOVE_64BIT)
6635 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6636 return;
6638 else
6639 break;
6640 case E_V8HImode:
6641 if (TARGET_DIRECT_MOVE_64BIT)
6643 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6644 return;
6646 else
6647 break;
6648 case E_V4SImode:
6649 if (TARGET_DIRECT_MOVE_64BIT)
6651 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6652 return;
6654 break;
6657 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6658 && TARGET_DIRECT_MOVE_64BIT)
6660 if (GET_MODE (elt) != DImode)
6662 rtx tmp = gen_reg_rtx (DImode);
6663 convert_move (tmp, elt, 0);
6664 elt = tmp;
6666 else if (!REG_P (elt))
6667 elt = force_reg (DImode, elt);
6669 switch (mode)
6671 case E_V1TImode:
6672 emit_move_insn (target, gen_lowpart (TImode, vec));
6673 return;
6675 case E_V2DFmode:
6676 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6677 return;
6679 case E_V2DImode:
6680 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6681 return;
6683 case E_V4SFmode:
6684 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6685 return;
6687 case E_V4SImode:
6688 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6689 return;
6691 case E_V8HImode:
6692 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6693 return;
6695 case E_V16QImode:
6696 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6697 return;
6699 default:
6700 gcc_unreachable ();
6704 /* Allocate mode-sized buffer. */
6705 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6707 emit_move_insn (mem, vec);
6708 if (CONST_INT_P (elt))
6710 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6712 /* Add offset to field within buffer matching vector element. */
6713 mem = adjust_address_nv (mem, inner_mode,
6714 modulo_elt * GET_MODE_SIZE (inner_mode));
6715 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6717 else
6719 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6720 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6721 rtx new_addr = gen_reg_rtx (Pmode);
6723 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6724 if (ele_size > 1)
6725 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6726 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6727 new_addr = change_address (mem, inner_mode, new_addr);
6728 emit_move_insn (target, new_addr);
6732 /* Helper function to return an address mask based on a physical register. */
6734 static addr_mask_type
6735 hard_reg_and_mode_to_addr_mask (rtx reg, machine_mode mode)
6737 unsigned int r = reg_or_subregno (reg);
6738 addr_mask_type addr_mask;
6740 gcc_assert (HARD_REGISTER_NUM_P (r));
6741 if (INT_REGNO_P (r))
6742 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
6744 else if (FP_REGNO_P (r))
6745 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
6747 else if (ALTIVEC_REGNO_P (r))
6748 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
6750 else
6751 gcc_unreachable ();
6753 return addr_mask;
6756 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6757 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6758 temporary (BASE_TMP) to fixup the address. Return the new memory address
6759 that is valid for reads or writes to a given register (SCALAR_REG). */
6762 rs6000_adjust_vec_address (rtx scalar_reg,
6763 rtx mem,
6764 rtx element,
6765 rtx base_tmp,
6766 machine_mode scalar_mode)
6768 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6769 rtx addr = XEXP (mem, 0);
6770 rtx element_offset;
6771 rtx new_addr;
6772 bool valid_addr_p;
6774 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6775 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6777 /* Calculate what we need to add to the address to get the element
6778 address. */
6779 if (CONST_INT_P (element))
6780 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6781 else
6783 int byte_shift = exact_log2 (scalar_size);
6784 gcc_assert (byte_shift >= 0);
6786 if (byte_shift == 0)
6787 element_offset = element;
6789 else
6791 if (TARGET_POWERPC64)
6792 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6793 else
6794 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6796 element_offset = base_tmp;
6800 /* Create the new address pointing to the element within the vector. If we
6801 are adding 0, we don't have to change the address. */
6802 if (element_offset == const0_rtx)
6803 new_addr = addr;
6805 /* A simple indirect address can be converted into a reg + offset
6806 address. */
6807 else if (REG_P (addr) || SUBREG_P (addr))
6808 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6810 /* Optimize D-FORM addresses with constant offset with a constant element, to
6811 include the element offset in the address directly. */
6812 else if (GET_CODE (addr) == PLUS)
6814 rtx op0 = XEXP (addr, 0);
6815 rtx op1 = XEXP (addr, 1);
6816 rtx insn;
6818 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6819 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6821 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6822 rtx offset_rtx = GEN_INT (offset);
6824 /* 16-bit offset. */
6825 if (SIGNED_INTEGER_16BIT_P (offset)
6826 && (scalar_size < 8 || (offset & 0x3) == 0))
6827 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6829 /* 34-bit offset if we have prefixed addresses. */
6830 else if (TARGET_PREFIXED_ADDR && SIGNED_INTEGER_34BIT_P (offset))
6831 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6833 else
6835 /* Offset overflowed, move offset to the temporary (which will
6836 likely be split), and do X-FORM addressing. */
6837 emit_move_insn (base_tmp, offset_rtx);
6838 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6841 else
6843 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6844 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6846 /* Note, ADDI requires the register being added to be a base
6847 register. If the register was R0, load it up into the temporary
6848 and do the add. */
6849 if (op1_reg_p
6850 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6852 insn = gen_add3_insn (base_tmp, op1, element_offset);
6853 gcc_assert (insn != NULL_RTX);
6854 emit_insn (insn);
6857 else if (ele_reg_p
6858 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6860 insn = gen_add3_insn (base_tmp, element_offset, op1);
6861 gcc_assert (insn != NULL_RTX);
6862 emit_insn (insn);
6865 /* Make sure we don't overwrite the temporary if the element being
6866 extracted is variable, and we've put the offset into base_tmp
6867 previously. */
6868 else if (reg_mentioned_p (base_tmp, element_offset))
6869 emit_insn (gen_add2_insn (base_tmp, op1));
6871 else
6873 emit_move_insn (base_tmp, op1);
6874 emit_insn (gen_add2_insn (base_tmp, element_offset));
6877 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6881 else
6883 emit_move_insn (base_tmp, addr);
6884 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6887 /* If we have a PLUS, we need to see whether the particular register class
6888 allows for D-FORM or X-FORM addressing. */
6889 if (GET_CODE (new_addr) == PLUS)
6891 rtx op1 = XEXP (new_addr, 1);
6892 addr_mask_type addr_mask
6893 = hard_reg_and_mode_to_addr_mask (scalar_reg, scalar_mode);
6895 if (REG_P (op1) || SUBREG_P (op1))
6896 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6897 else
6898 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6901 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6902 valid_addr_p = true;
6904 else
6905 valid_addr_p = false;
6907 if (!valid_addr_p)
6909 emit_move_insn (base_tmp, new_addr);
6910 new_addr = base_tmp;
6913 return change_address (mem, scalar_mode, new_addr);
6916 /* Split a variable vec_extract operation into the component instructions. */
6918 void
6919 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6920 rtx tmp_altivec)
6922 machine_mode mode = GET_MODE (src);
6923 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6924 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6925 int byte_shift = exact_log2 (scalar_size);
6927 gcc_assert (byte_shift >= 0);
6929 /* If we are given a memory address, optimize to load just the element. We
6930 don't have to adjust the vector element number on little endian
6931 systems. */
6932 if (MEM_P (src))
6934 int num_elements = GET_MODE_NUNITS (mode);
6935 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6937 emit_insn (gen_anddi3 (element, element, num_ele_m1));
6938 gcc_assert (REG_P (tmp_gpr));
6939 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
6940 tmp_gpr, scalar_mode));
6941 return;
6944 else if (REG_P (src) || SUBREG_P (src))
6946 int num_elements = GET_MODE_NUNITS (mode);
6947 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6948 int bit_shift = 7 - exact_log2 (num_elements);
6949 rtx element2;
6950 unsigned int dest_regno = reg_or_subregno (dest);
6951 unsigned int src_regno = reg_or_subregno (src);
6952 unsigned int element_regno = reg_or_subregno (element);
6954 gcc_assert (REG_P (tmp_gpr));
6956 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6957 a general purpose register. */
6958 if (TARGET_P9_VECTOR
6959 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6960 && INT_REGNO_P (dest_regno)
6961 && ALTIVEC_REGNO_P (src_regno)
6962 && INT_REGNO_P (element_regno))
6964 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6965 rtx element_si = gen_rtx_REG (SImode, element_regno);
6967 if (mode == V16QImode)
6968 emit_insn (BYTES_BIG_ENDIAN
6969 ? gen_vextublx (dest_si, element_si, src)
6970 : gen_vextubrx (dest_si, element_si, src));
6972 else if (mode == V8HImode)
6974 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6975 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6976 emit_insn (BYTES_BIG_ENDIAN
6977 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6978 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6982 else
6984 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6985 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
6986 emit_insn (BYTES_BIG_ENDIAN
6987 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
6988 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
6991 return;
6995 gcc_assert (REG_P (tmp_altivec));
6997 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6998 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6999 will shift the element into the upper position (adding 3 to convert a
7000 byte shift into a bit shift). */
7001 if (scalar_size == 8)
7003 if (!BYTES_BIG_ENDIAN)
7005 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7006 element2 = tmp_gpr;
7008 else
7009 element2 = element;
7011 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7012 bit. */
7013 emit_insn (gen_rtx_SET (tmp_gpr,
7014 gen_rtx_AND (DImode,
7015 gen_rtx_ASHIFT (DImode,
7016 element2,
7017 GEN_INT (6)),
7018 GEN_INT (64))));
7020 else
7022 if (!BYTES_BIG_ENDIAN)
7024 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7026 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7027 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7028 element2 = tmp_gpr;
7030 else
7031 element2 = element;
7033 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7036 /* Get the value into the lower byte of the Altivec register where VSLO
7037 expects it. */
7038 if (TARGET_P9_VECTOR)
7039 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7040 else if (can_create_pseudo_p ())
7041 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7042 else
7044 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7045 emit_move_insn (tmp_di, tmp_gpr);
7046 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7049 /* Do the VSLO to get the value into the final location. */
7050 switch (mode)
7052 case E_V2DFmode:
7053 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7054 return;
7056 case E_V2DImode:
7057 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7058 return;
7060 case E_V4SFmode:
7062 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7063 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7064 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7065 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7066 tmp_altivec));
7068 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7069 return;
7072 case E_V4SImode:
7073 case E_V8HImode:
7074 case E_V16QImode:
7076 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7077 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7078 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7079 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7080 tmp_altivec));
7081 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7082 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7083 GEN_INT (64 - bits_in_element)));
7084 return;
7087 default:
7088 gcc_unreachable ();
7091 return;
7093 else
7094 gcc_unreachable ();
7097 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7098 selects whether the alignment is abi mandated, optional, or
7099 both abi and optional alignment. */
7101 unsigned int
7102 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7104 if (how != align_opt)
7106 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7107 align = 128;
7110 if (how != align_abi)
7112 if (TREE_CODE (type) == ARRAY_TYPE
7113 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7115 if (align < BITS_PER_WORD)
7116 align = BITS_PER_WORD;
7120 return align;
7123 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7124 instructions simply ignore the low bits; VSX memory instructions
7125 are aligned to 4 or 8 bytes. */
7127 static bool
7128 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7130 return (STRICT_ALIGNMENT
7131 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7132 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7133 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7134 && (int) align < VECTOR_ALIGN (mode)))));
7137 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7139 bool
7140 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7142 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7144 if (computed != 128)
7146 static bool warned;
7147 if (!warned && warn_psabi)
7149 warned = true;
7150 inform (input_location,
7151 "the layout of aggregates containing vectors with"
7152 " %d-byte alignment has changed in GCC 5",
7153 computed / BITS_PER_UNIT);
7156 /* In current GCC there is no special case. */
7157 return false;
7160 return false;
7163 /* AIX increases natural record alignment to doubleword if the first
7164 field is an FP double while the FP fields remain word aligned. */
7166 unsigned int
7167 rs6000_special_round_type_align (tree type, unsigned int computed,
7168 unsigned int specified)
7170 unsigned int align = MAX (computed, specified);
7171 tree field = TYPE_FIELDS (type);
7173 /* Skip all non field decls */
7174 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7175 field = DECL_CHAIN (field);
7177 if (field != NULL && field != type)
7179 type = TREE_TYPE (field);
7180 while (TREE_CODE (type) == ARRAY_TYPE)
7181 type = TREE_TYPE (type);
7183 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7184 align = MAX (align, 64);
7187 return align;
7190 /* Darwin increases record alignment to the natural alignment of
7191 the first field. */
7193 unsigned int
7194 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7195 unsigned int specified)
7197 unsigned int align = MAX (computed, specified);
7199 if (TYPE_PACKED (type))
7200 return align;
7202 /* Find the first field, looking down into aggregates. */
7203 do {
7204 tree field = TYPE_FIELDS (type);
7205 /* Skip all non field decls */
7206 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7207 field = DECL_CHAIN (field);
7208 if (! field)
7209 break;
7210 /* A packed field does not contribute any extra alignment. */
7211 if (DECL_PACKED (field))
7212 return align;
7213 type = TREE_TYPE (field);
7214 while (TREE_CODE (type) == ARRAY_TYPE)
7215 type = TREE_TYPE (type);
7216 } while (AGGREGATE_TYPE_P (type));
7218 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7219 align = MAX (align, TYPE_ALIGN (type));
7221 return align;
7224 /* Return 1 for an operand in small memory on V.4/eabi. */
7227 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7228 machine_mode mode ATTRIBUTE_UNUSED)
7230 #if TARGET_ELF
7231 rtx sym_ref;
7233 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7234 return 0;
7236 if (DEFAULT_ABI != ABI_V4)
7237 return 0;
7239 if (SYMBOL_REF_P (op))
7240 sym_ref = op;
7242 else if (GET_CODE (op) != CONST
7243 || GET_CODE (XEXP (op, 0)) != PLUS
7244 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7245 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7246 return 0;
7248 else
7250 rtx sum = XEXP (op, 0);
7251 HOST_WIDE_INT summand;
7253 /* We have to be careful here, because it is the referenced address
7254 that must be 32k from _SDA_BASE_, not just the symbol. */
7255 summand = INTVAL (XEXP (sum, 1));
7256 if (summand < 0 || summand > g_switch_value)
7257 return 0;
7259 sym_ref = XEXP (sum, 0);
7262 return SYMBOL_REF_SMALL_P (sym_ref);
7263 #else
7264 return 0;
7265 #endif
7268 /* Return true if either operand is a general purpose register. */
7270 bool
7271 gpr_or_gpr_p (rtx op0, rtx op1)
7273 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7274 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7277 /* Return true if this is a move direct operation between GPR registers and
7278 floating point/VSX registers. */
7280 bool
7281 direct_move_p (rtx op0, rtx op1)
7283 if (!REG_P (op0) || !REG_P (op1))
7284 return false;
7286 if (!TARGET_DIRECT_MOVE)
7287 return false;
7289 int regno0 = REGNO (op0);
7290 int regno1 = REGNO (op1);
7291 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7292 return false;
7294 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7295 return true;
7297 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7298 return true;
7300 return false;
7303 /* Return true if the ADDR is an acceptable address for a quad memory
7304 operation of mode MODE (either LQ/STQ for general purpose registers, or
7305 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7306 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7307 3.0 LXV/STXV instruction. */
7309 bool
7310 quad_address_p (rtx addr, machine_mode mode, bool strict)
7312 rtx op0, op1;
7314 if (GET_MODE_SIZE (mode) != 16)
7315 return false;
7317 if (legitimate_indirect_address_p (addr, strict))
7318 return true;
7320 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7321 return false;
7323 /* Is this a valid prefixed address? If the bottom four bits of the offset
7324 are non-zero, we could use a prefixed instruction (which does not have the
7325 DQ-form constraint that the traditional instruction had) instead of
7326 forcing the unaligned offset to a GPR. */
7327 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7328 return true;
7330 if (GET_CODE (addr) != PLUS)
7331 return false;
7333 op0 = XEXP (addr, 0);
7334 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7335 return false;
7337 op1 = XEXP (addr, 1);
7338 if (!CONST_INT_P (op1))
7339 return false;
7341 return quad_address_offset_p (INTVAL (op1));
7344 /* Return true if this is a load or store quad operation. This function does
7345 not handle the atomic quad memory instructions. */
7347 bool
7348 quad_load_store_p (rtx op0, rtx op1)
7350 bool ret;
7352 if (!TARGET_QUAD_MEMORY)
7353 ret = false;
7355 else if (REG_P (op0) && MEM_P (op1))
7356 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7357 && quad_memory_operand (op1, GET_MODE (op1))
7358 && !reg_overlap_mentioned_p (op0, op1));
7360 else if (MEM_P (op0) && REG_P (op1))
7361 ret = (quad_memory_operand (op0, GET_MODE (op0))
7362 && quad_int_reg_operand (op1, GET_MODE (op1)));
7364 else
7365 ret = false;
7367 if (TARGET_DEBUG_ADDR)
7369 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7370 ret ? "true" : "false");
7371 debug_rtx (gen_rtx_SET (op0, op1));
7374 return ret;
7377 /* Given an address, return a constant offset term if one exists. */
7379 static rtx
7380 address_offset (rtx op)
7382 if (GET_CODE (op) == PRE_INC
7383 || GET_CODE (op) == PRE_DEC)
7384 op = XEXP (op, 0);
7385 else if (GET_CODE (op) == PRE_MODIFY
7386 || GET_CODE (op) == LO_SUM)
7387 op = XEXP (op, 1);
7389 if (GET_CODE (op) == CONST)
7390 op = XEXP (op, 0);
7392 if (GET_CODE (op) == PLUS)
7393 op = XEXP (op, 1);
7395 if (CONST_INT_P (op))
7396 return op;
7398 return NULL_RTX;
7401 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7402 the mode. If we can't find (or don't know) the alignment of the symbol
7403 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7404 should be pessimistic]. Offsets are validated in the same way as for
7405 reg + offset. */
7406 static bool
7407 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7409 /* We should not get here with this. */
7410 gcc_checking_assert (! mode_supports_dq_form (mode));
7412 if (GET_CODE (x) == CONST)
7413 x = XEXP (x, 0);
7415 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7416 x = XVECEXP (x, 0, 0);
7418 rtx sym = NULL_RTX;
7419 unsigned HOST_WIDE_INT offset = 0;
7421 if (GET_CODE (x) == PLUS)
7423 sym = XEXP (x, 0);
7424 if (! SYMBOL_REF_P (sym))
7425 return false;
7426 if (!CONST_INT_P (XEXP (x, 1)))
7427 return false;
7428 offset = INTVAL (XEXP (x, 1));
7430 else if (SYMBOL_REF_P (x))
7431 sym = x;
7432 else if (CONST_INT_P (x))
7433 offset = INTVAL (x);
7434 else if (GET_CODE (x) == LABEL_REF)
7435 offset = 0; // We assume code labels are Pmode aligned
7436 else
7437 return false; // not sure what we have here.
7439 /* If we don't know the alignment of the thing to which the symbol refers,
7440 we assume optimistically it is "enough".
7441 ??? maybe we should be pessimistic instead. */
7442 unsigned align = 0;
7444 if (sym)
7446 tree decl = SYMBOL_REF_DECL (sym);
7447 #if TARGET_MACHO
7448 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7449 /* The decl in an indirection symbol is the original one, which might
7450 be less aligned than the indirection. Our indirections are always
7451 pointer-aligned. */
7453 else
7454 #endif
7455 if (decl && DECL_ALIGN (decl))
7456 align = DECL_ALIGN_UNIT (decl);
7459 unsigned int extra = 0;
7460 switch (mode)
7462 case E_DFmode:
7463 case E_DDmode:
7464 case E_DImode:
7465 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7466 addressing. */
7467 if (VECTOR_MEM_VSX_P (mode))
7468 return false;
7470 if (!TARGET_POWERPC64)
7471 extra = 4;
7472 else if ((offset & 3) || (align & 3))
7473 return false;
7474 break;
7476 case E_TFmode:
7477 case E_IFmode:
7478 case E_KFmode:
7479 case E_TDmode:
7480 case E_TImode:
7481 case E_PTImode:
7482 extra = 8;
7483 if (!TARGET_POWERPC64)
7484 extra = 12;
7485 else if ((offset & 3) || (align & 3))
7486 return false;
7487 break;
7489 default:
7490 break;
7493 /* We only care if the access(es) would cause a change to the high part. */
7494 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7495 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7498 /* Return true if the MEM operand is a memory operand suitable for use
7499 with a (full width, possibly multiple) gpr load/store. On
7500 powerpc64 this means the offset must be divisible by 4.
7501 Implements 'Y' constraint.
7503 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7504 a constraint function we know the operand has satisfied a suitable
7505 memory predicate.
7507 Offsetting a lo_sum should not be allowed, except where we know by
7508 alignment that a 32k boundary is not crossed. Note that by
7509 "offsetting" here we mean a further offset to access parts of the
7510 MEM. It's fine to have a lo_sum where the inner address is offset
7511 from a sym, since the same sym+offset will appear in the high part
7512 of the address calculation. */
7514 bool
7515 mem_operand_gpr (rtx op, machine_mode mode)
7517 unsigned HOST_WIDE_INT offset;
7518 int extra;
7519 rtx addr = XEXP (op, 0);
7521 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7522 if (TARGET_UPDATE
7523 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7524 && mode_supports_pre_incdec_p (mode)
7525 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7526 return true;
7528 /* Allow prefixed instructions if supported. If the bottom two bits of the
7529 offset are non-zero, we could use a prefixed instruction (which does not
7530 have the DS-form constraint that the traditional instruction had) instead
7531 of forcing the unaligned offset to a GPR. */
7532 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7533 return true;
7535 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7536 really OK. Doing this early avoids teaching all the other machinery
7537 about them. */
7538 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7539 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7541 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7542 if (!rs6000_offsettable_memref_p (op, mode, false))
7543 return false;
7545 op = address_offset (addr);
7546 if (op == NULL_RTX)
7547 return true;
7549 offset = INTVAL (op);
7550 if (TARGET_POWERPC64 && (offset & 3) != 0)
7551 return false;
7553 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7554 if (extra < 0)
7555 extra = 0;
7557 if (GET_CODE (addr) == LO_SUM)
7558 /* For lo_sum addresses, we must allow any offset except one that
7559 causes a wrap, so test only the low 16 bits. */
7560 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7562 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7565 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7566 enforce an offset divisible by 4 even for 32-bit. */
7568 bool
7569 mem_operand_ds_form (rtx op, machine_mode mode)
7571 unsigned HOST_WIDE_INT offset;
7572 int extra;
7573 rtx addr = XEXP (op, 0);
7575 /* Allow prefixed instructions if supported. If the bottom two bits of the
7576 offset are non-zero, we could use a prefixed instruction (which does not
7577 have the DS-form constraint that the traditional instruction had) instead
7578 of forcing the unaligned offset to a GPR. */
7579 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7580 return true;
7582 if (!offsettable_address_p (false, mode, addr))
7583 return false;
7585 op = address_offset (addr);
7586 if (op == NULL_RTX)
7587 return true;
7589 offset = INTVAL (op);
7590 if ((offset & 3) != 0)
7591 return false;
7593 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7594 if (extra < 0)
7595 extra = 0;
7597 if (GET_CODE (addr) == LO_SUM)
7598 /* For lo_sum addresses, we must allow any offset except one that
7599 causes a wrap, so test only the low 16 bits. */
7600 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7602 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7605 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7607 static bool
7608 reg_offset_addressing_ok_p (machine_mode mode)
7610 switch (mode)
7612 case E_V16QImode:
7613 case E_V8HImode:
7614 case E_V4SFmode:
7615 case E_V4SImode:
7616 case E_V2DFmode:
7617 case E_V2DImode:
7618 case E_V1TImode:
7619 case E_TImode:
7620 case E_TFmode:
7621 case E_KFmode:
7622 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7623 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7624 a vector mode, if we want to use the VSX registers to move it around,
7625 we need to restrict ourselves to reg+reg addressing. Similarly for
7626 IEEE 128-bit floating point that is passed in a single vector
7627 register. */
7628 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7629 return mode_supports_dq_form (mode);
7630 break;
7632 case E_SDmode:
7633 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7634 addressing for the LFIWZX and STFIWX instructions. */
7635 if (TARGET_NO_SDMODE_STACK)
7636 return false;
7637 break;
7639 default:
7640 break;
7643 return true;
7646 static bool
7647 virtual_stack_registers_memory_p (rtx op)
7649 int regnum;
7651 if (REG_P (op))
7652 regnum = REGNO (op);
7654 else if (GET_CODE (op) == PLUS
7655 && REG_P (XEXP (op, 0))
7656 && CONST_INT_P (XEXP (op, 1)))
7657 regnum = REGNO (XEXP (op, 0));
7659 else
7660 return false;
7662 return (regnum >= FIRST_VIRTUAL_REGISTER
7663 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7666 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7667 is known to not straddle a 32k boundary. This function is used
7668 to determine whether -mcmodel=medium code can use TOC pointer
7669 relative addressing for OP. This means the alignment of the TOC
7670 pointer must also be taken into account, and unfortunately that is
7671 only 8 bytes. */
7673 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7674 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7675 #endif
7677 static bool
7678 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7679 machine_mode mode)
7681 tree decl;
7682 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7684 if (!SYMBOL_REF_P (op))
7685 return false;
7687 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7688 SYMBOL_REF. */
7689 if (mode_supports_dq_form (mode))
7690 return false;
7692 dsize = GET_MODE_SIZE (mode);
7693 decl = SYMBOL_REF_DECL (op);
7694 if (!decl)
7696 if (dsize == 0)
7697 return false;
7699 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7700 replacing memory addresses with an anchor plus offset. We
7701 could find the decl by rummaging around in the block->objects
7702 VEC for the given offset but that seems like too much work. */
7703 dalign = BITS_PER_UNIT;
7704 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7705 && SYMBOL_REF_ANCHOR_P (op)
7706 && SYMBOL_REF_BLOCK (op) != NULL)
7708 struct object_block *block = SYMBOL_REF_BLOCK (op);
7710 dalign = block->alignment;
7711 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7713 else if (CONSTANT_POOL_ADDRESS_P (op))
7715 /* It would be nice to have get_pool_align().. */
7716 machine_mode cmode = get_pool_mode (op);
7718 dalign = GET_MODE_ALIGNMENT (cmode);
7721 else if (DECL_P (decl))
7723 dalign = DECL_ALIGN (decl);
7725 if (dsize == 0)
7727 /* Allow BLKmode when the entire object is known to not
7728 cross a 32k boundary. */
7729 if (!DECL_SIZE_UNIT (decl))
7730 return false;
7732 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7733 return false;
7735 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7736 if (dsize > 32768)
7737 return false;
7739 dalign /= BITS_PER_UNIT;
7740 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7741 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7742 return dalign >= dsize;
7745 else
7746 gcc_unreachable ();
7748 /* Find how many bits of the alignment we know for this access. */
7749 dalign /= BITS_PER_UNIT;
7750 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7751 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7752 mask = dalign - 1;
7753 lsb = offset & -offset;
7754 mask &= lsb - 1;
7755 dalign = mask + 1;
7757 return dalign >= dsize;
7760 static bool
7761 constant_pool_expr_p (rtx op)
7763 rtx base, offset;
7765 split_const (op, &base, &offset);
7766 return (SYMBOL_REF_P (base)
7767 && CONSTANT_POOL_ADDRESS_P (base)
7768 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7771 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7772 use that as the register to put the HIGH value into if register allocation
7773 is already done. */
7776 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7778 rtx tocrel, tocreg, hi;
7780 gcc_assert (TARGET_TOC);
7782 if (TARGET_DEBUG_ADDR)
7784 if (SYMBOL_REF_P (symbol))
7785 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7786 XSTR (symbol, 0));
7787 else
7789 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7790 GET_RTX_NAME (GET_CODE (symbol)));
7791 debug_rtx (symbol);
7795 if (!can_create_pseudo_p ())
7796 df_set_regs_ever_live (TOC_REGISTER, true);
7798 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7799 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7800 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7801 return tocrel;
7803 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7804 if (largetoc_reg != NULL)
7806 emit_move_insn (largetoc_reg, hi);
7807 hi = largetoc_reg;
7809 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7812 /* These are only used to pass through from print_operand/print_operand_address
7813 to rs6000_output_addr_const_extra over the intervening function
7814 output_addr_const which is not target code. */
7815 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7817 /* Return true if OP is a toc pointer relative address (the output
7818 of create_TOC_reference). If STRICT, do not match non-split
7819 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7820 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7821 TOCREL_OFFSET_RET respectively. */
7823 bool
7824 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7825 const_rtx *tocrel_offset_ret)
7827 if (!TARGET_TOC)
7828 return false;
7830 if (TARGET_CMODEL != CMODEL_SMALL)
7832 /* When strict ensure we have everything tidy. */
7833 if (strict
7834 && !(GET_CODE (op) == LO_SUM
7835 && REG_P (XEXP (op, 0))
7836 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7837 return false;
7839 /* When not strict, allow non-split TOC addresses and also allow
7840 (lo_sum (high ..)) TOC addresses created during reload. */
7841 if (GET_CODE (op) == LO_SUM)
7842 op = XEXP (op, 1);
7845 const_rtx tocrel_base = op;
7846 const_rtx tocrel_offset = const0_rtx;
7848 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7850 tocrel_base = XEXP (op, 0);
7851 tocrel_offset = XEXP (op, 1);
7854 if (tocrel_base_ret)
7855 *tocrel_base_ret = tocrel_base;
7856 if (tocrel_offset_ret)
7857 *tocrel_offset_ret = tocrel_offset;
7859 return (GET_CODE (tocrel_base) == UNSPEC
7860 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7861 && REG_P (XVECEXP (tocrel_base, 0, 1))
7862 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7865 /* Return true if X is a constant pool address, and also for cmodel=medium
7866 if X is a toc-relative address known to be offsettable within MODE. */
7868 bool
7869 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7870 bool strict)
7872 const_rtx tocrel_base, tocrel_offset;
7873 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7874 && (TARGET_CMODEL != CMODEL_MEDIUM
7875 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7876 || mode == QImode
7877 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7878 INTVAL (tocrel_offset), mode)));
7881 static bool
7882 legitimate_small_data_p (machine_mode mode, rtx x)
7884 return (DEFAULT_ABI == ABI_V4
7885 && !flag_pic && !TARGET_TOC
7886 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7887 && small_data_operand (x, mode));
7890 bool
7891 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7892 bool strict, bool worst_case)
7894 unsigned HOST_WIDE_INT offset;
7895 unsigned int extra;
7897 if (GET_CODE (x) != PLUS)
7898 return false;
7899 if (!REG_P (XEXP (x, 0)))
7900 return false;
7901 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7902 return false;
7903 if (mode_supports_dq_form (mode))
7904 return quad_address_p (x, mode, strict);
7905 if (!reg_offset_addressing_ok_p (mode))
7906 return virtual_stack_registers_memory_p (x);
7907 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7908 return true;
7909 if (!CONST_INT_P (XEXP (x, 1)))
7910 return false;
7912 offset = INTVAL (XEXP (x, 1));
7913 extra = 0;
7914 switch (mode)
7916 case E_DFmode:
7917 case E_DDmode:
7918 case E_DImode:
7919 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7920 addressing. */
7921 if (VECTOR_MEM_VSX_P (mode))
7922 return false;
7924 if (!worst_case)
7925 break;
7926 if (!TARGET_POWERPC64)
7927 extra = 4;
7928 else if (offset & 3)
7929 return false;
7930 break;
7932 case E_TFmode:
7933 case E_IFmode:
7934 case E_KFmode:
7935 case E_TDmode:
7936 case E_TImode:
7937 case E_PTImode:
7938 extra = 8;
7939 if (!worst_case)
7940 break;
7941 if (!TARGET_POWERPC64)
7942 extra = 12;
7943 else if (offset & 3)
7944 return false;
7945 break;
7947 default:
7948 break;
7951 if (TARGET_PREFIXED_ADDR)
7952 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7953 else
7954 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7957 bool
7958 legitimate_indexed_address_p (rtx x, int strict)
7960 rtx op0, op1;
7962 if (GET_CODE (x) != PLUS)
7963 return false;
7965 op0 = XEXP (x, 0);
7966 op1 = XEXP (x, 1);
7968 return (REG_P (op0) && REG_P (op1)
7969 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7970 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7971 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7972 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7975 bool
7976 avoiding_indexed_address_p (machine_mode mode)
7978 /* Avoid indexed addressing for modes that have non-indexed
7979 load/store instruction forms. */
7980 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7983 bool
7984 legitimate_indirect_address_p (rtx x, int strict)
7986 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7989 bool
7990 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7992 if (!TARGET_MACHO || !flag_pic
7993 || mode != SImode || !MEM_P (x))
7994 return false;
7995 x = XEXP (x, 0);
7997 if (GET_CODE (x) != LO_SUM)
7998 return false;
7999 if (!REG_P (XEXP (x, 0)))
8000 return false;
8001 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8002 return false;
8003 x = XEXP (x, 1);
8005 return CONSTANT_P (x);
8008 static bool
8009 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8011 if (GET_CODE (x) != LO_SUM)
8012 return false;
8013 if (!REG_P (XEXP (x, 0)))
8014 return false;
8015 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8016 return false;
8017 /* quad word addresses are restricted, and we can't use LO_SUM. */
8018 if (mode_supports_dq_form (mode))
8019 return false;
8020 x = XEXP (x, 1);
8022 if (TARGET_ELF || TARGET_MACHO)
8024 bool large_toc_ok;
8026 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8027 return false;
8028 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8029 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8030 recognizes some LO_SUM addresses as valid although this
8031 function says opposite. In most cases, LRA through different
8032 transformations can generate correct code for address reloads.
8033 It cannot manage only some LO_SUM cases. So we need to add
8034 code here saying that some addresses are still valid. */
8035 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8036 && small_toc_ref (x, VOIDmode));
8037 if (TARGET_TOC && ! large_toc_ok)
8038 return false;
8039 if (GET_MODE_NUNITS (mode) != 1)
8040 return false;
8041 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8042 && !(/* ??? Assume floating point reg based on mode? */
8043 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8044 return false;
8046 return CONSTANT_P (x) || large_toc_ok;
8049 return false;
8053 /* Try machine-dependent ways of modifying an illegitimate address
8054 to be legitimate. If we find one, return the new, valid address.
8055 This is used from only one place: `memory_address' in explow.c.
8057 OLDX is the address as it was before break_out_memory_refs was
8058 called. In some cases it is useful to look at this to decide what
8059 needs to be done.
8061 It is always safe for this function to do nothing. It exists to
8062 recognize opportunities to optimize the output.
8064 On RS/6000, first check for the sum of a register with a constant
8065 integer that is out of range. If so, generate code to add the
8066 constant with the low-order 16 bits masked to the register and force
8067 this result into another register (this can be done with `cau').
8068 Then generate an address of REG+(CONST&0xffff), allowing for the
8069 possibility of bit 16 being a one.
8071 Then check for the sum of a register and something not constant, try to
8072 load the other things into a register and return the sum. */
8074 static rtx
8075 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8076 machine_mode mode)
8078 unsigned int extra;
8080 if (!reg_offset_addressing_ok_p (mode)
8081 || mode_supports_dq_form (mode))
8083 if (virtual_stack_registers_memory_p (x))
8084 return x;
8086 /* In theory we should not be seeing addresses of the form reg+0,
8087 but just in case it is generated, optimize it away. */
8088 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8089 return force_reg (Pmode, XEXP (x, 0));
8091 /* For TImode with load/store quad, restrict addresses to just a single
8092 pointer, so it works with both GPRs and VSX registers. */
8093 /* Make sure both operands are registers. */
8094 else if (GET_CODE (x) == PLUS
8095 && (mode != TImode || !TARGET_VSX))
8096 return gen_rtx_PLUS (Pmode,
8097 force_reg (Pmode, XEXP (x, 0)),
8098 force_reg (Pmode, XEXP (x, 1)));
8099 else
8100 return force_reg (Pmode, x);
8102 if (SYMBOL_REF_P (x))
8104 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8105 if (model != 0)
8106 return rs6000_legitimize_tls_address (x, model);
8109 extra = 0;
8110 switch (mode)
8112 case E_TFmode:
8113 case E_TDmode:
8114 case E_TImode:
8115 case E_PTImode:
8116 case E_IFmode:
8117 case E_KFmode:
8118 /* As in legitimate_offset_address_p we do not assume
8119 worst-case. The mode here is just a hint as to the registers
8120 used. A TImode is usually in gprs, but may actually be in
8121 fprs. Leave worst-case scenario for reload to handle via
8122 insn constraints. PTImode is only GPRs. */
8123 extra = 8;
8124 break;
8125 default:
8126 break;
8129 if (GET_CODE (x) == PLUS
8130 && REG_P (XEXP (x, 0))
8131 && CONST_INT_P (XEXP (x, 1))
8132 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8133 >= 0x10000 - extra))
8135 HOST_WIDE_INT high_int, low_int;
8136 rtx sum;
8137 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8138 if (low_int >= 0x8000 - extra)
8139 low_int = 0;
8140 high_int = INTVAL (XEXP (x, 1)) - low_int;
8141 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8142 GEN_INT (high_int)), 0);
8143 return plus_constant (Pmode, sum, low_int);
8145 else if (GET_CODE (x) == PLUS
8146 && REG_P (XEXP (x, 0))
8147 && !CONST_INT_P (XEXP (x, 1))
8148 && GET_MODE_NUNITS (mode) == 1
8149 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8150 || (/* ??? Assume floating point reg based on mode? */
8151 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8152 && !avoiding_indexed_address_p (mode))
8154 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8155 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8157 else if ((TARGET_ELF
8158 #if TARGET_MACHO
8159 || !MACHO_DYNAMIC_NO_PIC_P
8160 #endif
8162 && TARGET_32BIT
8163 && TARGET_NO_TOC_OR_PCREL
8164 && !flag_pic
8165 && !CONST_INT_P (x)
8166 && !CONST_WIDE_INT_P (x)
8167 && !CONST_DOUBLE_P (x)
8168 && CONSTANT_P (x)
8169 && GET_MODE_NUNITS (mode) == 1
8170 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8171 || (/* ??? Assume floating point reg based on mode? */
8172 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8174 rtx reg = gen_reg_rtx (Pmode);
8175 if (TARGET_ELF)
8176 emit_insn (gen_elf_high (reg, x));
8177 else
8178 emit_insn (gen_macho_high (Pmode, reg, x));
8179 return gen_rtx_LO_SUM (Pmode, reg, x);
8181 else if (TARGET_TOC
8182 && SYMBOL_REF_P (x)
8183 && constant_pool_expr_p (x)
8184 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8185 return create_TOC_reference (x, NULL_RTX);
8186 else
8187 return x;
8190 /* Debug version of rs6000_legitimize_address. */
8191 static rtx
8192 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8194 rtx ret;
8195 rtx_insn *insns;
8197 start_sequence ();
8198 ret = rs6000_legitimize_address (x, oldx, mode);
8199 insns = get_insns ();
8200 end_sequence ();
8202 if (ret != x)
8204 fprintf (stderr,
8205 "\nrs6000_legitimize_address: mode %s, old code %s, "
8206 "new code %s, modified\n",
8207 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8208 GET_RTX_NAME (GET_CODE (ret)));
8210 fprintf (stderr, "Original address:\n");
8211 debug_rtx (x);
8213 fprintf (stderr, "oldx:\n");
8214 debug_rtx (oldx);
8216 fprintf (stderr, "New address:\n");
8217 debug_rtx (ret);
8219 if (insns)
8221 fprintf (stderr, "Insns added:\n");
8222 debug_rtx_list (insns, 20);
8225 else
8227 fprintf (stderr,
8228 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8229 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8231 debug_rtx (x);
8234 if (insns)
8235 emit_insn (insns);
8237 return ret;
8240 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8241 We need to emit DTP-relative relocations. */
8243 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8244 static void
8245 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8247 switch (size)
8249 case 4:
8250 fputs ("\t.long\t", file);
8251 break;
8252 case 8:
8253 fputs (DOUBLE_INT_ASM_OP, file);
8254 break;
8255 default:
8256 gcc_unreachable ();
8258 output_addr_const (file, x);
8259 if (TARGET_ELF)
8260 fputs ("@dtprel+0x8000", file);
8261 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8263 switch (SYMBOL_REF_TLS_MODEL (x))
8265 case 0:
8266 break;
8267 case TLS_MODEL_LOCAL_EXEC:
8268 fputs ("@le", file);
8269 break;
8270 case TLS_MODEL_INITIAL_EXEC:
8271 fputs ("@ie", file);
8272 break;
8273 case TLS_MODEL_GLOBAL_DYNAMIC:
8274 case TLS_MODEL_LOCAL_DYNAMIC:
8275 fputs ("@m", file);
8276 break;
8277 default:
8278 gcc_unreachable ();
8283 /* Return true if X is a symbol that refers to real (rather than emulated)
8284 TLS. */
8286 static bool
8287 rs6000_real_tls_symbol_ref_p (rtx x)
8289 return (SYMBOL_REF_P (x)
8290 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8293 /* In the name of slightly smaller debug output, and to cater to
8294 general assembler lossage, recognize various UNSPEC sequences
8295 and turn them back into a direct symbol reference. */
8297 static rtx
8298 rs6000_delegitimize_address (rtx orig_x)
8300 rtx x, y, offset;
8302 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8303 orig_x = XVECEXP (orig_x, 0, 0);
8305 orig_x = delegitimize_mem_from_attrs (orig_x);
8307 x = orig_x;
8308 if (MEM_P (x))
8309 x = XEXP (x, 0);
8311 y = x;
8312 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8313 y = XEXP (y, 1);
8315 offset = NULL_RTX;
8316 if (GET_CODE (y) == PLUS
8317 && GET_MODE (y) == Pmode
8318 && CONST_INT_P (XEXP (y, 1)))
8320 offset = XEXP (y, 1);
8321 y = XEXP (y, 0);
8324 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8326 y = XVECEXP (y, 0, 0);
8328 #ifdef HAVE_AS_TLS
8329 /* Do not associate thread-local symbols with the original
8330 constant pool symbol. */
8331 if (TARGET_XCOFF
8332 && SYMBOL_REF_P (y)
8333 && CONSTANT_POOL_ADDRESS_P (y)
8334 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8335 return orig_x;
8336 #endif
8338 if (offset != NULL_RTX)
8339 y = gen_rtx_PLUS (Pmode, y, offset);
8340 if (!MEM_P (orig_x))
8341 return y;
8342 else
8343 return replace_equiv_address_nv (orig_x, y);
8346 if (TARGET_MACHO
8347 && GET_CODE (orig_x) == LO_SUM
8348 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8350 y = XEXP (XEXP (orig_x, 1), 0);
8351 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8352 return XVECEXP (y, 0, 0);
8355 return orig_x;
8358 /* Return true if X shouldn't be emitted into the debug info.
8359 The linker doesn't like .toc section references from
8360 .debug_* sections, so reject .toc section symbols. */
8362 static bool
8363 rs6000_const_not_ok_for_debug_p (rtx x)
8365 if (GET_CODE (x) == UNSPEC)
8366 return true;
8367 if (SYMBOL_REF_P (x)
8368 && CONSTANT_POOL_ADDRESS_P (x))
8370 rtx c = get_pool_constant (x);
8371 machine_mode cmode = get_pool_mode (x);
8372 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8373 return true;
8376 return false;
8379 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8381 static bool
8382 rs6000_legitimate_combined_insn (rtx_insn *insn)
8384 int icode = INSN_CODE (insn);
8386 /* Reject creating doloop insns. Combine should not be allowed
8387 to create these for a number of reasons:
8388 1) In a nested loop, if combine creates one of these in an
8389 outer loop and the register allocator happens to allocate ctr
8390 to the outer loop insn, then the inner loop can't use ctr.
8391 Inner loops ought to be more highly optimized.
8392 2) Combine often wants to create one of these from what was
8393 originally a three insn sequence, first combining the three
8394 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8395 allocated ctr, the splitter takes use back to the three insn
8396 sequence. It's better to stop combine at the two insn
8397 sequence.
8398 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8399 insns, the register allocator sometimes uses floating point
8400 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8401 jump insn and output reloads are not implemented for jumps,
8402 the ctrsi/ctrdi splitters need to handle all possible cases.
8403 That's a pain, and it gets to be seriously difficult when a
8404 splitter that runs after reload needs memory to transfer from
8405 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8406 for the difficult case. It's better to not create problems
8407 in the first place. */
8408 if (icode != CODE_FOR_nothing
8409 && (icode == CODE_FOR_bdz_si
8410 || icode == CODE_FOR_bdz_di
8411 || icode == CODE_FOR_bdnz_si
8412 || icode == CODE_FOR_bdnz_di
8413 || icode == CODE_FOR_bdztf_si
8414 || icode == CODE_FOR_bdztf_di
8415 || icode == CODE_FOR_bdnztf_si
8416 || icode == CODE_FOR_bdnztf_di))
8417 return false;
8419 return true;
8422 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8424 static GTY(()) rtx rs6000_tls_symbol;
8425 static rtx
8426 rs6000_tls_get_addr (void)
8428 if (!rs6000_tls_symbol)
8429 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8431 return rs6000_tls_symbol;
8434 /* Construct the SYMBOL_REF for TLS GOT references. */
8436 static GTY(()) rtx rs6000_got_symbol;
8438 rs6000_got_sym (void)
8440 if (!rs6000_got_symbol)
8442 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8443 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8444 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8447 return rs6000_got_symbol;
8450 /* AIX Thread-Local Address support. */
8452 static rtx
8453 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8455 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8456 const char *name;
8457 char *tlsname;
8459 name = XSTR (addr, 0);
8460 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8461 or the symbol will be in TLS private data section. */
8462 if (name[strlen (name) - 1] != ']'
8463 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8464 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8466 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8467 strcpy (tlsname, name);
8468 strcat (tlsname,
8469 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8470 tlsaddr = copy_rtx (addr);
8471 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8473 else
8474 tlsaddr = addr;
8476 /* Place addr into TOC constant pool. */
8477 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8479 /* Output the TOC entry and create the MEM referencing the value. */
8480 if (constant_pool_expr_p (XEXP (sym, 0))
8481 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8483 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8484 mem = gen_const_mem (Pmode, tocref);
8485 set_mem_alias_set (mem, get_TOC_alias_set ());
8487 else
8488 return sym;
8490 /* Use global-dynamic for local-dynamic. */
8491 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8492 || model == TLS_MODEL_LOCAL_DYNAMIC)
8494 /* Create new TOC reference for @m symbol. */
8495 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8496 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8497 strcpy (tlsname, "*LCM");
8498 strcat (tlsname, name + 3);
8499 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8500 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8501 tocref = create_TOC_reference (modaddr, NULL_RTX);
8502 rtx modmem = gen_const_mem (Pmode, tocref);
8503 set_mem_alias_set (modmem, get_TOC_alias_set ());
8505 rtx modreg = gen_reg_rtx (Pmode);
8506 emit_insn (gen_rtx_SET (modreg, modmem));
8508 tmpreg = gen_reg_rtx (Pmode);
8509 emit_insn (gen_rtx_SET (tmpreg, mem));
8511 dest = gen_reg_rtx (Pmode);
8512 if (TARGET_32BIT)
8513 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8514 else
8515 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8516 return dest;
8518 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8519 else if (TARGET_32BIT)
8521 tlsreg = gen_reg_rtx (SImode);
8522 emit_insn (gen_tls_get_tpointer (tlsreg));
8524 else
8525 tlsreg = gen_rtx_REG (DImode, 13);
8527 /* Load the TOC value into temporary register. */
8528 tmpreg = gen_reg_rtx (Pmode);
8529 emit_insn (gen_rtx_SET (tmpreg, mem));
8530 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8531 gen_rtx_MINUS (Pmode, addr, tlsreg));
8533 /* Add TOC symbol value to TLS pointer. */
8534 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8536 return dest;
8539 /* Passes the tls arg value for global dynamic and local dynamic
8540 emit_library_call_value in rs6000_legitimize_tls_address to
8541 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8542 marker relocs put on __tls_get_addr calls. */
8543 static rtx global_tlsarg;
8545 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8546 this (thread-local) address. */
8548 static rtx
8549 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8551 rtx dest, insn;
8553 if (TARGET_XCOFF)
8554 return rs6000_legitimize_tls_address_aix (addr, model);
8556 dest = gen_reg_rtx (Pmode);
8557 if (model == TLS_MODEL_LOCAL_EXEC
8558 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8560 rtx tlsreg;
8562 if (TARGET_64BIT)
8564 tlsreg = gen_rtx_REG (Pmode, 13);
8565 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8567 else
8569 tlsreg = gen_rtx_REG (Pmode, 2);
8570 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8572 emit_insn (insn);
8574 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8576 rtx tlsreg, tmp;
8578 tmp = gen_reg_rtx (Pmode);
8579 if (TARGET_64BIT)
8581 tlsreg = gen_rtx_REG (Pmode, 13);
8582 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8584 else
8586 tlsreg = gen_rtx_REG (Pmode, 2);
8587 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8589 emit_insn (insn);
8590 if (TARGET_64BIT)
8591 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8592 else
8593 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8594 emit_insn (insn);
8596 else
8598 rtx got, tga, tmp1, tmp2;
8600 /* We currently use relocations like @got@tlsgd for tls, which
8601 means the linker will handle allocation of tls entries, placing
8602 them in the .got section. So use a pointer to the .got section,
8603 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8604 or to secondary GOT sections used by 32-bit -fPIC. */
8605 if (rs6000_pcrel_p (cfun))
8606 got = const0_rtx;
8607 else if (TARGET_64BIT)
8608 got = gen_rtx_REG (Pmode, 2);
8609 else
8611 if (flag_pic == 1)
8612 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8613 else
8615 rtx gsym = rs6000_got_sym ();
8616 got = gen_reg_rtx (Pmode);
8617 if (flag_pic == 0)
8618 rs6000_emit_move (got, gsym, Pmode);
8619 else
8621 rtx mem, lab;
8623 tmp1 = gen_reg_rtx (Pmode);
8624 tmp2 = gen_reg_rtx (Pmode);
8625 mem = gen_const_mem (Pmode, tmp1);
8626 lab = gen_label_rtx ();
8627 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8628 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8629 if (TARGET_LINK_STACK)
8630 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8631 emit_move_insn (tmp2, mem);
8632 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8633 set_unique_reg_note (last, REG_EQUAL, gsym);
8638 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8640 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8641 UNSPEC_TLSGD);
8642 tga = rs6000_tls_get_addr ();
8643 rtx argreg = gen_rtx_REG (Pmode, 3);
8644 emit_insn (gen_rtx_SET (argreg, arg));
8645 global_tlsarg = arg;
8646 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8647 global_tlsarg = NULL_RTX;
8649 /* Make a note so that the result of this call can be CSEd. */
8650 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8651 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8652 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8654 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8656 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8657 tga = rs6000_tls_get_addr ();
8658 tmp1 = gen_reg_rtx (Pmode);
8659 rtx argreg = gen_rtx_REG (Pmode, 3);
8660 emit_insn (gen_rtx_SET (argreg, arg));
8661 global_tlsarg = arg;
8662 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8663 global_tlsarg = NULL_RTX;
8665 /* Make a note so that the result of this call can be CSEd. */
8666 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8667 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8668 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8670 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8672 if (TARGET_64BIT)
8673 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8674 else
8675 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8677 else if (rs6000_tls_size == 32)
8679 tmp2 = gen_reg_rtx (Pmode);
8680 if (TARGET_64BIT)
8681 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8682 else
8683 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8684 emit_insn (insn);
8685 if (TARGET_64BIT)
8686 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8687 else
8688 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8690 else
8692 tmp2 = gen_reg_rtx (Pmode);
8693 if (TARGET_64BIT)
8694 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8695 else
8696 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8697 emit_insn (insn);
8698 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8700 emit_insn (insn);
8702 else
8704 /* IE, or 64-bit offset LE. */
8705 tmp2 = gen_reg_rtx (Pmode);
8706 if (TARGET_64BIT)
8707 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8708 else
8709 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8710 emit_insn (insn);
8711 if (rs6000_pcrel_p (cfun))
8713 if (TARGET_64BIT)
8714 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8715 else
8716 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8718 else if (TARGET_64BIT)
8719 insn = gen_tls_tls_64 (dest, tmp2, addr);
8720 else
8721 insn = gen_tls_tls_32 (dest, tmp2, addr);
8722 emit_insn (insn);
8726 return dest;
8729 /* Only create the global variable for the stack protect guard if we are using
8730 the global flavor of that guard. */
8731 static tree
8732 rs6000_init_stack_protect_guard (void)
8734 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8735 return default_stack_protect_guard ();
8737 return NULL_TREE;
8740 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8742 static bool
8743 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8745 if (GET_CODE (x) == HIGH
8746 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8747 return true;
8749 /* A TLS symbol in the TOC cannot contain a sum. */
8750 if (GET_CODE (x) == CONST
8751 && GET_CODE (XEXP (x, 0)) == PLUS
8752 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8753 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8754 return true;
8756 /* Do not place an ELF TLS symbol in the constant pool. */
8757 return TARGET_ELF && tls_referenced_p (x);
8760 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8761 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8762 can be addressed relative to the toc pointer. */
8764 static bool
8765 use_toc_relative_ref (rtx sym, machine_mode mode)
8767 return ((constant_pool_expr_p (sym)
8768 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8769 get_pool_mode (sym)))
8770 || (TARGET_CMODEL == CMODEL_MEDIUM
8771 && SYMBOL_REF_LOCAL_P (sym)
8772 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8775 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8776 that is a valid memory address for an instruction.
8777 The MODE argument is the machine mode for the MEM expression
8778 that wants to use this address.
8780 On the RS/6000, there are four valid address: a SYMBOL_REF that
8781 refers to a constant pool entry of an address (or the sum of it
8782 plus a constant), a short (16-bit signed) constant plus a register,
8783 the sum of two registers, or a register indirect, possibly with an
8784 auto-increment. For DFmode, DDmode and DImode with a constant plus
8785 register, we must ensure that both words are addressable or PowerPC64
8786 with offset word aligned.
8788 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8789 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8790 because adjacent memory cells are accessed by adding word-sized offsets
8791 during assembly output. */
8792 static bool
8793 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8795 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8796 bool quad_offset_p = mode_supports_dq_form (mode);
8798 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8799 if (VECTOR_MEM_ALTIVEC_P (mode)
8800 && GET_CODE (x) == AND
8801 && CONST_INT_P (XEXP (x, 1))
8802 && INTVAL (XEXP (x, 1)) == -16)
8803 x = XEXP (x, 0);
8805 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8806 return 0;
8807 if (legitimate_indirect_address_p (x, reg_ok_strict))
8808 return 1;
8809 if (TARGET_UPDATE
8810 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8811 && mode_supports_pre_incdec_p (mode)
8812 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8813 return 1;
8815 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8816 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8817 return 1;
8819 /* Handle restricted vector d-form offsets in ISA 3.0. */
8820 if (quad_offset_p)
8822 if (quad_address_p (x, mode, reg_ok_strict))
8823 return 1;
8825 else if (virtual_stack_registers_memory_p (x))
8826 return 1;
8828 else if (reg_offset_p)
8830 if (legitimate_small_data_p (mode, x))
8831 return 1;
8832 if (legitimate_constant_pool_address_p (x, mode,
8833 reg_ok_strict || lra_in_progress))
8834 return 1;
8837 /* For TImode, if we have TImode in VSX registers, only allow register
8838 indirect addresses. This will allow the values to go in either GPRs
8839 or VSX registers without reloading. The vector types would tend to
8840 go into VSX registers, so we allow REG+REG, while TImode seems
8841 somewhat split, in that some uses are GPR based, and some VSX based. */
8842 /* FIXME: We could loosen this by changing the following to
8843 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8844 but currently we cannot allow REG+REG addressing for TImode. See
8845 PR72827 for complete details on how this ends up hoodwinking DSE. */
8846 if (mode == TImode && TARGET_VSX)
8847 return 0;
8848 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8849 if (! reg_ok_strict
8850 && reg_offset_p
8851 && GET_CODE (x) == PLUS
8852 && REG_P (XEXP (x, 0))
8853 && (XEXP (x, 0) == virtual_stack_vars_rtx
8854 || XEXP (x, 0) == arg_pointer_rtx)
8855 && CONST_INT_P (XEXP (x, 1)))
8856 return 1;
8857 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8858 return 1;
8859 if (!FLOAT128_2REG_P (mode)
8860 && (TARGET_HARD_FLOAT
8861 || TARGET_POWERPC64
8862 || (mode != DFmode && mode != DDmode))
8863 && (TARGET_POWERPC64 || mode != DImode)
8864 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8865 && mode != PTImode
8866 && !avoiding_indexed_address_p (mode)
8867 && legitimate_indexed_address_p (x, reg_ok_strict))
8868 return 1;
8869 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8870 && mode_supports_pre_modify_p (mode)
8871 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8872 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8873 reg_ok_strict, false)
8874 || (!avoiding_indexed_address_p (mode)
8875 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8876 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8878 /* There is no prefixed version of the load/store with update. */
8879 rtx addr = XEXP (x, 1);
8880 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8882 if (reg_offset_p && !quad_offset_p
8883 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8884 return 1;
8885 return 0;
8888 /* Debug version of rs6000_legitimate_address_p. */
8889 static bool
8890 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8891 bool reg_ok_strict)
8893 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8894 fprintf (stderr,
8895 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8896 "strict = %d, reload = %s, code = %s\n",
8897 ret ? "true" : "false",
8898 GET_MODE_NAME (mode),
8899 reg_ok_strict,
8900 (reload_completed ? "after" : "before"),
8901 GET_RTX_NAME (GET_CODE (x)));
8902 debug_rtx (x);
8904 return ret;
8907 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8909 static bool
8910 rs6000_mode_dependent_address_p (const_rtx addr,
8911 addr_space_t as ATTRIBUTE_UNUSED)
8913 return rs6000_mode_dependent_address_ptr (addr);
8916 /* Go to LABEL if ADDR (a legitimate address expression)
8917 has an effect that depends on the machine mode it is used for.
8919 On the RS/6000 this is true of all integral offsets (since AltiVec
8920 and VSX modes don't allow them) or is a pre-increment or decrement.
8922 ??? Except that due to conceptual problems in offsettable_address_p
8923 we can't really report the problems of integral offsets. So leave
8924 this assuming that the adjustable offset must be valid for the
8925 sub-words of a TFmode operand, which is what we had before. */
8927 static bool
8928 rs6000_mode_dependent_address (const_rtx addr)
8930 switch (GET_CODE (addr))
8932 case PLUS:
8933 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8934 is considered a legitimate address before reload, so there
8935 are no offset restrictions in that case. Note that this
8936 condition is safe in strict mode because any address involving
8937 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8938 been rejected as illegitimate. */
8939 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8940 && XEXP (addr, 0) != arg_pointer_rtx
8941 && CONST_INT_P (XEXP (addr, 1)))
8943 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8944 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8945 if (TARGET_PREFIXED_ADDR)
8946 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8947 else
8948 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8950 break;
8952 case LO_SUM:
8953 /* Anything in the constant pool is sufficiently aligned that
8954 all bytes have the same high part address. */
8955 return !legitimate_constant_pool_address_p (addr, QImode, false);
8957 /* Auto-increment cases are now treated generically in recog.c. */
8958 case PRE_MODIFY:
8959 return TARGET_UPDATE;
8961 /* AND is only allowed in Altivec loads. */
8962 case AND:
8963 return true;
8965 default:
8966 break;
8969 return false;
8972 /* Debug version of rs6000_mode_dependent_address. */
8973 static bool
8974 rs6000_debug_mode_dependent_address (const_rtx addr)
8976 bool ret = rs6000_mode_dependent_address (addr);
8978 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8979 ret ? "true" : "false");
8980 debug_rtx (addr);
8982 return ret;
8985 /* Implement FIND_BASE_TERM. */
8988 rs6000_find_base_term (rtx op)
8990 rtx base;
8992 base = op;
8993 if (GET_CODE (base) == CONST)
8994 base = XEXP (base, 0);
8995 if (GET_CODE (base) == PLUS)
8996 base = XEXP (base, 0);
8997 if (GET_CODE (base) == UNSPEC)
8998 switch (XINT (base, 1))
9000 case UNSPEC_TOCREL:
9001 case UNSPEC_MACHOPIC_OFFSET:
9002 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9003 for aliasing purposes. */
9004 return XVECEXP (base, 0, 0);
9007 return op;
9010 /* More elaborate version of recog's offsettable_memref_p predicate
9011 that works around the ??? note of rs6000_mode_dependent_address.
9012 In particular it accepts
9014 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9016 in 32-bit mode, that the recog predicate rejects. */
9018 static bool
9019 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9021 bool worst_case;
9023 if (!MEM_P (op))
9024 return false;
9026 /* First mimic offsettable_memref_p. */
9027 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9028 return true;
9030 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9031 the latter predicate knows nothing about the mode of the memory
9032 reference and, therefore, assumes that it is the largest supported
9033 mode (TFmode). As a consequence, legitimate offsettable memory
9034 references are rejected. rs6000_legitimate_offset_address_p contains
9035 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9036 at least with a little bit of help here given that we know the
9037 actual registers used. */
9038 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9039 || GET_MODE_SIZE (reg_mode) == 4);
9040 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9041 strict, worst_case);
9044 /* Determine the reassociation width to be used in reassociate_bb.
9045 This takes into account how many parallel operations we
9046 can actually do of a given type, and also the latency.
9048 int add/sub 6/cycle
9049 mul 2/cycle
9050 vect add/sub/mul 2/cycle
9051 fp add/sub/mul 2/cycle
9052 dfp 1/cycle
9055 static int
9056 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9057 machine_mode mode)
9059 switch (rs6000_tune)
9061 case PROCESSOR_POWER8:
9062 case PROCESSOR_POWER9:
9063 case PROCESSOR_FUTURE:
9064 if (DECIMAL_FLOAT_MODE_P (mode))
9065 return 1;
9066 if (VECTOR_MODE_P (mode))
9067 return 4;
9068 if (INTEGRAL_MODE_P (mode))
9069 return 1;
9070 if (FLOAT_MODE_P (mode))
9071 return 4;
9072 break;
9073 default:
9074 break;
9076 return 1;
9079 /* Change register usage conditional on target flags. */
9080 static void
9081 rs6000_conditional_register_usage (void)
9083 int i;
9085 if (TARGET_DEBUG_TARGET)
9086 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9088 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9089 if (TARGET_64BIT)
9090 fixed_regs[13] = call_used_regs[13] = 1;
9092 /* Conditionally disable FPRs. */
9093 if (TARGET_SOFT_FLOAT)
9094 for (i = 32; i < 64; i++)
9095 fixed_regs[i] = call_used_regs[i] = 1;
9097 /* The TOC register is not killed across calls in a way that is
9098 visible to the compiler. */
9099 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9100 call_used_regs[2] = 0;
9102 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9103 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9105 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9106 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9107 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9109 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9110 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9111 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9113 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9114 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9116 if (!TARGET_ALTIVEC && !TARGET_VSX)
9118 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9119 fixed_regs[i] = call_used_regs[i] = 1;
9120 call_used_regs[VRSAVE_REGNO] = 1;
9123 if (TARGET_ALTIVEC || TARGET_VSX)
9124 global_regs[VSCR_REGNO] = 1;
9126 if (TARGET_ALTIVEC_ABI)
9128 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9129 call_used_regs[i] = 1;
9131 /* AIX reserves VR20:31 in non-extended ABI mode. */
9132 if (TARGET_XCOFF)
9133 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9134 fixed_regs[i] = call_used_regs[i] = 1;
9139 /* Output insns to set DEST equal to the constant SOURCE as a series of
9140 lis, ori and shl instructions and return TRUE. */
9142 bool
9143 rs6000_emit_set_const (rtx dest, rtx source)
9145 machine_mode mode = GET_MODE (dest);
9146 rtx temp, set;
9147 rtx_insn *insn;
9148 HOST_WIDE_INT c;
9150 gcc_checking_assert (CONST_INT_P (source));
9151 c = INTVAL (source);
9152 switch (mode)
9154 case E_QImode:
9155 case E_HImode:
9156 emit_insn (gen_rtx_SET (dest, source));
9157 return true;
9159 case E_SImode:
9160 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9162 emit_insn (gen_rtx_SET (copy_rtx (temp),
9163 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9164 emit_insn (gen_rtx_SET (dest,
9165 gen_rtx_IOR (SImode, copy_rtx (temp),
9166 GEN_INT (c & 0xffff))));
9167 break;
9169 case E_DImode:
9170 if (!TARGET_POWERPC64)
9172 rtx hi, lo;
9174 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9175 DImode);
9176 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9177 DImode);
9178 emit_move_insn (hi, GEN_INT (c >> 32));
9179 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9180 emit_move_insn (lo, GEN_INT (c));
9182 else
9183 rs6000_emit_set_long_const (dest, c);
9184 break;
9186 default:
9187 gcc_unreachable ();
9190 insn = get_last_insn ();
9191 set = single_set (insn);
9192 if (! CONSTANT_P (SET_SRC (set)))
9193 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9195 return true;
9198 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9199 Output insns to set DEST equal to the constant C as a series of
9200 lis, ori and shl instructions. */
9202 static void
9203 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9205 rtx temp;
9206 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9208 ud1 = c & 0xffff;
9209 c = c >> 16;
9210 ud2 = c & 0xffff;
9211 c = c >> 16;
9212 ud3 = c & 0xffff;
9213 c = c >> 16;
9214 ud4 = c & 0xffff;
9216 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9217 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9218 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9220 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9221 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9223 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9225 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9226 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9227 if (ud1 != 0)
9228 emit_move_insn (dest,
9229 gen_rtx_IOR (DImode, copy_rtx (temp),
9230 GEN_INT (ud1)));
9232 else if (ud3 == 0 && ud4 == 0)
9234 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9236 gcc_assert (ud2 & 0x8000);
9237 emit_move_insn (copy_rtx (temp),
9238 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9239 if (ud1 != 0)
9240 emit_move_insn (copy_rtx (temp),
9241 gen_rtx_IOR (DImode, copy_rtx (temp),
9242 GEN_INT (ud1)));
9243 emit_move_insn (dest,
9244 gen_rtx_ZERO_EXTEND (DImode,
9245 gen_lowpart (SImode,
9246 copy_rtx (temp))));
9248 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9249 || (ud4 == 0 && ! (ud3 & 0x8000)))
9251 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9253 emit_move_insn (copy_rtx (temp),
9254 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9255 if (ud2 != 0)
9256 emit_move_insn (copy_rtx (temp),
9257 gen_rtx_IOR (DImode, copy_rtx (temp),
9258 GEN_INT (ud2)));
9259 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9260 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9261 GEN_INT (16)));
9262 if (ud1 != 0)
9263 emit_move_insn (dest,
9264 gen_rtx_IOR (DImode, copy_rtx (temp),
9265 GEN_INT (ud1)));
9267 else
9269 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9271 emit_move_insn (copy_rtx (temp),
9272 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9273 if (ud3 != 0)
9274 emit_move_insn (copy_rtx (temp),
9275 gen_rtx_IOR (DImode, copy_rtx (temp),
9276 GEN_INT (ud3)));
9278 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9279 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9280 GEN_INT (32)));
9281 if (ud2 != 0)
9282 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9283 gen_rtx_IOR (DImode, copy_rtx (temp),
9284 GEN_INT (ud2 << 16)));
9285 if (ud1 != 0)
9286 emit_move_insn (dest,
9287 gen_rtx_IOR (DImode, copy_rtx (temp),
9288 GEN_INT (ud1)));
9292 /* Helper for the following. Get rid of [r+r] memory refs
9293 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9295 static void
9296 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9298 if (MEM_P (operands[0])
9299 && !REG_P (XEXP (operands[0], 0))
9300 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9301 GET_MODE (operands[0]), false))
9302 operands[0]
9303 = replace_equiv_address (operands[0],
9304 copy_addr_to_reg (XEXP (operands[0], 0)));
9306 if (MEM_P (operands[1])
9307 && !REG_P (XEXP (operands[1], 0))
9308 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9309 GET_MODE (operands[1]), false))
9310 operands[1]
9311 = replace_equiv_address (operands[1],
9312 copy_addr_to_reg (XEXP (operands[1], 0)));
9315 /* Generate a vector of constants to permute MODE for a little-endian
9316 storage operation by swapping the two halves of a vector. */
9317 static rtvec
9318 rs6000_const_vec (machine_mode mode)
9320 int i, subparts;
9321 rtvec v;
9323 switch (mode)
9325 case E_V1TImode:
9326 subparts = 1;
9327 break;
9328 case E_V2DFmode:
9329 case E_V2DImode:
9330 subparts = 2;
9331 break;
9332 case E_V4SFmode:
9333 case E_V4SImode:
9334 subparts = 4;
9335 break;
9336 case E_V8HImode:
9337 subparts = 8;
9338 break;
9339 case E_V16QImode:
9340 subparts = 16;
9341 break;
9342 default:
9343 gcc_unreachable();
9346 v = rtvec_alloc (subparts);
9348 for (i = 0; i < subparts / 2; ++i)
9349 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9350 for (i = subparts / 2; i < subparts; ++i)
9351 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9353 return v;
9356 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9357 store operation. */
9358 void
9359 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9361 /* Scalar permutations are easier to express in integer modes rather than
9362 floating-point modes, so cast them here. We use V1TImode instead
9363 of TImode to ensure that the values don't go through GPRs. */
9364 if (FLOAT128_VECTOR_P (mode))
9366 dest = gen_lowpart (V1TImode, dest);
9367 source = gen_lowpart (V1TImode, source);
9368 mode = V1TImode;
9371 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9372 scalar. */
9373 if (mode == TImode || mode == V1TImode)
9374 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9375 GEN_INT (64))));
9376 else
9378 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9379 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9383 /* Emit a little-endian load from vector memory location SOURCE to VSX
9384 register DEST in mode MODE. The load is done with two permuting
9385 insn's that represent an lxvd2x and xxpermdi. */
9386 void
9387 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9389 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9390 V1TImode). */
9391 if (mode == TImode || mode == V1TImode)
9393 mode = V2DImode;
9394 dest = gen_lowpart (V2DImode, dest);
9395 source = adjust_address (source, V2DImode, 0);
9398 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9399 rs6000_emit_le_vsx_permute (tmp, source, mode);
9400 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9403 /* Emit a little-endian store to vector memory location DEST from VSX
9404 register SOURCE in mode MODE. The store is done with two permuting
9405 insn's that represent an xxpermdi and an stxvd2x. */
9406 void
9407 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9409 /* This should never be called during or after LRA, because it does
9410 not re-permute the source register. It is intended only for use
9411 during expand. */
9412 gcc_assert (!lra_in_progress && !reload_completed);
9414 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9415 V1TImode). */
9416 if (mode == TImode || mode == V1TImode)
9418 mode = V2DImode;
9419 dest = adjust_address (dest, V2DImode, 0);
9420 source = gen_lowpart (V2DImode, source);
9423 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9424 rs6000_emit_le_vsx_permute (tmp, source, mode);
9425 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9428 /* Emit a sequence representing a little-endian VSX load or store,
9429 moving data from SOURCE to DEST in mode MODE. This is done
9430 separately from rs6000_emit_move to ensure it is called only
9431 during expand. LE VSX loads and stores introduced later are
9432 handled with a split. The expand-time RTL generation allows
9433 us to optimize away redundant pairs of register-permutes. */
9434 void
9435 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9437 gcc_assert (!BYTES_BIG_ENDIAN
9438 && VECTOR_MEM_VSX_P (mode)
9439 && !TARGET_P9_VECTOR
9440 && !gpr_or_gpr_p (dest, source)
9441 && (MEM_P (source) ^ MEM_P (dest)));
9443 if (MEM_P (source))
9445 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9446 rs6000_emit_le_vsx_load (dest, source, mode);
9448 else
9450 if (!REG_P (source))
9451 source = force_reg (mode, source);
9452 rs6000_emit_le_vsx_store (dest, source, mode);
9456 /* Return whether a SFmode or SImode move can be done without converting one
9457 mode to another. This arrises when we have:
9459 (SUBREG:SF (REG:SI ...))
9460 (SUBREG:SI (REG:SF ...))
9462 and one of the values is in a floating point/vector register, where SFmode
9463 scalars are stored in DFmode format. */
9465 bool
9466 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9468 if (TARGET_ALLOW_SF_SUBREG)
9469 return true;
9471 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9472 return true;
9474 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9475 return true;
9477 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9478 if (SUBREG_P (dest))
9480 rtx dest_subreg = SUBREG_REG (dest);
9481 rtx src_subreg = SUBREG_REG (src);
9482 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9485 return false;
9489 /* Helper function to change moves with:
9491 (SUBREG:SF (REG:SI)) and
9492 (SUBREG:SI (REG:SF))
9494 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9495 values are stored as DFmode values in the VSX registers. We need to convert
9496 the bits before we can use a direct move or operate on the bits in the
9497 vector register as an integer type.
9499 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9501 static bool
9502 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9504 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9505 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9506 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9508 rtx inner_source = SUBREG_REG (source);
9509 machine_mode inner_mode = GET_MODE (inner_source);
9511 if (mode == SImode && inner_mode == SFmode)
9513 emit_insn (gen_movsi_from_sf (dest, inner_source));
9514 return true;
9517 if (mode == SFmode && inner_mode == SImode)
9519 emit_insn (gen_movsf_from_si (dest, inner_source));
9520 return true;
9524 return false;
9527 /* Emit a move from SOURCE to DEST in mode MODE. */
9528 void
9529 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9531 rtx operands[2];
9532 operands[0] = dest;
9533 operands[1] = source;
9535 if (TARGET_DEBUG_ADDR)
9537 fprintf (stderr,
9538 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9539 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9540 GET_MODE_NAME (mode),
9541 lra_in_progress,
9542 reload_completed,
9543 can_create_pseudo_p ());
9544 debug_rtx (dest);
9545 fprintf (stderr, "source:\n");
9546 debug_rtx (source);
9549 /* Check that we get CONST_WIDE_INT only when we should. */
9550 if (CONST_WIDE_INT_P (operands[1])
9551 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9552 gcc_unreachable ();
9554 #ifdef HAVE_AS_GNU_ATTRIBUTE
9555 /* If we use a long double type, set the flags in .gnu_attribute that say
9556 what the long double type is. This is to allow the linker's warning
9557 message for the wrong long double to be useful, even if the function does
9558 not do a call (for example, doing a 128-bit add on power9 if the long
9559 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9560 used if they aren't the default long dobule type. */
9561 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9563 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9564 rs6000_passes_float = rs6000_passes_long_double = true;
9566 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9567 rs6000_passes_float = rs6000_passes_long_double = true;
9569 #endif
9571 /* See if we need to special case SImode/SFmode SUBREG moves. */
9572 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9573 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9574 return;
9576 /* Check if GCC is setting up a block move that will end up using FP
9577 registers as temporaries. We must make sure this is acceptable. */
9578 if (MEM_P (operands[0])
9579 && MEM_P (operands[1])
9580 && mode == DImode
9581 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9582 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9583 && ! (rs6000_slow_unaligned_access (SImode,
9584 (MEM_ALIGN (operands[0]) > 32
9585 ? 32 : MEM_ALIGN (operands[0])))
9586 || rs6000_slow_unaligned_access (SImode,
9587 (MEM_ALIGN (operands[1]) > 32
9588 ? 32 : MEM_ALIGN (operands[1]))))
9589 && ! MEM_VOLATILE_P (operands [0])
9590 && ! MEM_VOLATILE_P (operands [1]))
9592 emit_move_insn (adjust_address (operands[0], SImode, 0),
9593 adjust_address (operands[1], SImode, 0));
9594 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9595 adjust_address (copy_rtx (operands[1]), SImode, 4));
9596 return;
9599 if (can_create_pseudo_p () && MEM_P (operands[0])
9600 && !gpc_reg_operand (operands[1], mode))
9601 operands[1] = force_reg (mode, operands[1]);
9603 /* Recognize the case where operand[1] is a reference to thread-local
9604 data and load its address to a register. */
9605 if (tls_referenced_p (operands[1]))
9607 enum tls_model model;
9608 rtx tmp = operands[1];
9609 rtx addend = NULL;
9611 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9613 addend = XEXP (XEXP (tmp, 0), 1);
9614 tmp = XEXP (XEXP (tmp, 0), 0);
9617 gcc_assert (SYMBOL_REF_P (tmp));
9618 model = SYMBOL_REF_TLS_MODEL (tmp);
9619 gcc_assert (model != 0);
9621 tmp = rs6000_legitimize_tls_address (tmp, model);
9622 if (addend)
9624 tmp = gen_rtx_PLUS (mode, tmp, addend);
9625 tmp = force_operand (tmp, operands[0]);
9627 operands[1] = tmp;
9630 /* 128-bit constant floating-point values on Darwin should really be loaded
9631 as two parts. However, this premature splitting is a problem when DFmode
9632 values can go into Altivec registers. */
9633 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9634 && !reg_addr[DFmode].scalar_in_vmx_p)
9636 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9637 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9638 DFmode);
9639 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9640 GET_MODE_SIZE (DFmode)),
9641 simplify_gen_subreg (DFmode, operands[1], mode,
9642 GET_MODE_SIZE (DFmode)),
9643 DFmode);
9644 return;
9647 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9648 p1:SD) if p1 is not of floating point class and p0 is spilled as
9649 we can have no analogous movsd_store for this. */
9650 if (lra_in_progress && mode == DDmode
9651 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9652 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9653 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9654 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9656 enum reg_class cl;
9657 int regno = REGNO (SUBREG_REG (operands[1]));
9659 if (!HARD_REGISTER_NUM_P (regno))
9661 cl = reg_preferred_class (regno);
9662 regno = reg_renumber[regno];
9663 if (regno < 0)
9664 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9666 if (regno >= 0 && ! FP_REGNO_P (regno))
9668 mode = SDmode;
9669 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9670 operands[1] = SUBREG_REG (operands[1]);
9673 if (lra_in_progress
9674 && mode == SDmode
9675 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9676 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9677 && (REG_P (operands[1])
9678 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9680 int regno = reg_or_subregno (operands[1]);
9681 enum reg_class cl;
9683 if (!HARD_REGISTER_NUM_P (regno))
9685 cl = reg_preferred_class (regno);
9686 gcc_assert (cl != NO_REGS);
9687 regno = reg_renumber[regno];
9688 if (regno < 0)
9689 regno = ira_class_hard_regs[cl][0];
9691 if (FP_REGNO_P (regno))
9693 if (GET_MODE (operands[0]) != DDmode)
9694 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9695 emit_insn (gen_movsd_store (operands[0], operands[1]));
9697 else if (INT_REGNO_P (regno))
9698 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9699 else
9700 gcc_unreachable();
9701 return;
9703 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9704 p:DD)) if p0 is not of floating point class and p1 is spilled as
9705 we can have no analogous movsd_load for this. */
9706 if (lra_in_progress && mode == DDmode
9707 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9708 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9709 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9710 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9712 enum reg_class cl;
9713 int regno = REGNO (SUBREG_REG (operands[0]));
9715 if (!HARD_REGISTER_NUM_P (regno))
9717 cl = reg_preferred_class (regno);
9718 regno = reg_renumber[regno];
9719 if (regno < 0)
9720 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9722 if (regno >= 0 && ! FP_REGNO_P (regno))
9724 mode = SDmode;
9725 operands[0] = SUBREG_REG (operands[0]);
9726 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9729 if (lra_in_progress
9730 && mode == SDmode
9731 && (REG_P (operands[0])
9732 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9733 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9734 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9736 int regno = reg_or_subregno (operands[0]);
9737 enum reg_class cl;
9739 if (!HARD_REGISTER_NUM_P (regno))
9741 cl = reg_preferred_class (regno);
9742 gcc_assert (cl != NO_REGS);
9743 regno = reg_renumber[regno];
9744 if (regno < 0)
9745 regno = ira_class_hard_regs[cl][0];
9747 if (FP_REGNO_P (regno))
9749 if (GET_MODE (operands[1]) != DDmode)
9750 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9751 emit_insn (gen_movsd_load (operands[0], operands[1]));
9753 else if (INT_REGNO_P (regno))
9754 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9755 else
9756 gcc_unreachable();
9757 return;
9760 /* FIXME: In the long term, this switch statement should go away
9761 and be replaced by a sequence of tests based on things like
9762 mode == Pmode. */
9763 switch (mode)
9765 case E_HImode:
9766 case E_QImode:
9767 if (CONSTANT_P (operands[1])
9768 && !CONST_INT_P (operands[1]))
9769 operands[1] = force_const_mem (mode, operands[1]);
9770 break;
9772 case E_TFmode:
9773 case E_TDmode:
9774 case E_IFmode:
9775 case E_KFmode:
9776 if (FLOAT128_2REG_P (mode))
9777 rs6000_eliminate_indexed_memrefs (operands);
9778 /* fall through */
9780 case E_DFmode:
9781 case E_DDmode:
9782 case E_SFmode:
9783 case E_SDmode:
9784 if (CONSTANT_P (operands[1])
9785 && ! easy_fp_constant (operands[1], mode))
9786 operands[1] = force_const_mem (mode, operands[1]);
9787 break;
9789 case E_V16QImode:
9790 case E_V8HImode:
9791 case E_V4SFmode:
9792 case E_V4SImode:
9793 case E_V2DFmode:
9794 case E_V2DImode:
9795 case E_V1TImode:
9796 if (CONSTANT_P (operands[1])
9797 && !easy_vector_constant (operands[1], mode))
9798 operands[1] = force_const_mem (mode, operands[1]);
9799 break;
9801 case E_SImode:
9802 case E_DImode:
9803 /* Use default pattern for address of ELF small data */
9804 if (TARGET_ELF
9805 && mode == Pmode
9806 && DEFAULT_ABI == ABI_V4
9807 && (SYMBOL_REF_P (operands[1])
9808 || GET_CODE (operands[1]) == CONST)
9809 && small_data_operand (operands[1], mode))
9811 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9812 return;
9815 /* Use the default pattern for loading up PC-relative addresses. */
9816 if (TARGET_PCREL && mode == Pmode
9817 && pcrel_local_or_external_address (operands[1], Pmode))
9819 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9820 return;
9823 if (DEFAULT_ABI == ABI_V4
9824 && mode == Pmode && mode == SImode
9825 && flag_pic == 1 && got_operand (operands[1], mode))
9827 emit_insn (gen_movsi_got (operands[0], operands[1]));
9828 return;
9831 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9832 && TARGET_NO_TOC_OR_PCREL
9833 && ! flag_pic
9834 && mode == Pmode
9835 && CONSTANT_P (operands[1])
9836 && GET_CODE (operands[1]) != HIGH
9837 && !CONST_INT_P (operands[1]))
9839 rtx target = (!can_create_pseudo_p ()
9840 ? operands[0]
9841 : gen_reg_rtx (mode));
9843 /* If this is a function address on -mcall-aixdesc,
9844 convert it to the address of the descriptor. */
9845 if (DEFAULT_ABI == ABI_AIX
9846 && SYMBOL_REF_P (operands[1])
9847 && XSTR (operands[1], 0)[0] == '.')
9849 const char *name = XSTR (operands[1], 0);
9850 rtx new_ref;
9851 while (*name == '.')
9852 name++;
9853 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9854 CONSTANT_POOL_ADDRESS_P (new_ref)
9855 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9856 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9857 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9858 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9859 operands[1] = new_ref;
9862 if (DEFAULT_ABI == ABI_DARWIN)
9864 #if TARGET_MACHO
9865 /* This is not PIC code, but could require the subset of
9866 indirections used by mdynamic-no-pic. */
9867 if (MACHO_DYNAMIC_NO_PIC_P)
9869 /* Take care of any required data indirection. */
9870 operands[1] = rs6000_machopic_legitimize_pic_address (
9871 operands[1], mode, operands[0]);
9872 if (operands[0] != operands[1])
9873 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9874 return;
9876 #endif
9877 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9878 emit_insn (gen_macho_low (Pmode, operands[0],
9879 target, operands[1]));
9880 return;
9883 emit_insn (gen_elf_high (target, operands[1]));
9884 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9885 return;
9888 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9889 and we have put it in the TOC, we just need to make a TOC-relative
9890 reference to it. */
9891 if (TARGET_TOC
9892 && SYMBOL_REF_P (operands[1])
9893 && use_toc_relative_ref (operands[1], mode))
9894 operands[1] = create_TOC_reference (operands[1], operands[0]);
9895 else if (mode == Pmode
9896 && CONSTANT_P (operands[1])
9897 && GET_CODE (operands[1]) != HIGH
9898 && ((REG_P (operands[0])
9899 && FP_REGNO_P (REGNO (operands[0])))
9900 || !CONST_INT_P (operands[1])
9901 || (num_insns_constant (operands[1], mode)
9902 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9903 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9904 && (TARGET_CMODEL == CMODEL_SMALL
9905 || can_create_pseudo_p ()
9906 || (REG_P (operands[0])
9907 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9910 #if TARGET_MACHO
9911 /* Darwin uses a special PIC legitimizer. */
9912 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9914 operands[1] =
9915 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9916 operands[0]);
9917 if (operands[0] != operands[1])
9918 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9919 return;
9921 #endif
9923 /* If we are to limit the number of things we put in the TOC and
9924 this is a symbol plus a constant we can add in one insn,
9925 just put the symbol in the TOC and add the constant. */
9926 if (GET_CODE (operands[1]) == CONST
9927 && TARGET_NO_SUM_IN_TOC
9928 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9929 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9930 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9931 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9932 && ! side_effects_p (operands[0]))
9934 rtx sym =
9935 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9936 rtx other = XEXP (XEXP (operands[1], 0), 1);
9938 sym = force_reg (mode, sym);
9939 emit_insn (gen_add3_insn (operands[0], sym, other));
9940 return;
9943 operands[1] = force_const_mem (mode, operands[1]);
9945 if (TARGET_TOC
9946 && SYMBOL_REF_P (XEXP (operands[1], 0))
9947 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9949 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9950 operands[0]);
9951 operands[1] = gen_const_mem (mode, tocref);
9952 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9955 break;
9957 case E_TImode:
9958 if (!VECTOR_MEM_VSX_P (TImode))
9959 rs6000_eliminate_indexed_memrefs (operands);
9960 break;
9962 case E_PTImode:
9963 rs6000_eliminate_indexed_memrefs (operands);
9964 break;
9966 default:
9967 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9970 /* Above, we may have called force_const_mem which may have returned
9971 an invalid address. If we can, fix this up; otherwise, reload will
9972 have to deal with it. */
9973 if (MEM_P (operands[1]))
9974 operands[1] = validize_mem (operands[1]);
9976 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9980 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9981 static void
9982 init_float128_ibm (machine_mode mode)
9984 if (!TARGET_XL_COMPAT)
9986 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
9987 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
9988 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
9989 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
9991 if (!TARGET_HARD_FLOAT)
9993 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
9994 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
9995 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
9996 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
9997 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
9998 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
9999 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10000 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10002 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10003 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10004 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10005 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10006 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10007 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10008 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10009 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10012 else
10014 set_optab_libfunc (add_optab, mode, "_xlqadd");
10015 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10016 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10017 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10020 /* Add various conversions for IFmode to use the traditional TFmode
10021 names. */
10022 if (mode == IFmode)
10024 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10025 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10026 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10027 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10028 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10029 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10031 if (TARGET_POWERPC64)
10033 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10034 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10035 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10036 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10041 /* Create a decl for either complex long double multiply or complex long double
10042 divide when long double is IEEE 128-bit floating point. We can't use
10043 __multc3 and __divtc3 because the original long double using IBM extended
10044 double used those names. The complex multiply/divide functions are encoded
10045 as builtin functions with a complex result and 4 scalar inputs. */
10047 static void
10048 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10050 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10051 name, NULL_TREE);
10053 set_builtin_decl (fncode, fndecl, true);
10055 if (TARGET_DEBUG_BUILTIN)
10056 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10058 return;
10061 /* Set up IEEE 128-bit floating point routines. Use different names if the
10062 arguments can be passed in a vector register. The historical PowerPC
10063 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10064 continue to use that if we aren't using vector registers to pass IEEE
10065 128-bit floating point. */
10067 static void
10068 init_float128_ieee (machine_mode mode)
10070 if (FLOAT128_VECTOR_P (mode))
10072 static bool complex_muldiv_init_p = false;
10074 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10075 we have clone or target attributes, this will be called a second
10076 time. We want to create the built-in function only once. */
10077 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10079 complex_muldiv_init_p = true;
10080 built_in_function fncode_mul =
10081 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10082 - MIN_MODE_COMPLEX_FLOAT);
10083 built_in_function fncode_div =
10084 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10085 - MIN_MODE_COMPLEX_FLOAT);
10087 tree fntype = build_function_type_list (complex_long_double_type_node,
10088 long_double_type_node,
10089 long_double_type_node,
10090 long_double_type_node,
10091 long_double_type_node,
10092 NULL_TREE);
10094 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10095 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10098 set_optab_libfunc (add_optab, mode, "__addkf3");
10099 set_optab_libfunc (sub_optab, mode, "__subkf3");
10100 set_optab_libfunc (neg_optab, mode, "__negkf2");
10101 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10102 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10103 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10104 set_optab_libfunc (abs_optab, mode, "__abskf2");
10105 set_optab_libfunc (powi_optab, mode, "__powikf2");
10107 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10108 set_optab_libfunc (ne_optab, mode, "__nekf2");
10109 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10110 set_optab_libfunc (ge_optab, mode, "__gekf2");
10111 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10112 set_optab_libfunc (le_optab, mode, "__lekf2");
10113 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10115 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10116 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10117 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10118 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10120 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10121 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10122 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10124 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10125 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10126 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10128 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10129 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10130 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10131 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10132 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10133 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10135 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10136 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10137 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10138 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10140 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10141 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10142 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10143 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10145 if (TARGET_POWERPC64)
10147 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10148 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10149 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10150 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10154 else
10156 set_optab_libfunc (add_optab, mode, "_q_add");
10157 set_optab_libfunc (sub_optab, mode, "_q_sub");
10158 set_optab_libfunc (neg_optab, mode, "_q_neg");
10159 set_optab_libfunc (smul_optab, mode, "_q_mul");
10160 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10161 if (TARGET_PPC_GPOPT)
10162 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10164 set_optab_libfunc (eq_optab, mode, "_q_feq");
10165 set_optab_libfunc (ne_optab, mode, "_q_fne");
10166 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10167 set_optab_libfunc (ge_optab, mode, "_q_fge");
10168 set_optab_libfunc (lt_optab, mode, "_q_flt");
10169 set_optab_libfunc (le_optab, mode, "_q_fle");
10171 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10172 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10173 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10174 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10175 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10176 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10177 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10178 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10182 static void
10183 rs6000_init_libfuncs (void)
10185 /* __float128 support. */
10186 if (TARGET_FLOAT128_TYPE)
10188 init_float128_ibm (IFmode);
10189 init_float128_ieee (KFmode);
10192 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10193 if (TARGET_LONG_DOUBLE_128)
10195 if (!TARGET_IEEEQUAD)
10196 init_float128_ibm (TFmode);
10198 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10199 else
10200 init_float128_ieee (TFmode);
10204 /* Emit a potentially record-form instruction, setting DST from SRC.
10205 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10206 signed comparison of DST with zero. If DOT is 1, the generated RTL
10207 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10208 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10209 a separate COMPARE. */
10211 void
10212 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10214 if (dot == 0)
10216 emit_move_insn (dst, src);
10217 return;
10220 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10222 emit_move_insn (dst, src);
10223 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10224 return;
10227 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10228 if (dot == 1)
10230 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10231 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10233 else
10235 rtx set = gen_rtx_SET (dst, src);
10236 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10241 /* A validation routine: say whether CODE, a condition code, and MODE
10242 match. The other alternatives either don't make sense or should
10243 never be generated. */
10245 void
10246 validate_condition_mode (enum rtx_code code, machine_mode mode)
10248 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10249 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10250 && GET_MODE_CLASS (mode) == MODE_CC);
10252 /* These don't make sense. */
10253 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10254 || mode != CCUNSmode);
10256 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10257 || mode == CCUNSmode);
10259 gcc_assert (mode == CCFPmode
10260 || (code != ORDERED && code != UNORDERED
10261 && code != UNEQ && code != LTGT
10262 && code != UNGT && code != UNLT
10263 && code != UNGE && code != UNLE));
10265 /* These are invalid; the information is not there. */
10266 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10270 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10271 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10272 not zero, store there the bit offset (counted from the right) where
10273 the single stretch of 1 bits begins; and similarly for B, the bit
10274 offset where it ends. */
10276 bool
10277 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10279 unsigned HOST_WIDE_INT val = INTVAL (mask);
10280 unsigned HOST_WIDE_INT bit;
10281 int nb, ne;
10282 int n = GET_MODE_PRECISION (mode);
10284 if (mode != DImode && mode != SImode)
10285 return false;
10287 if (INTVAL (mask) >= 0)
10289 bit = val & -val;
10290 ne = exact_log2 (bit);
10291 nb = exact_log2 (val + bit);
10293 else if (val + 1 == 0)
10295 nb = n;
10296 ne = 0;
10298 else if (val & 1)
10300 val = ~val;
10301 bit = val & -val;
10302 nb = exact_log2 (bit);
10303 ne = exact_log2 (val + bit);
10305 else
10307 bit = val & -val;
10308 ne = exact_log2 (bit);
10309 if (val + bit == 0)
10310 nb = n;
10311 else
10312 nb = 0;
10315 nb--;
10317 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10318 return false;
10320 if (b)
10321 *b = nb;
10322 if (e)
10323 *e = ne;
10325 return true;
10328 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10329 or rldicr instruction, to implement an AND with it in mode MODE. */
10331 bool
10332 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10334 int nb, ne;
10336 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10337 return false;
10339 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10340 does not wrap. */
10341 if (mode == DImode)
10342 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10344 /* For SImode, rlwinm can do everything. */
10345 if (mode == SImode)
10346 return (nb < 32 && ne < 32);
10348 return false;
10351 /* Return the instruction template for an AND with mask in mode MODE, with
10352 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10354 const char *
10355 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10357 int nb, ne;
10359 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10360 gcc_unreachable ();
10362 if (mode == DImode && ne == 0)
10364 operands[3] = GEN_INT (63 - nb);
10365 if (dot)
10366 return "rldicl. %0,%1,0,%3";
10367 return "rldicl %0,%1,0,%3";
10370 if (mode == DImode && nb == 63)
10372 operands[3] = GEN_INT (63 - ne);
10373 if (dot)
10374 return "rldicr. %0,%1,0,%3";
10375 return "rldicr %0,%1,0,%3";
10378 if (nb < 32 && ne < 32)
10380 operands[3] = GEN_INT (31 - nb);
10381 operands[4] = GEN_INT (31 - ne);
10382 if (dot)
10383 return "rlwinm. %0,%1,0,%3,%4";
10384 return "rlwinm %0,%1,0,%3,%4";
10387 gcc_unreachable ();
10390 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10391 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10392 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10394 bool
10395 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10397 int nb, ne;
10399 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10400 return false;
10402 int n = GET_MODE_PRECISION (mode);
10403 int sh = -1;
10405 if (CONST_INT_P (XEXP (shift, 1)))
10407 sh = INTVAL (XEXP (shift, 1));
10408 if (sh < 0 || sh >= n)
10409 return false;
10412 rtx_code code = GET_CODE (shift);
10414 /* Convert any shift by 0 to a rotate, to simplify below code. */
10415 if (sh == 0)
10416 code = ROTATE;
10418 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10419 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10420 code = ASHIFT;
10421 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10423 code = LSHIFTRT;
10424 sh = n - sh;
10427 /* DImode rotates need rld*. */
10428 if (mode == DImode && code == ROTATE)
10429 return (nb == 63 || ne == 0 || ne == sh);
10431 /* SImode rotates need rlw*. */
10432 if (mode == SImode && code == ROTATE)
10433 return (nb < 32 && ne < 32 && sh < 32);
10435 /* Wrap-around masks are only okay for rotates. */
10436 if (ne > nb)
10437 return false;
10439 /* Variable shifts are only okay for rotates. */
10440 if (sh < 0)
10441 return false;
10443 /* Don't allow ASHIFT if the mask is wrong for that. */
10444 if (code == ASHIFT && ne < sh)
10445 return false;
10447 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10448 if the mask is wrong for that. */
10449 if (nb < 32 && ne < 32 && sh < 32
10450 && !(code == LSHIFTRT && nb >= 32 - sh))
10451 return true;
10453 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10454 if the mask is wrong for that. */
10455 if (code == LSHIFTRT)
10456 sh = 64 - sh;
10457 if (nb == 63 || ne == 0 || ne == sh)
10458 return !(code == LSHIFTRT && nb >= sh);
10460 return false;
10463 /* Return the instruction template for a shift with mask in mode MODE, with
10464 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10466 const char *
10467 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10469 int nb, ne;
10471 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10472 gcc_unreachable ();
10474 if (mode == DImode && ne == 0)
10476 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10477 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10478 operands[3] = GEN_INT (63 - nb);
10479 if (dot)
10480 return "rld%I2cl. %0,%1,%2,%3";
10481 return "rld%I2cl %0,%1,%2,%3";
10484 if (mode == DImode && nb == 63)
10486 operands[3] = GEN_INT (63 - ne);
10487 if (dot)
10488 return "rld%I2cr. %0,%1,%2,%3";
10489 return "rld%I2cr %0,%1,%2,%3";
10492 if (mode == DImode
10493 && GET_CODE (operands[4]) != LSHIFTRT
10494 && CONST_INT_P (operands[2])
10495 && ne == INTVAL (operands[2]))
10497 operands[3] = GEN_INT (63 - nb);
10498 if (dot)
10499 return "rld%I2c. %0,%1,%2,%3";
10500 return "rld%I2c %0,%1,%2,%3";
10503 if (nb < 32 && ne < 32)
10505 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10506 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10507 operands[3] = GEN_INT (31 - nb);
10508 operands[4] = GEN_INT (31 - ne);
10509 /* This insn can also be a 64-bit rotate with mask that really makes
10510 it just a shift right (with mask); the %h below are to adjust for
10511 that situation (shift count is >= 32 in that case). */
10512 if (dot)
10513 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10514 return "rlw%I2nm %0,%1,%h2,%3,%4";
10517 gcc_unreachable ();
10520 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10521 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10522 ASHIFT, or LSHIFTRT) in mode MODE. */
10524 bool
10525 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10527 int nb, ne;
10529 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10530 return false;
10532 int n = GET_MODE_PRECISION (mode);
10534 int sh = INTVAL (XEXP (shift, 1));
10535 if (sh < 0 || sh >= n)
10536 return false;
10538 rtx_code code = GET_CODE (shift);
10540 /* Convert any shift by 0 to a rotate, to simplify below code. */
10541 if (sh == 0)
10542 code = ROTATE;
10544 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10545 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10546 code = ASHIFT;
10547 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10549 code = LSHIFTRT;
10550 sh = n - sh;
10553 /* DImode rotates need rldimi. */
10554 if (mode == DImode && code == ROTATE)
10555 return (ne == sh);
10557 /* SImode rotates need rlwimi. */
10558 if (mode == SImode && code == ROTATE)
10559 return (nb < 32 && ne < 32 && sh < 32);
10561 /* Wrap-around masks are only okay for rotates. */
10562 if (ne > nb)
10563 return false;
10565 /* Don't allow ASHIFT if the mask is wrong for that. */
10566 if (code == ASHIFT && ne < sh)
10567 return false;
10569 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10570 if the mask is wrong for that. */
10571 if (nb < 32 && ne < 32 && sh < 32
10572 && !(code == LSHIFTRT && nb >= 32 - sh))
10573 return true;
10575 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10576 if the mask is wrong for that. */
10577 if (code == LSHIFTRT)
10578 sh = 64 - sh;
10579 if (ne == sh)
10580 return !(code == LSHIFTRT && nb >= sh);
10582 return false;
10585 /* Return the instruction template for an insert with mask in mode MODE, with
10586 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10588 const char *
10589 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10591 int nb, ne;
10593 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10594 gcc_unreachable ();
10596 /* Prefer rldimi because rlwimi is cracked. */
10597 if (TARGET_POWERPC64
10598 && (!dot || mode == DImode)
10599 && GET_CODE (operands[4]) != LSHIFTRT
10600 && ne == INTVAL (operands[2]))
10602 operands[3] = GEN_INT (63 - nb);
10603 if (dot)
10604 return "rldimi. %0,%1,%2,%3";
10605 return "rldimi %0,%1,%2,%3";
10608 if (nb < 32 && ne < 32)
10610 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10611 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10612 operands[3] = GEN_INT (31 - nb);
10613 operands[4] = GEN_INT (31 - ne);
10614 if (dot)
10615 return "rlwimi. %0,%1,%2,%3,%4";
10616 return "rlwimi %0,%1,%2,%3,%4";
10619 gcc_unreachable ();
10622 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10623 using two machine instructions. */
10625 bool
10626 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10628 /* There are two kinds of AND we can handle with two insns:
10629 1) those we can do with two rl* insn;
10630 2) ori[s];xori[s].
10632 We do not handle that last case yet. */
10634 /* If there is just one stretch of ones, we can do it. */
10635 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10636 return true;
10638 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10639 one insn, we can do the whole thing with two. */
10640 unsigned HOST_WIDE_INT val = INTVAL (c);
10641 unsigned HOST_WIDE_INT bit1 = val & -val;
10642 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10643 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10644 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10645 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10648 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10649 If EXPAND is true, split rotate-and-mask instructions we generate to
10650 their constituent parts as well (this is used during expand); if DOT
10651 is 1, make the last insn a record-form instruction clobbering the
10652 destination GPR and setting the CC reg (from operands[3]); if 2, set
10653 that GPR as well as the CC reg. */
10655 void
10656 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10658 gcc_assert (!(expand && dot));
10660 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10662 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10663 shift right. This generates better code than doing the masks without
10664 shifts, or shifting first right and then left. */
10665 int nb, ne;
10666 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10668 gcc_assert (mode == DImode);
10670 int shift = 63 - nb;
10671 if (expand)
10673 rtx tmp1 = gen_reg_rtx (DImode);
10674 rtx tmp2 = gen_reg_rtx (DImode);
10675 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10676 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10677 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10679 else
10681 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10682 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10683 emit_move_insn (operands[0], tmp);
10684 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10685 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10687 return;
10690 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10691 that does the rest. */
10692 unsigned HOST_WIDE_INT bit1 = val & -val;
10693 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10694 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10695 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10697 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10698 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10700 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10702 /* Two "no-rotate"-and-mask instructions, for SImode. */
10703 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10705 gcc_assert (mode == SImode);
10707 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10708 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10709 emit_move_insn (reg, tmp);
10710 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10711 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10712 return;
10715 gcc_assert (mode == DImode);
10717 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10718 insns; we have to do the first in SImode, because it wraps. */
10719 if (mask2 <= 0xffffffff
10720 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10722 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10723 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10724 GEN_INT (mask1));
10725 rtx reg_low = gen_lowpart (SImode, reg);
10726 emit_move_insn (reg_low, tmp);
10727 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10728 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10729 return;
10732 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10733 at the top end), rotate back and clear the other hole. */
10734 int right = exact_log2 (bit3);
10735 int left = 64 - right;
10737 /* Rotate the mask too. */
10738 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10740 if (expand)
10742 rtx tmp1 = gen_reg_rtx (DImode);
10743 rtx tmp2 = gen_reg_rtx (DImode);
10744 rtx tmp3 = gen_reg_rtx (DImode);
10745 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10746 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10747 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10748 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10750 else
10752 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10753 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10754 emit_move_insn (operands[0], tmp);
10755 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10756 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10757 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10761 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10762 for lfq and stfq insns iff the registers are hard registers. */
10765 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10767 /* We might have been passed a SUBREG. */
10768 if (!REG_P (reg1) || !REG_P (reg2))
10769 return 0;
10771 /* We might have been passed non floating point registers. */
10772 if (!FP_REGNO_P (REGNO (reg1))
10773 || !FP_REGNO_P (REGNO (reg2)))
10774 return 0;
10776 return (REGNO (reg1) == REGNO (reg2) - 1);
10779 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10780 addr1 and addr2 must be in consecutive memory locations
10781 (addr2 == addr1 + 8). */
10784 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10786 rtx addr1, addr2;
10787 unsigned int reg1, reg2;
10788 int offset1, offset2;
10790 /* The mems cannot be volatile. */
10791 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10792 return 0;
10794 addr1 = XEXP (mem1, 0);
10795 addr2 = XEXP (mem2, 0);
10797 /* Extract an offset (if used) from the first addr. */
10798 if (GET_CODE (addr1) == PLUS)
10800 /* If not a REG, return zero. */
10801 if (!REG_P (XEXP (addr1, 0)))
10802 return 0;
10803 else
10805 reg1 = REGNO (XEXP (addr1, 0));
10806 /* The offset must be constant! */
10807 if (!CONST_INT_P (XEXP (addr1, 1)))
10808 return 0;
10809 offset1 = INTVAL (XEXP (addr1, 1));
10812 else if (!REG_P (addr1))
10813 return 0;
10814 else
10816 reg1 = REGNO (addr1);
10817 /* This was a simple (mem (reg)) expression. Offset is 0. */
10818 offset1 = 0;
10821 /* And now for the second addr. */
10822 if (GET_CODE (addr2) == PLUS)
10824 /* If not a REG, return zero. */
10825 if (!REG_P (XEXP (addr2, 0)))
10826 return 0;
10827 else
10829 reg2 = REGNO (XEXP (addr2, 0));
10830 /* The offset must be constant. */
10831 if (!CONST_INT_P (XEXP (addr2, 1)))
10832 return 0;
10833 offset2 = INTVAL (XEXP (addr2, 1));
10836 else if (!REG_P (addr2))
10837 return 0;
10838 else
10840 reg2 = REGNO (addr2);
10841 /* This was a simple (mem (reg)) expression. Offset is 0. */
10842 offset2 = 0;
10845 /* Both of these must have the same base register. */
10846 if (reg1 != reg2)
10847 return 0;
10849 /* The offset for the second addr must be 8 more than the first addr. */
10850 if (offset2 != offset1 + 8)
10851 return 0;
10853 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10854 instructions. */
10855 return 1;
10858 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10859 need to use DDmode, in all other cases we can use the same mode. */
10860 static machine_mode
10861 rs6000_secondary_memory_needed_mode (machine_mode mode)
10863 if (lra_in_progress && mode == SDmode)
10864 return DDmode;
10865 return mode;
10868 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10869 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10870 only work on the traditional altivec registers, note if an altivec register
10871 was chosen. */
10873 static enum rs6000_reg_type
10874 register_to_reg_type (rtx reg, bool *is_altivec)
10876 HOST_WIDE_INT regno;
10877 enum reg_class rclass;
10879 if (SUBREG_P (reg))
10880 reg = SUBREG_REG (reg);
10882 if (!REG_P (reg))
10883 return NO_REG_TYPE;
10885 regno = REGNO (reg);
10886 if (!HARD_REGISTER_NUM_P (regno))
10888 if (!lra_in_progress && !reload_completed)
10889 return PSEUDO_REG_TYPE;
10891 regno = true_regnum (reg);
10892 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10893 return PSEUDO_REG_TYPE;
10896 gcc_assert (regno >= 0);
10898 if (is_altivec && ALTIVEC_REGNO_P (regno))
10899 *is_altivec = true;
10901 rclass = rs6000_regno_regclass[regno];
10902 return reg_class_to_reg_type[(int)rclass];
10905 /* Helper function to return the cost of adding a TOC entry address. */
10907 static inline int
10908 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10910 int ret;
10912 if (TARGET_CMODEL != CMODEL_SMALL)
10913 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10915 else
10916 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10918 return ret;
10921 /* Helper function for rs6000_secondary_reload to determine whether the memory
10922 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10923 needs reloading. Return negative if the memory is not handled by the memory
10924 helper functions and to try a different reload method, 0 if no additional
10925 instructions are need, and positive to give the extra cost for the
10926 memory. */
10928 static int
10929 rs6000_secondary_reload_memory (rtx addr,
10930 enum reg_class rclass,
10931 machine_mode mode)
10933 int extra_cost = 0;
10934 rtx reg, and_arg, plus_arg0, plus_arg1;
10935 addr_mask_type addr_mask;
10936 const char *type = NULL;
10937 const char *fail_msg = NULL;
10939 if (GPR_REG_CLASS_P (rclass))
10940 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10942 else if (rclass == FLOAT_REGS)
10943 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10945 else if (rclass == ALTIVEC_REGS)
10946 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10948 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10949 else if (rclass == VSX_REGS)
10950 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10951 & ~RELOAD_REG_AND_M16);
10953 /* If the register allocator hasn't made up its mind yet on the register
10954 class to use, settle on defaults to use. */
10955 else if (rclass == NO_REGS)
10957 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10958 & ~RELOAD_REG_AND_M16);
10960 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10961 addr_mask &= ~(RELOAD_REG_INDEXED
10962 | RELOAD_REG_PRE_INCDEC
10963 | RELOAD_REG_PRE_MODIFY);
10966 else
10967 addr_mask = 0;
10969 /* If the register isn't valid in this register class, just return now. */
10970 if ((addr_mask & RELOAD_REG_VALID) == 0)
10972 if (TARGET_DEBUG_ADDR)
10974 fprintf (stderr,
10975 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10976 "not valid in class\n",
10977 GET_MODE_NAME (mode), reg_class_names[rclass]);
10978 debug_rtx (addr);
10981 return -1;
10984 switch (GET_CODE (addr))
10986 /* Does the register class supports auto update forms for this mode? We
10987 don't need a scratch register, since the powerpc only supports
10988 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10989 case PRE_INC:
10990 case PRE_DEC:
10991 reg = XEXP (addr, 0);
10992 if (!base_reg_operand (addr, GET_MODE (reg)))
10994 fail_msg = "no base register #1";
10995 extra_cost = -1;
10998 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11000 extra_cost = 1;
11001 type = "update";
11003 break;
11005 case PRE_MODIFY:
11006 reg = XEXP (addr, 0);
11007 plus_arg1 = XEXP (addr, 1);
11008 if (!base_reg_operand (reg, GET_MODE (reg))
11009 || GET_CODE (plus_arg1) != PLUS
11010 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11012 fail_msg = "bad PRE_MODIFY";
11013 extra_cost = -1;
11016 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11018 extra_cost = 1;
11019 type = "update";
11021 break;
11023 /* Do we need to simulate AND -16 to clear the bottom address bits used
11024 in VMX load/stores? Only allow the AND for vector sizes. */
11025 case AND:
11026 and_arg = XEXP (addr, 0);
11027 if (GET_MODE_SIZE (mode) != 16
11028 || !CONST_INT_P (XEXP (addr, 1))
11029 || INTVAL (XEXP (addr, 1)) != -16)
11031 fail_msg = "bad Altivec AND #1";
11032 extra_cost = -1;
11035 if (rclass != ALTIVEC_REGS)
11037 if (legitimate_indirect_address_p (and_arg, false))
11038 extra_cost = 1;
11040 else if (legitimate_indexed_address_p (and_arg, false))
11041 extra_cost = 2;
11043 else
11045 fail_msg = "bad Altivec AND #2";
11046 extra_cost = -1;
11049 type = "and";
11051 break;
11053 /* If this is an indirect address, make sure it is a base register. */
11054 case REG:
11055 case SUBREG:
11056 if (!legitimate_indirect_address_p (addr, false))
11058 extra_cost = 1;
11059 type = "move";
11061 break;
11063 /* If this is an indexed address, make sure the register class can handle
11064 indexed addresses for this mode. */
11065 case PLUS:
11066 plus_arg0 = XEXP (addr, 0);
11067 plus_arg1 = XEXP (addr, 1);
11069 /* (plus (plus (reg) (constant)) (constant)) is generated during
11070 push_reload processing, so handle it now. */
11071 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11073 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11075 extra_cost = 1;
11076 type = "offset";
11080 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11081 push_reload processing, so handle it now. */
11082 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11084 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11086 extra_cost = 1;
11087 type = "indexed #2";
11091 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11093 fail_msg = "no base register #2";
11094 extra_cost = -1;
11097 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11099 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11100 || !legitimate_indexed_address_p (addr, false))
11102 extra_cost = 1;
11103 type = "indexed";
11107 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11108 && CONST_INT_P (plus_arg1))
11110 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11112 extra_cost = 1;
11113 type = "vector d-form offset";
11117 /* Make sure the register class can handle offset addresses. */
11118 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11120 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11122 extra_cost = 1;
11123 type = "offset #2";
11127 else
11129 fail_msg = "bad PLUS";
11130 extra_cost = -1;
11133 break;
11135 case LO_SUM:
11136 /* Quad offsets are restricted and can't handle normal addresses. */
11137 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11139 extra_cost = -1;
11140 type = "vector d-form lo_sum";
11143 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11145 fail_msg = "bad LO_SUM";
11146 extra_cost = -1;
11149 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11151 extra_cost = 1;
11152 type = "lo_sum";
11154 break;
11156 /* Static addresses need to create a TOC entry. */
11157 case CONST:
11158 case SYMBOL_REF:
11159 case LABEL_REF:
11160 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11162 extra_cost = -1;
11163 type = "vector d-form lo_sum #2";
11166 else
11168 type = "address";
11169 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11171 break;
11173 /* TOC references look like offsetable memory. */
11174 case UNSPEC:
11175 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11177 fail_msg = "bad UNSPEC";
11178 extra_cost = -1;
11181 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11183 extra_cost = -1;
11184 type = "vector d-form lo_sum #3";
11187 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11189 extra_cost = 1;
11190 type = "toc reference";
11192 break;
11194 default:
11196 fail_msg = "bad address";
11197 extra_cost = -1;
11201 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11203 if (extra_cost < 0)
11204 fprintf (stderr,
11205 "rs6000_secondary_reload_memory error: mode = %s, "
11206 "class = %s, addr_mask = '%s', %s\n",
11207 GET_MODE_NAME (mode),
11208 reg_class_names[rclass],
11209 rs6000_debug_addr_mask (addr_mask, false),
11210 (fail_msg != NULL) ? fail_msg : "<bad address>");
11212 else
11213 fprintf (stderr,
11214 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11215 "addr_mask = '%s', extra cost = %d, %s\n",
11216 GET_MODE_NAME (mode),
11217 reg_class_names[rclass],
11218 rs6000_debug_addr_mask (addr_mask, false),
11219 extra_cost,
11220 (type) ? type : "<none>");
11222 debug_rtx (addr);
11225 return extra_cost;
11228 /* Helper function for rs6000_secondary_reload to return true if a move to a
11229 different register classe is really a simple move. */
11231 static bool
11232 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11233 enum rs6000_reg_type from_type,
11234 machine_mode mode)
11236 int size = GET_MODE_SIZE (mode);
11238 /* Add support for various direct moves available. In this function, we only
11239 look at cases where we don't need any extra registers, and one or more
11240 simple move insns are issued. Originally small integers are not allowed
11241 in FPR/VSX registers. Single precision binary floating is not a simple
11242 move because we need to convert to the single precision memory layout.
11243 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11244 need special direct move handling, which we do not support yet. */
11245 if (TARGET_DIRECT_MOVE
11246 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11247 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11249 if (TARGET_POWERPC64)
11251 /* ISA 2.07: MTVSRD or MVFVSRD. */
11252 if (size == 8)
11253 return true;
11255 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11256 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11257 return true;
11260 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11261 if (TARGET_P8_VECTOR)
11263 if (mode == SImode)
11264 return true;
11266 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11267 return true;
11270 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11271 if (mode == SDmode)
11272 return true;
11275 /* Move to/from SPR. */
11276 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11277 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11278 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11279 return true;
11281 return false;
11284 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11285 special direct moves that involve allocating an extra register, return the
11286 insn code of the helper function if there is such a function or
11287 CODE_FOR_nothing if not. */
11289 static bool
11290 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11291 enum rs6000_reg_type from_type,
11292 machine_mode mode,
11293 secondary_reload_info *sri,
11294 bool altivec_p)
11296 bool ret = false;
11297 enum insn_code icode = CODE_FOR_nothing;
11298 int cost = 0;
11299 int size = GET_MODE_SIZE (mode);
11301 if (TARGET_POWERPC64 && size == 16)
11303 /* Handle moving 128-bit values from GPRs to VSX point registers on
11304 ISA 2.07 (power8, power9) when running in 64-bit mode using
11305 XXPERMDI to glue the two 64-bit values back together. */
11306 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11308 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11309 icode = reg_addr[mode].reload_vsx_gpr;
11312 /* Handle moving 128-bit values from VSX point registers to GPRs on
11313 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11314 bottom 64-bit value. */
11315 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11317 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11318 icode = reg_addr[mode].reload_gpr_vsx;
11322 else if (TARGET_POWERPC64 && mode == SFmode)
11324 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11326 cost = 3; /* xscvdpspn, mfvsrd, and. */
11327 icode = reg_addr[mode].reload_gpr_vsx;
11330 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11332 cost = 2; /* mtvsrz, xscvspdpn. */
11333 icode = reg_addr[mode].reload_vsx_gpr;
11337 else if (!TARGET_POWERPC64 && size == 8)
11339 /* Handle moving 64-bit values from GPRs to floating point registers on
11340 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11341 32-bit values back together. Altivec register classes must be handled
11342 specially since a different instruction is used, and the secondary
11343 reload support requires a single instruction class in the scratch
11344 register constraint. However, right now TFmode is not allowed in
11345 Altivec registers, so the pattern will never match. */
11346 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11348 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11349 icode = reg_addr[mode].reload_fpr_gpr;
11353 if (icode != CODE_FOR_nothing)
11355 ret = true;
11356 if (sri)
11358 sri->icode = icode;
11359 sri->extra_cost = cost;
11363 return ret;
11366 /* Return whether a move between two register classes can be done either
11367 directly (simple move) or via a pattern that uses a single extra temporary
11368 (using ISA 2.07's direct move in this case. */
11370 static bool
11371 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11372 enum rs6000_reg_type from_type,
11373 machine_mode mode,
11374 secondary_reload_info *sri,
11375 bool altivec_p)
11377 /* Fall back to load/store reloads if either type is not a register. */
11378 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11379 return false;
11381 /* If we haven't allocated registers yet, assume the move can be done for the
11382 standard register types. */
11383 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11384 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11385 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11386 return true;
11388 /* Moves to the same set of registers is a simple move for non-specialized
11389 registers. */
11390 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11391 return true;
11393 /* Check whether a simple move can be done directly. */
11394 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11396 if (sri)
11398 sri->icode = CODE_FOR_nothing;
11399 sri->extra_cost = 0;
11401 return true;
11404 /* Now check if we can do it in a few steps. */
11405 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11406 altivec_p);
11409 /* Inform reload about cases where moving X with a mode MODE to a register in
11410 RCLASS requires an extra scratch or immediate register. Return the class
11411 needed for the immediate register.
11413 For VSX and Altivec, we may need a register to convert sp+offset into
11414 reg+sp.
11416 For misaligned 64-bit gpr loads and stores we need a register to
11417 convert an offset address to indirect. */
11419 static reg_class_t
11420 rs6000_secondary_reload (bool in_p,
11421 rtx x,
11422 reg_class_t rclass_i,
11423 machine_mode mode,
11424 secondary_reload_info *sri)
11426 enum reg_class rclass = (enum reg_class) rclass_i;
11427 reg_class_t ret = ALL_REGS;
11428 enum insn_code icode;
11429 bool default_p = false;
11430 bool done_p = false;
11432 /* Allow subreg of memory before/during reload. */
11433 bool memory_p = (MEM_P (x)
11434 || (!reload_completed && SUBREG_P (x)
11435 && MEM_P (SUBREG_REG (x))));
11437 sri->icode = CODE_FOR_nothing;
11438 sri->t_icode = CODE_FOR_nothing;
11439 sri->extra_cost = 0;
11440 icode = ((in_p)
11441 ? reg_addr[mode].reload_load
11442 : reg_addr[mode].reload_store);
11444 if (REG_P (x) || register_operand (x, mode))
11446 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11447 bool altivec_p = (rclass == ALTIVEC_REGS);
11448 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11450 if (!in_p)
11451 std::swap (to_type, from_type);
11453 /* Can we do a direct move of some sort? */
11454 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11455 altivec_p))
11457 icode = (enum insn_code)sri->icode;
11458 default_p = false;
11459 done_p = true;
11460 ret = NO_REGS;
11464 /* Make sure 0.0 is not reloaded or forced into memory. */
11465 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11467 ret = NO_REGS;
11468 default_p = false;
11469 done_p = true;
11472 /* If this is a scalar floating point value and we want to load it into the
11473 traditional Altivec registers, do it via a move via a traditional floating
11474 point register, unless we have D-form addressing. Also make sure that
11475 non-zero constants use a FPR. */
11476 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11477 && !mode_supports_vmx_dform (mode)
11478 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11479 && (memory_p || CONST_DOUBLE_P (x)))
11481 ret = FLOAT_REGS;
11482 default_p = false;
11483 done_p = true;
11486 /* Handle reload of load/stores if we have reload helper functions. */
11487 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11489 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11490 mode);
11492 if (extra_cost >= 0)
11494 done_p = true;
11495 ret = NO_REGS;
11496 if (extra_cost > 0)
11498 sri->extra_cost = extra_cost;
11499 sri->icode = icode;
11504 /* Handle unaligned loads and stores of integer registers. */
11505 if (!done_p && TARGET_POWERPC64
11506 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11507 && memory_p
11508 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11510 rtx addr = XEXP (x, 0);
11511 rtx off = address_offset (addr);
11513 if (off != NULL_RTX)
11515 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11516 unsigned HOST_WIDE_INT offset = INTVAL (off);
11518 /* We need a secondary reload when our legitimate_address_p
11519 says the address is good (as otherwise the entire address
11520 will be reloaded), and the offset is not a multiple of
11521 four or we have an address wrap. Address wrap will only
11522 occur for LO_SUMs since legitimate_offset_address_p
11523 rejects addresses for 16-byte mems that will wrap. */
11524 if (GET_CODE (addr) == LO_SUM
11525 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11526 && ((offset & 3) != 0
11527 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11528 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11529 && (offset & 3) != 0))
11531 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11532 if (in_p)
11533 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11534 : CODE_FOR_reload_di_load);
11535 else
11536 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11537 : CODE_FOR_reload_di_store);
11538 sri->extra_cost = 2;
11539 ret = NO_REGS;
11540 done_p = true;
11542 else
11543 default_p = true;
11545 else
11546 default_p = true;
11549 if (!done_p && !TARGET_POWERPC64
11550 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11551 && memory_p
11552 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11554 rtx addr = XEXP (x, 0);
11555 rtx off = address_offset (addr);
11557 if (off != NULL_RTX)
11559 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11560 unsigned HOST_WIDE_INT offset = INTVAL (off);
11562 /* We need a secondary reload when our legitimate_address_p
11563 says the address is good (as otherwise the entire address
11564 will be reloaded), and we have a wrap.
11566 legitimate_lo_sum_address_p allows LO_SUM addresses to
11567 have any offset so test for wrap in the low 16 bits.
11569 legitimate_offset_address_p checks for the range
11570 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11571 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11572 [0x7ff4,0x7fff] respectively, so test for the
11573 intersection of these ranges, [0x7ffc,0x7fff] and
11574 [0x7ff4,0x7ff7] respectively.
11576 Note that the address we see here may have been
11577 manipulated by legitimize_reload_address. */
11578 if (GET_CODE (addr) == LO_SUM
11579 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11580 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11582 if (in_p)
11583 sri->icode = CODE_FOR_reload_si_load;
11584 else
11585 sri->icode = CODE_FOR_reload_si_store;
11586 sri->extra_cost = 2;
11587 ret = NO_REGS;
11588 done_p = true;
11590 else
11591 default_p = true;
11593 else
11594 default_p = true;
11597 if (!done_p)
11598 default_p = true;
11600 if (default_p)
11601 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11603 gcc_assert (ret != ALL_REGS);
11605 if (TARGET_DEBUG_ADDR)
11607 fprintf (stderr,
11608 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11609 "mode = %s",
11610 reg_class_names[ret],
11611 in_p ? "true" : "false",
11612 reg_class_names[rclass],
11613 GET_MODE_NAME (mode));
11615 if (reload_completed)
11616 fputs (", after reload", stderr);
11618 if (!done_p)
11619 fputs (", done_p not set", stderr);
11621 if (default_p)
11622 fputs (", default secondary reload", stderr);
11624 if (sri->icode != CODE_FOR_nothing)
11625 fprintf (stderr, ", reload func = %s, extra cost = %d",
11626 insn_data[sri->icode].name, sri->extra_cost);
11628 else if (sri->extra_cost > 0)
11629 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11631 fputs ("\n", stderr);
11632 debug_rtx (x);
11635 return ret;
11638 /* Better tracing for rs6000_secondary_reload_inner. */
11640 static void
11641 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11642 bool store_p)
11644 rtx set, clobber;
11646 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11648 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11649 store_p ? "store" : "load");
11651 if (store_p)
11652 set = gen_rtx_SET (mem, reg);
11653 else
11654 set = gen_rtx_SET (reg, mem);
11656 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11657 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11660 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11661 ATTRIBUTE_NORETURN;
11663 static void
11664 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11665 bool store_p)
11667 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11668 gcc_unreachable ();
11671 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11672 reload helper functions. These were identified in
11673 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11674 reload, it calls the insns:
11675 reload_<RELOAD:mode>_<P:mptrsize>_store
11676 reload_<RELOAD:mode>_<P:mptrsize>_load
11678 which in turn calls this function, to do whatever is necessary to create
11679 valid addresses. */
11681 void
11682 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11684 int regno = true_regnum (reg);
11685 machine_mode mode = GET_MODE (reg);
11686 addr_mask_type addr_mask;
11687 rtx addr;
11688 rtx new_addr;
11689 rtx op_reg, op0, op1;
11690 rtx and_op;
11691 rtx cc_clobber;
11692 rtvec rv;
11694 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11695 || !base_reg_operand (scratch, GET_MODE (scratch)))
11696 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11698 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11699 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11701 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11702 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11704 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11705 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11707 else
11708 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11710 /* Make sure the mode is valid in this register class. */
11711 if ((addr_mask & RELOAD_REG_VALID) == 0)
11712 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11714 if (TARGET_DEBUG_ADDR)
11715 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11717 new_addr = addr = XEXP (mem, 0);
11718 switch (GET_CODE (addr))
11720 /* Does the register class support auto update forms for this mode? If
11721 not, do the update now. We don't need a scratch register, since the
11722 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11723 case PRE_INC:
11724 case PRE_DEC:
11725 op_reg = XEXP (addr, 0);
11726 if (!base_reg_operand (op_reg, Pmode))
11727 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11729 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11731 int delta = GET_MODE_SIZE (mode);
11732 if (GET_CODE (addr) == PRE_DEC)
11733 delta = -delta;
11734 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11735 new_addr = op_reg;
11737 break;
11739 case PRE_MODIFY:
11740 op0 = XEXP (addr, 0);
11741 op1 = XEXP (addr, 1);
11742 if (!base_reg_operand (op0, Pmode)
11743 || GET_CODE (op1) != PLUS
11744 || !rtx_equal_p (op0, XEXP (op1, 0)))
11745 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11747 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11749 emit_insn (gen_rtx_SET (op0, op1));
11750 new_addr = reg;
11752 break;
11754 /* Do we need to simulate AND -16 to clear the bottom address bits used
11755 in VMX load/stores? */
11756 case AND:
11757 op0 = XEXP (addr, 0);
11758 op1 = XEXP (addr, 1);
11759 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11761 if (REG_P (op0) || SUBREG_P (op0))
11762 op_reg = op0;
11764 else if (GET_CODE (op1) == PLUS)
11766 emit_insn (gen_rtx_SET (scratch, op1));
11767 op_reg = scratch;
11770 else
11771 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11773 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11774 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11775 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11776 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11777 new_addr = scratch;
11779 break;
11781 /* If this is an indirect address, make sure it is a base register. */
11782 case REG:
11783 case SUBREG:
11784 if (!base_reg_operand (addr, GET_MODE (addr)))
11786 emit_insn (gen_rtx_SET (scratch, addr));
11787 new_addr = scratch;
11789 break;
11791 /* If this is an indexed address, make sure the register class can handle
11792 indexed addresses for this mode. */
11793 case PLUS:
11794 op0 = XEXP (addr, 0);
11795 op1 = XEXP (addr, 1);
11796 if (!base_reg_operand (op0, Pmode))
11797 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11799 else if (int_reg_operand (op1, Pmode))
11801 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11803 emit_insn (gen_rtx_SET (scratch, addr));
11804 new_addr = scratch;
11808 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11810 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11811 || !quad_address_p (addr, mode, false))
11813 emit_insn (gen_rtx_SET (scratch, addr));
11814 new_addr = scratch;
11818 /* Make sure the register class can handle offset addresses. */
11819 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11821 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11823 emit_insn (gen_rtx_SET (scratch, addr));
11824 new_addr = scratch;
11828 else
11829 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11831 break;
11833 case LO_SUM:
11834 op0 = XEXP (addr, 0);
11835 op1 = XEXP (addr, 1);
11836 if (!base_reg_operand (op0, Pmode))
11837 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11839 else if (int_reg_operand (op1, Pmode))
11841 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11843 emit_insn (gen_rtx_SET (scratch, addr));
11844 new_addr = scratch;
11848 /* Quad offsets are restricted and can't handle normal addresses. */
11849 else if (mode_supports_dq_form (mode))
11851 emit_insn (gen_rtx_SET (scratch, addr));
11852 new_addr = scratch;
11855 /* Make sure the register class can handle offset addresses. */
11856 else if (legitimate_lo_sum_address_p (mode, addr, false))
11858 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11860 emit_insn (gen_rtx_SET (scratch, addr));
11861 new_addr = scratch;
11865 else
11866 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11868 break;
11870 case SYMBOL_REF:
11871 case CONST:
11872 case LABEL_REF:
11873 rs6000_emit_move (scratch, addr, Pmode);
11874 new_addr = scratch;
11875 break;
11877 default:
11878 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11881 /* Adjust the address if it changed. */
11882 if (addr != new_addr)
11884 mem = replace_equiv_address_nv (mem, new_addr);
11885 if (TARGET_DEBUG_ADDR)
11886 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11889 /* Now create the move. */
11890 if (store_p)
11891 emit_insn (gen_rtx_SET (mem, reg));
11892 else
11893 emit_insn (gen_rtx_SET (reg, mem));
11895 return;
11898 /* Convert reloads involving 64-bit gprs and misaligned offset
11899 addressing, or multiple 32-bit gprs and offsets that are too large,
11900 to use indirect addressing. */
11902 void
11903 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11905 int regno = true_regnum (reg);
11906 enum reg_class rclass;
11907 rtx addr;
11908 rtx scratch_or_premodify = scratch;
11910 if (TARGET_DEBUG_ADDR)
11912 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11913 store_p ? "store" : "load");
11914 fprintf (stderr, "reg:\n");
11915 debug_rtx (reg);
11916 fprintf (stderr, "mem:\n");
11917 debug_rtx (mem);
11918 fprintf (stderr, "scratch:\n");
11919 debug_rtx (scratch);
11922 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11923 gcc_assert (MEM_P (mem));
11924 rclass = REGNO_REG_CLASS (regno);
11925 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11926 addr = XEXP (mem, 0);
11928 if (GET_CODE (addr) == PRE_MODIFY)
11930 gcc_assert (REG_P (XEXP (addr, 0))
11931 && GET_CODE (XEXP (addr, 1)) == PLUS
11932 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11933 scratch_or_premodify = XEXP (addr, 0);
11934 addr = XEXP (addr, 1);
11936 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11938 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11940 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11942 /* Now create the move. */
11943 if (store_p)
11944 emit_insn (gen_rtx_SET (mem, reg));
11945 else
11946 emit_insn (gen_rtx_SET (reg, mem));
11948 return;
11951 /* Given an rtx X being reloaded into a reg required to be
11952 in class CLASS, return the class of reg to actually use.
11953 In general this is just CLASS; but on some machines
11954 in some cases it is preferable to use a more restrictive class.
11956 On the RS/6000, we have to return NO_REGS when we want to reload a
11957 floating-point CONST_DOUBLE to force it to be copied to memory.
11959 We also don't want to reload integer values into floating-point
11960 registers if we can at all help it. In fact, this can
11961 cause reload to die, if it tries to generate a reload of CTR
11962 into a FP register and discovers it doesn't have the memory location
11963 required.
11965 ??? Would it be a good idea to have reload do the converse, that is
11966 try to reload floating modes into FP registers if possible?
11969 static enum reg_class
11970 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11972 machine_mode mode = GET_MODE (x);
11973 bool is_constant = CONSTANT_P (x);
11975 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11976 reload class for it. */
11977 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11978 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
11979 return NO_REGS;
11981 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
11982 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
11983 return NO_REGS;
11985 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11986 the reloading of address expressions using PLUS into floating point
11987 registers. */
11988 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
11990 if (is_constant)
11992 /* Zero is always allowed in all VSX registers. */
11993 if (x == CONST0_RTX (mode))
11994 return rclass;
11996 /* If this is a vector constant that can be formed with a few Altivec
11997 instructions, we want altivec registers. */
11998 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
11999 return ALTIVEC_REGS;
12001 /* If this is an integer constant that can easily be loaded into
12002 vector registers, allow it. */
12003 if (CONST_INT_P (x))
12005 HOST_WIDE_INT value = INTVAL (x);
12007 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12008 2.06 can generate it in the Altivec registers with
12009 VSPLTI<x>. */
12010 if (value == -1)
12012 if (TARGET_P8_VECTOR)
12013 return rclass;
12014 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12015 return ALTIVEC_REGS;
12016 else
12017 return NO_REGS;
12020 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12021 a sign extend in the Altivec registers. */
12022 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12023 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12024 return ALTIVEC_REGS;
12027 /* Force constant to memory. */
12028 return NO_REGS;
12031 /* D-form addressing can easily reload the value. */
12032 if (mode_supports_vmx_dform (mode)
12033 || mode_supports_dq_form (mode))
12034 return rclass;
12036 /* If this is a scalar floating point value and we don't have D-form
12037 addressing, prefer the traditional floating point registers so that we
12038 can use D-form (register+offset) addressing. */
12039 if (rclass == VSX_REGS
12040 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12041 return FLOAT_REGS;
12043 /* Prefer the Altivec registers if Altivec is handling the vector
12044 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12045 loads. */
12046 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12047 || mode == V1TImode)
12048 return ALTIVEC_REGS;
12050 return rclass;
12053 if (is_constant || GET_CODE (x) == PLUS)
12055 if (reg_class_subset_p (GENERAL_REGS, rclass))
12056 return GENERAL_REGS;
12057 if (reg_class_subset_p (BASE_REGS, rclass))
12058 return BASE_REGS;
12059 return NO_REGS;
12062 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12063 return GENERAL_REGS;
12065 return rclass;
12068 /* Debug version of rs6000_preferred_reload_class. */
12069 static enum reg_class
12070 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12072 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12074 fprintf (stderr,
12075 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12076 "mode = %s, x:\n",
12077 reg_class_names[ret], reg_class_names[rclass],
12078 GET_MODE_NAME (GET_MODE (x)));
12079 debug_rtx (x);
12081 return ret;
12084 /* If we are copying between FP or AltiVec registers and anything else, we need
12085 a memory location. The exception is when we are targeting ppc64 and the
12086 move to/from fpr to gpr instructions are available. Also, under VSX, you
12087 can copy vector registers from the FP register set to the Altivec register
12088 set and vice versa. */
12090 static bool
12091 rs6000_secondary_memory_needed (machine_mode mode,
12092 reg_class_t from_class,
12093 reg_class_t to_class)
12095 enum rs6000_reg_type from_type, to_type;
12096 bool altivec_p = ((from_class == ALTIVEC_REGS)
12097 || (to_class == ALTIVEC_REGS));
12099 /* If a simple/direct move is available, we don't need secondary memory */
12100 from_type = reg_class_to_reg_type[(int)from_class];
12101 to_type = reg_class_to_reg_type[(int)to_class];
12103 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12104 (secondary_reload_info *)0, altivec_p))
12105 return false;
12107 /* If we have a floating point or vector register class, we need to use
12108 memory to transfer the data. */
12109 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12110 return true;
12112 return false;
12115 /* Debug version of rs6000_secondary_memory_needed. */
12116 static bool
12117 rs6000_debug_secondary_memory_needed (machine_mode mode,
12118 reg_class_t from_class,
12119 reg_class_t to_class)
12121 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12123 fprintf (stderr,
12124 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12125 "to_class = %s, mode = %s\n",
12126 ret ? "true" : "false",
12127 reg_class_names[from_class],
12128 reg_class_names[to_class],
12129 GET_MODE_NAME (mode));
12131 return ret;
12134 /* Return the register class of a scratch register needed to copy IN into
12135 or out of a register in RCLASS in MODE. If it can be done directly,
12136 NO_REGS is returned. */
12138 static enum reg_class
12139 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12140 rtx in)
12142 int regno;
12144 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12145 #if TARGET_MACHO
12146 && MACHOPIC_INDIRECT
12147 #endif
12150 /* We cannot copy a symbolic operand directly into anything
12151 other than BASE_REGS for TARGET_ELF. So indicate that a
12152 register from BASE_REGS is needed as an intermediate
12153 register.
12155 On Darwin, pic addresses require a load from memory, which
12156 needs a base register. */
12157 if (rclass != BASE_REGS
12158 && (SYMBOL_REF_P (in)
12159 || GET_CODE (in) == HIGH
12160 || GET_CODE (in) == LABEL_REF
12161 || GET_CODE (in) == CONST))
12162 return BASE_REGS;
12165 if (REG_P (in))
12167 regno = REGNO (in);
12168 if (!HARD_REGISTER_NUM_P (regno))
12170 regno = true_regnum (in);
12171 if (!HARD_REGISTER_NUM_P (regno))
12172 regno = -1;
12175 else if (SUBREG_P (in))
12177 regno = true_regnum (in);
12178 if (!HARD_REGISTER_NUM_P (regno))
12179 regno = -1;
12181 else
12182 regno = -1;
12184 /* If we have VSX register moves, prefer moving scalar values between
12185 Altivec registers and GPR by going via an FPR (and then via memory)
12186 instead of reloading the secondary memory address for Altivec moves. */
12187 if (TARGET_VSX
12188 && GET_MODE_SIZE (mode) < 16
12189 && !mode_supports_vmx_dform (mode)
12190 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12191 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12192 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12193 && (regno >= 0 && INT_REGNO_P (regno)))))
12194 return FLOAT_REGS;
12196 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12197 into anything. */
12198 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12199 || (regno >= 0 && INT_REGNO_P (regno)))
12200 return NO_REGS;
12202 /* Constants, memory, and VSX registers can go into VSX registers (both the
12203 traditional floating point and the altivec registers). */
12204 if (rclass == VSX_REGS
12205 && (regno == -1 || VSX_REGNO_P (regno)))
12206 return NO_REGS;
12208 /* Constants, memory, and FP registers can go into FP registers. */
12209 if ((regno == -1 || FP_REGNO_P (regno))
12210 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12211 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12213 /* Memory, and AltiVec registers can go into AltiVec registers. */
12214 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12215 && rclass == ALTIVEC_REGS)
12216 return NO_REGS;
12218 /* We can copy among the CR registers. */
12219 if ((rclass == CR_REGS || rclass == CR0_REGS)
12220 && regno >= 0 && CR_REGNO_P (regno))
12221 return NO_REGS;
12223 /* Otherwise, we need GENERAL_REGS. */
12224 return GENERAL_REGS;
12227 /* Debug version of rs6000_secondary_reload_class. */
12228 static enum reg_class
12229 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12230 machine_mode mode, rtx in)
12232 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12233 fprintf (stderr,
12234 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12235 "mode = %s, input rtx:\n",
12236 reg_class_names[ret], reg_class_names[rclass],
12237 GET_MODE_NAME (mode));
12238 debug_rtx (in);
12240 return ret;
12243 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12245 static bool
12246 rs6000_can_change_mode_class (machine_mode from,
12247 machine_mode to,
12248 reg_class_t rclass)
12250 unsigned from_size = GET_MODE_SIZE (from);
12251 unsigned to_size = GET_MODE_SIZE (to);
12253 if (from_size != to_size)
12255 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12257 if (reg_classes_intersect_p (xclass, rclass))
12259 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12260 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12261 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12262 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12264 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12265 single register under VSX because the scalar part of the register
12266 is in the upper 64-bits, and not the lower 64-bits. Types like
12267 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12268 IEEE floating point can't overlap, and neither can small
12269 values. */
12271 if (to_float128_vector_p && from_float128_vector_p)
12272 return true;
12274 else if (to_float128_vector_p || from_float128_vector_p)
12275 return false;
12277 /* TDmode in floating-mode registers must always go into a register
12278 pair with the most significant word in the even-numbered register
12279 to match ISA requirements. In little-endian mode, this does not
12280 match subreg numbering, so we cannot allow subregs. */
12281 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12282 return false;
12284 if (from_size < 8 || to_size < 8)
12285 return false;
12287 if (from_size == 8 && (8 * to_nregs) != to_size)
12288 return false;
12290 if (to_size == 8 && (8 * from_nregs) != from_size)
12291 return false;
12293 return true;
12295 else
12296 return true;
12299 /* Since the VSX register set includes traditional floating point registers
12300 and altivec registers, just check for the size being different instead of
12301 trying to check whether the modes are vector modes. Otherwise it won't
12302 allow say DF and DI to change classes. For types like TFmode and TDmode
12303 that take 2 64-bit registers, rather than a single 128-bit register, don't
12304 allow subregs of those types to other 128 bit types. */
12305 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12307 unsigned num_regs = (from_size + 15) / 16;
12308 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12309 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12310 return false;
12312 return (from_size == 8 || from_size == 16);
12315 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12316 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12317 return false;
12319 return true;
12322 /* Debug version of rs6000_can_change_mode_class. */
12323 static bool
12324 rs6000_debug_can_change_mode_class (machine_mode from,
12325 machine_mode to,
12326 reg_class_t rclass)
12328 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12330 fprintf (stderr,
12331 "rs6000_can_change_mode_class, return %s, from = %s, "
12332 "to = %s, rclass = %s\n",
12333 ret ? "true" : "false",
12334 GET_MODE_NAME (from), GET_MODE_NAME (to),
12335 reg_class_names[rclass]);
12337 return ret;
12340 /* Return a string to do a move operation of 128 bits of data. */
12342 const char *
12343 rs6000_output_move_128bit (rtx operands[])
12345 rtx dest = operands[0];
12346 rtx src = operands[1];
12347 machine_mode mode = GET_MODE (dest);
12348 int dest_regno;
12349 int src_regno;
12350 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12351 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12353 if (REG_P (dest))
12355 dest_regno = REGNO (dest);
12356 dest_gpr_p = INT_REGNO_P (dest_regno);
12357 dest_fp_p = FP_REGNO_P (dest_regno);
12358 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12359 dest_vsx_p = dest_fp_p | dest_vmx_p;
12361 else
12363 dest_regno = -1;
12364 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12367 if (REG_P (src))
12369 src_regno = REGNO (src);
12370 src_gpr_p = INT_REGNO_P (src_regno);
12371 src_fp_p = FP_REGNO_P (src_regno);
12372 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12373 src_vsx_p = src_fp_p | src_vmx_p;
12375 else
12377 src_regno = -1;
12378 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12381 /* Register moves. */
12382 if (dest_regno >= 0 && src_regno >= 0)
12384 if (dest_gpr_p)
12386 if (src_gpr_p)
12387 return "#";
12389 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12390 return (WORDS_BIG_ENDIAN
12391 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12392 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12394 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12395 return "#";
12398 else if (TARGET_VSX && dest_vsx_p)
12400 if (src_vsx_p)
12401 return "xxlor %x0,%x1,%x1";
12403 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12404 return (WORDS_BIG_ENDIAN
12405 ? "mtvsrdd %x0,%1,%L1"
12406 : "mtvsrdd %x0,%L1,%1");
12408 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12409 return "#";
12412 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12413 return "vor %0,%1,%1";
12415 else if (dest_fp_p && src_fp_p)
12416 return "#";
12419 /* Loads. */
12420 else if (dest_regno >= 0 && MEM_P (src))
12422 if (dest_gpr_p)
12424 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12425 return "lq %0,%1";
12426 else
12427 return "#";
12430 else if (TARGET_ALTIVEC && dest_vmx_p
12431 && altivec_indexed_or_indirect_operand (src, mode))
12432 return "lvx %0,%y1";
12434 else if (TARGET_VSX && dest_vsx_p)
12436 if (mode_supports_dq_form (mode)
12437 && quad_address_p (XEXP (src, 0), mode, true))
12438 return "lxv %x0,%1";
12440 else if (TARGET_P9_VECTOR)
12441 return "lxvx %x0,%y1";
12443 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12444 return "lxvw4x %x0,%y1";
12446 else
12447 return "lxvd2x %x0,%y1";
12450 else if (TARGET_ALTIVEC && dest_vmx_p)
12451 return "lvx %0,%y1";
12453 else if (dest_fp_p)
12454 return "#";
12457 /* Stores. */
12458 else if (src_regno >= 0 && MEM_P (dest))
12460 if (src_gpr_p)
12462 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12463 return "stq %1,%0";
12464 else
12465 return "#";
12468 else if (TARGET_ALTIVEC && src_vmx_p
12469 && altivec_indexed_or_indirect_operand (dest, mode))
12470 return "stvx %1,%y0";
12472 else if (TARGET_VSX && src_vsx_p)
12474 if (mode_supports_dq_form (mode)
12475 && quad_address_p (XEXP (dest, 0), mode, true))
12476 return "stxv %x1,%0";
12478 else if (TARGET_P9_VECTOR)
12479 return "stxvx %x1,%y0";
12481 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12482 return "stxvw4x %x1,%y0";
12484 else
12485 return "stxvd2x %x1,%y0";
12488 else if (TARGET_ALTIVEC && src_vmx_p)
12489 return "stvx %1,%y0";
12491 else if (src_fp_p)
12492 return "#";
12495 /* Constants. */
12496 else if (dest_regno >= 0
12497 && (CONST_INT_P (src)
12498 || CONST_WIDE_INT_P (src)
12499 || CONST_DOUBLE_P (src)
12500 || GET_CODE (src) == CONST_VECTOR))
12502 if (dest_gpr_p)
12503 return "#";
12505 else if ((dest_vmx_p && TARGET_ALTIVEC)
12506 || (dest_vsx_p && TARGET_VSX))
12507 return output_vec_const_move (operands);
12510 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12513 /* Validate a 128-bit move. */
12514 bool
12515 rs6000_move_128bit_ok_p (rtx operands[])
12517 machine_mode mode = GET_MODE (operands[0]);
12518 return (gpc_reg_operand (operands[0], mode)
12519 || gpc_reg_operand (operands[1], mode));
12522 /* Return true if a 128-bit move needs to be split. */
12523 bool
12524 rs6000_split_128bit_ok_p (rtx operands[])
12526 if (!reload_completed)
12527 return false;
12529 if (!gpr_or_gpr_p (operands[0], operands[1]))
12530 return false;
12532 if (quad_load_store_p (operands[0], operands[1]))
12533 return false;
12535 return true;
12539 /* Given a comparison operation, return the bit number in CCR to test. We
12540 know this is a valid comparison.
12542 SCC_P is 1 if this is for an scc. That means that %D will have been
12543 used instead of %C, so the bits will be in different places.
12545 Return -1 if OP isn't a valid comparison for some reason. */
12548 ccr_bit (rtx op, int scc_p)
12550 enum rtx_code code = GET_CODE (op);
12551 machine_mode cc_mode;
12552 int cc_regnum;
12553 int base_bit;
12554 rtx reg;
12556 if (!COMPARISON_P (op))
12557 return -1;
12559 reg = XEXP (op, 0);
12561 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12562 return -1;
12564 cc_mode = GET_MODE (reg);
12565 cc_regnum = REGNO (reg);
12566 base_bit = 4 * (cc_regnum - CR0_REGNO);
12568 validate_condition_mode (code, cc_mode);
12570 /* When generating a sCOND operation, only positive conditions are
12571 allowed. */
12572 if (scc_p)
12573 switch (code)
12575 case EQ:
12576 case GT:
12577 case LT:
12578 case UNORDERED:
12579 case GTU:
12580 case LTU:
12581 break;
12582 default:
12583 return -1;
12586 switch (code)
12588 case NE:
12589 return scc_p ? base_bit + 3 : base_bit + 2;
12590 case EQ:
12591 return base_bit + 2;
12592 case GT: case GTU: case UNLE:
12593 return base_bit + 1;
12594 case LT: case LTU: case UNGE:
12595 return base_bit;
12596 case ORDERED: case UNORDERED:
12597 return base_bit + 3;
12599 case GE: case GEU:
12600 /* If scc, we will have done a cror to put the bit in the
12601 unordered position. So test that bit. For integer, this is ! LT
12602 unless this is an scc insn. */
12603 return scc_p ? base_bit + 3 : base_bit;
12605 case LE: case LEU:
12606 return scc_p ? base_bit + 3 : base_bit + 1;
12608 default:
12609 return -1;
12613 /* Return the GOT register. */
12616 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12618 /* The second flow pass currently (June 1999) can't update
12619 regs_ever_live without disturbing other parts of the compiler, so
12620 update it here to make the prolog/epilogue code happy. */
12621 if (!can_create_pseudo_p ()
12622 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12623 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12625 crtl->uses_pic_offset_table = 1;
12627 return pic_offset_table_rtx;
12630 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12632 /* Write out a function code label. */
12634 void
12635 rs6000_output_function_entry (FILE *file, const char *fname)
12637 if (fname[0] != '.')
12639 switch (DEFAULT_ABI)
12641 default:
12642 gcc_unreachable ();
12644 case ABI_AIX:
12645 if (DOT_SYMBOLS)
12646 putc ('.', file);
12647 else
12648 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12649 break;
12651 case ABI_ELFv2:
12652 case ABI_V4:
12653 case ABI_DARWIN:
12654 break;
12658 RS6000_OUTPUT_BASENAME (file, fname);
12661 /* Print an operand. Recognize special options, documented below. */
12663 #if TARGET_ELF
12664 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12665 only introduced by the linker, when applying the sda21
12666 relocation. */
12667 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12668 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12669 #else
12670 #define SMALL_DATA_RELOC "sda21"
12671 #define SMALL_DATA_REG 0
12672 #endif
12674 void
12675 print_operand (FILE *file, rtx x, int code)
12677 int i;
12678 unsigned HOST_WIDE_INT uval;
12680 switch (code)
12682 /* %a is output_address. */
12684 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12685 output_operand. */
12687 case 'D':
12688 /* Like 'J' but get to the GT bit only. */
12689 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12691 output_operand_lossage ("invalid %%D value");
12692 return;
12695 /* Bit 1 is GT bit. */
12696 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12698 /* Add one for shift count in rlinm for scc. */
12699 fprintf (file, "%d", i + 1);
12700 return;
12702 case 'e':
12703 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12704 if (! INT_P (x))
12706 output_operand_lossage ("invalid %%e value");
12707 return;
12710 uval = INTVAL (x);
12711 if ((uval & 0xffff) == 0 && uval != 0)
12712 putc ('s', file);
12713 return;
12715 case 'E':
12716 /* X is a CR register. Print the number of the EQ bit of the CR */
12717 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12718 output_operand_lossage ("invalid %%E value");
12719 else
12720 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12721 return;
12723 case 'f':
12724 /* X is a CR register. Print the shift count needed to move it
12725 to the high-order four bits. */
12726 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12727 output_operand_lossage ("invalid %%f value");
12728 else
12729 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12730 return;
12732 case 'F':
12733 /* Similar, but print the count for the rotate in the opposite
12734 direction. */
12735 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12736 output_operand_lossage ("invalid %%F value");
12737 else
12738 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12739 return;
12741 case 'G':
12742 /* X is a constant integer. If it is negative, print "m",
12743 otherwise print "z". This is to make an aze or ame insn. */
12744 if (!CONST_INT_P (x))
12745 output_operand_lossage ("invalid %%G value");
12746 else if (INTVAL (x) >= 0)
12747 putc ('z', file);
12748 else
12749 putc ('m', file);
12750 return;
12752 case 'h':
12753 /* If constant, output low-order five bits. Otherwise, write
12754 normally. */
12755 if (INT_P (x))
12756 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12757 else
12758 print_operand (file, x, 0);
12759 return;
12761 case 'H':
12762 /* If constant, output low-order six bits. Otherwise, write
12763 normally. */
12764 if (INT_P (x))
12765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12766 else
12767 print_operand (file, x, 0);
12768 return;
12770 case 'I':
12771 /* Print `i' if this is a constant, else nothing. */
12772 if (INT_P (x))
12773 putc ('i', file);
12774 return;
12776 case 'j':
12777 /* Write the bit number in CCR for jump. */
12778 i = ccr_bit (x, 0);
12779 if (i == -1)
12780 output_operand_lossage ("invalid %%j code");
12781 else
12782 fprintf (file, "%d", i);
12783 return;
12785 case 'J':
12786 /* Similar, but add one for shift count in rlinm for scc and pass
12787 scc flag to `ccr_bit'. */
12788 i = ccr_bit (x, 1);
12789 if (i == -1)
12790 output_operand_lossage ("invalid %%J code");
12791 else
12792 /* If we want bit 31, write a shift count of zero, not 32. */
12793 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12794 return;
12796 case 'k':
12797 /* X must be a constant. Write the 1's complement of the
12798 constant. */
12799 if (! INT_P (x))
12800 output_operand_lossage ("invalid %%k value");
12801 else
12802 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12803 return;
12805 case 'K':
12806 /* X must be a symbolic constant on ELF. Write an
12807 expression suitable for an 'addi' that adds in the low 16
12808 bits of the MEM. */
12809 if (GET_CODE (x) == CONST)
12811 if (GET_CODE (XEXP (x, 0)) != PLUS
12812 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12813 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12814 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12815 output_operand_lossage ("invalid %%K value");
12817 print_operand_address (file, x);
12818 fputs ("@l", file);
12819 return;
12821 /* %l is output_asm_label. */
12823 case 'L':
12824 /* Write second word of DImode or DFmode reference. Works on register
12825 or non-indexed memory only. */
12826 if (REG_P (x))
12827 fputs (reg_names[REGNO (x) + 1], file);
12828 else if (MEM_P (x))
12830 machine_mode mode = GET_MODE (x);
12831 /* Handle possible auto-increment. Since it is pre-increment and
12832 we have already done it, we can just use an offset of word. */
12833 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12834 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12835 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12836 UNITS_PER_WORD));
12837 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12838 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12839 UNITS_PER_WORD));
12840 else
12841 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12842 UNITS_PER_WORD),
12843 0));
12845 if (small_data_operand (x, GET_MODE (x)))
12846 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12847 reg_names[SMALL_DATA_REG]);
12849 return;
12851 case 'N': /* Unused */
12852 /* Write the number of elements in the vector times 4. */
12853 if (GET_CODE (x) != PARALLEL)
12854 output_operand_lossage ("invalid %%N value");
12855 else
12856 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12857 return;
12859 case 'O': /* Unused */
12860 /* Similar, but subtract 1 first. */
12861 if (GET_CODE (x) != PARALLEL)
12862 output_operand_lossage ("invalid %%O value");
12863 else
12864 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12865 return;
12867 case 'p':
12868 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12869 if (! INT_P (x)
12870 || INTVAL (x) < 0
12871 || (i = exact_log2 (INTVAL (x))) < 0)
12872 output_operand_lossage ("invalid %%p value");
12873 else
12874 fprintf (file, "%d", i);
12875 return;
12877 case 'P':
12878 /* The operand must be an indirect memory reference. The result
12879 is the register name. */
12880 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12881 || REGNO (XEXP (x, 0)) >= 32)
12882 output_operand_lossage ("invalid %%P value");
12883 else
12884 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12885 return;
12887 case 'q':
12888 /* This outputs the logical code corresponding to a boolean
12889 expression. The expression may have one or both operands
12890 negated (if one, only the first one). For condition register
12891 logical operations, it will also treat the negated
12892 CR codes as NOTs, but not handle NOTs of them. */
12894 const char *const *t = 0;
12895 const char *s;
12896 enum rtx_code code = GET_CODE (x);
12897 static const char * const tbl[3][3] = {
12898 { "and", "andc", "nor" },
12899 { "or", "orc", "nand" },
12900 { "xor", "eqv", "xor" } };
12902 if (code == AND)
12903 t = tbl[0];
12904 else if (code == IOR)
12905 t = tbl[1];
12906 else if (code == XOR)
12907 t = tbl[2];
12908 else
12909 output_operand_lossage ("invalid %%q value");
12911 if (GET_CODE (XEXP (x, 0)) != NOT)
12912 s = t[0];
12913 else
12915 if (GET_CODE (XEXP (x, 1)) == NOT)
12916 s = t[2];
12917 else
12918 s = t[1];
12921 fputs (s, file);
12923 return;
12925 case 'Q':
12926 if (! TARGET_MFCRF)
12927 return;
12928 fputc (',', file);
12929 /* FALLTHRU */
12931 case 'R':
12932 /* X is a CR register. Print the mask for `mtcrf'. */
12933 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12934 output_operand_lossage ("invalid %%R value");
12935 else
12936 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12937 return;
12939 case 's':
12940 /* Low 5 bits of 32 - value */
12941 if (! INT_P (x))
12942 output_operand_lossage ("invalid %%s value");
12943 else
12944 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12945 return;
12947 case 't':
12948 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12949 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12951 output_operand_lossage ("invalid %%t value");
12952 return;
12955 /* Bit 3 is OV bit. */
12956 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12958 /* If we want bit 31, write a shift count of zero, not 32. */
12959 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12960 return;
12962 case 'T':
12963 /* Print the symbolic name of a branch target register. */
12964 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12965 x = XVECEXP (x, 0, 0);
12966 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12967 && REGNO (x) != CTR_REGNO))
12968 output_operand_lossage ("invalid %%T value");
12969 else if (REGNO (x) == LR_REGNO)
12970 fputs ("lr", file);
12971 else
12972 fputs ("ctr", file);
12973 return;
12975 case 'u':
12976 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12977 for use in unsigned operand. */
12978 if (! INT_P (x))
12980 output_operand_lossage ("invalid %%u value");
12981 return;
12984 uval = INTVAL (x);
12985 if ((uval & 0xffff) == 0)
12986 uval >>= 16;
12988 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
12989 return;
12991 case 'v':
12992 /* High-order 16 bits of constant for use in signed operand. */
12993 if (! INT_P (x))
12994 output_operand_lossage ("invalid %%v value");
12995 else
12996 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
12997 (INTVAL (x) >> 16) & 0xffff);
12998 return;
13000 case 'U':
13001 /* Print `u' if this has an auto-increment or auto-decrement. */
13002 if (MEM_P (x)
13003 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13004 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13005 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13006 putc ('u', file);
13007 return;
13009 case 'V':
13010 /* Print the trap code for this operand. */
13011 switch (GET_CODE (x))
13013 case EQ:
13014 fputs ("eq", file); /* 4 */
13015 break;
13016 case NE:
13017 fputs ("ne", file); /* 24 */
13018 break;
13019 case LT:
13020 fputs ("lt", file); /* 16 */
13021 break;
13022 case LE:
13023 fputs ("le", file); /* 20 */
13024 break;
13025 case GT:
13026 fputs ("gt", file); /* 8 */
13027 break;
13028 case GE:
13029 fputs ("ge", file); /* 12 */
13030 break;
13031 case LTU:
13032 fputs ("llt", file); /* 2 */
13033 break;
13034 case LEU:
13035 fputs ("lle", file); /* 6 */
13036 break;
13037 case GTU:
13038 fputs ("lgt", file); /* 1 */
13039 break;
13040 case GEU:
13041 fputs ("lge", file); /* 5 */
13042 break;
13043 default:
13044 output_operand_lossage ("invalid %%V value");
13046 break;
13048 case 'w':
13049 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13050 normally. */
13051 if (INT_P (x))
13052 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13053 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13054 else
13055 print_operand (file, x, 0);
13056 return;
13058 case 'x':
13059 /* X is a FPR or Altivec register used in a VSX context. */
13060 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13061 output_operand_lossage ("invalid %%x value");
13062 else
13064 int reg = REGNO (x);
13065 int vsx_reg = (FP_REGNO_P (reg)
13066 ? reg - 32
13067 : reg - FIRST_ALTIVEC_REGNO + 32);
13069 #ifdef TARGET_REGNAMES
13070 if (TARGET_REGNAMES)
13071 fprintf (file, "%%vs%d", vsx_reg);
13072 else
13073 #endif
13074 fprintf (file, "%d", vsx_reg);
13076 return;
13078 case 'X':
13079 if (MEM_P (x)
13080 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13081 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13082 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13083 putc ('x', file);
13084 return;
13086 case 'Y':
13087 /* Like 'L', for third word of TImode/PTImode */
13088 if (REG_P (x))
13089 fputs (reg_names[REGNO (x) + 2], file);
13090 else if (MEM_P (x))
13092 machine_mode mode = GET_MODE (x);
13093 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13094 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13095 output_address (mode, plus_constant (Pmode,
13096 XEXP (XEXP (x, 0), 0), 8));
13097 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13098 output_address (mode, plus_constant (Pmode,
13099 XEXP (XEXP (x, 0), 0), 8));
13100 else
13101 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13102 if (small_data_operand (x, GET_MODE (x)))
13103 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13104 reg_names[SMALL_DATA_REG]);
13106 return;
13108 case 'z':
13109 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13110 x = XVECEXP (x, 0, 1);
13111 /* X is a SYMBOL_REF. Write out the name preceded by a
13112 period and without any trailing data in brackets. Used for function
13113 names. If we are configured for System V (or the embedded ABI) on
13114 the PowerPC, do not emit the period, since those systems do not use
13115 TOCs and the like. */
13116 if (!SYMBOL_REF_P (x))
13118 output_operand_lossage ("invalid %%z value");
13119 return;
13122 /* For macho, check to see if we need a stub. */
13123 if (TARGET_MACHO)
13125 const char *name = XSTR (x, 0);
13126 #if TARGET_MACHO
13127 if (darwin_symbol_stubs
13128 && MACHOPIC_INDIRECT
13129 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13130 name = machopic_indirection_name (x, /*stub_p=*/true);
13131 #endif
13132 assemble_name (file, name);
13134 else if (!DOT_SYMBOLS)
13135 assemble_name (file, XSTR (x, 0));
13136 else
13137 rs6000_output_function_entry (file, XSTR (x, 0));
13138 return;
13140 case 'Z':
13141 /* Like 'L', for last word of TImode/PTImode. */
13142 if (REG_P (x))
13143 fputs (reg_names[REGNO (x) + 3], file);
13144 else if (MEM_P (x))
13146 machine_mode mode = GET_MODE (x);
13147 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13148 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13149 output_address (mode, plus_constant (Pmode,
13150 XEXP (XEXP (x, 0), 0), 12));
13151 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13152 output_address (mode, plus_constant (Pmode,
13153 XEXP (XEXP (x, 0), 0), 12));
13154 else
13155 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13156 if (small_data_operand (x, GET_MODE (x)))
13157 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13158 reg_names[SMALL_DATA_REG]);
13160 return;
13162 /* Print AltiVec memory operand. */
13163 case 'y':
13165 rtx tmp;
13167 gcc_assert (MEM_P (x));
13169 tmp = XEXP (x, 0);
13171 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13172 && GET_CODE (tmp) == AND
13173 && CONST_INT_P (XEXP (tmp, 1))
13174 && INTVAL (XEXP (tmp, 1)) == -16)
13175 tmp = XEXP (tmp, 0);
13176 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13177 && GET_CODE (tmp) == PRE_MODIFY)
13178 tmp = XEXP (tmp, 1);
13179 if (REG_P (tmp))
13180 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13181 else
13183 if (GET_CODE (tmp) != PLUS
13184 || !REG_P (XEXP (tmp, 0))
13185 || !REG_P (XEXP (tmp, 1)))
13187 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13188 break;
13191 if (REGNO (XEXP (tmp, 0)) == 0)
13192 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13193 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13194 else
13195 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13196 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13198 break;
13201 case 0:
13202 if (REG_P (x))
13203 fprintf (file, "%s", reg_names[REGNO (x)]);
13204 else if (MEM_P (x))
13206 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13207 know the width from the mode. */
13208 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13209 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13210 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13211 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13212 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13213 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13214 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13215 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13216 else
13217 output_address (GET_MODE (x), XEXP (x, 0));
13219 else if (toc_relative_expr_p (x, false,
13220 &tocrel_base_oac, &tocrel_offset_oac))
13221 /* This hack along with a corresponding hack in
13222 rs6000_output_addr_const_extra arranges to output addends
13223 where the assembler expects to find them. eg.
13224 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13225 without this hack would be output as "x@toc+4". We
13226 want "x+4@toc". */
13227 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13228 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13229 output_addr_const (file, XVECEXP (x, 0, 0));
13230 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13231 output_addr_const (file, XVECEXP (x, 0, 1));
13232 else
13233 output_addr_const (file, x);
13234 return;
13236 case '&':
13237 if (const char *name = get_some_local_dynamic_name ())
13238 assemble_name (file, name);
13239 else
13240 output_operand_lossage ("'%%&' used without any "
13241 "local dynamic TLS references");
13242 return;
13244 default:
13245 output_operand_lossage ("invalid %%xn code");
13249 /* Print the address of an operand. */
13251 void
13252 print_operand_address (FILE *file, rtx x)
13254 if (REG_P (x))
13255 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13257 /* Is it a PC-relative address? */
13258 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13260 HOST_WIDE_INT offset;
13262 if (GET_CODE (x) == CONST)
13263 x = XEXP (x, 0);
13265 if (GET_CODE (x) == PLUS)
13267 offset = INTVAL (XEXP (x, 1));
13268 x = XEXP (x, 0);
13270 else
13271 offset = 0;
13273 output_addr_const (file, x);
13275 if (offset)
13276 fprintf (file, "%+" PRId64, offset);
13278 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13279 fprintf (file, "@got");
13281 fprintf (file, "@pcrel");
13283 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13284 || GET_CODE (x) == LABEL_REF)
13286 output_addr_const (file, x);
13287 if (small_data_operand (x, GET_MODE (x)))
13288 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13289 reg_names[SMALL_DATA_REG]);
13290 else
13291 gcc_assert (!TARGET_TOC);
13293 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13294 && REG_P (XEXP (x, 1)))
13296 if (REGNO (XEXP (x, 0)) == 0)
13297 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13298 reg_names[ REGNO (XEXP (x, 0)) ]);
13299 else
13300 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13301 reg_names[ REGNO (XEXP (x, 1)) ]);
13303 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13304 && CONST_INT_P (XEXP (x, 1)))
13305 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13306 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13307 #if TARGET_MACHO
13308 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13309 && CONSTANT_P (XEXP (x, 1)))
13311 fprintf (file, "lo16(");
13312 output_addr_const (file, XEXP (x, 1));
13313 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13315 #endif
13316 #if TARGET_ELF
13317 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13318 && CONSTANT_P (XEXP (x, 1)))
13320 output_addr_const (file, XEXP (x, 1));
13321 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13323 #endif
13324 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13326 /* This hack along with a corresponding hack in
13327 rs6000_output_addr_const_extra arranges to output addends
13328 where the assembler expects to find them. eg.
13329 (lo_sum (reg 9)
13330 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13331 without this hack would be output as "x@toc+8@l(9)". We
13332 want "x+8@toc@l(9)". */
13333 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13334 if (GET_CODE (x) == LO_SUM)
13335 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13336 else
13337 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13339 else
13340 output_addr_const (file, x);
13343 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13345 bool
13346 rs6000_output_addr_const_extra (FILE *file, rtx x)
13348 if (GET_CODE (x) == UNSPEC)
13349 switch (XINT (x, 1))
13351 case UNSPEC_TOCREL:
13352 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13353 && REG_P (XVECEXP (x, 0, 1))
13354 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13355 output_addr_const (file, XVECEXP (x, 0, 0));
13356 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13358 if (INTVAL (tocrel_offset_oac) >= 0)
13359 fprintf (file, "+");
13360 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13362 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13364 putc ('-', file);
13365 assemble_name (file, toc_label_name);
13366 need_toc_init = 1;
13368 else if (TARGET_ELF)
13369 fputs ("@toc", file);
13370 return true;
13372 #if TARGET_MACHO
13373 case UNSPEC_MACHOPIC_OFFSET:
13374 output_addr_const (file, XVECEXP (x, 0, 0));
13375 putc ('-', file);
13376 machopic_output_function_base_name (file);
13377 return true;
13378 #endif
13380 return false;
13383 /* Target hook for assembling integer objects. The PowerPC version has
13384 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13385 is defined. It also needs to handle DI-mode objects on 64-bit
13386 targets. */
13388 static bool
13389 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13391 #ifdef RELOCATABLE_NEEDS_FIXUP
13392 /* Special handling for SI values. */
13393 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13395 static int recurse = 0;
13397 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13398 the .fixup section. Since the TOC section is already relocated, we
13399 don't need to mark it here. We used to skip the text section, but it
13400 should never be valid for relocated addresses to be placed in the text
13401 section. */
13402 if (DEFAULT_ABI == ABI_V4
13403 && (TARGET_RELOCATABLE || flag_pic > 1)
13404 && in_section != toc_section
13405 && !recurse
13406 && !CONST_SCALAR_INT_P (x)
13407 && CONSTANT_P (x))
13409 char buf[256];
13411 recurse = 1;
13412 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13413 fixuplabelno++;
13414 ASM_OUTPUT_LABEL (asm_out_file, buf);
13415 fprintf (asm_out_file, "\t.long\t(");
13416 output_addr_const (asm_out_file, x);
13417 fprintf (asm_out_file, ")@fixup\n");
13418 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13419 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13420 fprintf (asm_out_file, "\t.long\t");
13421 assemble_name (asm_out_file, buf);
13422 fprintf (asm_out_file, "\n\t.previous\n");
13423 recurse = 0;
13424 return true;
13426 /* Remove initial .'s to turn a -mcall-aixdesc function
13427 address into the address of the descriptor, not the function
13428 itself. */
13429 else if (SYMBOL_REF_P (x)
13430 && XSTR (x, 0)[0] == '.'
13431 && DEFAULT_ABI == ABI_AIX)
13433 const char *name = XSTR (x, 0);
13434 while (*name == '.')
13435 name++;
13437 fprintf (asm_out_file, "\t.long\t%s\n", name);
13438 return true;
13441 #endif /* RELOCATABLE_NEEDS_FIXUP */
13442 return default_assemble_integer (x, size, aligned_p);
13445 /* Return a template string for assembly to emit when making an
13446 external call. FUNOP is the call mem argument operand number. */
13448 static const char *
13449 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13451 /* -Wformat-overflow workaround, without which gcc thinks that %u
13452 might produce 10 digits. */
13453 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13455 char arg[12];
13456 arg[0] = 0;
13457 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13459 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13460 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13461 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13462 sprintf (arg, "(%%&@tlsld)");
13465 /* The magic 32768 offset here corresponds to the offset of
13466 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13467 char z[11];
13468 sprintf (z, "%%z%u%s", funop,
13469 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13470 ? "+32768" : ""));
13472 static char str[32]; /* 1 spare */
13473 if (rs6000_pcrel_p (cfun))
13474 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13475 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13476 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13477 sibcall ? "" : "\n\tnop");
13478 else if (DEFAULT_ABI == ABI_V4)
13479 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13480 flag_pic ? "@plt" : "");
13481 #if TARGET_MACHO
13482 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13483 else if (DEFAULT_ABI == ABI_DARWIN)
13485 /* The cookie is in operand func+2. */
13486 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13487 int cookie = INTVAL (operands[funop + 2]);
13488 if (cookie & CALL_LONG)
13490 tree funname = get_identifier (XSTR (operands[funop], 0));
13491 tree labelname = get_prev_label (funname);
13492 gcc_checking_assert (labelname && !sibcall);
13494 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13495 instruction will reach 'foo', otherwise link as 'bl L42'".
13496 "L42" should be a 'branch island', that will do a far jump to
13497 'foo'. Branch islands are generated in
13498 macho_branch_islands(). */
13499 sprintf (str, "jbsr %%z%u,%.10s", funop,
13500 IDENTIFIER_POINTER (labelname));
13502 else
13503 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13504 after the call. */
13505 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13507 #endif
13508 else
13509 gcc_unreachable ();
13510 return str;
13513 const char *
13514 rs6000_call_template (rtx *operands, unsigned int funop)
13516 return rs6000_call_template_1 (operands, funop, false);
13519 const char *
13520 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13522 return rs6000_call_template_1 (operands, funop, true);
13525 /* As above, for indirect calls. */
13527 static const char *
13528 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13529 bool sibcall)
13531 /* -Wformat-overflow workaround, without which gcc thinks that %u
13532 might produce 10 digits. Note that -Wformat-overflow will not
13533 currently warn here for str[], so do not rely on a warning to
13534 ensure str[] is correctly sized. */
13535 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13537 /* Currently, funop is either 0 or 1. The maximum string is always
13538 a !speculate 64-bit __tls_get_addr call.
13540 ABI_ELFv2, pcrel:
13541 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13542 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13543 . 9 crset 2\n\t
13544 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13545 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13546 . 8 beq%T1l-
13547 .---
13548 .142
13550 ABI_AIX:
13551 . 9 ld 2,%3\n\t
13552 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13553 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13554 . 9 crset 2\n\t
13555 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13556 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13557 . 10 beq%T1l-\n\t
13558 . 10 ld 2,%4(1)
13559 .---
13560 .151
13562 ABI_ELFv2:
13563 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13564 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13565 . 9 crset 2\n\t
13566 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13567 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13568 . 10 beq%T1l-\n\t
13569 . 10 ld 2,%3(1)
13570 .---
13571 .142
13573 ABI_V4:
13574 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13575 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13576 . 9 crset 2\n\t
13577 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13578 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13579 . 8 beq%T1l-
13580 .---
13581 .141 */
13582 static char str[160]; /* 8 spare */
13583 char *s = str;
13584 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13586 if (DEFAULT_ABI == ABI_AIX)
13587 s += sprintf (s,
13588 "l%s 2,%%%u\n\t",
13589 ptrload, funop + 2);
13591 /* We don't need the extra code to stop indirect call speculation if
13592 calling via LR. */
13593 bool speculate = (TARGET_MACHO
13594 || rs6000_speculate_indirect_jumps
13595 || (REG_P (operands[funop])
13596 && REGNO (operands[funop]) == LR_REGNO));
13598 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13600 const char *rel64 = TARGET_64BIT ? "64" : "";
13601 char tls[29];
13602 tls[0] = 0;
13603 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13605 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13606 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13607 rel64, funop + 1);
13608 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13609 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13610 rel64);
13613 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13614 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13615 && flag_pic == 2 ? "+32768" : "");
13616 if (!speculate)
13618 s += sprintf (s,
13619 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13620 tls, rel64, notoc, funop, addend);
13621 s += sprintf (s, "crset 2\n\t");
13623 s += sprintf (s,
13624 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13625 tls, rel64, notoc, funop, addend);
13627 else if (!speculate)
13628 s += sprintf (s, "crset 2\n\t");
13630 if (rs6000_pcrel_p (cfun))
13632 if (speculate)
13633 sprintf (s, "b%%T%ul", funop);
13634 else
13635 sprintf (s, "beq%%T%ul-", funop);
13637 else if (DEFAULT_ABI == ABI_AIX)
13639 if (speculate)
13640 sprintf (s,
13641 "b%%T%ul\n\t"
13642 "l%s 2,%%%u(1)",
13643 funop, ptrload, funop + 3);
13644 else
13645 sprintf (s,
13646 "beq%%T%ul-\n\t"
13647 "l%s 2,%%%u(1)",
13648 funop, ptrload, funop + 3);
13650 else if (DEFAULT_ABI == ABI_ELFv2)
13652 if (speculate)
13653 sprintf (s,
13654 "b%%T%ul\n\t"
13655 "l%s 2,%%%u(1)",
13656 funop, ptrload, funop + 2);
13657 else
13658 sprintf (s,
13659 "beq%%T%ul-\n\t"
13660 "l%s 2,%%%u(1)",
13661 funop, ptrload, funop + 2);
13663 else
13665 if (speculate)
13666 sprintf (s,
13667 "b%%T%u%s",
13668 funop, sibcall ? "" : "l");
13669 else
13670 sprintf (s,
13671 "beq%%T%u%s-%s",
13672 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13674 return str;
13677 const char *
13678 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13680 return rs6000_indirect_call_template_1 (operands, funop, false);
13683 const char *
13684 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13686 return rs6000_indirect_call_template_1 (operands, funop, true);
13689 #if HAVE_AS_PLTSEQ
13690 /* Output indirect call insns. WHICH identifies the type of sequence. */
13691 const char *
13692 rs6000_pltseq_template (rtx *operands, int which)
13694 const char *rel64 = TARGET_64BIT ? "64" : "";
13695 char tls[30];
13696 tls[0] = 0;
13697 if (GET_CODE (operands[3]) == UNSPEC)
13699 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13700 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13701 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13702 off, rel64);
13703 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13704 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13705 off, rel64);
13708 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13709 static char str[96]; /* 10 spare */
13710 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13711 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13712 && flag_pic == 2 ? "+32768" : "");
13713 switch (which)
13715 case RS6000_PLTSEQ_TOCSAVE:
13716 sprintf (str,
13717 "st%s\n\t"
13718 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13719 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13720 tls, rel64);
13721 break;
13722 case RS6000_PLTSEQ_PLT16_HA:
13723 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13724 sprintf (str,
13725 "lis %%0,0\n\t"
13726 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13727 tls, off, rel64);
13728 else
13729 sprintf (str,
13730 "addis %%0,%%1,0\n\t"
13731 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13732 tls, off, rel64, addend);
13733 break;
13734 case RS6000_PLTSEQ_PLT16_LO:
13735 sprintf (str,
13736 "l%s %%0,0(%%1)\n\t"
13737 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13738 TARGET_64BIT ? "d" : "wz",
13739 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13740 break;
13741 case RS6000_PLTSEQ_MTCTR:
13742 sprintf (str,
13743 "mtctr %%1\n\t"
13744 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13745 tls, rel64, addend);
13746 break;
13747 case RS6000_PLTSEQ_PLT_PCREL34:
13748 sprintf (str,
13749 "pl%s %%0,0(0),1\n\t"
13750 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13751 TARGET_64BIT ? "d" : "wz",
13752 tls, rel64);
13753 break;
13754 default:
13755 gcc_unreachable ();
13757 return str;
13759 #endif
13761 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13762 /* Emit an assembler directive to set symbol visibility for DECL to
13763 VISIBILITY_TYPE. */
13765 static void
13766 rs6000_assemble_visibility (tree decl, int vis)
13768 if (TARGET_XCOFF)
13769 return;
13771 /* Functions need to have their entry point symbol visibility set as
13772 well as their descriptor symbol visibility. */
13773 if (DEFAULT_ABI == ABI_AIX
13774 && DOT_SYMBOLS
13775 && TREE_CODE (decl) == FUNCTION_DECL)
13777 static const char * const visibility_types[] = {
13778 NULL, "protected", "hidden", "internal"
13781 const char *name, *type;
13783 name = ((* targetm.strip_name_encoding)
13784 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13785 type = visibility_types[vis];
13787 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13788 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13790 else
13791 default_assemble_visibility (decl, vis);
13793 #endif
13795 enum rtx_code
13796 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13798 /* Reversal of FP compares takes care -- an ordered compare
13799 becomes an unordered compare and vice versa. */
13800 if (mode == CCFPmode
13801 && (!flag_finite_math_only
13802 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13803 || code == UNEQ || code == LTGT))
13804 return reverse_condition_maybe_unordered (code);
13805 else
13806 return reverse_condition (code);
13809 /* Generate a compare for CODE. Return a brand-new rtx that
13810 represents the result of the compare. */
13812 static rtx
13813 rs6000_generate_compare (rtx cmp, machine_mode mode)
13815 machine_mode comp_mode;
13816 rtx compare_result;
13817 enum rtx_code code = GET_CODE (cmp);
13818 rtx op0 = XEXP (cmp, 0);
13819 rtx op1 = XEXP (cmp, 1);
13821 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13822 comp_mode = CCmode;
13823 else if (FLOAT_MODE_P (mode))
13824 comp_mode = CCFPmode;
13825 else if (code == GTU || code == LTU
13826 || code == GEU || code == LEU)
13827 comp_mode = CCUNSmode;
13828 else if ((code == EQ || code == NE)
13829 && unsigned_reg_p (op0)
13830 && (unsigned_reg_p (op1)
13831 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13832 /* These are unsigned values, perhaps there will be a later
13833 ordering compare that can be shared with this one. */
13834 comp_mode = CCUNSmode;
13835 else
13836 comp_mode = CCmode;
13838 /* If we have an unsigned compare, make sure we don't have a signed value as
13839 an immediate. */
13840 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13841 && INTVAL (op1) < 0)
13843 op0 = copy_rtx_if_shared (op0);
13844 op1 = force_reg (GET_MODE (op0), op1);
13845 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13848 /* First, the compare. */
13849 compare_result = gen_reg_rtx (comp_mode);
13851 /* IEEE 128-bit support in VSX registers when we do not have hardware
13852 support. */
13853 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13855 rtx libfunc = NULL_RTX;
13856 bool check_nan = false;
13857 rtx dest;
13859 switch (code)
13861 case EQ:
13862 case NE:
13863 libfunc = optab_libfunc (eq_optab, mode);
13864 break;
13866 case GT:
13867 case GE:
13868 libfunc = optab_libfunc (ge_optab, mode);
13869 break;
13871 case LT:
13872 case LE:
13873 libfunc = optab_libfunc (le_optab, mode);
13874 break;
13876 case UNORDERED:
13877 case ORDERED:
13878 libfunc = optab_libfunc (unord_optab, mode);
13879 code = (code == UNORDERED) ? NE : EQ;
13880 break;
13882 case UNGE:
13883 case UNGT:
13884 check_nan = true;
13885 libfunc = optab_libfunc (ge_optab, mode);
13886 code = (code == UNGE) ? GE : GT;
13887 break;
13889 case UNLE:
13890 case UNLT:
13891 check_nan = true;
13892 libfunc = optab_libfunc (le_optab, mode);
13893 code = (code == UNLE) ? LE : LT;
13894 break;
13896 case UNEQ:
13897 case LTGT:
13898 check_nan = true;
13899 libfunc = optab_libfunc (eq_optab, mode);
13900 code = (code = UNEQ) ? EQ : NE;
13901 break;
13903 default:
13904 gcc_unreachable ();
13907 gcc_assert (libfunc);
13909 if (!check_nan)
13910 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13911 SImode, op0, mode, op1, mode);
13913 /* The library signals an exception for signalling NaNs, so we need to
13914 handle isgreater, etc. by first checking isordered. */
13915 else
13917 rtx ne_rtx, normal_dest, unord_dest;
13918 rtx unord_func = optab_libfunc (unord_optab, mode);
13919 rtx join_label = gen_label_rtx ();
13920 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13921 rtx unord_cmp = gen_reg_rtx (comp_mode);
13924 /* Test for either value being a NaN. */
13925 gcc_assert (unord_func);
13926 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13927 SImode, op0, mode, op1, mode);
13929 /* Set value (0) if either value is a NaN, and jump to the join
13930 label. */
13931 dest = gen_reg_rtx (SImode);
13932 emit_move_insn (dest, const1_rtx);
13933 emit_insn (gen_rtx_SET (unord_cmp,
13934 gen_rtx_COMPARE (comp_mode, unord_dest,
13935 const0_rtx)));
13937 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13938 emit_jump_insn (gen_rtx_SET (pc_rtx,
13939 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13940 join_ref,
13941 pc_rtx)));
13943 /* Do the normal comparison, knowing that the values are not
13944 NaNs. */
13945 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13946 SImode, op0, mode, op1, mode);
13948 emit_insn (gen_cstoresi4 (dest,
13949 gen_rtx_fmt_ee (code, SImode, normal_dest,
13950 const0_rtx),
13951 normal_dest, const0_rtx));
13953 /* Join NaN and non-Nan paths. Compare dest against 0. */
13954 emit_label (join_label);
13955 code = NE;
13958 emit_insn (gen_rtx_SET (compare_result,
13959 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13962 else
13964 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13965 CLOBBERs to match cmptf_internal2 pattern. */
13966 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13967 && FLOAT128_IBM_P (GET_MODE (op0))
13968 && TARGET_HARD_FLOAT)
13969 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13970 gen_rtvec (10,
13971 gen_rtx_SET (compare_result,
13972 gen_rtx_COMPARE (comp_mode, op0, op1)),
13973 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13974 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13975 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13976 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13977 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13978 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13979 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13980 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13981 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
13982 else if (GET_CODE (op1) == UNSPEC
13983 && XINT (op1, 1) == UNSPEC_SP_TEST)
13985 rtx op1b = XVECEXP (op1, 0, 0);
13986 comp_mode = CCEQmode;
13987 compare_result = gen_reg_rtx (CCEQmode);
13988 if (TARGET_64BIT)
13989 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
13990 else
13991 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
13993 else
13994 emit_insn (gen_rtx_SET (compare_result,
13995 gen_rtx_COMPARE (comp_mode, op0, op1)));
13998 validate_condition_mode (code, GET_MODE (compare_result));
14000 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14004 /* Return the diagnostic message string if the binary operation OP is
14005 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14007 static const char*
14008 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14009 const_tree type1,
14010 const_tree type2)
14012 machine_mode mode1 = TYPE_MODE (type1);
14013 machine_mode mode2 = TYPE_MODE (type2);
14015 /* For complex modes, use the inner type. */
14016 if (COMPLEX_MODE_P (mode1))
14017 mode1 = GET_MODE_INNER (mode1);
14019 if (COMPLEX_MODE_P (mode2))
14020 mode2 = GET_MODE_INNER (mode2);
14022 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14023 double to intermix unless -mfloat128-convert. */
14024 if (mode1 == mode2)
14025 return NULL;
14027 if (!TARGET_FLOAT128_CVT)
14029 if ((mode1 == KFmode && mode2 == IFmode)
14030 || (mode1 == IFmode && mode2 == KFmode))
14031 return N_("__float128 and __ibm128 cannot be used in the same "
14032 "expression");
14034 if (TARGET_IEEEQUAD
14035 && ((mode1 == IFmode && mode2 == TFmode)
14036 || (mode1 == TFmode && mode2 == IFmode)))
14037 return N_("__ibm128 and long double cannot be used in the same "
14038 "expression");
14040 if (!TARGET_IEEEQUAD
14041 && ((mode1 == KFmode && mode2 == TFmode)
14042 || (mode1 == TFmode && mode2 == KFmode)))
14043 return N_("__float128 and long double cannot be used in the same "
14044 "expression");
14047 return NULL;
14051 /* Expand floating point conversion to/from __float128 and __ibm128. */
14053 void
14054 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14056 machine_mode dest_mode = GET_MODE (dest);
14057 machine_mode src_mode = GET_MODE (src);
14058 convert_optab cvt = unknown_optab;
14059 bool do_move = false;
14060 rtx libfunc = NULL_RTX;
14061 rtx dest2;
14062 typedef rtx (*rtx_2func_t) (rtx, rtx);
14063 rtx_2func_t hw_convert = (rtx_2func_t)0;
14064 size_t kf_or_tf;
14066 struct hw_conv_t {
14067 rtx_2func_t from_df;
14068 rtx_2func_t from_sf;
14069 rtx_2func_t from_si_sign;
14070 rtx_2func_t from_si_uns;
14071 rtx_2func_t from_di_sign;
14072 rtx_2func_t from_di_uns;
14073 rtx_2func_t to_df;
14074 rtx_2func_t to_sf;
14075 rtx_2func_t to_si_sign;
14076 rtx_2func_t to_si_uns;
14077 rtx_2func_t to_di_sign;
14078 rtx_2func_t to_di_uns;
14079 } hw_conversions[2] = {
14080 /* convertions to/from KFmode */
14082 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14083 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14084 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14085 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14086 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14087 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14088 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14089 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14090 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14091 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14092 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14093 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14096 /* convertions to/from TFmode */
14098 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14099 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14100 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14101 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14102 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14103 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14104 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14105 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14106 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14107 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14108 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14109 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14113 if (dest_mode == src_mode)
14114 gcc_unreachable ();
14116 /* Eliminate memory operations. */
14117 if (MEM_P (src))
14118 src = force_reg (src_mode, src);
14120 if (MEM_P (dest))
14122 rtx tmp = gen_reg_rtx (dest_mode);
14123 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14124 rs6000_emit_move (dest, tmp, dest_mode);
14125 return;
14128 /* Convert to IEEE 128-bit floating point. */
14129 if (FLOAT128_IEEE_P (dest_mode))
14131 if (dest_mode == KFmode)
14132 kf_or_tf = 0;
14133 else if (dest_mode == TFmode)
14134 kf_or_tf = 1;
14135 else
14136 gcc_unreachable ();
14138 switch (src_mode)
14140 case E_DFmode:
14141 cvt = sext_optab;
14142 hw_convert = hw_conversions[kf_or_tf].from_df;
14143 break;
14145 case E_SFmode:
14146 cvt = sext_optab;
14147 hw_convert = hw_conversions[kf_or_tf].from_sf;
14148 break;
14150 case E_KFmode:
14151 case E_IFmode:
14152 case E_TFmode:
14153 if (FLOAT128_IBM_P (src_mode))
14154 cvt = sext_optab;
14155 else
14156 do_move = true;
14157 break;
14159 case E_SImode:
14160 if (unsigned_p)
14162 cvt = ufloat_optab;
14163 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14165 else
14167 cvt = sfloat_optab;
14168 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14170 break;
14172 case E_DImode:
14173 if (unsigned_p)
14175 cvt = ufloat_optab;
14176 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14178 else
14180 cvt = sfloat_optab;
14181 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14183 break;
14185 default:
14186 gcc_unreachable ();
14190 /* Convert from IEEE 128-bit floating point. */
14191 else if (FLOAT128_IEEE_P (src_mode))
14193 if (src_mode == KFmode)
14194 kf_or_tf = 0;
14195 else if (src_mode == TFmode)
14196 kf_or_tf = 1;
14197 else
14198 gcc_unreachable ();
14200 switch (dest_mode)
14202 case E_DFmode:
14203 cvt = trunc_optab;
14204 hw_convert = hw_conversions[kf_or_tf].to_df;
14205 break;
14207 case E_SFmode:
14208 cvt = trunc_optab;
14209 hw_convert = hw_conversions[kf_or_tf].to_sf;
14210 break;
14212 case E_KFmode:
14213 case E_IFmode:
14214 case E_TFmode:
14215 if (FLOAT128_IBM_P (dest_mode))
14216 cvt = trunc_optab;
14217 else
14218 do_move = true;
14219 break;
14221 case E_SImode:
14222 if (unsigned_p)
14224 cvt = ufix_optab;
14225 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14227 else
14229 cvt = sfix_optab;
14230 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14232 break;
14234 case E_DImode:
14235 if (unsigned_p)
14237 cvt = ufix_optab;
14238 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14240 else
14242 cvt = sfix_optab;
14243 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14245 break;
14247 default:
14248 gcc_unreachable ();
14252 /* Both IBM format. */
14253 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14254 do_move = true;
14256 else
14257 gcc_unreachable ();
14259 /* Handle conversion between TFmode/KFmode/IFmode. */
14260 if (do_move)
14261 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14263 /* Handle conversion if we have hardware support. */
14264 else if (TARGET_FLOAT128_HW && hw_convert)
14265 emit_insn ((hw_convert) (dest, src));
14267 /* Call an external function to do the conversion. */
14268 else if (cvt != unknown_optab)
14270 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14271 gcc_assert (libfunc != NULL_RTX);
14273 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14274 src, src_mode);
14276 gcc_assert (dest2 != NULL_RTX);
14277 if (!rtx_equal_p (dest, dest2))
14278 emit_move_insn (dest, dest2);
14281 else
14282 gcc_unreachable ();
14284 return;
14288 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14289 can be used as that dest register. Return the dest register. */
14292 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14294 if (op2 == const0_rtx)
14295 return op1;
14297 if (GET_CODE (scratch) == SCRATCH)
14298 scratch = gen_reg_rtx (mode);
14300 if (logical_operand (op2, mode))
14301 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14302 else
14303 emit_insn (gen_rtx_SET (scratch,
14304 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14306 return scratch;
14309 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14310 requires this. The result is mode MODE. */
14312 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14314 rtx cond[2];
14315 int n = 0;
14316 if (code == LTGT || code == LE || code == UNLT)
14317 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14318 if (code == LTGT || code == GE || code == UNGT)
14319 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14320 if (code == LE || code == GE || code == UNEQ)
14321 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14322 if (code == UNLT || code == UNGT || code == UNEQ)
14323 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14325 gcc_assert (n == 2);
14327 rtx cc = gen_reg_rtx (CCEQmode);
14328 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14329 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14331 return cc;
14334 void
14335 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14337 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14338 rtx_code cond_code = GET_CODE (condition_rtx);
14340 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14341 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14343 else if (cond_code == NE
14344 || cond_code == GE || cond_code == LE
14345 || cond_code == GEU || cond_code == LEU
14346 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14348 rtx not_result = gen_reg_rtx (CCEQmode);
14349 rtx not_op, rev_cond_rtx;
14350 machine_mode cc_mode;
14352 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14354 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14355 SImode, XEXP (condition_rtx, 0), const0_rtx);
14356 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14357 emit_insn (gen_rtx_SET (not_result, not_op));
14358 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14361 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14362 if (op_mode == VOIDmode)
14363 op_mode = GET_MODE (XEXP (operands[1], 1));
14365 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14367 PUT_MODE (condition_rtx, DImode);
14368 convert_move (operands[0], condition_rtx, 0);
14370 else
14372 PUT_MODE (condition_rtx, SImode);
14373 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14377 /* Emit a branch of kind CODE to location LOC. */
14379 void
14380 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14382 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14383 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14384 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14385 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14388 /* Return the string to output a conditional branch to LABEL, which is
14389 the operand template of the label, or NULL if the branch is really a
14390 conditional return.
14392 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14393 condition code register and its mode specifies what kind of
14394 comparison we made.
14396 REVERSED is nonzero if we should reverse the sense of the comparison.
14398 INSN is the insn. */
14400 char *
14401 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14403 static char string[64];
14404 enum rtx_code code = GET_CODE (op);
14405 rtx cc_reg = XEXP (op, 0);
14406 machine_mode mode = GET_MODE (cc_reg);
14407 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14408 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14409 int really_reversed = reversed ^ need_longbranch;
14410 char *s = string;
14411 const char *ccode;
14412 const char *pred;
14413 rtx note;
14415 validate_condition_mode (code, mode);
14417 /* Work out which way this really branches. We could use
14418 reverse_condition_maybe_unordered here always but this
14419 makes the resulting assembler clearer. */
14420 if (really_reversed)
14422 /* Reversal of FP compares takes care -- an ordered compare
14423 becomes an unordered compare and vice versa. */
14424 if (mode == CCFPmode)
14425 code = reverse_condition_maybe_unordered (code);
14426 else
14427 code = reverse_condition (code);
14430 switch (code)
14432 /* Not all of these are actually distinct opcodes, but
14433 we distinguish them for clarity of the resulting assembler. */
14434 case NE: case LTGT:
14435 ccode = "ne"; break;
14436 case EQ: case UNEQ:
14437 ccode = "eq"; break;
14438 case GE: case GEU:
14439 ccode = "ge"; break;
14440 case GT: case GTU: case UNGT:
14441 ccode = "gt"; break;
14442 case LE: case LEU:
14443 ccode = "le"; break;
14444 case LT: case LTU: case UNLT:
14445 ccode = "lt"; break;
14446 case UNORDERED: ccode = "un"; break;
14447 case ORDERED: ccode = "nu"; break;
14448 case UNGE: ccode = "nl"; break;
14449 case UNLE: ccode = "ng"; break;
14450 default:
14451 gcc_unreachable ();
14454 /* Maybe we have a guess as to how likely the branch is. */
14455 pred = "";
14456 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14457 if (note != NULL_RTX)
14459 /* PROB is the difference from 50%. */
14460 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14461 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14463 /* Only hint for highly probable/improbable branches on newer cpus when
14464 we have real profile data, as static prediction overrides processor
14465 dynamic prediction. For older cpus we may as well always hint, but
14466 assume not taken for branches that are very close to 50% as a
14467 mispredicted taken branch is more expensive than a
14468 mispredicted not-taken branch. */
14469 if (rs6000_always_hint
14470 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14471 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14472 && br_prob_note_reliable_p (note)))
14474 if (abs (prob) > REG_BR_PROB_BASE / 20
14475 && ((prob > 0) ^ need_longbranch))
14476 pred = "+";
14477 else
14478 pred = "-";
14482 if (label == NULL)
14483 s += sprintf (s, "b%slr%s ", ccode, pred);
14484 else
14485 s += sprintf (s, "b%s%s ", ccode, pred);
14487 /* We need to escape any '%' characters in the reg_names string.
14488 Assume they'd only be the first character.... */
14489 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14490 *s++ = '%';
14491 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14493 if (label != NULL)
14495 /* If the branch distance was too far, we may have to use an
14496 unconditional branch to go the distance. */
14497 if (need_longbranch)
14498 s += sprintf (s, ",$+8\n\tb %s", label);
14499 else
14500 s += sprintf (s, ",%s", label);
14503 return string;
14506 /* Return insn for VSX or Altivec comparisons. */
14508 static rtx
14509 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14511 rtx mask;
14512 machine_mode mode = GET_MODE (op0);
14514 switch (code)
14516 default:
14517 break;
14519 case GE:
14520 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14521 return NULL_RTX;
14522 /* FALLTHRU */
14524 case EQ:
14525 case GT:
14526 case GTU:
14527 case ORDERED:
14528 case UNORDERED:
14529 case UNEQ:
14530 case LTGT:
14531 mask = gen_reg_rtx (mode);
14532 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14533 return mask;
14536 return NULL_RTX;
14539 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14540 DMODE is expected destination mode. This is a recursive function. */
14542 static rtx
14543 rs6000_emit_vector_compare (enum rtx_code rcode,
14544 rtx op0, rtx op1,
14545 machine_mode dmode)
14547 rtx mask;
14548 bool swap_operands = false;
14549 bool try_again = false;
14551 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14552 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14554 /* See if the comparison works as is. */
14555 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14556 if (mask)
14557 return mask;
14559 switch (rcode)
14561 case LT:
14562 rcode = GT;
14563 swap_operands = true;
14564 try_again = true;
14565 break;
14566 case LTU:
14567 rcode = GTU;
14568 swap_operands = true;
14569 try_again = true;
14570 break;
14571 case NE:
14572 case UNLE:
14573 case UNLT:
14574 case UNGE:
14575 case UNGT:
14576 /* Invert condition and try again.
14577 e.g., A != B becomes ~(A==B). */
14579 enum rtx_code rev_code;
14580 enum insn_code nor_code;
14581 rtx mask2;
14583 rev_code = reverse_condition_maybe_unordered (rcode);
14584 if (rev_code == UNKNOWN)
14585 return NULL_RTX;
14587 nor_code = optab_handler (one_cmpl_optab, dmode);
14588 if (nor_code == CODE_FOR_nothing)
14589 return NULL_RTX;
14591 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14592 if (!mask2)
14593 return NULL_RTX;
14595 mask = gen_reg_rtx (dmode);
14596 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14597 return mask;
14599 break;
14600 case GE:
14601 case GEU:
14602 case LE:
14603 case LEU:
14604 /* Try GT/GTU/LT/LTU OR EQ */
14606 rtx c_rtx, eq_rtx;
14607 enum insn_code ior_code;
14608 enum rtx_code new_code;
14610 switch (rcode)
14612 case GE:
14613 new_code = GT;
14614 break;
14616 case GEU:
14617 new_code = GTU;
14618 break;
14620 case LE:
14621 new_code = LT;
14622 break;
14624 case LEU:
14625 new_code = LTU;
14626 break;
14628 default:
14629 gcc_unreachable ();
14632 ior_code = optab_handler (ior_optab, dmode);
14633 if (ior_code == CODE_FOR_nothing)
14634 return NULL_RTX;
14636 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14637 if (!c_rtx)
14638 return NULL_RTX;
14640 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14641 if (!eq_rtx)
14642 return NULL_RTX;
14644 mask = gen_reg_rtx (dmode);
14645 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14646 return mask;
14648 break;
14649 default:
14650 return NULL_RTX;
14653 if (try_again)
14655 if (swap_operands)
14656 std::swap (op0, op1);
14658 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14659 if (mask)
14660 return mask;
14663 /* You only get two chances. */
14664 return NULL_RTX;
14667 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14668 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14669 operands for the relation operation COND. */
14672 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14673 rtx cond, rtx cc_op0, rtx cc_op1)
14675 machine_mode dest_mode = GET_MODE (dest);
14676 machine_mode mask_mode = GET_MODE (cc_op0);
14677 enum rtx_code rcode = GET_CODE (cond);
14678 machine_mode cc_mode = CCmode;
14679 rtx mask;
14680 rtx cond2;
14681 bool invert_move = false;
14683 if (VECTOR_UNIT_NONE_P (dest_mode))
14684 return 0;
14686 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14687 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14689 switch (rcode)
14691 /* Swap operands if we can, and fall back to doing the operation as
14692 specified, and doing a NOR to invert the test. */
14693 case NE:
14694 case UNLE:
14695 case UNLT:
14696 case UNGE:
14697 case UNGT:
14698 /* Invert condition and try again.
14699 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14700 invert_move = true;
14701 rcode = reverse_condition_maybe_unordered (rcode);
14702 if (rcode == UNKNOWN)
14703 return 0;
14704 break;
14706 case GE:
14707 case LE:
14708 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14710 /* Invert condition to avoid compound test. */
14711 invert_move = true;
14712 rcode = reverse_condition (rcode);
14714 break;
14716 case GTU:
14717 case GEU:
14718 case LTU:
14719 case LEU:
14720 /* Mark unsigned tests with CCUNSmode. */
14721 cc_mode = CCUNSmode;
14723 /* Invert condition to avoid compound test if necessary. */
14724 if (rcode == GEU || rcode == LEU)
14726 invert_move = true;
14727 rcode = reverse_condition (rcode);
14729 break;
14731 default:
14732 break;
14735 /* Get the vector mask for the given relational operations. */
14736 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14738 if (!mask)
14739 return 0;
14741 if (invert_move)
14742 std::swap (op_true, op_false);
14744 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14745 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14746 && (GET_CODE (op_true) == CONST_VECTOR
14747 || GET_CODE (op_false) == CONST_VECTOR))
14749 rtx constant_0 = CONST0_RTX (dest_mode);
14750 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14752 if (op_true == constant_m1 && op_false == constant_0)
14754 emit_move_insn (dest, mask);
14755 return 1;
14758 else if (op_true == constant_0 && op_false == constant_m1)
14760 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14761 return 1;
14764 /* If we can't use the vector comparison directly, perhaps we can use
14765 the mask for the true or false fields, instead of loading up a
14766 constant. */
14767 if (op_true == constant_m1)
14768 op_true = mask;
14770 if (op_false == constant_0)
14771 op_false = mask;
14774 if (!REG_P (op_true) && !SUBREG_P (op_true))
14775 op_true = force_reg (dest_mode, op_true);
14777 if (!REG_P (op_false) && !SUBREG_P (op_false))
14778 op_false = force_reg (dest_mode, op_false);
14780 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14781 CONST0_RTX (dest_mode));
14782 emit_insn (gen_rtx_SET (dest,
14783 gen_rtx_IF_THEN_ELSE (dest_mode,
14784 cond2,
14785 op_true,
14786 op_false)));
14787 return 1;
14790 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14791 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14792 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14793 hardware has no such operation. */
14795 static int
14796 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14798 enum rtx_code code = GET_CODE (op);
14799 rtx op0 = XEXP (op, 0);
14800 rtx op1 = XEXP (op, 1);
14801 machine_mode compare_mode = GET_MODE (op0);
14802 machine_mode result_mode = GET_MODE (dest);
14803 bool max_p = false;
14805 if (result_mode != compare_mode)
14806 return 0;
14808 if (code == GE || code == GT)
14809 max_p = true;
14810 else if (code == LE || code == LT)
14811 max_p = false;
14812 else
14813 return 0;
14815 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14818 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
14819 max_p = !max_p;
14821 else
14822 return 0;
14824 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14825 return 1;
14828 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14829 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14830 operands of the last comparison is nonzero/true, FALSE_COND if it is
14831 zero/false. Return 0 if the hardware has no such operation. */
14833 static int
14834 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14836 enum rtx_code code = GET_CODE (op);
14837 rtx op0 = XEXP (op, 0);
14838 rtx op1 = XEXP (op, 1);
14839 machine_mode result_mode = GET_MODE (dest);
14840 rtx compare_rtx;
14841 rtx cmove_rtx;
14842 rtx clobber_rtx;
14844 if (!can_create_pseudo_p ())
14845 return 0;
14847 switch (code)
14849 case EQ:
14850 case GE:
14851 case GT:
14852 break;
14854 case NE:
14855 case LT:
14856 case LE:
14857 code = swap_condition (code);
14858 std::swap (op0, op1);
14859 break;
14861 default:
14862 return 0;
14865 /* Generate: [(parallel [(set (dest)
14866 (if_then_else (op (cmp1) (cmp2))
14867 (true)
14868 (false)))
14869 (clobber (scratch))])]. */
14871 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14872 cmove_rtx = gen_rtx_SET (dest,
14873 gen_rtx_IF_THEN_ELSE (result_mode,
14874 compare_rtx,
14875 true_cond,
14876 false_cond));
14878 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14879 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14880 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14882 return 1;
14885 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14886 operands of the last comparison is nonzero/true, FALSE_COND if it
14887 is zero/false. Return 0 if the hardware has no such operation. */
14890 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14892 enum rtx_code code = GET_CODE (op);
14893 rtx op0 = XEXP (op, 0);
14894 rtx op1 = XEXP (op, 1);
14895 machine_mode compare_mode = GET_MODE (op0);
14896 machine_mode result_mode = GET_MODE (dest);
14897 rtx temp;
14898 bool is_against_zero;
14900 /* These modes should always match. */
14901 if (GET_MODE (op1) != compare_mode
14902 /* In the isel case however, we can use a compare immediate, so
14903 op1 may be a small constant. */
14904 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14905 return 0;
14906 if (GET_MODE (true_cond) != result_mode)
14907 return 0;
14908 if (GET_MODE (false_cond) != result_mode)
14909 return 0;
14911 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14912 if (TARGET_P9_MINMAX
14913 && (compare_mode == SFmode || compare_mode == DFmode)
14914 && (result_mode == SFmode || result_mode == DFmode))
14916 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14917 return 1;
14919 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14920 return 1;
14923 /* Don't allow using floating point comparisons for integer results for
14924 now. */
14925 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14926 return 0;
14928 /* First, work out if the hardware can do this at all, or
14929 if it's too slow.... */
14930 if (!FLOAT_MODE_P (compare_mode))
14932 if (TARGET_ISEL)
14933 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14934 return 0;
14937 is_against_zero = op1 == CONST0_RTX (compare_mode);
14939 /* A floating-point subtract might overflow, underflow, or produce
14940 an inexact result, thus changing the floating-point flags, so it
14941 can't be generated if we care about that. It's safe if one side
14942 of the construct is zero, since then no subtract will be
14943 generated. */
14944 if (SCALAR_FLOAT_MODE_P (compare_mode)
14945 && flag_trapping_math && ! is_against_zero)
14946 return 0;
14948 /* Eliminate half of the comparisons by switching operands, this
14949 makes the remaining code simpler. */
14950 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14951 || code == LTGT || code == LT || code == UNLE)
14953 code = reverse_condition_maybe_unordered (code);
14954 temp = true_cond;
14955 true_cond = false_cond;
14956 false_cond = temp;
14959 /* UNEQ and LTGT take four instructions for a comparison with zero,
14960 it'll probably be faster to use a branch here too. */
14961 if (code == UNEQ && HONOR_NANS (compare_mode))
14962 return 0;
14964 /* We're going to try to implement comparisons by performing
14965 a subtract, then comparing against zero. Unfortunately,
14966 Inf - Inf is NaN which is not zero, and so if we don't
14967 know that the operand is finite and the comparison
14968 would treat EQ different to UNORDERED, we can't do it. */
14969 if (HONOR_INFINITIES (compare_mode)
14970 && code != GT && code != UNGE
14971 && (!CONST_DOUBLE_P (op1)
14972 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
14973 /* Constructs of the form (a OP b ? a : b) are safe. */
14974 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
14975 || (! rtx_equal_p (op0, true_cond)
14976 && ! rtx_equal_p (op1, true_cond))))
14977 return 0;
14979 /* At this point we know we can use fsel. */
14981 /* Reduce the comparison to a comparison against zero. */
14982 if (! is_against_zero)
14984 temp = gen_reg_rtx (compare_mode);
14985 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
14986 op0 = temp;
14987 op1 = CONST0_RTX (compare_mode);
14990 /* If we don't care about NaNs we can reduce some of the comparisons
14991 down to faster ones. */
14992 if (! HONOR_NANS (compare_mode))
14993 switch (code)
14995 case GT:
14996 code = LE;
14997 temp = true_cond;
14998 true_cond = false_cond;
14999 false_cond = temp;
15000 break;
15001 case UNGE:
15002 code = GE;
15003 break;
15004 case UNEQ:
15005 code = EQ;
15006 break;
15007 default:
15008 break;
15011 /* Now, reduce everything down to a GE. */
15012 switch (code)
15014 case GE:
15015 break;
15017 case LE:
15018 temp = gen_reg_rtx (compare_mode);
15019 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15020 op0 = temp;
15021 break;
15023 case ORDERED:
15024 temp = gen_reg_rtx (compare_mode);
15025 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15026 op0 = temp;
15027 break;
15029 case EQ:
15030 temp = gen_reg_rtx (compare_mode);
15031 emit_insn (gen_rtx_SET (temp,
15032 gen_rtx_NEG (compare_mode,
15033 gen_rtx_ABS (compare_mode, op0))));
15034 op0 = temp;
15035 break;
15037 case UNGE:
15038 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15039 temp = gen_reg_rtx (result_mode);
15040 emit_insn (gen_rtx_SET (temp,
15041 gen_rtx_IF_THEN_ELSE (result_mode,
15042 gen_rtx_GE (VOIDmode,
15043 op0, op1),
15044 true_cond, false_cond)));
15045 false_cond = true_cond;
15046 true_cond = temp;
15048 temp = gen_reg_rtx (compare_mode);
15049 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15050 op0 = temp;
15051 break;
15053 case GT:
15054 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15055 temp = gen_reg_rtx (result_mode);
15056 emit_insn (gen_rtx_SET (temp,
15057 gen_rtx_IF_THEN_ELSE (result_mode,
15058 gen_rtx_GE (VOIDmode,
15059 op0, op1),
15060 true_cond, false_cond)));
15061 true_cond = false_cond;
15062 false_cond = temp;
15064 temp = gen_reg_rtx (compare_mode);
15065 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15066 op0 = temp;
15067 break;
15069 default:
15070 gcc_unreachable ();
15073 emit_insn (gen_rtx_SET (dest,
15074 gen_rtx_IF_THEN_ELSE (result_mode,
15075 gen_rtx_GE (VOIDmode,
15076 op0, op1),
15077 true_cond, false_cond)));
15078 return 1;
15081 /* Same as above, but for ints (isel). */
15084 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15086 rtx condition_rtx, cr;
15087 machine_mode mode = GET_MODE (dest);
15088 enum rtx_code cond_code;
15089 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15090 bool signedp;
15092 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15093 return 0;
15095 /* We still have to do the compare, because isel doesn't do a
15096 compare, it just looks at the CRx bits set by a previous compare
15097 instruction. */
15098 condition_rtx = rs6000_generate_compare (op, mode);
15099 cond_code = GET_CODE (condition_rtx);
15100 cr = XEXP (condition_rtx, 0);
15101 signedp = GET_MODE (cr) == CCmode;
15103 isel_func = (mode == SImode
15104 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15105 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15107 switch (cond_code)
15109 case LT: case GT: case LTU: case GTU: case EQ:
15110 /* isel handles these directly. */
15111 break;
15113 default:
15114 /* We need to swap the sense of the comparison. */
15116 std::swap (false_cond, true_cond);
15117 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15119 break;
15122 false_cond = force_reg (mode, false_cond);
15123 if (true_cond != const0_rtx)
15124 true_cond = force_reg (mode, true_cond);
15126 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15128 return 1;
15131 void
15132 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15134 machine_mode mode = GET_MODE (op0);
15135 enum rtx_code c;
15136 rtx target;
15138 /* VSX/altivec have direct min/max insns. */
15139 if ((code == SMAX || code == SMIN)
15140 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15141 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15143 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15144 return;
15147 if (code == SMAX || code == SMIN)
15148 c = GE;
15149 else
15150 c = GEU;
15152 if (code == SMAX || code == UMAX)
15153 target = emit_conditional_move (dest, c, op0, op1, mode,
15154 op0, op1, mode, 0);
15155 else
15156 target = emit_conditional_move (dest, c, op0, op1, mode,
15157 op1, op0, mode, 0);
15158 gcc_assert (target);
15159 if (target != dest)
15160 emit_move_insn (dest, target);
15163 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15164 COND is true. Mark the jump as unlikely to be taken. */
15166 static void
15167 emit_unlikely_jump (rtx cond, rtx label)
15169 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15170 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15171 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15174 /* A subroutine of the atomic operation splitters. Emit a load-locked
15175 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15176 the zero_extend operation. */
15178 static void
15179 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15181 rtx (*fn) (rtx, rtx) = NULL;
15183 switch (mode)
15185 case E_QImode:
15186 fn = gen_load_lockedqi;
15187 break;
15188 case E_HImode:
15189 fn = gen_load_lockedhi;
15190 break;
15191 case E_SImode:
15192 if (GET_MODE (mem) == QImode)
15193 fn = gen_load_lockedqi_si;
15194 else if (GET_MODE (mem) == HImode)
15195 fn = gen_load_lockedhi_si;
15196 else
15197 fn = gen_load_lockedsi;
15198 break;
15199 case E_DImode:
15200 fn = gen_load_lockeddi;
15201 break;
15202 case E_TImode:
15203 fn = gen_load_lockedti;
15204 break;
15205 default:
15206 gcc_unreachable ();
15208 emit_insn (fn (reg, mem));
15211 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15212 instruction in MODE. */
15214 static void
15215 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15217 rtx (*fn) (rtx, rtx, rtx) = NULL;
15219 switch (mode)
15221 case E_QImode:
15222 fn = gen_store_conditionalqi;
15223 break;
15224 case E_HImode:
15225 fn = gen_store_conditionalhi;
15226 break;
15227 case E_SImode:
15228 fn = gen_store_conditionalsi;
15229 break;
15230 case E_DImode:
15231 fn = gen_store_conditionaldi;
15232 break;
15233 case E_TImode:
15234 fn = gen_store_conditionalti;
15235 break;
15236 default:
15237 gcc_unreachable ();
15240 /* Emit sync before stwcx. to address PPC405 Erratum. */
15241 if (PPC405_ERRATUM77)
15242 emit_insn (gen_hwsync ());
15244 emit_insn (fn (res, mem, val));
15247 /* Expand barriers before and after a load_locked/store_cond sequence. */
15249 static rtx
15250 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15252 rtx addr = XEXP (mem, 0);
15254 if (!legitimate_indirect_address_p (addr, reload_completed)
15255 && !legitimate_indexed_address_p (addr, reload_completed))
15257 addr = force_reg (Pmode, addr);
15258 mem = replace_equiv_address_nv (mem, addr);
15261 switch (model)
15263 case MEMMODEL_RELAXED:
15264 case MEMMODEL_CONSUME:
15265 case MEMMODEL_ACQUIRE:
15266 break;
15267 case MEMMODEL_RELEASE:
15268 case MEMMODEL_ACQ_REL:
15269 emit_insn (gen_lwsync ());
15270 break;
15271 case MEMMODEL_SEQ_CST:
15272 emit_insn (gen_hwsync ());
15273 break;
15274 default:
15275 gcc_unreachable ();
15277 return mem;
15280 static void
15281 rs6000_post_atomic_barrier (enum memmodel model)
15283 switch (model)
15285 case MEMMODEL_RELAXED:
15286 case MEMMODEL_CONSUME:
15287 case MEMMODEL_RELEASE:
15288 break;
15289 case MEMMODEL_ACQUIRE:
15290 case MEMMODEL_ACQ_REL:
15291 case MEMMODEL_SEQ_CST:
15292 emit_insn (gen_isync ());
15293 break;
15294 default:
15295 gcc_unreachable ();
15299 /* A subroutine of the various atomic expanders. For sub-word operations,
15300 we must adjust things to operate on SImode. Given the original MEM,
15301 return a new aligned memory. Also build and return the quantities by
15302 which to shift and mask. */
15304 static rtx
15305 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15307 rtx addr, align, shift, mask, mem;
15308 HOST_WIDE_INT shift_mask;
15309 machine_mode mode = GET_MODE (orig_mem);
15311 /* For smaller modes, we have to implement this via SImode. */
15312 shift_mask = (mode == QImode ? 0x18 : 0x10);
15314 addr = XEXP (orig_mem, 0);
15315 addr = force_reg (GET_MODE (addr), addr);
15317 /* Aligned memory containing subword. Generate a new memory. We
15318 do not want any of the existing MEM_ATTR data, as we're now
15319 accessing memory outside the original object. */
15320 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15321 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15322 mem = gen_rtx_MEM (SImode, align);
15323 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15324 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15325 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15327 /* Shift amount for subword relative to aligned word. */
15328 shift = gen_reg_rtx (SImode);
15329 addr = gen_lowpart (SImode, addr);
15330 rtx tmp = gen_reg_rtx (SImode);
15331 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15332 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15333 if (BYTES_BIG_ENDIAN)
15334 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15335 shift, 1, OPTAB_LIB_WIDEN);
15336 *pshift = shift;
15338 /* Mask for insertion. */
15339 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15340 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15341 *pmask = mask;
15343 return mem;
15346 /* A subroutine of the various atomic expanders. For sub-word operands,
15347 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15349 static rtx
15350 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15352 rtx x;
15354 x = gen_reg_rtx (SImode);
15355 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15356 gen_rtx_NOT (SImode, mask),
15357 oldval)));
15359 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15361 return x;
15364 /* A subroutine of the various atomic expanders. For sub-word operands,
15365 extract WIDE to NARROW via SHIFT. */
15367 static void
15368 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15370 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15371 wide, 1, OPTAB_LIB_WIDEN);
15372 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15375 /* Expand an atomic compare and swap operation. */
15377 void
15378 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15380 rtx boolval, retval, mem, oldval, newval, cond;
15381 rtx label1, label2, x, mask, shift;
15382 machine_mode mode, orig_mode;
15383 enum memmodel mod_s, mod_f;
15384 bool is_weak;
15386 boolval = operands[0];
15387 retval = operands[1];
15388 mem = operands[2];
15389 oldval = operands[3];
15390 newval = operands[4];
15391 is_weak = (INTVAL (operands[5]) != 0);
15392 mod_s = memmodel_base (INTVAL (operands[6]));
15393 mod_f = memmodel_base (INTVAL (operands[7]));
15394 orig_mode = mode = GET_MODE (mem);
15396 mask = shift = NULL_RTX;
15397 if (mode == QImode || mode == HImode)
15399 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15400 lwarx and shift/mask operations. With power8, we need to do the
15401 comparison in SImode, but the store is still done in QI/HImode. */
15402 oldval = convert_modes (SImode, mode, oldval, 1);
15404 if (!TARGET_SYNC_HI_QI)
15406 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15408 /* Shift and mask OLDVAL into position with the word. */
15409 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15410 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15412 /* Shift and mask NEWVAL into position within the word. */
15413 newval = convert_modes (SImode, mode, newval, 1);
15414 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15415 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15418 /* Prepare to adjust the return value. */
15419 retval = gen_reg_rtx (SImode);
15420 mode = SImode;
15422 else if (reg_overlap_mentioned_p (retval, oldval))
15423 oldval = copy_to_reg (oldval);
15425 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15426 oldval = copy_to_mode_reg (mode, oldval);
15428 if (reg_overlap_mentioned_p (retval, newval))
15429 newval = copy_to_reg (newval);
15431 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15433 label1 = NULL_RTX;
15434 if (!is_weak)
15436 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15437 emit_label (XEXP (label1, 0));
15439 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15441 emit_load_locked (mode, retval, mem);
15443 x = retval;
15444 if (mask)
15445 x = expand_simple_binop (SImode, AND, retval, mask,
15446 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15448 cond = gen_reg_rtx (CCmode);
15449 /* If we have TImode, synthesize a comparison. */
15450 if (mode != TImode)
15451 x = gen_rtx_COMPARE (CCmode, x, oldval);
15452 else
15454 rtx xor1_result = gen_reg_rtx (DImode);
15455 rtx xor2_result = gen_reg_rtx (DImode);
15456 rtx or_result = gen_reg_rtx (DImode);
15457 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15458 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15459 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15460 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15462 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15463 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15464 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15465 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15468 emit_insn (gen_rtx_SET (cond, x));
15470 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15471 emit_unlikely_jump (x, label2);
15473 x = newval;
15474 if (mask)
15475 x = rs6000_mask_atomic_subword (retval, newval, mask);
15477 emit_store_conditional (orig_mode, cond, mem, x);
15479 if (!is_weak)
15481 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15482 emit_unlikely_jump (x, label1);
15485 if (!is_mm_relaxed (mod_f))
15486 emit_label (XEXP (label2, 0));
15488 rs6000_post_atomic_barrier (mod_s);
15490 if (is_mm_relaxed (mod_f))
15491 emit_label (XEXP (label2, 0));
15493 if (shift)
15494 rs6000_finish_atomic_subword (operands[1], retval, shift);
15495 else if (mode != GET_MODE (operands[1]))
15496 convert_move (operands[1], retval, 1);
15498 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15499 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15500 emit_insn (gen_rtx_SET (boolval, x));
15503 /* Expand an atomic exchange operation. */
15505 void
15506 rs6000_expand_atomic_exchange (rtx operands[])
15508 rtx retval, mem, val, cond;
15509 machine_mode mode;
15510 enum memmodel model;
15511 rtx label, x, mask, shift;
15513 retval = operands[0];
15514 mem = operands[1];
15515 val = operands[2];
15516 model = memmodel_base (INTVAL (operands[3]));
15517 mode = GET_MODE (mem);
15519 mask = shift = NULL_RTX;
15520 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15522 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15524 /* Shift and mask VAL into position with the word. */
15525 val = convert_modes (SImode, mode, val, 1);
15526 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15527 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15529 /* Prepare to adjust the return value. */
15530 retval = gen_reg_rtx (SImode);
15531 mode = SImode;
15534 mem = rs6000_pre_atomic_barrier (mem, model);
15536 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15537 emit_label (XEXP (label, 0));
15539 emit_load_locked (mode, retval, mem);
15541 x = val;
15542 if (mask)
15543 x = rs6000_mask_atomic_subword (retval, val, mask);
15545 cond = gen_reg_rtx (CCmode);
15546 emit_store_conditional (mode, cond, mem, x);
15548 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15549 emit_unlikely_jump (x, label);
15551 rs6000_post_atomic_barrier (model);
15553 if (shift)
15554 rs6000_finish_atomic_subword (operands[0], retval, shift);
15557 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15558 to perform. MEM is the memory on which to operate. VAL is the second
15559 operand of the binary operator. BEFORE and AFTER are optional locations to
15560 return the value of MEM either before of after the operation. MODEL_RTX
15561 is a CONST_INT containing the memory model to use. */
15563 void
15564 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15565 rtx orig_before, rtx orig_after, rtx model_rtx)
15567 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15568 machine_mode mode = GET_MODE (mem);
15569 machine_mode store_mode = mode;
15570 rtx label, x, cond, mask, shift;
15571 rtx before = orig_before, after = orig_after;
15573 mask = shift = NULL_RTX;
15574 /* On power8, we want to use SImode for the operation. On previous systems,
15575 use the operation in a subword and shift/mask to get the proper byte or
15576 halfword. */
15577 if (mode == QImode || mode == HImode)
15579 if (TARGET_SYNC_HI_QI)
15581 val = convert_modes (SImode, mode, val, 1);
15583 /* Prepare to adjust the return value. */
15584 before = gen_reg_rtx (SImode);
15585 if (after)
15586 after = gen_reg_rtx (SImode);
15587 mode = SImode;
15589 else
15591 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15593 /* Shift and mask VAL into position with the word. */
15594 val = convert_modes (SImode, mode, val, 1);
15595 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15596 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15598 switch (code)
15600 case IOR:
15601 case XOR:
15602 /* We've already zero-extended VAL. That is sufficient to
15603 make certain that it does not affect other bits. */
15604 mask = NULL;
15605 break;
15607 case AND:
15608 /* If we make certain that all of the other bits in VAL are
15609 set, that will be sufficient to not affect other bits. */
15610 x = gen_rtx_NOT (SImode, mask);
15611 x = gen_rtx_IOR (SImode, x, val);
15612 emit_insn (gen_rtx_SET (val, x));
15613 mask = NULL;
15614 break;
15616 case NOT:
15617 case PLUS:
15618 case MINUS:
15619 /* These will all affect bits outside the field and need
15620 adjustment via MASK within the loop. */
15621 break;
15623 default:
15624 gcc_unreachable ();
15627 /* Prepare to adjust the return value. */
15628 before = gen_reg_rtx (SImode);
15629 if (after)
15630 after = gen_reg_rtx (SImode);
15631 store_mode = mode = SImode;
15635 mem = rs6000_pre_atomic_barrier (mem, model);
15637 label = gen_label_rtx ();
15638 emit_label (label);
15639 label = gen_rtx_LABEL_REF (VOIDmode, label);
15641 if (before == NULL_RTX)
15642 before = gen_reg_rtx (mode);
15644 emit_load_locked (mode, before, mem);
15646 if (code == NOT)
15648 x = expand_simple_binop (mode, AND, before, val,
15649 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15650 after = expand_simple_unop (mode, NOT, x, after, 1);
15652 else
15654 after = expand_simple_binop (mode, code, before, val,
15655 after, 1, OPTAB_LIB_WIDEN);
15658 x = after;
15659 if (mask)
15661 x = expand_simple_binop (SImode, AND, after, mask,
15662 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15663 x = rs6000_mask_atomic_subword (before, x, mask);
15665 else if (store_mode != mode)
15666 x = convert_modes (store_mode, mode, x, 1);
15668 cond = gen_reg_rtx (CCmode);
15669 emit_store_conditional (store_mode, cond, mem, x);
15671 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15672 emit_unlikely_jump (x, label);
15674 rs6000_post_atomic_barrier (model);
15676 if (shift)
15678 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15679 then do the calcuations in a SImode register. */
15680 if (orig_before)
15681 rs6000_finish_atomic_subword (orig_before, before, shift);
15682 if (orig_after)
15683 rs6000_finish_atomic_subword (orig_after, after, shift);
15685 else if (store_mode != mode)
15687 /* QImode/HImode on machines with lbarx/lharx where we do the native
15688 operation and then do the calcuations in a SImode register. */
15689 if (orig_before)
15690 convert_move (orig_before, before, 1);
15691 if (orig_after)
15692 convert_move (orig_after, after, 1);
15694 else if (orig_after && after != orig_after)
15695 emit_move_insn (orig_after, after);
15698 /* Emit instructions to move SRC to DST. Called by splitters for
15699 multi-register moves. It will emit at most one instruction for
15700 each register that is accessed; that is, it won't emit li/lis pairs
15701 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15702 register. */
15704 void
15705 rs6000_split_multireg_move (rtx dst, rtx src)
15707 /* The register number of the first register being moved. */
15708 int reg;
15709 /* The mode that is to be moved. */
15710 machine_mode mode;
15711 /* The mode that the move is being done in, and its size. */
15712 machine_mode reg_mode;
15713 int reg_mode_size;
15714 /* The number of registers that will be moved. */
15715 int nregs;
15717 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15718 mode = GET_MODE (dst);
15719 nregs = hard_regno_nregs (reg, mode);
15720 if (FP_REGNO_P (reg))
15721 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15722 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15723 else if (ALTIVEC_REGNO_P (reg))
15724 reg_mode = V16QImode;
15725 else
15726 reg_mode = word_mode;
15727 reg_mode_size = GET_MODE_SIZE (reg_mode);
15729 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15731 /* TDmode residing in FP registers is special, since the ISA requires that
15732 the lower-numbered word of a register pair is always the most significant
15733 word, even in little-endian mode. This does not match the usual subreg
15734 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15735 the appropriate constituent registers "by hand" in little-endian mode.
15737 Note we do not need to check for destructive overlap here since TDmode
15738 can only reside in even/odd register pairs. */
15739 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15741 rtx p_src, p_dst;
15742 int i;
15744 for (i = 0; i < nregs; i++)
15746 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15747 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15748 else
15749 p_src = simplify_gen_subreg (reg_mode, src, mode,
15750 i * reg_mode_size);
15752 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15753 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15754 else
15755 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15756 i * reg_mode_size);
15758 emit_insn (gen_rtx_SET (p_dst, p_src));
15761 return;
15764 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15766 /* Move register range backwards, if we might have destructive
15767 overlap. */
15768 int i;
15769 for (i = nregs - 1; i >= 0; i--)
15770 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15771 i * reg_mode_size),
15772 simplify_gen_subreg (reg_mode, src, mode,
15773 i * reg_mode_size)));
15775 else
15777 int i;
15778 int j = -1;
15779 bool used_update = false;
15780 rtx restore_basereg = NULL_RTX;
15782 if (MEM_P (src) && INT_REGNO_P (reg))
15784 rtx breg;
15786 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15787 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15789 rtx delta_rtx;
15790 breg = XEXP (XEXP (src, 0), 0);
15791 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15792 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15793 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15794 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15795 src = replace_equiv_address (src, breg);
15797 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15799 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15801 rtx basereg = XEXP (XEXP (src, 0), 0);
15802 if (TARGET_UPDATE)
15804 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15805 emit_insn (gen_rtx_SET (ndst,
15806 gen_rtx_MEM (reg_mode,
15807 XEXP (src, 0))));
15808 used_update = true;
15810 else
15811 emit_insn (gen_rtx_SET (basereg,
15812 XEXP (XEXP (src, 0), 1)));
15813 src = replace_equiv_address (src, basereg);
15815 else
15817 rtx basereg = gen_rtx_REG (Pmode, reg);
15818 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15819 src = replace_equiv_address (src, basereg);
15823 breg = XEXP (src, 0);
15824 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15825 breg = XEXP (breg, 0);
15827 /* If the base register we are using to address memory is
15828 also a destination reg, then change that register last. */
15829 if (REG_P (breg)
15830 && REGNO (breg) >= REGNO (dst)
15831 && REGNO (breg) < REGNO (dst) + nregs)
15832 j = REGNO (breg) - REGNO (dst);
15834 else if (MEM_P (dst) && INT_REGNO_P (reg))
15836 rtx breg;
15838 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15839 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15841 rtx delta_rtx;
15842 breg = XEXP (XEXP (dst, 0), 0);
15843 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15844 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15845 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15847 /* We have to update the breg before doing the store.
15848 Use store with update, if available. */
15850 if (TARGET_UPDATE)
15852 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15853 emit_insn (TARGET_32BIT
15854 ? (TARGET_POWERPC64
15855 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15856 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15857 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15858 used_update = true;
15860 else
15861 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15862 dst = replace_equiv_address (dst, breg);
15864 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15865 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15867 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15869 rtx basereg = XEXP (XEXP (dst, 0), 0);
15870 if (TARGET_UPDATE)
15872 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15873 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15874 XEXP (dst, 0)),
15875 nsrc));
15876 used_update = true;
15878 else
15879 emit_insn (gen_rtx_SET (basereg,
15880 XEXP (XEXP (dst, 0), 1)));
15881 dst = replace_equiv_address (dst, basereg);
15883 else
15885 rtx basereg = XEXP (XEXP (dst, 0), 0);
15886 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15887 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15888 && REG_P (basereg)
15889 && REG_P (offsetreg)
15890 && REGNO (basereg) != REGNO (offsetreg));
15891 if (REGNO (basereg) == 0)
15893 rtx tmp = offsetreg;
15894 offsetreg = basereg;
15895 basereg = tmp;
15897 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15898 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15899 dst = replace_equiv_address (dst, basereg);
15902 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15903 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15906 for (i = 0; i < nregs; i++)
15908 /* Calculate index to next subword. */
15909 ++j;
15910 if (j == nregs)
15911 j = 0;
15913 /* If compiler already emitted move of first word by
15914 store with update, no need to do anything. */
15915 if (j == 0 && used_update)
15916 continue;
15918 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15919 j * reg_mode_size),
15920 simplify_gen_subreg (reg_mode, src, mode,
15921 j * reg_mode_size)));
15923 if (restore_basereg != NULL_RTX)
15924 emit_insn (restore_basereg);
15928 static GTY(()) alias_set_type TOC_alias_set = -1;
15930 alias_set_type
15931 get_TOC_alias_set (void)
15933 if (TOC_alias_set == -1)
15934 TOC_alias_set = new_alias_set ();
15935 return TOC_alias_set;
15938 /* The mode the ABI uses for a word. This is not the same as word_mode
15939 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15941 static scalar_int_mode
15942 rs6000_abi_word_mode (void)
15944 return TARGET_32BIT ? SImode : DImode;
15947 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15948 static char *
15949 rs6000_offload_options (void)
15951 if (TARGET_64BIT)
15952 return xstrdup ("-foffload-abi=lp64");
15953 else
15954 return xstrdup ("-foffload-abi=ilp32");
15958 /* A quick summary of the various types of 'constant-pool tables'
15959 under PowerPC:
15961 Target Flags Name One table per
15962 AIX (none) AIX TOC object file
15963 AIX -mfull-toc AIX TOC object file
15964 AIX -mminimal-toc AIX minimal TOC translation unit
15965 SVR4/EABI (none) SVR4 SDATA object file
15966 SVR4/EABI -fpic SVR4 pic object file
15967 SVR4/EABI -fPIC SVR4 PIC translation unit
15968 SVR4/EABI -mrelocatable EABI TOC function
15969 SVR4/EABI -maix AIX TOC object file
15970 SVR4/EABI -maix -mminimal-toc
15971 AIX minimal TOC translation unit
15973 Name Reg. Set by entries contains:
15974 made by addrs? fp? sum?
15976 AIX TOC 2 crt0 as Y option option
15977 AIX minimal TOC 30 prolog gcc Y Y option
15978 SVR4 SDATA 13 crt0 gcc N Y N
15979 SVR4 pic 30 prolog ld Y not yet N
15980 SVR4 PIC 30 prolog gcc Y option option
15981 EABI TOC 30 prolog gcc Y option option
15985 /* Hash functions for the hash table. */
15987 static unsigned
15988 rs6000_hash_constant (rtx k)
15990 enum rtx_code code = GET_CODE (k);
15991 machine_mode mode = GET_MODE (k);
15992 unsigned result = (code << 3) ^ mode;
15993 const char *format;
15994 int flen, fidx;
15996 format = GET_RTX_FORMAT (code);
15997 flen = strlen (format);
15998 fidx = 0;
16000 switch (code)
16002 case LABEL_REF:
16003 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16005 case CONST_WIDE_INT:
16007 int i;
16008 flen = CONST_WIDE_INT_NUNITS (k);
16009 for (i = 0; i < flen; i++)
16010 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16011 return result;
16014 case CONST_DOUBLE:
16015 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16017 case CODE_LABEL:
16018 fidx = 3;
16019 break;
16021 default:
16022 break;
16025 for (; fidx < flen; fidx++)
16026 switch (format[fidx])
16028 case 's':
16030 unsigned i, len;
16031 const char *str = XSTR (k, fidx);
16032 len = strlen (str);
16033 result = result * 613 + len;
16034 for (i = 0; i < len; i++)
16035 result = result * 613 + (unsigned) str[i];
16036 break;
16038 case 'u':
16039 case 'e':
16040 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16041 break;
16042 case 'i':
16043 case 'n':
16044 result = result * 613 + (unsigned) XINT (k, fidx);
16045 break;
16046 case 'w':
16047 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16048 result = result * 613 + (unsigned) XWINT (k, fidx);
16049 else
16051 size_t i;
16052 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16053 result = result * 613 + (unsigned) (XWINT (k, fidx)
16054 >> CHAR_BIT * i);
16056 break;
16057 case '0':
16058 break;
16059 default:
16060 gcc_unreachable ();
16063 return result;
16066 hashval_t
16067 toc_hasher::hash (toc_hash_struct *thc)
16069 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16072 /* Compare H1 and H2 for equivalence. */
16074 bool
16075 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16077 rtx r1 = h1->key;
16078 rtx r2 = h2->key;
16080 if (h1->key_mode != h2->key_mode)
16081 return 0;
16083 return rtx_equal_p (r1, r2);
16086 /* These are the names given by the C++ front-end to vtables, and
16087 vtable-like objects. Ideally, this logic should not be here;
16088 instead, there should be some programmatic way of inquiring as
16089 to whether or not an object is a vtable. */
16091 #define VTABLE_NAME_P(NAME) \
16092 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16093 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16094 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16095 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16096 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16098 #ifdef NO_DOLLAR_IN_LABEL
16099 /* Return a GGC-allocated character string translating dollar signs in
16100 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16102 const char *
16103 rs6000_xcoff_strip_dollar (const char *name)
16105 char *strip, *p;
16106 const char *q;
16107 size_t len;
16109 q = (const char *) strchr (name, '$');
16111 if (q == 0 || q == name)
16112 return name;
16114 len = strlen (name);
16115 strip = XALLOCAVEC (char, len + 1);
16116 strcpy (strip, name);
16117 p = strip + (q - name);
16118 while (p)
16120 *p = '_';
16121 p = strchr (p + 1, '$');
16124 return ggc_alloc_string (strip, len);
16126 #endif
16128 void
16129 rs6000_output_symbol_ref (FILE *file, rtx x)
16131 const char *name = XSTR (x, 0);
16133 /* Currently C++ toc references to vtables can be emitted before it
16134 is decided whether the vtable is public or private. If this is
16135 the case, then the linker will eventually complain that there is
16136 a reference to an unknown section. Thus, for vtables only,
16137 we emit the TOC reference to reference the identifier and not the
16138 symbol. */
16139 if (VTABLE_NAME_P (name))
16141 RS6000_OUTPUT_BASENAME (file, name);
16143 else
16144 assemble_name (file, name);
16147 /* Output a TOC entry. We derive the entry name from what is being
16148 written. */
16150 void
16151 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16153 char buf[256];
16154 const char *name = buf;
16155 rtx base = x;
16156 HOST_WIDE_INT offset = 0;
16158 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16160 /* When the linker won't eliminate them, don't output duplicate
16161 TOC entries (this happens on AIX if there is any kind of TOC,
16162 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16163 CODE_LABELs. */
16164 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16166 struct toc_hash_struct *h;
16168 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16169 time because GGC is not initialized at that point. */
16170 if (toc_hash_table == NULL)
16171 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16173 h = ggc_alloc<toc_hash_struct> ();
16174 h->key = x;
16175 h->key_mode = mode;
16176 h->labelno = labelno;
16178 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16179 if (*found == NULL)
16180 *found = h;
16181 else /* This is indeed a duplicate.
16182 Set this label equal to that label. */
16184 fputs ("\t.set ", file);
16185 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16186 fprintf (file, "%d,", labelno);
16187 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16188 fprintf (file, "%d\n", ((*found)->labelno));
16190 #ifdef HAVE_AS_TLS
16191 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16192 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16193 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16195 fputs ("\t.set ", file);
16196 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16197 fprintf (file, "%d,", labelno);
16198 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16199 fprintf (file, "%d\n", ((*found)->labelno));
16201 #endif
16202 return;
16206 /* If we're going to put a double constant in the TOC, make sure it's
16207 aligned properly when strict alignment is on. */
16208 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16209 && STRICT_ALIGNMENT
16210 && GET_MODE_BITSIZE (mode) >= 64
16211 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16212 ASM_OUTPUT_ALIGN (file, 3);
16215 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16217 /* Handle FP constants specially. Note that if we have a minimal
16218 TOC, things we put here aren't actually in the TOC, so we can allow
16219 FP constants. */
16220 if (CONST_DOUBLE_P (x)
16221 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16222 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16224 long k[4];
16226 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16227 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16228 else
16229 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16231 if (TARGET_64BIT)
16233 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16234 fputs (DOUBLE_INT_ASM_OP, file);
16235 else
16236 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16237 k[0] & 0xffffffff, k[1] & 0xffffffff,
16238 k[2] & 0xffffffff, k[3] & 0xffffffff);
16239 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16240 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16241 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16242 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16243 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16244 return;
16246 else
16248 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16249 fputs ("\t.long ", file);
16250 else
16251 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16252 k[0] & 0xffffffff, k[1] & 0xffffffff,
16253 k[2] & 0xffffffff, k[3] & 0xffffffff);
16254 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16255 k[0] & 0xffffffff, k[1] & 0xffffffff,
16256 k[2] & 0xffffffff, k[3] & 0xffffffff);
16257 return;
16260 else if (CONST_DOUBLE_P (x)
16261 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16263 long k[2];
16265 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16266 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16267 else
16268 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16270 if (TARGET_64BIT)
16272 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16273 fputs (DOUBLE_INT_ASM_OP, file);
16274 else
16275 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16276 k[0] & 0xffffffff, k[1] & 0xffffffff);
16277 fprintf (file, "0x%lx%08lx\n",
16278 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16279 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16280 return;
16282 else
16284 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16285 fputs ("\t.long ", file);
16286 else
16287 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16288 k[0] & 0xffffffff, k[1] & 0xffffffff);
16289 fprintf (file, "0x%lx,0x%lx\n",
16290 k[0] & 0xffffffff, k[1] & 0xffffffff);
16291 return;
16294 else if (CONST_DOUBLE_P (x)
16295 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16297 long l;
16299 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16300 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16301 else
16302 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16304 if (TARGET_64BIT)
16306 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16307 fputs (DOUBLE_INT_ASM_OP, file);
16308 else
16309 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16310 if (WORDS_BIG_ENDIAN)
16311 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16312 else
16313 fprintf (file, "0x%lx\n", l & 0xffffffff);
16314 return;
16316 else
16318 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16319 fputs ("\t.long ", file);
16320 else
16321 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16322 fprintf (file, "0x%lx\n", l & 0xffffffff);
16323 return;
16326 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16328 unsigned HOST_WIDE_INT low;
16329 HOST_WIDE_INT high;
16331 low = INTVAL (x) & 0xffffffff;
16332 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16334 /* TOC entries are always Pmode-sized, so when big-endian
16335 smaller integer constants in the TOC need to be padded.
16336 (This is still a win over putting the constants in
16337 a separate constant pool, because then we'd have
16338 to have both a TOC entry _and_ the actual constant.)
16340 For a 32-bit target, CONST_INT values are loaded and shifted
16341 entirely within `low' and can be stored in one TOC entry. */
16343 /* It would be easy to make this work, but it doesn't now. */
16344 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16346 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16348 low |= high << 32;
16349 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16350 high = (HOST_WIDE_INT) low >> 32;
16351 low &= 0xffffffff;
16354 if (TARGET_64BIT)
16356 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16357 fputs (DOUBLE_INT_ASM_OP, file);
16358 else
16359 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16360 (long) high & 0xffffffff, (long) low & 0xffffffff);
16361 fprintf (file, "0x%lx%08lx\n",
16362 (long) high & 0xffffffff, (long) low & 0xffffffff);
16363 return;
16365 else
16367 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16369 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16370 fputs ("\t.long ", file);
16371 else
16372 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16373 (long) high & 0xffffffff, (long) low & 0xffffffff);
16374 fprintf (file, "0x%lx,0x%lx\n",
16375 (long) high & 0xffffffff, (long) low & 0xffffffff);
16377 else
16379 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16380 fputs ("\t.long ", file);
16381 else
16382 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16383 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16385 return;
16389 if (GET_CODE (x) == CONST)
16391 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16392 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16394 base = XEXP (XEXP (x, 0), 0);
16395 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16398 switch (GET_CODE (base))
16400 case SYMBOL_REF:
16401 name = XSTR (base, 0);
16402 break;
16404 case LABEL_REF:
16405 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16406 CODE_LABEL_NUMBER (XEXP (base, 0)));
16407 break;
16409 case CODE_LABEL:
16410 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16411 break;
16413 default:
16414 gcc_unreachable ();
16417 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16418 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16419 else
16421 fputs ("\t.tc ", file);
16422 RS6000_OUTPUT_BASENAME (file, name);
16424 if (offset < 0)
16425 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16426 else if (offset)
16427 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16429 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16430 after other TOC symbols, reducing overflow of small TOC access
16431 to [TC] symbols. */
16432 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16433 ? "[TE]," : "[TC],", file);
16436 /* Currently C++ toc references to vtables can be emitted before it
16437 is decided whether the vtable is public or private. If this is
16438 the case, then the linker will eventually complain that there is
16439 a TOC reference to an unknown section. Thus, for vtables only,
16440 we emit the TOC reference to reference the symbol and not the
16441 section. */
16442 if (VTABLE_NAME_P (name))
16444 RS6000_OUTPUT_BASENAME (file, name);
16445 if (offset < 0)
16446 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16447 else if (offset > 0)
16448 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16450 else
16451 output_addr_const (file, x);
16453 #if HAVE_AS_TLS
16454 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16456 switch (SYMBOL_REF_TLS_MODEL (base))
16458 case 0:
16459 break;
16460 case TLS_MODEL_LOCAL_EXEC:
16461 fputs ("@le", file);
16462 break;
16463 case TLS_MODEL_INITIAL_EXEC:
16464 fputs ("@ie", file);
16465 break;
16466 /* Use global-dynamic for local-dynamic. */
16467 case TLS_MODEL_GLOBAL_DYNAMIC:
16468 case TLS_MODEL_LOCAL_DYNAMIC:
16469 putc ('\n', file);
16470 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16471 fputs ("\t.tc .", file);
16472 RS6000_OUTPUT_BASENAME (file, name);
16473 fputs ("[TC],", file);
16474 output_addr_const (file, x);
16475 fputs ("@m", file);
16476 break;
16477 default:
16478 gcc_unreachable ();
16481 #endif
16483 putc ('\n', file);
16486 /* Output an assembler pseudo-op to write an ASCII string of N characters
16487 starting at P to FILE.
16489 On the RS/6000, we have to do this using the .byte operation and
16490 write out special characters outside the quoted string.
16491 Also, the assembler is broken; very long strings are truncated,
16492 so we must artificially break them up early. */
16494 void
16495 output_ascii (FILE *file, const char *p, int n)
16497 char c;
16498 int i, count_string;
16499 const char *for_string = "\t.byte \"";
16500 const char *for_decimal = "\t.byte ";
16501 const char *to_close = NULL;
16503 count_string = 0;
16504 for (i = 0; i < n; i++)
16506 c = *p++;
16507 if (c >= ' ' && c < 0177)
16509 if (for_string)
16510 fputs (for_string, file);
16511 putc (c, file);
16513 /* Write two quotes to get one. */
16514 if (c == '"')
16516 putc (c, file);
16517 ++count_string;
16520 for_string = NULL;
16521 for_decimal = "\"\n\t.byte ";
16522 to_close = "\"\n";
16523 ++count_string;
16525 if (count_string >= 512)
16527 fputs (to_close, file);
16529 for_string = "\t.byte \"";
16530 for_decimal = "\t.byte ";
16531 to_close = NULL;
16532 count_string = 0;
16535 else
16537 if (for_decimal)
16538 fputs (for_decimal, file);
16539 fprintf (file, "%d", c);
16541 for_string = "\n\t.byte \"";
16542 for_decimal = ", ";
16543 to_close = "\n";
16544 count_string = 0;
16548 /* Now close the string if we have written one. Then end the line. */
16549 if (to_close)
16550 fputs (to_close, file);
16553 /* Generate a unique section name for FILENAME for a section type
16554 represented by SECTION_DESC. Output goes into BUF.
16556 SECTION_DESC can be any string, as long as it is different for each
16557 possible section type.
16559 We name the section in the same manner as xlc. The name begins with an
16560 underscore followed by the filename (after stripping any leading directory
16561 names) with the last period replaced by the string SECTION_DESC. If
16562 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16563 the name. */
16565 void
16566 rs6000_gen_section_name (char **buf, const char *filename,
16567 const char *section_desc)
16569 const char *q, *after_last_slash, *last_period = 0;
16570 char *p;
16571 int len;
16573 after_last_slash = filename;
16574 for (q = filename; *q; q++)
16576 if (*q == '/')
16577 after_last_slash = q + 1;
16578 else if (*q == '.')
16579 last_period = q;
16582 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16583 *buf = (char *) xmalloc (len);
16585 p = *buf;
16586 *p++ = '_';
16588 for (q = after_last_slash; *q; q++)
16590 if (q == last_period)
16592 strcpy (p, section_desc);
16593 p += strlen (section_desc);
16594 break;
16597 else if (ISALNUM (*q))
16598 *p++ = *q;
16601 if (last_period == 0)
16602 strcpy (p, section_desc);
16603 else
16604 *p = '\0';
16607 /* Emit profile function. */
16609 void
16610 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16612 /* Non-standard profiling for kernels, which just saves LR then calls
16613 _mcount without worrying about arg saves. The idea is to change
16614 the function prologue as little as possible as it isn't easy to
16615 account for arg save/restore code added just for _mcount. */
16616 if (TARGET_PROFILE_KERNEL)
16617 return;
16619 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16621 #ifndef NO_PROFILE_COUNTERS
16622 # define NO_PROFILE_COUNTERS 0
16623 #endif
16624 if (NO_PROFILE_COUNTERS)
16625 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16626 LCT_NORMAL, VOIDmode);
16627 else
16629 char buf[30];
16630 const char *label_name;
16631 rtx fun;
16633 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16634 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16635 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16637 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16638 LCT_NORMAL, VOIDmode, fun, Pmode);
16641 else if (DEFAULT_ABI == ABI_DARWIN)
16643 const char *mcount_name = RS6000_MCOUNT;
16644 int caller_addr_regno = LR_REGNO;
16646 /* Be conservative and always set this, at least for now. */
16647 crtl->uses_pic_offset_table = 1;
16649 #if TARGET_MACHO
16650 /* For PIC code, set up a stub and collect the caller's address
16651 from r0, which is where the prologue puts it. */
16652 if (MACHOPIC_INDIRECT
16653 && crtl->uses_pic_offset_table)
16654 caller_addr_regno = 0;
16655 #endif
16656 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16657 LCT_NORMAL, VOIDmode,
16658 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16662 /* Write function profiler code. */
16664 void
16665 output_function_profiler (FILE *file, int labelno)
16667 char buf[100];
16669 switch (DEFAULT_ABI)
16671 default:
16672 gcc_unreachable ();
16674 case ABI_V4:
16675 if (!TARGET_32BIT)
16677 warning (0, "no profiling of 64-bit code for this ABI");
16678 return;
16680 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16681 fprintf (file, "\tmflr %s\n", reg_names[0]);
16682 if (NO_PROFILE_COUNTERS)
16684 asm_fprintf (file, "\tstw %s,4(%s)\n",
16685 reg_names[0], reg_names[1]);
16687 else if (TARGET_SECURE_PLT && flag_pic)
16689 if (TARGET_LINK_STACK)
16691 char name[32];
16692 get_ppc476_thunk_name (name);
16693 asm_fprintf (file, "\tbl %s\n", name);
16695 else
16696 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16697 asm_fprintf (file, "\tstw %s,4(%s)\n",
16698 reg_names[0], reg_names[1]);
16699 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16700 asm_fprintf (file, "\taddis %s,%s,",
16701 reg_names[12], reg_names[12]);
16702 assemble_name (file, buf);
16703 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16704 assemble_name (file, buf);
16705 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16707 else if (flag_pic == 1)
16709 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16710 asm_fprintf (file, "\tstw %s,4(%s)\n",
16711 reg_names[0], reg_names[1]);
16712 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16713 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16714 assemble_name (file, buf);
16715 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16717 else if (flag_pic > 1)
16719 asm_fprintf (file, "\tstw %s,4(%s)\n",
16720 reg_names[0], reg_names[1]);
16721 /* Now, we need to get the address of the label. */
16722 if (TARGET_LINK_STACK)
16724 char name[32];
16725 get_ppc476_thunk_name (name);
16726 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16727 assemble_name (file, buf);
16728 fputs ("-.\n1:", file);
16729 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16730 asm_fprintf (file, "\taddi %s,%s,4\n",
16731 reg_names[11], reg_names[11]);
16733 else
16735 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16736 assemble_name (file, buf);
16737 fputs ("-.\n1:", file);
16738 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16740 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16741 reg_names[0], reg_names[11]);
16742 asm_fprintf (file, "\tadd %s,%s,%s\n",
16743 reg_names[0], reg_names[0], reg_names[11]);
16745 else
16747 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16748 assemble_name (file, buf);
16749 fputs ("@ha\n", file);
16750 asm_fprintf (file, "\tstw %s,4(%s)\n",
16751 reg_names[0], reg_names[1]);
16752 asm_fprintf (file, "\tla %s,", reg_names[0]);
16753 assemble_name (file, buf);
16754 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16757 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16758 fprintf (file, "\tbl %s%s\n",
16759 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16760 break;
16762 case ABI_AIX:
16763 case ABI_ELFv2:
16764 case ABI_DARWIN:
16765 /* Don't do anything, done in output_profile_hook (). */
16766 break;
16772 /* The following variable value is the last issued insn. */
16774 static rtx_insn *last_scheduled_insn;
16776 /* The following variable helps to balance issuing of load and
16777 store instructions */
16779 static int load_store_pendulum;
16781 /* The following variable helps pair divide insns during scheduling. */
16782 static int divide_cnt;
16783 /* The following variable helps pair and alternate vector and vector load
16784 insns during scheduling. */
16785 static int vec_pairing;
16788 /* Power4 load update and store update instructions are cracked into a
16789 load or store and an integer insn which are executed in the same cycle.
16790 Branches have their own dispatch slot which does not count against the
16791 GCC issue rate, but it changes the program flow so there are no other
16792 instructions to issue in this cycle. */
16794 static int
16795 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16797 last_scheduled_insn = insn;
16798 if (GET_CODE (PATTERN (insn)) == USE
16799 || GET_CODE (PATTERN (insn)) == CLOBBER)
16801 cached_can_issue_more = more;
16802 return cached_can_issue_more;
16805 if (insn_terminates_group_p (insn, current_group))
16807 cached_can_issue_more = 0;
16808 return cached_can_issue_more;
16811 /* If no reservation, but reach here */
16812 if (recog_memoized (insn) < 0)
16813 return more;
16815 if (rs6000_sched_groups)
16817 if (is_microcoded_insn (insn))
16818 cached_can_issue_more = 0;
16819 else if (is_cracked_insn (insn))
16820 cached_can_issue_more = more > 2 ? more - 2 : 0;
16821 else
16822 cached_can_issue_more = more - 1;
16824 return cached_can_issue_more;
16827 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16828 return 0;
16830 cached_can_issue_more = more - 1;
16831 return cached_can_issue_more;
16834 static int
16835 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16837 int r = rs6000_variable_issue_1 (insn, more);
16838 if (verbose)
16839 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16840 return r;
16843 /* Adjust the cost of a scheduling dependency. Return the new cost of
16844 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16846 static int
16847 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16848 unsigned int)
16850 enum attr_type attr_type;
16852 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16853 return cost;
16855 switch (dep_type)
16857 case REG_DEP_TRUE:
16859 /* Data dependency; DEP_INSN writes a register that INSN reads
16860 some cycles later. */
16862 /* Separate a load from a narrower, dependent store. */
16863 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16864 || rs6000_tune == PROCESSOR_FUTURE)
16865 && GET_CODE (PATTERN (insn)) == SET
16866 && GET_CODE (PATTERN (dep_insn)) == SET
16867 && MEM_P (XEXP (PATTERN (insn), 1))
16868 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16869 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16870 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16871 return cost + 14;
16873 attr_type = get_attr_type (insn);
16875 switch (attr_type)
16877 case TYPE_JMPREG:
16878 /* Tell the first scheduling pass about the latency between
16879 a mtctr and bctr (and mtlr and br/blr). The first
16880 scheduling pass will not know about this latency since
16881 the mtctr instruction, which has the latency associated
16882 to it, will be generated by reload. */
16883 return 4;
16884 case TYPE_BRANCH:
16885 /* Leave some extra cycles between a compare and its
16886 dependent branch, to inhibit expensive mispredicts. */
16887 if ((rs6000_tune == PROCESSOR_PPC603
16888 || rs6000_tune == PROCESSOR_PPC604
16889 || rs6000_tune == PROCESSOR_PPC604e
16890 || rs6000_tune == PROCESSOR_PPC620
16891 || rs6000_tune == PROCESSOR_PPC630
16892 || rs6000_tune == PROCESSOR_PPC750
16893 || rs6000_tune == PROCESSOR_PPC7400
16894 || rs6000_tune == PROCESSOR_PPC7450
16895 || rs6000_tune == PROCESSOR_PPCE5500
16896 || rs6000_tune == PROCESSOR_PPCE6500
16897 || rs6000_tune == PROCESSOR_POWER4
16898 || rs6000_tune == PROCESSOR_POWER5
16899 || rs6000_tune == PROCESSOR_POWER7
16900 || rs6000_tune == PROCESSOR_POWER8
16901 || rs6000_tune == PROCESSOR_POWER9
16902 || rs6000_tune == PROCESSOR_FUTURE
16903 || rs6000_tune == PROCESSOR_CELL)
16904 && recog_memoized (dep_insn)
16905 && (INSN_CODE (dep_insn) >= 0))
16907 switch (get_attr_type (dep_insn))
16909 case TYPE_CMP:
16910 case TYPE_FPCOMPARE:
16911 case TYPE_CR_LOGICAL:
16912 return cost + 2;
16913 case TYPE_EXTS:
16914 case TYPE_MUL:
16915 if (get_attr_dot (dep_insn) == DOT_YES)
16916 return cost + 2;
16917 else
16918 break;
16919 case TYPE_SHIFT:
16920 if (get_attr_dot (dep_insn) == DOT_YES
16921 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16922 return cost + 2;
16923 else
16924 break;
16925 default:
16926 break;
16928 break;
16930 case TYPE_STORE:
16931 case TYPE_FPSTORE:
16932 if ((rs6000_tune == PROCESSOR_POWER6)
16933 && recog_memoized (dep_insn)
16934 && (INSN_CODE (dep_insn) >= 0))
16937 if (GET_CODE (PATTERN (insn)) != SET)
16938 /* If this happens, we have to extend this to schedule
16939 optimally. Return default for now. */
16940 return cost;
16942 /* Adjust the cost for the case where the value written
16943 by a fixed point operation is used as the address
16944 gen value on a store. */
16945 switch (get_attr_type (dep_insn))
16947 case TYPE_LOAD:
16948 case TYPE_CNTLZ:
16950 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16951 return get_attr_sign_extend (dep_insn)
16952 == SIGN_EXTEND_YES ? 6 : 4;
16953 break;
16955 case TYPE_SHIFT:
16957 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16958 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16959 6 : 3;
16960 break;
16962 case TYPE_INTEGER:
16963 case TYPE_ADD:
16964 case TYPE_LOGICAL:
16965 case TYPE_EXTS:
16966 case TYPE_INSERT:
16968 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16969 return 3;
16970 break;
16972 case TYPE_STORE:
16973 case TYPE_FPLOAD:
16974 case TYPE_FPSTORE:
16976 if (get_attr_update (dep_insn) == UPDATE_YES
16977 && ! rs6000_store_data_bypass_p (dep_insn, insn))
16978 return 3;
16979 break;
16981 case TYPE_MUL:
16983 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16984 return 17;
16985 break;
16987 case TYPE_DIV:
16989 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16990 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16991 break;
16993 default:
16994 break;
16997 break;
16999 case TYPE_LOAD:
17000 if ((rs6000_tune == PROCESSOR_POWER6)
17001 && recog_memoized (dep_insn)
17002 && (INSN_CODE (dep_insn) >= 0))
17005 /* Adjust the cost for the case where the value written
17006 by a fixed point instruction is used within the address
17007 gen portion of a subsequent load(u)(x) */
17008 switch (get_attr_type (dep_insn))
17010 case TYPE_LOAD:
17011 case TYPE_CNTLZ:
17013 if (set_to_load_agen (dep_insn, insn))
17014 return get_attr_sign_extend (dep_insn)
17015 == SIGN_EXTEND_YES ? 6 : 4;
17016 break;
17018 case TYPE_SHIFT:
17020 if (set_to_load_agen (dep_insn, insn))
17021 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17022 6 : 3;
17023 break;
17025 case TYPE_INTEGER:
17026 case TYPE_ADD:
17027 case TYPE_LOGICAL:
17028 case TYPE_EXTS:
17029 case TYPE_INSERT:
17031 if (set_to_load_agen (dep_insn, insn))
17032 return 3;
17033 break;
17035 case TYPE_STORE:
17036 case TYPE_FPLOAD:
17037 case TYPE_FPSTORE:
17039 if (get_attr_update (dep_insn) == UPDATE_YES
17040 && set_to_load_agen (dep_insn, insn))
17041 return 3;
17042 break;
17044 case TYPE_MUL:
17046 if (set_to_load_agen (dep_insn, insn))
17047 return 17;
17048 break;
17050 case TYPE_DIV:
17052 if (set_to_load_agen (dep_insn, insn))
17053 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17054 break;
17056 default:
17057 break;
17060 break;
17062 case TYPE_FPLOAD:
17063 if ((rs6000_tune == PROCESSOR_POWER6)
17064 && get_attr_update (insn) == UPDATE_NO
17065 && recog_memoized (dep_insn)
17066 && (INSN_CODE (dep_insn) >= 0)
17067 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17068 return 2;
17070 default:
17071 break;
17074 /* Fall out to return default cost. */
17076 break;
17078 case REG_DEP_OUTPUT:
17079 /* Output dependency; DEP_INSN writes a register that INSN writes some
17080 cycles later. */
17081 if ((rs6000_tune == PROCESSOR_POWER6)
17082 && recog_memoized (dep_insn)
17083 && (INSN_CODE (dep_insn) >= 0))
17085 attr_type = get_attr_type (insn);
17087 switch (attr_type)
17089 case TYPE_FP:
17090 case TYPE_FPSIMPLE:
17091 if (get_attr_type (dep_insn) == TYPE_FP
17092 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17093 return 1;
17094 break;
17095 case TYPE_FPLOAD:
17096 if (get_attr_update (insn) == UPDATE_NO
17097 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17098 return 2;
17099 break;
17100 default:
17101 break;
17104 /* Fall through, no cost for output dependency. */
17105 /* FALLTHRU */
17107 case REG_DEP_ANTI:
17108 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17109 cycles later. */
17110 return 0;
17112 default:
17113 gcc_unreachable ();
17116 return cost;
17119 /* Debug version of rs6000_adjust_cost. */
17121 static int
17122 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17123 int cost, unsigned int dw)
17125 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17127 if (ret != cost)
17129 const char *dep;
17131 switch (dep_type)
17133 default: dep = "unknown depencency"; break;
17134 case REG_DEP_TRUE: dep = "data dependency"; break;
17135 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17136 case REG_DEP_ANTI: dep = "anti depencency"; break;
17139 fprintf (stderr,
17140 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17141 "%s, insn:\n", ret, cost, dep);
17143 debug_rtx (insn);
17146 return ret;
17149 /* The function returns a true if INSN is microcoded.
17150 Return false otherwise. */
17152 static bool
17153 is_microcoded_insn (rtx_insn *insn)
17155 if (!insn || !NONDEBUG_INSN_P (insn)
17156 || GET_CODE (PATTERN (insn)) == USE
17157 || GET_CODE (PATTERN (insn)) == CLOBBER)
17158 return false;
17160 if (rs6000_tune == PROCESSOR_CELL)
17161 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17163 if (rs6000_sched_groups
17164 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17166 enum attr_type type = get_attr_type (insn);
17167 if ((type == TYPE_LOAD
17168 && get_attr_update (insn) == UPDATE_YES
17169 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17170 || ((type == TYPE_LOAD || type == TYPE_STORE)
17171 && get_attr_update (insn) == UPDATE_YES
17172 && get_attr_indexed (insn) == INDEXED_YES)
17173 || type == TYPE_MFCR)
17174 return true;
17177 return false;
17180 /* The function returns true if INSN is cracked into 2 instructions
17181 by the processor (and therefore occupies 2 issue slots). */
17183 static bool
17184 is_cracked_insn (rtx_insn *insn)
17186 if (!insn || !NONDEBUG_INSN_P (insn)
17187 || GET_CODE (PATTERN (insn)) == USE
17188 || GET_CODE (PATTERN (insn)) == CLOBBER)
17189 return false;
17191 if (rs6000_sched_groups
17192 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17194 enum attr_type type = get_attr_type (insn);
17195 if ((type == TYPE_LOAD
17196 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17197 && get_attr_update (insn) == UPDATE_NO)
17198 || (type == TYPE_LOAD
17199 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17200 && get_attr_update (insn) == UPDATE_YES
17201 && get_attr_indexed (insn) == INDEXED_NO)
17202 || (type == TYPE_STORE
17203 && get_attr_update (insn) == UPDATE_YES
17204 && get_attr_indexed (insn) == INDEXED_NO)
17205 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17206 && get_attr_update (insn) == UPDATE_YES)
17207 || (type == TYPE_CR_LOGICAL
17208 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17209 || (type == TYPE_EXTS
17210 && get_attr_dot (insn) == DOT_YES)
17211 || (type == TYPE_SHIFT
17212 && get_attr_dot (insn) == DOT_YES
17213 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17214 || (type == TYPE_MUL
17215 && get_attr_dot (insn) == DOT_YES)
17216 || type == TYPE_DIV
17217 || (type == TYPE_INSERT
17218 && get_attr_size (insn) == SIZE_32))
17219 return true;
17222 return false;
17225 /* The function returns true if INSN can be issued only from
17226 the branch slot. */
17228 static bool
17229 is_branch_slot_insn (rtx_insn *insn)
17231 if (!insn || !NONDEBUG_INSN_P (insn)
17232 || GET_CODE (PATTERN (insn)) == USE
17233 || GET_CODE (PATTERN (insn)) == CLOBBER)
17234 return false;
17236 if (rs6000_sched_groups)
17238 enum attr_type type = get_attr_type (insn);
17239 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17240 return true;
17241 return false;
17244 return false;
17247 /* The function returns true if out_inst sets a value that is
17248 used in the address generation computation of in_insn */
17249 static bool
17250 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17252 rtx out_set, in_set;
17254 /* For performance reasons, only handle the simple case where
17255 both loads are a single_set. */
17256 out_set = single_set (out_insn);
17257 if (out_set)
17259 in_set = single_set (in_insn);
17260 if (in_set)
17261 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17264 return false;
17267 /* Try to determine base/offset/size parts of the given MEM.
17268 Return true if successful, false if all the values couldn't
17269 be determined.
17271 This function only looks for REG or REG+CONST address forms.
17272 REG+REG address form will return false. */
17274 static bool
17275 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17276 HOST_WIDE_INT *size)
17278 rtx addr_rtx;
17279 if MEM_SIZE_KNOWN_P (mem)
17280 *size = MEM_SIZE (mem);
17281 else
17282 return false;
17284 addr_rtx = (XEXP (mem, 0));
17285 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17286 addr_rtx = XEXP (addr_rtx, 1);
17288 *offset = 0;
17289 while (GET_CODE (addr_rtx) == PLUS
17290 && CONST_INT_P (XEXP (addr_rtx, 1)))
17292 *offset += INTVAL (XEXP (addr_rtx, 1));
17293 addr_rtx = XEXP (addr_rtx, 0);
17295 if (!REG_P (addr_rtx))
17296 return false;
17298 *base = addr_rtx;
17299 return true;
17302 /* The function returns true if the target storage location of
17303 mem1 is adjacent to the target storage location of mem2 */
17304 /* Return 1 if memory locations are adjacent. */
17306 static bool
17307 adjacent_mem_locations (rtx mem1, rtx mem2)
17309 rtx reg1, reg2;
17310 HOST_WIDE_INT off1, size1, off2, size2;
17312 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17313 && get_memref_parts (mem2, &reg2, &off2, &size2))
17314 return ((REGNO (reg1) == REGNO (reg2))
17315 && ((off1 + size1 == off2)
17316 || (off2 + size2 == off1)));
17318 return false;
17321 /* This function returns true if it can be determined that the two MEM
17322 locations overlap by at least 1 byte based on base reg/offset/size. */
17324 static bool
17325 mem_locations_overlap (rtx mem1, rtx mem2)
17327 rtx reg1, reg2;
17328 HOST_WIDE_INT off1, size1, off2, size2;
17330 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17331 && get_memref_parts (mem2, &reg2, &off2, &size2))
17332 return ((REGNO (reg1) == REGNO (reg2))
17333 && (((off1 <= off2) && (off1 + size1 > off2))
17334 || ((off2 <= off1) && (off2 + size2 > off1))));
17336 return false;
17339 /* A C statement (sans semicolon) to update the integer scheduling
17340 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17341 INSN earlier, reduce the priority to execute INSN later. Do not
17342 define this macro if you do not need to adjust the scheduling
17343 priorities of insns. */
17345 static int
17346 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17348 rtx load_mem, str_mem;
17349 /* On machines (like the 750) which have asymmetric integer units,
17350 where one integer unit can do multiply and divides and the other
17351 can't, reduce the priority of multiply/divide so it is scheduled
17352 before other integer operations. */
17354 #if 0
17355 if (! INSN_P (insn))
17356 return priority;
17358 if (GET_CODE (PATTERN (insn)) == USE)
17359 return priority;
17361 switch (rs6000_tune) {
17362 case PROCESSOR_PPC750:
17363 switch (get_attr_type (insn))
17365 default:
17366 break;
17368 case TYPE_MUL:
17369 case TYPE_DIV:
17370 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17371 priority, priority);
17372 if (priority >= 0 && priority < 0x01000000)
17373 priority >>= 3;
17374 break;
17377 #endif
17379 if (insn_must_be_first_in_group (insn)
17380 && reload_completed
17381 && current_sched_info->sched_max_insns_priority
17382 && rs6000_sched_restricted_insns_priority)
17385 /* Prioritize insns that can be dispatched only in the first
17386 dispatch slot. */
17387 if (rs6000_sched_restricted_insns_priority == 1)
17388 /* Attach highest priority to insn. This means that in
17389 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17390 precede 'priority' (critical path) considerations. */
17391 return current_sched_info->sched_max_insns_priority;
17392 else if (rs6000_sched_restricted_insns_priority == 2)
17393 /* Increase priority of insn by a minimal amount. This means that in
17394 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17395 considerations precede dispatch-slot restriction considerations. */
17396 return (priority + 1);
17399 if (rs6000_tune == PROCESSOR_POWER6
17400 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17401 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17402 /* Attach highest priority to insn if the scheduler has just issued two
17403 stores and this instruction is a load, or two loads and this instruction
17404 is a store. Power6 wants loads and stores scheduled alternately
17405 when possible */
17406 return current_sched_info->sched_max_insns_priority;
17408 return priority;
17411 /* Return true if the instruction is nonpipelined on the Cell. */
17412 static bool
17413 is_nonpipeline_insn (rtx_insn *insn)
17415 enum attr_type type;
17416 if (!insn || !NONDEBUG_INSN_P (insn)
17417 || GET_CODE (PATTERN (insn)) == USE
17418 || GET_CODE (PATTERN (insn)) == CLOBBER)
17419 return false;
17421 type = get_attr_type (insn);
17422 if (type == TYPE_MUL
17423 || type == TYPE_DIV
17424 || type == TYPE_SDIV
17425 || type == TYPE_DDIV
17426 || type == TYPE_SSQRT
17427 || type == TYPE_DSQRT
17428 || type == TYPE_MFCR
17429 || type == TYPE_MFCRF
17430 || type == TYPE_MFJMPR)
17432 return true;
17434 return false;
17438 /* Return how many instructions the machine can issue per cycle. */
17440 static int
17441 rs6000_issue_rate (void)
17443 /* Unless scheduling for register pressure, use issue rate of 1 for
17444 first scheduling pass to decrease degradation. */
17445 if (!reload_completed && !flag_sched_pressure)
17446 return 1;
17448 switch (rs6000_tune) {
17449 case PROCESSOR_RS64A:
17450 case PROCESSOR_PPC601: /* ? */
17451 case PROCESSOR_PPC7450:
17452 return 3;
17453 case PROCESSOR_PPC440:
17454 case PROCESSOR_PPC603:
17455 case PROCESSOR_PPC750:
17456 case PROCESSOR_PPC7400:
17457 case PROCESSOR_PPC8540:
17458 case PROCESSOR_PPC8548:
17459 case PROCESSOR_CELL:
17460 case PROCESSOR_PPCE300C2:
17461 case PROCESSOR_PPCE300C3:
17462 case PROCESSOR_PPCE500MC:
17463 case PROCESSOR_PPCE500MC64:
17464 case PROCESSOR_PPCE5500:
17465 case PROCESSOR_PPCE6500:
17466 case PROCESSOR_TITAN:
17467 return 2;
17468 case PROCESSOR_PPC476:
17469 case PROCESSOR_PPC604:
17470 case PROCESSOR_PPC604e:
17471 case PROCESSOR_PPC620:
17472 case PROCESSOR_PPC630:
17473 return 4;
17474 case PROCESSOR_POWER4:
17475 case PROCESSOR_POWER5:
17476 case PROCESSOR_POWER6:
17477 case PROCESSOR_POWER7:
17478 return 5;
17479 case PROCESSOR_POWER8:
17480 return 7;
17481 case PROCESSOR_POWER9:
17482 case PROCESSOR_FUTURE:
17483 return 6;
17484 default:
17485 return 1;
17489 /* Return how many instructions to look ahead for better insn
17490 scheduling. */
17492 static int
17493 rs6000_use_sched_lookahead (void)
17495 switch (rs6000_tune)
17497 case PROCESSOR_PPC8540:
17498 case PROCESSOR_PPC8548:
17499 return 4;
17501 case PROCESSOR_CELL:
17502 return (reload_completed ? 8 : 0);
17504 default:
17505 return 0;
17509 /* We are choosing insn from the ready queue. Return zero if INSN can be
17510 chosen. */
17511 static int
17512 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17514 if (ready_index == 0)
17515 return 0;
17517 if (rs6000_tune != PROCESSOR_CELL)
17518 return 0;
17520 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17522 if (!reload_completed
17523 || is_nonpipeline_insn (insn)
17524 || is_microcoded_insn (insn))
17525 return 1;
17527 return 0;
17530 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17531 and return true. */
17533 static bool
17534 find_mem_ref (rtx pat, rtx *mem_ref)
17536 const char * fmt;
17537 int i, j;
17539 /* stack_tie does not produce any real memory traffic. */
17540 if (tie_operand (pat, VOIDmode))
17541 return false;
17543 if (MEM_P (pat))
17545 *mem_ref = pat;
17546 return true;
17549 /* Recursively process the pattern. */
17550 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17552 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17554 if (fmt[i] == 'e')
17556 if (find_mem_ref (XEXP (pat, i), mem_ref))
17557 return true;
17559 else if (fmt[i] == 'E')
17560 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17562 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17563 return true;
17567 return false;
17570 /* Determine if PAT is a PATTERN of a load insn. */
17572 static bool
17573 is_load_insn1 (rtx pat, rtx *load_mem)
17575 if (!pat || pat == NULL_RTX)
17576 return false;
17578 if (GET_CODE (pat) == SET)
17579 return find_mem_ref (SET_SRC (pat), load_mem);
17581 if (GET_CODE (pat) == PARALLEL)
17583 int i;
17585 for (i = 0; i < XVECLEN (pat, 0); i++)
17586 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17587 return true;
17590 return false;
17593 /* Determine if INSN loads from memory. */
17595 static bool
17596 is_load_insn (rtx insn, rtx *load_mem)
17598 if (!insn || !INSN_P (insn))
17599 return false;
17601 if (CALL_P (insn))
17602 return false;
17604 return is_load_insn1 (PATTERN (insn), load_mem);
17607 /* Determine if PAT is a PATTERN of a store insn. */
17609 static bool
17610 is_store_insn1 (rtx pat, rtx *str_mem)
17612 if (!pat || pat == NULL_RTX)
17613 return false;
17615 if (GET_CODE (pat) == SET)
17616 return find_mem_ref (SET_DEST (pat), str_mem);
17618 if (GET_CODE (pat) == PARALLEL)
17620 int i;
17622 for (i = 0; i < XVECLEN (pat, 0); i++)
17623 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17624 return true;
17627 return false;
17630 /* Determine if INSN stores to memory. */
17632 static bool
17633 is_store_insn (rtx insn, rtx *str_mem)
17635 if (!insn || !INSN_P (insn))
17636 return false;
17638 return is_store_insn1 (PATTERN (insn), str_mem);
17641 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17643 static bool
17644 is_power9_pairable_vec_type (enum attr_type type)
17646 switch (type)
17648 case TYPE_VECSIMPLE:
17649 case TYPE_VECCOMPLEX:
17650 case TYPE_VECDIV:
17651 case TYPE_VECCMP:
17652 case TYPE_VECPERM:
17653 case TYPE_VECFLOAT:
17654 case TYPE_VECFDIV:
17655 case TYPE_VECDOUBLE:
17656 return true;
17657 default:
17658 break;
17660 return false;
17663 /* Returns whether the dependence between INSN and NEXT is considered
17664 costly by the given target. */
17666 static bool
17667 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17669 rtx insn;
17670 rtx next;
17671 rtx load_mem, str_mem;
17673 /* If the flag is not enabled - no dependence is considered costly;
17674 allow all dependent insns in the same group.
17675 This is the most aggressive option. */
17676 if (rs6000_sched_costly_dep == no_dep_costly)
17677 return false;
17679 /* If the flag is set to 1 - a dependence is always considered costly;
17680 do not allow dependent instructions in the same group.
17681 This is the most conservative option. */
17682 if (rs6000_sched_costly_dep == all_deps_costly)
17683 return true;
17685 insn = DEP_PRO (dep);
17686 next = DEP_CON (dep);
17688 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17689 && is_load_insn (next, &load_mem)
17690 && is_store_insn (insn, &str_mem))
17691 /* Prevent load after store in the same group. */
17692 return true;
17694 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17695 && is_load_insn (next, &load_mem)
17696 && is_store_insn (insn, &str_mem)
17697 && DEP_TYPE (dep) == REG_DEP_TRUE
17698 && mem_locations_overlap(str_mem, load_mem))
17699 /* Prevent load after store in the same group if it is a true
17700 dependence. */
17701 return true;
17703 /* The flag is set to X; dependences with latency >= X are considered costly,
17704 and will not be scheduled in the same group. */
17705 if (rs6000_sched_costly_dep <= max_dep_latency
17706 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17707 return true;
17709 return false;
17712 /* Return the next insn after INSN that is found before TAIL is reached,
17713 skipping any "non-active" insns - insns that will not actually occupy
17714 an issue slot. Return NULL_RTX if such an insn is not found. */
17716 static rtx_insn *
17717 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17719 if (insn == NULL_RTX || insn == tail)
17720 return NULL;
17722 while (1)
17724 insn = NEXT_INSN (insn);
17725 if (insn == NULL_RTX || insn == tail)
17726 return NULL;
17728 if (CALL_P (insn)
17729 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17730 || (NONJUMP_INSN_P (insn)
17731 && GET_CODE (PATTERN (insn)) != USE
17732 && GET_CODE (PATTERN (insn)) != CLOBBER
17733 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17734 break;
17736 return insn;
17739 /* Move instruction at POS to the end of the READY list. */
17741 static void
17742 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17744 rtx_insn *tmp;
17745 int i;
17747 tmp = ready[pos];
17748 for (i = pos; i < lastpos; i++)
17749 ready[i] = ready[i + 1];
17750 ready[lastpos] = tmp;
17753 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17755 static int
17756 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17758 /* For Power6, we need to handle some special cases to try and keep the
17759 store queue from overflowing and triggering expensive flushes.
17761 This code monitors how load and store instructions are being issued
17762 and skews the ready list one way or the other to increase the likelihood
17763 that a desired instruction is issued at the proper time.
17765 A couple of things are done. First, we maintain a "load_store_pendulum"
17766 to track the current state of load/store issue.
17768 - If the pendulum is at zero, then no loads or stores have been
17769 issued in the current cycle so we do nothing.
17771 - If the pendulum is 1, then a single load has been issued in this
17772 cycle and we attempt to locate another load in the ready list to
17773 issue with it.
17775 - If the pendulum is -2, then two stores have already been
17776 issued in this cycle, so we increase the priority of the first load
17777 in the ready list to increase it's likelihood of being chosen first
17778 in the next cycle.
17780 - If the pendulum is -1, then a single store has been issued in this
17781 cycle and we attempt to locate another store in the ready list to
17782 issue with it, preferring a store to an adjacent memory location to
17783 facilitate store pairing in the store queue.
17785 - If the pendulum is 2, then two loads have already been
17786 issued in this cycle, so we increase the priority of the first store
17787 in the ready list to increase it's likelihood of being chosen first
17788 in the next cycle.
17790 - If the pendulum < -2 or > 2, then do nothing.
17792 Note: This code covers the most common scenarios. There exist non
17793 load/store instructions which make use of the LSU and which
17794 would need to be accounted for to strictly model the behavior
17795 of the machine. Those instructions are currently unaccounted
17796 for to help minimize compile time overhead of this code.
17798 int pos;
17799 rtx load_mem, str_mem;
17801 if (is_store_insn (last_scheduled_insn, &str_mem))
17802 /* Issuing a store, swing the load_store_pendulum to the left */
17803 load_store_pendulum--;
17804 else if (is_load_insn (last_scheduled_insn, &load_mem))
17805 /* Issuing a load, swing the load_store_pendulum to the right */
17806 load_store_pendulum++;
17807 else
17808 return cached_can_issue_more;
17810 /* If the pendulum is balanced, or there is only one instruction on
17811 the ready list, then all is well, so return. */
17812 if ((load_store_pendulum == 0) || (lastpos <= 0))
17813 return cached_can_issue_more;
17815 if (load_store_pendulum == 1)
17817 /* A load has been issued in this cycle. Scan the ready list
17818 for another load to issue with it */
17819 pos = lastpos;
17821 while (pos >= 0)
17823 if (is_load_insn (ready[pos], &load_mem))
17825 /* Found a load. Move it to the head of the ready list,
17826 and adjust it's priority so that it is more likely to
17827 stay there */
17828 move_to_end_of_ready (ready, pos, lastpos);
17830 if (!sel_sched_p ()
17831 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17832 INSN_PRIORITY (ready[lastpos])++;
17833 break;
17835 pos--;
17838 else if (load_store_pendulum == -2)
17840 /* Two stores have been issued in this cycle. Increase the
17841 priority of the first load in the ready list to favor it for
17842 issuing in the next cycle. */
17843 pos = lastpos;
17845 while (pos >= 0)
17847 if (is_load_insn (ready[pos], &load_mem)
17848 && !sel_sched_p ()
17849 && INSN_PRIORITY_KNOWN (ready[pos]))
17851 INSN_PRIORITY (ready[pos])++;
17853 /* Adjust the pendulum to account for the fact that a load
17854 was found and increased in priority. This is to prevent
17855 increasing the priority of multiple loads */
17856 load_store_pendulum--;
17858 break;
17860 pos--;
17863 else if (load_store_pendulum == -1)
17865 /* A store has been issued in this cycle. Scan the ready list for
17866 another store to issue with it, preferring a store to an adjacent
17867 memory location */
17868 int first_store_pos = -1;
17870 pos = lastpos;
17872 while (pos >= 0)
17874 if (is_store_insn (ready[pos], &str_mem))
17876 rtx str_mem2;
17877 /* Maintain the index of the first store found on the
17878 list */
17879 if (first_store_pos == -1)
17880 first_store_pos = pos;
17882 if (is_store_insn (last_scheduled_insn, &str_mem2)
17883 && adjacent_mem_locations (str_mem, str_mem2))
17885 /* Found an adjacent store. Move it to the head of the
17886 ready list, and adjust it's priority so that it is
17887 more likely to stay there */
17888 move_to_end_of_ready (ready, pos, lastpos);
17890 if (!sel_sched_p ()
17891 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17892 INSN_PRIORITY (ready[lastpos])++;
17894 first_store_pos = -1;
17896 break;
17899 pos--;
17902 if (first_store_pos >= 0)
17904 /* An adjacent store wasn't found, but a non-adjacent store was,
17905 so move the non-adjacent store to the front of the ready
17906 list, and adjust its priority so that it is more likely to
17907 stay there. */
17908 move_to_end_of_ready (ready, first_store_pos, lastpos);
17909 if (!sel_sched_p ()
17910 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17911 INSN_PRIORITY (ready[lastpos])++;
17914 else if (load_store_pendulum == 2)
17916 /* Two loads have been issued in this cycle. Increase the priority
17917 of the first store in the ready list to favor it for issuing in
17918 the next cycle. */
17919 pos = lastpos;
17921 while (pos >= 0)
17923 if (is_store_insn (ready[pos], &str_mem)
17924 && !sel_sched_p ()
17925 && INSN_PRIORITY_KNOWN (ready[pos]))
17927 INSN_PRIORITY (ready[pos])++;
17929 /* Adjust the pendulum to account for the fact that a store
17930 was found and increased in priority. This is to prevent
17931 increasing the priority of multiple stores */
17932 load_store_pendulum++;
17934 break;
17936 pos--;
17940 return cached_can_issue_more;
17943 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17945 static int
17946 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17948 int pos;
17949 enum attr_type type, type2;
17951 type = get_attr_type (last_scheduled_insn);
17953 /* Try to issue fixed point divides back-to-back in pairs so they will be
17954 routed to separate execution units and execute in parallel. */
17955 if (type == TYPE_DIV && divide_cnt == 0)
17957 /* First divide has been scheduled. */
17958 divide_cnt = 1;
17960 /* Scan the ready list looking for another divide, if found move it
17961 to the end of the list so it is chosen next. */
17962 pos = lastpos;
17963 while (pos >= 0)
17965 if (recog_memoized (ready[pos]) >= 0
17966 && get_attr_type (ready[pos]) == TYPE_DIV)
17968 move_to_end_of_ready (ready, pos, lastpos);
17969 break;
17971 pos--;
17974 else
17976 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17977 divide_cnt = 0;
17979 /* The best dispatch throughput for vector and vector load insns can be
17980 achieved by interleaving a vector and vector load such that they'll
17981 dispatch to the same superslice. If this pairing cannot be achieved
17982 then it is best to pair vector insns together and vector load insns
17983 together.
17985 To aid in this pairing, vec_pairing maintains the current state with
17986 the following values:
17988 0 : Initial state, no vecload/vector pairing has been started.
17990 1 : A vecload or vector insn has been issued and a candidate for
17991 pairing has been found and moved to the end of the ready
17992 list. */
17993 if (type == TYPE_VECLOAD)
17995 /* Issued a vecload. */
17996 if (vec_pairing == 0)
17998 int vecload_pos = -1;
17999 /* We issued a single vecload, look for a vector insn to pair it
18000 with. If one isn't found, try to pair another vecload. */
18001 pos = lastpos;
18002 while (pos >= 0)
18004 if (recog_memoized (ready[pos]) >= 0)
18006 type2 = get_attr_type (ready[pos]);
18007 if (is_power9_pairable_vec_type (type2))
18009 /* Found a vector insn to pair with, move it to the
18010 end of the ready list so it is scheduled next. */
18011 move_to_end_of_ready (ready, pos, lastpos);
18012 vec_pairing = 1;
18013 return cached_can_issue_more;
18015 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18016 /* Remember position of first vecload seen. */
18017 vecload_pos = pos;
18019 pos--;
18021 if (vecload_pos >= 0)
18023 /* Didn't find a vector to pair with but did find a vecload,
18024 move it to the end of the ready list. */
18025 move_to_end_of_ready (ready, vecload_pos, lastpos);
18026 vec_pairing = 1;
18027 return cached_can_issue_more;
18031 else if (is_power9_pairable_vec_type (type))
18033 /* Issued a vector operation. */
18034 if (vec_pairing == 0)
18036 int vec_pos = -1;
18037 /* We issued a single vector insn, look for a vecload to pair it
18038 with. If one isn't found, try to pair another vector. */
18039 pos = lastpos;
18040 while (pos >= 0)
18042 if (recog_memoized (ready[pos]) >= 0)
18044 type2 = get_attr_type (ready[pos]);
18045 if (type2 == TYPE_VECLOAD)
18047 /* Found a vecload insn to pair with, move it to the
18048 end of the ready list so it is scheduled next. */
18049 move_to_end_of_ready (ready, pos, lastpos);
18050 vec_pairing = 1;
18051 return cached_can_issue_more;
18053 else if (is_power9_pairable_vec_type (type2)
18054 && vec_pos == -1)
18055 /* Remember position of first vector insn seen. */
18056 vec_pos = pos;
18058 pos--;
18060 if (vec_pos >= 0)
18062 /* Didn't find a vecload to pair with but did find a vector
18063 insn, move it to the end of the ready list. */
18064 move_to_end_of_ready (ready, vec_pos, lastpos);
18065 vec_pairing = 1;
18066 return cached_can_issue_more;
18071 /* We've either finished a vec/vecload pair, couldn't find an insn to
18072 continue the current pair, or the last insn had nothing to do with
18073 with pairing. In any case, reset the state. */
18074 vec_pairing = 0;
18077 return cached_can_issue_more;
18080 /* We are about to begin issuing insns for this clock cycle. */
18082 static int
18083 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18084 rtx_insn **ready ATTRIBUTE_UNUSED,
18085 int *pn_ready ATTRIBUTE_UNUSED,
18086 int clock_var ATTRIBUTE_UNUSED)
18088 int n_ready = *pn_ready;
18090 if (sched_verbose)
18091 fprintf (dump, "// rs6000_sched_reorder :\n");
18093 /* Reorder the ready list, if the second to last ready insn
18094 is a nonepipeline insn. */
18095 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18097 if (is_nonpipeline_insn (ready[n_ready - 1])
18098 && (recog_memoized (ready[n_ready - 2]) > 0))
18099 /* Simply swap first two insns. */
18100 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18103 if (rs6000_tune == PROCESSOR_POWER6)
18104 load_store_pendulum = 0;
18106 return rs6000_issue_rate ();
18109 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18111 static int
18112 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18113 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18115 if (sched_verbose)
18116 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18118 /* Do Power6 dependent reordering if necessary. */
18119 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18120 return power6_sched_reorder2 (ready, *pn_ready - 1);
18122 /* Do Power9 dependent reordering if necessary. */
18123 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18124 && recog_memoized (last_scheduled_insn) >= 0)
18125 return power9_sched_reorder2 (ready, *pn_ready - 1);
18127 return cached_can_issue_more;
18130 /* Return whether the presence of INSN causes a dispatch group termination
18131 of group WHICH_GROUP.
18133 If WHICH_GROUP == current_group, this function will return true if INSN
18134 causes the termination of the current group (i.e, the dispatch group to
18135 which INSN belongs). This means that INSN will be the last insn in the
18136 group it belongs to.
18138 If WHICH_GROUP == previous_group, this function will return true if INSN
18139 causes the termination of the previous group (i.e, the dispatch group that
18140 precedes the group to which INSN belongs). This means that INSN will be
18141 the first insn in the group it belongs to). */
18143 static bool
18144 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18146 bool first, last;
18148 if (! insn)
18149 return false;
18151 first = insn_must_be_first_in_group (insn);
18152 last = insn_must_be_last_in_group (insn);
18154 if (first && last)
18155 return true;
18157 if (which_group == current_group)
18158 return last;
18159 else if (which_group == previous_group)
18160 return first;
18162 return false;
18166 static bool
18167 insn_must_be_first_in_group (rtx_insn *insn)
18169 enum attr_type type;
18171 if (!insn
18172 || NOTE_P (insn)
18173 || DEBUG_INSN_P (insn)
18174 || GET_CODE (PATTERN (insn)) == USE
18175 || GET_CODE (PATTERN (insn)) == CLOBBER)
18176 return false;
18178 switch (rs6000_tune)
18180 case PROCESSOR_POWER5:
18181 if (is_cracked_insn (insn))
18182 return true;
18183 /* FALLTHRU */
18184 case PROCESSOR_POWER4:
18185 if (is_microcoded_insn (insn))
18186 return true;
18188 if (!rs6000_sched_groups)
18189 return false;
18191 type = get_attr_type (insn);
18193 switch (type)
18195 case TYPE_MFCR:
18196 case TYPE_MFCRF:
18197 case TYPE_MTCR:
18198 case TYPE_CR_LOGICAL:
18199 case TYPE_MTJMPR:
18200 case TYPE_MFJMPR:
18201 case TYPE_DIV:
18202 case TYPE_LOAD_L:
18203 case TYPE_STORE_C:
18204 case TYPE_ISYNC:
18205 case TYPE_SYNC:
18206 return true;
18207 default:
18208 break;
18210 break;
18211 case PROCESSOR_POWER6:
18212 type = get_attr_type (insn);
18214 switch (type)
18216 case TYPE_EXTS:
18217 case TYPE_CNTLZ:
18218 case TYPE_TRAP:
18219 case TYPE_MUL:
18220 case TYPE_INSERT:
18221 case TYPE_FPCOMPARE:
18222 case TYPE_MFCR:
18223 case TYPE_MTCR:
18224 case TYPE_MFJMPR:
18225 case TYPE_MTJMPR:
18226 case TYPE_ISYNC:
18227 case TYPE_SYNC:
18228 case TYPE_LOAD_L:
18229 case TYPE_STORE_C:
18230 return true;
18231 case TYPE_SHIFT:
18232 if (get_attr_dot (insn) == DOT_NO
18233 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18234 return true;
18235 else
18236 break;
18237 case TYPE_DIV:
18238 if (get_attr_size (insn) == SIZE_32)
18239 return true;
18240 else
18241 break;
18242 case TYPE_LOAD:
18243 case TYPE_STORE:
18244 case TYPE_FPLOAD:
18245 case TYPE_FPSTORE:
18246 if (get_attr_update (insn) == UPDATE_YES)
18247 return true;
18248 else
18249 break;
18250 default:
18251 break;
18253 break;
18254 case PROCESSOR_POWER7:
18255 type = get_attr_type (insn);
18257 switch (type)
18259 case TYPE_CR_LOGICAL:
18260 case TYPE_MFCR:
18261 case TYPE_MFCRF:
18262 case TYPE_MTCR:
18263 case TYPE_DIV:
18264 case TYPE_ISYNC:
18265 case TYPE_LOAD_L:
18266 case TYPE_STORE_C:
18267 case TYPE_MFJMPR:
18268 case TYPE_MTJMPR:
18269 return true;
18270 case TYPE_MUL:
18271 case TYPE_SHIFT:
18272 case TYPE_EXTS:
18273 if (get_attr_dot (insn) == DOT_YES)
18274 return true;
18275 else
18276 break;
18277 case TYPE_LOAD:
18278 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18279 || get_attr_update (insn) == UPDATE_YES)
18280 return true;
18281 else
18282 break;
18283 case TYPE_STORE:
18284 case TYPE_FPLOAD:
18285 case TYPE_FPSTORE:
18286 if (get_attr_update (insn) == UPDATE_YES)
18287 return true;
18288 else
18289 break;
18290 default:
18291 break;
18293 break;
18294 case PROCESSOR_POWER8:
18295 type = get_attr_type (insn);
18297 switch (type)
18299 case TYPE_CR_LOGICAL:
18300 case TYPE_MFCR:
18301 case TYPE_MFCRF:
18302 case TYPE_MTCR:
18303 case TYPE_SYNC:
18304 case TYPE_ISYNC:
18305 case TYPE_LOAD_L:
18306 case TYPE_STORE_C:
18307 case TYPE_VECSTORE:
18308 case TYPE_MFJMPR:
18309 case TYPE_MTJMPR:
18310 return true;
18311 case TYPE_SHIFT:
18312 case TYPE_EXTS:
18313 case TYPE_MUL:
18314 if (get_attr_dot (insn) == DOT_YES)
18315 return true;
18316 else
18317 break;
18318 case TYPE_LOAD:
18319 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18320 || get_attr_update (insn) == UPDATE_YES)
18321 return true;
18322 else
18323 break;
18324 case TYPE_STORE:
18325 if (get_attr_update (insn) == UPDATE_YES
18326 && get_attr_indexed (insn) == INDEXED_YES)
18327 return true;
18328 else
18329 break;
18330 default:
18331 break;
18333 break;
18334 default:
18335 break;
18338 return false;
18341 static bool
18342 insn_must_be_last_in_group (rtx_insn *insn)
18344 enum attr_type type;
18346 if (!insn
18347 || NOTE_P (insn)
18348 || DEBUG_INSN_P (insn)
18349 || GET_CODE (PATTERN (insn)) == USE
18350 || GET_CODE (PATTERN (insn)) == CLOBBER)
18351 return false;
18353 switch (rs6000_tune) {
18354 case PROCESSOR_POWER4:
18355 case PROCESSOR_POWER5:
18356 if (is_microcoded_insn (insn))
18357 return true;
18359 if (is_branch_slot_insn (insn))
18360 return true;
18362 break;
18363 case PROCESSOR_POWER6:
18364 type = get_attr_type (insn);
18366 switch (type)
18368 case TYPE_EXTS:
18369 case TYPE_CNTLZ:
18370 case TYPE_TRAP:
18371 case TYPE_MUL:
18372 case TYPE_FPCOMPARE:
18373 case TYPE_MFCR:
18374 case TYPE_MTCR:
18375 case TYPE_MFJMPR:
18376 case TYPE_MTJMPR:
18377 case TYPE_ISYNC:
18378 case TYPE_SYNC:
18379 case TYPE_LOAD_L:
18380 case TYPE_STORE_C:
18381 return true;
18382 case TYPE_SHIFT:
18383 if (get_attr_dot (insn) == DOT_NO
18384 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18385 return true;
18386 else
18387 break;
18388 case TYPE_DIV:
18389 if (get_attr_size (insn) == SIZE_32)
18390 return true;
18391 else
18392 break;
18393 default:
18394 break;
18396 break;
18397 case PROCESSOR_POWER7:
18398 type = get_attr_type (insn);
18400 switch (type)
18402 case TYPE_ISYNC:
18403 case TYPE_SYNC:
18404 case TYPE_LOAD_L:
18405 case TYPE_STORE_C:
18406 return true;
18407 case TYPE_LOAD:
18408 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18409 && get_attr_update (insn) == UPDATE_YES)
18410 return true;
18411 else
18412 break;
18413 case TYPE_STORE:
18414 if (get_attr_update (insn) == UPDATE_YES
18415 && get_attr_indexed (insn) == INDEXED_YES)
18416 return true;
18417 else
18418 break;
18419 default:
18420 break;
18422 break;
18423 case PROCESSOR_POWER8:
18424 type = get_attr_type (insn);
18426 switch (type)
18428 case TYPE_MFCR:
18429 case TYPE_MTCR:
18430 case TYPE_ISYNC:
18431 case TYPE_SYNC:
18432 case TYPE_LOAD_L:
18433 case TYPE_STORE_C:
18434 return true;
18435 case TYPE_LOAD:
18436 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18437 && get_attr_update (insn) == UPDATE_YES)
18438 return true;
18439 else
18440 break;
18441 case TYPE_STORE:
18442 if (get_attr_update (insn) == UPDATE_YES
18443 && get_attr_indexed (insn) == INDEXED_YES)
18444 return true;
18445 else
18446 break;
18447 default:
18448 break;
18450 break;
18451 default:
18452 break;
18455 return false;
18458 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18459 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18461 static bool
18462 is_costly_group (rtx *group_insns, rtx next_insn)
18464 int i;
18465 int issue_rate = rs6000_issue_rate ();
18467 for (i = 0; i < issue_rate; i++)
18469 sd_iterator_def sd_it;
18470 dep_t dep;
18471 rtx insn = group_insns[i];
18473 if (!insn)
18474 continue;
18476 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18478 rtx next = DEP_CON (dep);
18480 if (next == next_insn
18481 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18482 return true;
18486 return false;
18489 /* Utility of the function redefine_groups.
18490 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18491 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18492 to keep it "far" (in a separate group) from GROUP_INSNS, following
18493 one of the following schemes, depending on the value of the flag
18494 -minsert_sched_nops = X:
18495 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18496 in order to force NEXT_INSN into a separate group.
18497 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18498 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18499 insertion (has a group just ended, how many vacant issue slots remain in the
18500 last group, and how many dispatch groups were encountered so far). */
18502 static int
18503 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18504 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18505 int *group_count)
18507 rtx nop;
18508 bool force;
18509 int issue_rate = rs6000_issue_rate ();
18510 bool end = *group_end;
18511 int i;
18513 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18514 return can_issue_more;
18516 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18517 return can_issue_more;
18519 force = is_costly_group (group_insns, next_insn);
18520 if (!force)
18521 return can_issue_more;
18523 if (sched_verbose > 6)
18524 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18525 *group_count ,can_issue_more);
18527 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18529 if (*group_end)
18530 can_issue_more = 0;
18532 /* Since only a branch can be issued in the last issue_slot, it is
18533 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18534 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18535 in this case the last nop will start a new group and the branch
18536 will be forced to the new group. */
18537 if (can_issue_more && !is_branch_slot_insn (next_insn))
18538 can_issue_more--;
18540 /* Do we have a special group ending nop? */
18541 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18542 || rs6000_tune == PROCESSOR_POWER8)
18544 nop = gen_group_ending_nop ();
18545 emit_insn_before (nop, next_insn);
18546 can_issue_more = 0;
18548 else
18549 while (can_issue_more > 0)
18551 nop = gen_nop ();
18552 emit_insn_before (nop, next_insn);
18553 can_issue_more--;
18556 *group_end = true;
18557 return 0;
18560 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18562 int n_nops = rs6000_sched_insert_nops;
18564 /* Nops can't be issued from the branch slot, so the effective
18565 issue_rate for nops is 'issue_rate - 1'. */
18566 if (can_issue_more == 0)
18567 can_issue_more = issue_rate;
18568 can_issue_more--;
18569 if (can_issue_more == 0)
18571 can_issue_more = issue_rate - 1;
18572 (*group_count)++;
18573 end = true;
18574 for (i = 0; i < issue_rate; i++)
18576 group_insns[i] = 0;
18580 while (n_nops > 0)
18582 nop = gen_nop ();
18583 emit_insn_before (nop, next_insn);
18584 if (can_issue_more == issue_rate - 1) /* new group begins */
18585 end = false;
18586 can_issue_more--;
18587 if (can_issue_more == 0)
18589 can_issue_more = issue_rate - 1;
18590 (*group_count)++;
18591 end = true;
18592 for (i = 0; i < issue_rate; i++)
18594 group_insns[i] = 0;
18597 n_nops--;
18600 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18601 can_issue_more++;
18603 /* Is next_insn going to start a new group? */
18604 *group_end
18605 = (end
18606 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18607 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18608 || (can_issue_more < issue_rate &&
18609 insn_terminates_group_p (next_insn, previous_group)));
18610 if (*group_end && end)
18611 (*group_count)--;
18613 if (sched_verbose > 6)
18614 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18615 *group_count, can_issue_more);
18616 return can_issue_more;
18619 return can_issue_more;
18622 /* This function tries to synch the dispatch groups that the compiler "sees"
18623 with the dispatch groups that the processor dispatcher is expected to
18624 form in practice. It tries to achieve this synchronization by forcing the
18625 estimated processor grouping on the compiler (as opposed to the function
18626 'pad_goups' which tries to force the scheduler's grouping on the processor).
18628 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18629 examines the (estimated) dispatch groups that will be formed by the processor
18630 dispatcher. It marks these group boundaries to reflect the estimated
18631 processor grouping, overriding the grouping that the scheduler had marked.
18632 Depending on the value of the flag '-minsert-sched-nops' this function can
18633 force certain insns into separate groups or force a certain distance between
18634 them by inserting nops, for example, if there exists a "costly dependence"
18635 between the insns.
18637 The function estimates the group boundaries that the processor will form as
18638 follows: It keeps track of how many vacant issue slots are available after
18639 each insn. A subsequent insn will start a new group if one of the following
18640 4 cases applies:
18641 - no more vacant issue slots remain in the current dispatch group.
18642 - only the last issue slot, which is the branch slot, is vacant, but the next
18643 insn is not a branch.
18644 - only the last 2 or less issue slots, including the branch slot, are vacant,
18645 which means that a cracked insn (which occupies two issue slots) can't be
18646 issued in this group.
18647 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18648 start a new group. */
18650 static int
18651 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18652 rtx_insn *tail)
18654 rtx_insn *insn, *next_insn;
18655 int issue_rate;
18656 int can_issue_more;
18657 int slot, i;
18658 bool group_end;
18659 int group_count = 0;
18660 rtx *group_insns;
18662 /* Initialize. */
18663 issue_rate = rs6000_issue_rate ();
18664 group_insns = XALLOCAVEC (rtx, issue_rate);
18665 for (i = 0; i < issue_rate; i++)
18667 group_insns[i] = 0;
18669 can_issue_more = issue_rate;
18670 slot = 0;
18671 insn = get_next_active_insn (prev_head_insn, tail);
18672 group_end = false;
18674 while (insn != NULL_RTX)
18676 slot = (issue_rate - can_issue_more);
18677 group_insns[slot] = insn;
18678 can_issue_more =
18679 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18680 if (insn_terminates_group_p (insn, current_group))
18681 can_issue_more = 0;
18683 next_insn = get_next_active_insn (insn, tail);
18684 if (next_insn == NULL_RTX)
18685 return group_count + 1;
18687 /* Is next_insn going to start a new group? */
18688 group_end
18689 = (can_issue_more == 0
18690 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18691 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18692 || (can_issue_more < issue_rate &&
18693 insn_terminates_group_p (next_insn, previous_group)));
18695 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18696 next_insn, &group_end, can_issue_more,
18697 &group_count);
18699 if (group_end)
18701 group_count++;
18702 can_issue_more = 0;
18703 for (i = 0; i < issue_rate; i++)
18705 group_insns[i] = 0;
18709 if (GET_MODE (next_insn) == TImode && can_issue_more)
18710 PUT_MODE (next_insn, VOIDmode);
18711 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18712 PUT_MODE (next_insn, TImode);
18714 insn = next_insn;
18715 if (can_issue_more == 0)
18716 can_issue_more = issue_rate;
18717 } /* while */
18719 return group_count;
18722 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18723 dispatch group boundaries that the scheduler had marked. Pad with nops
18724 any dispatch groups which have vacant issue slots, in order to force the
18725 scheduler's grouping on the processor dispatcher. The function
18726 returns the number of dispatch groups found. */
18728 static int
18729 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18730 rtx_insn *tail)
18732 rtx_insn *insn, *next_insn;
18733 rtx nop;
18734 int issue_rate;
18735 int can_issue_more;
18736 int group_end;
18737 int group_count = 0;
18739 /* Initialize issue_rate. */
18740 issue_rate = rs6000_issue_rate ();
18741 can_issue_more = issue_rate;
18743 insn = get_next_active_insn (prev_head_insn, tail);
18744 next_insn = get_next_active_insn (insn, tail);
18746 while (insn != NULL_RTX)
18748 can_issue_more =
18749 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18751 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18753 if (next_insn == NULL_RTX)
18754 break;
18756 if (group_end)
18758 /* If the scheduler had marked group termination at this location
18759 (between insn and next_insn), and neither insn nor next_insn will
18760 force group termination, pad the group with nops to force group
18761 termination. */
18762 if (can_issue_more
18763 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18764 && !insn_terminates_group_p (insn, current_group)
18765 && !insn_terminates_group_p (next_insn, previous_group))
18767 if (!is_branch_slot_insn (next_insn))
18768 can_issue_more--;
18770 while (can_issue_more)
18772 nop = gen_nop ();
18773 emit_insn_before (nop, next_insn);
18774 can_issue_more--;
18778 can_issue_more = issue_rate;
18779 group_count++;
18782 insn = next_insn;
18783 next_insn = get_next_active_insn (insn, tail);
18786 return group_count;
18789 /* We're beginning a new block. Initialize data structures as necessary. */
18791 static void
18792 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18793 int sched_verbose ATTRIBUTE_UNUSED,
18794 int max_ready ATTRIBUTE_UNUSED)
18796 last_scheduled_insn = NULL;
18797 load_store_pendulum = 0;
18798 divide_cnt = 0;
18799 vec_pairing = 0;
18802 /* The following function is called at the end of scheduling BB.
18803 After reload, it inserts nops at insn group bundling. */
18805 static void
18806 rs6000_sched_finish (FILE *dump, int sched_verbose)
18808 int n_groups;
18810 if (sched_verbose)
18811 fprintf (dump, "=== Finishing schedule.\n");
18813 if (reload_completed && rs6000_sched_groups)
18815 /* Do not run sched_finish hook when selective scheduling enabled. */
18816 if (sel_sched_p ())
18817 return;
18819 if (rs6000_sched_insert_nops == sched_finish_none)
18820 return;
18822 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18823 n_groups = pad_groups (dump, sched_verbose,
18824 current_sched_info->prev_head,
18825 current_sched_info->next_tail);
18826 else
18827 n_groups = redefine_groups (dump, sched_verbose,
18828 current_sched_info->prev_head,
18829 current_sched_info->next_tail);
18831 if (sched_verbose >= 6)
18833 fprintf (dump, "ngroups = %d\n", n_groups);
18834 print_rtl (dump, current_sched_info->prev_head);
18835 fprintf (dump, "Done finish_sched\n");
18840 struct rs6000_sched_context
18842 short cached_can_issue_more;
18843 rtx_insn *last_scheduled_insn;
18844 int load_store_pendulum;
18845 int divide_cnt;
18846 int vec_pairing;
18849 typedef struct rs6000_sched_context rs6000_sched_context_def;
18850 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18852 /* Allocate store for new scheduling context. */
18853 static void *
18854 rs6000_alloc_sched_context (void)
18856 return xmalloc (sizeof (rs6000_sched_context_def));
18859 /* If CLEAN_P is true then initializes _SC with clean data,
18860 and from the global context otherwise. */
18861 static void
18862 rs6000_init_sched_context (void *_sc, bool clean_p)
18864 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18866 if (clean_p)
18868 sc->cached_can_issue_more = 0;
18869 sc->last_scheduled_insn = NULL;
18870 sc->load_store_pendulum = 0;
18871 sc->divide_cnt = 0;
18872 sc->vec_pairing = 0;
18874 else
18876 sc->cached_can_issue_more = cached_can_issue_more;
18877 sc->last_scheduled_insn = last_scheduled_insn;
18878 sc->load_store_pendulum = load_store_pendulum;
18879 sc->divide_cnt = divide_cnt;
18880 sc->vec_pairing = vec_pairing;
18884 /* Sets the global scheduling context to the one pointed to by _SC. */
18885 static void
18886 rs6000_set_sched_context (void *_sc)
18888 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18890 gcc_assert (sc != NULL);
18892 cached_can_issue_more = sc->cached_can_issue_more;
18893 last_scheduled_insn = sc->last_scheduled_insn;
18894 load_store_pendulum = sc->load_store_pendulum;
18895 divide_cnt = sc->divide_cnt;
18896 vec_pairing = sc->vec_pairing;
18899 /* Free _SC. */
18900 static void
18901 rs6000_free_sched_context (void *_sc)
18903 gcc_assert (_sc != NULL);
18905 free (_sc);
18908 static bool
18909 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18911 switch (get_attr_type (insn))
18913 case TYPE_DIV:
18914 case TYPE_SDIV:
18915 case TYPE_DDIV:
18916 case TYPE_VECDIV:
18917 case TYPE_SSQRT:
18918 case TYPE_DSQRT:
18919 return false;
18921 default:
18922 return true;
18926 /* Length in units of the trampoline for entering a nested function. */
18929 rs6000_trampoline_size (void)
18931 int ret = 0;
18933 switch (DEFAULT_ABI)
18935 default:
18936 gcc_unreachable ();
18938 case ABI_AIX:
18939 ret = (TARGET_32BIT) ? 12 : 24;
18940 break;
18942 case ABI_ELFv2:
18943 gcc_assert (!TARGET_32BIT);
18944 ret = 32;
18945 break;
18947 case ABI_DARWIN:
18948 case ABI_V4:
18949 ret = (TARGET_32BIT) ? 40 : 48;
18950 break;
18953 return ret;
18956 /* Emit RTL insns to initialize the variable parts of a trampoline.
18957 FNADDR is an RTX for the address of the function's pure code.
18958 CXT is an RTX for the static chain value for the function. */
18960 static void
18961 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18963 int regsize = (TARGET_32BIT) ? 4 : 8;
18964 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
18965 rtx ctx_reg = force_reg (Pmode, cxt);
18966 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
18968 switch (DEFAULT_ABI)
18970 default:
18971 gcc_unreachable ();
18973 /* Under AIX, just build the 3 word function descriptor */
18974 case ABI_AIX:
18976 rtx fnmem, fn_reg, toc_reg;
18978 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
18979 error ("you cannot take the address of a nested function if you use "
18980 "the %qs option", "-mno-pointers-to-nested-functions");
18982 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
18983 fn_reg = gen_reg_rtx (Pmode);
18984 toc_reg = gen_reg_rtx (Pmode);
18986 /* Macro to shorten the code expansions below. */
18987 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18989 m_tramp = replace_equiv_address (m_tramp, addr);
18991 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
18992 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
18993 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
18994 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
18995 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
18997 # undef MEM_PLUS
18999 break;
19001 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19002 case ABI_ELFv2:
19003 case ABI_DARWIN:
19004 case ABI_V4:
19005 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19006 LCT_NORMAL, VOIDmode,
19007 addr, Pmode,
19008 GEN_INT (rs6000_trampoline_size ()), SImode,
19009 fnaddr, Pmode,
19010 ctx_reg, Pmode);
19011 break;
19016 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19017 identifier as an argument, so the front end shouldn't look it up. */
19019 static bool
19020 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19022 return is_attribute_p ("altivec", attr_id);
19025 /* Handle the "altivec" attribute. The attribute may have
19026 arguments as follows:
19028 __attribute__((altivec(vector__)))
19029 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19030 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19032 and may appear more than once (e.g., 'vector bool char') in a
19033 given declaration. */
19035 static tree
19036 rs6000_handle_altivec_attribute (tree *node,
19037 tree name ATTRIBUTE_UNUSED,
19038 tree args,
19039 int flags ATTRIBUTE_UNUSED,
19040 bool *no_add_attrs)
19042 tree type = *node, result = NULL_TREE;
19043 machine_mode mode;
19044 int unsigned_p;
19045 char altivec_type
19046 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19047 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19048 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19049 : '?');
19051 while (POINTER_TYPE_P (type)
19052 || TREE_CODE (type) == FUNCTION_TYPE
19053 || TREE_CODE (type) == METHOD_TYPE
19054 || TREE_CODE (type) == ARRAY_TYPE)
19055 type = TREE_TYPE (type);
19057 mode = TYPE_MODE (type);
19059 /* Check for invalid AltiVec type qualifiers. */
19060 if (type == long_double_type_node)
19061 error ("use of %<long double%> in AltiVec types is invalid");
19062 else if (type == boolean_type_node)
19063 error ("use of boolean types in AltiVec types is invalid");
19064 else if (TREE_CODE (type) == COMPLEX_TYPE)
19065 error ("use of %<complex%> in AltiVec types is invalid");
19066 else if (DECIMAL_FLOAT_MODE_P (mode))
19067 error ("use of decimal floating point types in AltiVec types is invalid");
19068 else if (!TARGET_VSX)
19070 if (type == long_unsigned_type_node || type == long_integer_type_node)
19072 if (TARGET_64BIT)
19073 error ("use of %<long%> in AltiVec types is invalid for "
19074 "64-bit code without %qs", "-mvsx");
19075 else if (rs6000_warn_altivec_long)
19076 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19077 "use %<int%>");
19079 else if (type == long_long_unsigned_type_node
19080 || type == long_long_integer_type_node)
19081 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19082 "-mvsx");
19083 else if (type == double_type_node)
19084 error ("use of %<double%> in AltiVec types is invalid without %qs",
19085 "-mvsx");
19088 switch (altivec_type)
19090 case 'v':
19091 unsigned_p = TYPE_UNSIGNED (type);
19092 switch (mode)
19094 case E_TImode:
19095 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19096 break;
19097 case E_DImode:
19098 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19099 break;
19100 case E_SImode:
19101 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19102 break;
19103 case E_HImode:
19104 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19105 break;
19106 case E_QImode:
19107 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19108 break;
19109 case E_SFmode: result = V4SF_type_node; break;
19110 case E_DFmode: result = V2DF_type_node; break;
19111 /* If the user says 'vector int bool', we may be handed the 'bool'
19112 attribute _before_ the 'vector' attribute, and so select the
19113 proper type in the 'b' case below. */
19114 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19115 case E_V2DImode: case E_V2DFmode:
19116 result = type;
19117 default: break;
19119 break;
19120 case 'b':
19121 switch (mode)
19123 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19124 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19125 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19126 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19127 default: break;
19129 break;
19130 case 'p':
19131 switch (mode)
19133 case E_V8HImode: result = pixel_V8HI_type_node;
19134 default: break;
19136 default: break;
19139 /* Propagate qualifiers attached to the element type
19140 onto the vector type. */
19141 if (result && result != type && TYPE_QUALS (type))
19142 result = build_qualified_type (result, TYPE_QUALS (type));
19144 *no_add_attrs = true; /* No need to hang on to the attribute. */
19146 if (result)
19147 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19149 return NULL_TREE;
19152 /* AltiVec defines five built-in scalar types that serve as vector
19153 elements; we must teach the compiler how to mangle them. The 128-bit
19154 floating point mangling is target-specific as well. */
19156 static const char *
19157 rs6000_mangle_type (const_tree type)
19159 type = TYPE_MAIN_VARIANT (type);
19161 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19162 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19163 return NULL;
19165 if (type == bool_char_type_node) return "U6__boolc";
19166 if (type == bool_short_type_node) return "U6__bools";
19167 if (type == pixel_type_node) return "u7__pixel";
19168 if (type == bool_int_type_node) return "U6__booli";
19169 if (type == bool_long_long_type_node) return "U6__boolx";
19171 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19172 return "g";
19173 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19174 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19176 /* For all other types, use the default mangling. */
19177 return NULL;
19180 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19181 struct attribute_spec.handler. */
19183 static tree
19184 rs6000_handle_longcall_attribute (tree *node, tree name,
19185 tree args ATTRIBUTE_UNUSED,
19186 int flags ATTRIBUTE_UNUSED,
19187 bool *no_add_attrs)
19189 if (TREE_CODE (*node) != FUNCTION_TYPE
19190 && TREE_CODE (*node) != FIELD_DECL
19191 && TREE_CODE (*node) != TYPE_DECL)
19193 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19194 name);
19195 *no_add_attrs = true;
19198 return NULL_TREE;
19201 /* Set longcall attributes on all functions declared when
19202 rs6000_default_long_calls is true. */
19203 static void
19204 rs6000_set_default_type_attributes (tree type)
19206 if (rs6000_default_long_calls
19207 && (TREE_CODE (type) == FUNCTION_TYPE
19208 || TREE_CODE (type) == METHOD_TYPE))
19209 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19210 NULL_TREE,
19211 TYPE_ATTRIBUTES (type));
19213 #if TARGET_MACHO
19214 darwin_set_default_type_attributes (type);
19215 #endif
19218 /* Return a reference suitable for calling a function with the
19219 longcall attribute. */
19221 static rtx
19222 rs6000_longcall_ref (rtx call_ref, rtx arg)
19224 /* System V adds '.' to the internal name, so skip them. */
19225 const char *call_name = XSTR (call_ref, 0);
19226 if (*call_name == '.')
19228 while (*call_name == '.')
19229 call_name++;
19231 tree node = get_identifier (call_name);
19232 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19235 if (TARGET_PLTSEQ)
19237 rtx base = const0_rtx;
19238 int regno = 12;
19239 if (rs6000_pcrel_p (cfun))
19241 rtx reg = gen_rtx_REG (Pmode, regno);
19242 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19243 UNSPEC_PLT_PCREL);
19244 emit_insn (gen_rtx_SET (reg, u));
19245 return reg;
19248 if (DEFAULT_ABI == ABI_ELFv2)
19249 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19250 else
19252 if (flag_pic)
19253 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19254 regno = 11;
19256 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19257 may be used by a function global entry point. For SysV4, r11
19258 is used by __glink_PLTresolve lazy resolver entry. */
19259 rtx reg = gen_rtx_REG (Pmode, regno);
19260 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19261 UNSPEC_PLT16_HA);
19262 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19263 UNSPEC_PLT16_LO);
19264 emit_insn (gen_rtx_SET (reg, hi));
19265 emit_insn (gen_rtx_SET (reg, lo));
19266 return reg;
19269 return force_reg (Pmode, call_ref);
19272 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19273 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19274 #endif
19276 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19277 struct attribute_spec.handler. */
19278 static tree
19279 rs6000_handle_struct_attribute (tree *node, tree name,
19280 tree args ATTRIBUTE_UNUSED,
19281 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19283 tree *type = NULL;
19284 if (DECL_P (*node))
19286 if (TREE_CODE (*node) == TYPE_DECL)
19287 type = &TREE_TYPE (*node);
19289 else
19290 type = node;
19292 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19293 || TREE_CODE (*type) == UNION_TYPE)))
19295 warning (OPT_Wattributes, "%qE attribute ignored", name);
19296 *no_add_attrs = true;
19299 else if ((is_attribute_p ("ms_struct", name)
19300 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19301 || ((is_attribute_p ("gcc_struct", name)
19302 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19304 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19305 name);
19306 *no_add_attrs = true;
19309 return NULL_TREE;
19312 static bool
19313 rs6000_ms_bitfield_layout_p (const_tree record_type)
19315 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19316 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19317 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19320 #ifdef USING_ELFOS_H
19322 /* A get_unnamed_section callback, used for switching to toc_section. */
19324 static void
19325 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19327 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19328 && TARGET_MINIMAL_TOC)
19330 if (!toc_initialized)
19332 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19333 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19334 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19335 fprintf (asm_out_file, "\t.tc ");
19336 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19337 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19338 fprintf (asm_out_file, "\n");
19340 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19341 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19342 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19343 fprintf (asm_out_file, " = .+32768\n");
19344 toc_initialized = 1;
19346 else
19347 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19349 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19351 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19352 if (!toc_initialized)
19354 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19355 toc_initialized = 1;
19358 else
19360 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19361 if (!toc_initialized)
19363 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19364 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19365 fprintf (asm_out_file, " = .+32768\n");
19366 toc_initialized = 1;
19371 /* Implement TARGET_ASM_INIT_SECTIONS. */
19373 static void
19374 rs6000_elf_asm_init_sections (void)
19376 toc_section
19377 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19379 sdata2_section
19380 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19381 SDATA2_SECTION_ASM_OP);
19384 /* Implement TARGET_SELECT_RTX_SECTION. */
19386 static section *
19387 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19388 unsigned HOST_WIDE_INT align)
19390 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19391 return toc_section;
19392 else
19393 return default_elf_select_rtx_section (mode, x, align);
19396 /* For a SYMBOL_REF, set generic flags and then perform some
19397 target-specific processing.
19399 When the AIX ABI is requested on a non-AIX system, replace the
19400 function name with the real name (with a leading .) rather than the
19401 function descriptor name. This saves a lot of overriding code to
19402 read the prefixes. */
19404 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19405 static void
19406 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19408 default_encode_section_info (decl, rtl, first);
19410 if (first
19411 && TREE_CODE (decl) == FUNCTION_DECL
19412 && !TARGET_AIX
19413 && DEFAULT_ABI == ABI_AIX)
19415 rtx sym_ref = XEXP (rtl, 0);
19416 size_t len = strlen (XSTR (sym_ref, 0));
19417 char *str = XALLOCAVEC (char, len + 2);
19418 str[0] = '.';
19419 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19420 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19424 static inline bool
19425 compare_section_name (const char *section, const char *templ)
19427 int len;
19429 len = strlen (templ);
19430 return (strncmp (section, templ, len) == 0
19431 && (section[len] == 0 || section[len] == '.'));
19434 bool
19435 rs6000_elf_in_small_data_p (const_tree decl)
19437 if (rs6000_sdata == SDATA_NONE)
19438 return false;
19440 /* We want to merge strings, so we never consider them small data. */
19441 if (TREE_CODE (decl) == STRING_CST)
19442 return false;
19444 /* Functions are never in the small data area. */
19445 if (TREE_CODE (decl) == FUNCTION_DECL)
19446 return false;
19448 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19450 const char *section = DECL_SECTION_NAME (decl);
19451 if (compare_section_name (section, ".sdata")
19452 || compare_section_name (section, ".sdata2")
19453 || compare_section_name (section, ".gnu.linkonce.s")
19454 || compare_section_name (section, ".sbss")
19455 || compare_section_name (section, ".sbss2")
19456 || compare_section_name (section, ".gnu.linkonce.sb")
19457 || strcmp (section, ".PPC.EMB.sdata0") == 0
19458 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19459 return true;
19461 else
19463 /* If we are told not to put readonly data in sdata, then don't. */
19464 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19465 && !rs6000_readonly_in_sdata)
19466 return false;
19468 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19470 if (size > 0
19471 && size <= g_switch_value
19472 /* If it's not public, and we're not going to reference it there,
19473 there's no need to put it in the small data section. */
19474 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19475 return true;
19478 return false;
19481 #endif /* USING_ELFOS_H */
19483 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19485 static bool
19486 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19488 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19491 /* Do not place thread-local symbols refs in the object blocks. */
19493 static bool
19494 rs6000_use_blocks_for_decl_p (const_tree decl)
19496 return !DECL_THREAD_LOCAL_P (decl);
19499 /* Return a REG that occurs in ADDR with coefficient 1.
19500 ADDR can be effectively incremented by incrementing REG.
19502 r0 is special and we must not select it as an address
19503 register by this routine since our caller will try to
19504 increment the returned register via an "la" instruction. */
19507 find_addr_reg (rtx addr)
19509 while (GET_CODE (addr) == PLUS)
19511 if (REG_P (XEXP (addr, 0))
19512 && REGNO (XEXP (addr, 0)) != 0)
19513 addr = XEXP (addr, 0);
19514 else if (REG_P (XEXP (addr, 1))
19515 && REGNO (XEXP (addr, 1)) != 0)
19516 addr = XEXP (addr, 1);
19517 else if (CONSTANT_P (XEXP (addr, 0)))
19518 addr = XEXP (addr, 1);
19519 else if (CONSTANT_P (XEXP (addr, 1)))
19520 addr = XEXP (addr, 0);
19521 else
19522 gcc_unreachable ();
19524 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19525 return addr;
19528 void
19529 rs6000_fatal_bad_address (rtx op)
19531 fatal_insn ("bad address", op);
19534 #if TARGET_MACHO
19536 vec<branch_island, va_gc> *branch_islands;
19538 /* Remember to generate a branch island for far calls to the given
19539 function. */
19541 static void
19542 add_compiler_branch_island (tree label_name, tree function_name,
19543 int line_number)
19545 branch_island bi = {function_name, label_name, line_number};
19546 vec_safe_push (branch_islands, bi);
19549 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19550 already there or not. */
19552 static int
19553 no_previous_def (tree function_name)
19555 branch_island *bi;
19556 unsigned ix;
19558 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19559 if (function_name == bi->function_name)
19560 return 0;
19561 return 1;
19564 /* GET_PREV_LABEL gets the label name from the previous definition of
19565 the function. */
19567 static tree
19568 get_prev_label (tree function_name)
19570 branch_island *bi;
19571 unsigned ix;
19573 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19574 if (function_name == bi->function_name)
19575 return bi->label_name;
19576 return NULL_TREE;
19579 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19581 void
19582 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19584 unsigned int length;
19585 char *symbol_name, *lazy_ptr_name;
19586 char *local_label_0;
19587 static unsigned label = 0;
19589 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19590 symb = (*targetm.strip_name_encoding) (symb);
19592 length = strlen (symb);
19593 symbol_name = XALLOCAVEC (char, length + 32);
19594 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19596 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19597 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19599 if (MACHOPIC_PURE)
19601 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19602 fprintf (file, "\t.align 5\n");
19604 fprintf (file, "%s:\n", stub);
19605 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19607 label++;
19608 local_label_0 = XALLOCAVEC (char, 16);
19609 sprintf (local_label_0, "L%u$spb", label);
19611 fprintf (file, "\tmflr r0\n");
19612 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19613 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19614 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19615 lazy_ptr_name, local_label_0);
19616 fprintf (file, "\tmtlr r0\n");
19617 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19618 (TARGET_64BIT ? "ldu" : "lwzu"),
19619 lazy_ptr_name, local_label_0);
19620 fprintf (file, "\tmtctr r12\n");
19621 fprintf (file, "\tbctr\n");
19623 else /* mdynamic-no-pic or mkernel. */
19625 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19626 fprintf (file, "\t.align 4\n");
19628 fprintf (file, "%s:\n", stub);
19629 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19631 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19632 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19633 (TARGET_64BIT ? "ldu" : "lwzu"),
19634 lazy_ptr_name);
19635 fprintf (file, "\tmtctr r12\n");
19636 fprintf (file, "\tbctr\n");
19639 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19640 fprintf (file, "%s:\n", lazy_ptr_name);
19641 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19642 fprintf (file, "%sdyld_stub_binding_helper\n",
19643 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19646 /* Legitimize PIC addresses. If the address is already
19647 position-independent, we return ORIG. Newly generated
19648 position-independent addresses go into a reg. This is REG if non
19649 zero, otherwise we allocate register(s) as necessary. */
19651 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19654 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19655 rtx reg)
19657 rtx base, offset;
19659 if (reg == NULL && !reload_completed)
19660 reg = gen_reg_rtx (Pmode);
19662 if (GET_CODE (orig) == CONST)
19664 rtx reg_temp;
19666 if (GET_CODE (XEXP (orig, 0)) == PLUS
19667 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19668 return orig;
19670 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19672 /* Use a different reg for the intermediate value, as
19673 it will be marked UNCHANGING. */
19674 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19675 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19676 Pmode, reg_temp);
19677 offset =
19678 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19679 Pmode, reg);
19681 if (CONST_INT_P (offset))
19683 if (SMALL_INT (offset))
19684 return plus_constant (Pmode, base, INTVAL (offset));
19685 else if (!reload_completed)
19686 offset = force_reg (Pmode, offset);
19687 else
19689 rtx mem = force_const_mem (Pmode, orig);
19690 return machopic_legitimize_pic_address (mem, Pmode, reg);
19693 return gen_rtx_PLUS (Pmode, base, offset);
19696 /* Fall back on generic machopic code. */
19697 return machopic_legitimize_pic_address (orig, mode, reg);
19700 /* Output a .machine directive for the Darwin assembler, and call
19701 the generic start_file routine. */
19703 static void
19704 rs6000_darwin_file_start (void)
19706 static const struct
19708 const char *arg;
19709 const char *name;
19710 HOST_WIDE_INT if_set;
19711 } mapping[] = {
19712 { "ppc64", "ppc64", MASK_64BIT },
19713 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19714 { "power4", "ppc970", 0 },
19715 { "G5", "ppc970", 0 },
19716 { "7450", "ppc7450", 0 },
19717 { "7400", "ppc7400", MASK_ALTIVEC },
19718 { "G4", "ppc7400", 0 },
19719 { "750", "ppc750", 0 },
19720 { "740", "ppc750", 0 },
19721 { "G3", "ppc750", 0 },
19722 { "604e", "ppc604e", 0 },
19723 { "604", "ppc604", 0 },
19724 { "603e", "ppc603", 0 },
19725 { "603", "ppc603", 0 },
19726 { "601", "ppc601", 0 },
19727 { NULL, "ppc", 0 } };
19728 const char *cpu_id = "";
19729 size_t i;
19731 rs6000_file_start ();
19732 darwin_file_start ();
19734 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19736 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19737 cpu_id = rs6000_default_cpu;
19739 if (global_options_set.x_rs6000_cpu_index)
19740 cpu_id = processor_target_table[rs6000_cpu_index].name;
19742 /* Look through the mapping array. Pick the first name that either
19743 matches the argument, has a bit set in IF_SET that is also set
19744 in the target flags, or has a NULL name. */
19746 i = 0;
19747 while (mapping[i].arg != NULL
19748 && strcmp (mapping[i].arg, cpu_id) != 0
19749 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19750 i++;
19752 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19755 #endif /* TARGET_MACHO */
19757 #if TARGET_ELF
19758 static int
19759 rs6000_elf_reloc_rw_mask (void)
19761 if (flag_pic)
19762 return 3;
19763 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19764 return 2;
19765 else
19766 return 0;
19769 /* Record an element in the table of global constructors. SYMBOL is
19770 a SYMBOL_REF of the function to be called; PRIORITY is a number
19771 between 0 and MAX_INIT_PRIORITY.
19773 This differs from default_named_section_asm_out_constructor in
19774 that we have special handling for -mrelocatable. */
19776 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19777 static void
19778 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19780 const char *section = ".ctors";
19781 char buf[18];
19783 if (priority != DEFAULT_INIT_PRIORITY)
19785 sprintf (buf, ".ctors.%.5u",
19786 /* Invert the numbering so the linker puts us in the proper
19787 order; constructors are run from right to left, and the
19788 linker sorts in increasing order. */
19789 MAX_INIT_PRIORITY - priority);
19790 section = buf;
19793 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19794 assemble_align (POINTER_SIZE);
19796 if (DEFAULT_ABI == ABI_V4
19797 && (TARGET_RELOCATABLE || flag_pic > 1))
19799 fputs ("\t.long (", asm_out_file);
19800 output_addr_const (asm_out_file, symbol);
19801 fputs (")@fixup\n", asm_out_file);
19803 else
19804 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19807 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19808 static void
19809 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19811 const char *section = ".dtors";
19812 char buf[18];
19814 if (priority != DEFAULT_INIT_PRIORITY)
19816 sprintf (buf, ".dtors.%.5u",
19817 /* Invert the numbering so the linker puts us in the proper
19818 order; constructors are run from right to left, and the
19819 linker sorts in increasing order. */
19820 MAX_INIT_PRIORITY - priority);
19821 section = buf;
19824 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19825 assemble_align (POINTER_SIZE);
19827 if (DEFAULT_ABI == ABI_V4
19828 && (TARGET_RELOCATABLE || flag_pic > 1))
19830 fputs ("\t.long (", asm_out_file);
19831 output_addr_const (asm_out_file, symbol);
19832 fputs (")@fixup\n", asm_out_file);
19834 else
19835 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19838 void
19839 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19841 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19843 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19844 ASM_OUTPUT_LABEL (file, name);
19845 fputs (DOUBLE_INT_ASM_OP, file);
19846 rs6000_output_function_entry (file, name);
19847 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19848 if (DOT_SYMBOLS)
19850 fputs ("\t.size\t", file);
19851 assemble_name (file, name);
19852 fputs (",24\n\t.type\t.", file);
19853 assemble_name (file, name);
19854 fputs (",@function\n", file);
19855 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19857 fputs ("\t.globl\t.", file);
19858 assemble_name (file, name);
19859 putc ('\n', file);
19862 else
19863 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19864 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19865 rs6000_output_function_entry (file, name);
19866 fputs (":\n", file);
19867 return;
19870 int uses_toc;
19871 if (DEFAULT_ABI == ABI_V4
19872 && (TARGET_RELOCATABLE || flag_pic > 1)
19873 && !TARGET_SECURE_PLT
19874 && (!constant_pool_empty_p () || crtl->profile)
19875 && (uses_toc = uses_TOC ()))
19877 char buf[256];
19879 if (uses_toc == 2)
19880 switch_to_other_text_partition ();
19881 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19883 fprintf (file, "\t.long ");
19884 assemble_name (file, toc_label_name);
19885 need_toc_init = 1;
19886 putc ('-', file);
19887 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19888 assemble_name (file, buf);
19889 putc ('\n', file);
19890 if (uses_toc == 2)
19891 switch_to_other_text_partition ();
19894 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19895 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19897 if (TARGET_CMODEL == CMODEL_LARGE
19898 && rs6000_global_entry_point_prologue_needed_p ())
19900 char buf[256];
19902 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19904 fprintf (file, "\t.quad .TOC.-");
19905 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19906 assemble_name (file, buf);
19907 putc ('\n', file);
19910 if (DEFAULT_ABI == ABI_AIX)
19912 const char *desc_name, *orig_name;
19914 orig_name = (*targetm.strip_name_encoding) (name);
19915 desc_name = orig_name;
19916 while (*desc_name == '.')
19917 desc_name++;
19919 if (TREE_PUBLIC (decl))
19920 fprintf (file, "\t.globl %s\n", desc_name);
19922 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19923 fprintf (file, "%s:\n", desc_name);
19924 fprintf (file, "\t.long %s\n", orig_name);
19925 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19926 fputs ("\t.long 0\n", file);
19927 fprintf (file, "\t.previous\n");
19929 ASM_OUTPUT_LABEL (file, name);
19932 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19933 static void
19934 rs6000_elf_file_end (void)
19936 #ifdef HAVE_AS_GNU_ATTRIBUTE
19937 /* ??? The value emitted depends on options active at file end.
19938 Assume anyone using #pragma or attributes that might change
19939 options knows what they are doing. */
19940 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19941 && rs6000_passes_float)
19943 int fp;
19945 if (TARGET_HARD_FLOAT)
19946 fp = 1;
19947 else
19948 fp = 2;
19949 if (rs6000_passes_long_double)
19951 if (!TARGET_LONG_DOUBLE_128)
19952 fp |= 2 * 4;
19953 else if (TARGET_IEEEQUAD)
19954 fp |= 3 * 4;
19955 else
19956 fp |= 1 * 4;
19958 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19960 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19962 if (rs6000_passes_vector)
19963 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19964 (TARGET_ALTIVEC_ABI ? 2 : 1));
19965 if (rs6000_returns_struct)
19966 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
19967 aix_struct_return ? 2 : 1);
19969 #endif
19970 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19971 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
19972 file_end_indicate_exec_stack ();
19973 #endif
19975 if (flag_split_stack)
19976 file_end_indicate_split_stack ();
19978 if (cpu_builtin_p)
19980 /* We have expanded a CPU builtin, so we need to emit a reference to
19981 the special symbol that LIBC uses to declare it supports the
19982 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19983 switch_to_section (data_section);
19984 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
19985 fprintf (asm_out_file, "\t%s %s\n",
19986 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
19989 #endif
19991 #if TARGET_XCOFF
19993 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19994 #define HAVE_XCOFF_DWARF_EXTRAS 0
19995 #endif
19997 static enum unwind_info_type
19998 rs6000_xcoff_debug_unwind_info (void)
20000 return UI_NONE;
20003 static void
20004 rs6000_xcoff_asm_output_anchor (rtx symbol)
20006 char buffer[100];
20008 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20009 SYMBOL_REF_BLOCK_OFFSET (symbol));
20010 fprintf (asm_out_file, "%s", SET_ASM_OP);
20011 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20012 fprintf (asm_out_file, ",");
20013 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20014 fprintf (asm_out_file, "\n");
20017 static void
20018 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20020 fputs (GLOBAL_ASM_OP, stream);
20021 RS6000_OUTPUT_BASENAME (stream, name);
20022 putc ('\n', stream);
20025 /* A get_unnamed_decl callback, used for read-only sections. PTR
20026 points to the section string variable. */
20028 static void
20029 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20031 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20032 *(const char *const *) directive,
20033 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20036 /* Likewise for read-write sections. */
20038 static void
20039 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20041 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20042 *(const char *const *) directive,
20043 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20046 static void
20047 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20049 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20050 *(const char *const *) directive,
20051 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20054 /* A get_unnamed_section callback, used for switching to toc_section. */
20056 static void
20057 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20059 if (TARGET_MINIMAL_TOC)
20061 /* toc_section is always selected at least once from
20062 rs6000_xcoff_file_start, so this is guaranteed to
20063 always be defined once and only once in each file. */
20064 if (!toc_initialized)
20066 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20067 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20068 toc_initialized = 1;
20070 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20071 (TARGET_32BIT ? "" : ",3"));
20073 else
20074 fputs ("\t.toc\n", asm_out_file);
20077 /* Implement TARGET_ASM_INIT_SECTIONS. */
20079 static void
20080 rs6000_xcoff_asm_init_sections (void)
20082 read_only_data_section
20083 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20084 &xcoff_read_only_section_name);
20086 private_data_section
20087 = get_unnamed_section (SECTION_WRITE,
20088 rs6000_xcoff_output_readwrite_section_asm_op,
20089 &xcoff_private_data_section_name);
20091 read_only_private_data_section
20092 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20093 &xcoff_private_rodata_section_name);
20095 tls_data_section
20096 = get_unnamed_section (SECTION_TLS,
20097 rs6000_xcoff_output_tls_section_asm_op,
20098 &xcoff_tls_data_section_name);
20100 tls_private_data_section
20101 = get_unnamed_section (SECTION_TLS,
20102 rs6000_xcoff_output_tls_section_asm_op,
20103 &xcoff_private_data_section_name);
20105 toc_section
20106 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20108 readonly_data_section = read_only_data_section;
20111 static int
20112 rs6000_xcoff_reloc_rw_mask (void)
20114 return 3;
20117 static void
20118 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20119 tree decl ATTRIBUTE_UNUSED)
20121 int smclass;
20122 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20124 if (flags & SECTION_EXCLUDE)
20125 smclass = 4;
20126 else if (flags & SECTION_DEBUG)
20128 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20129 return;
20131 else if (flags & SECTION_CODE)
20132 smclass = 0;
20133 else if (flags & SECTION_TLS)
20134 smclass = 3;
20135 else if (flags & SECTION_WRITE)
20136 smclass = 2;
20137 else
20138 smclass = 1;
20140 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20141 (flags & SECTION_CODE) ? "." : "",
20142 name, suffix[smclass], flags & SECTION_ENTSIZE);
20145 #define IN_NAMED_SECTION(DECL) \
20146 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20147 && DECL_SECTION_NAME (DECL) != NULL)
20149 static section *
20150 rs6000_xcoff_select_section (tree decl, int reloc,
20151 unsigned HOST_WIDE_INT align)
20153 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20154 named section. */
20155 if (align > BIGGEST_ALIGNMENT)
20157 resolve_unique_section (decl, reloc, true);
20158 if (IN_NAMED_SECTION (decl))
20159 return get_named_section (decl, NULL, reloc);
20162 if (decl_readonly_section (decl, reloc))
20164 if (TREE_PUBLIC (decl))
20165 return read_only_data_section;
20166 else
20167 return read_only_private_data_section;
20169 else
20171 #if HAVE_AS_TLS
20172 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20174 if (TREE_PUBLIC (decl))
20175 return tls_data_section;
20176 else if (bss_initializer_p (decl))
20178 /* Convert to COMMON to emit in BSS. */
20179 DECL_COMMON (decl) = 1;
20180 return tls_comm_section;
20182 else
20183 return tls_private_data_section;
20185 else
20186 #endif
20187 if (TREE_PUBLIC (decl))
20188 return data_section;
20189 else
20190 return private_data_section;
20194 static void
20195 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20197 const char *name;
20199 /* Use select_section for private data and uninitialized data with
20200 alignment <= BIGGEST_ALIGNMENT. */
20201 if (!TREE_PUBLIC (decl)
20202 || DECL_COMMON (decl)
20203 || (DECL_INITIAL (decl) == NULL_TREE
20204 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20205 || DECL_INITIAL (decl) == error_mark_node
20206 || (flag_zero_initialized_in_bss
20207 && initializer_zerop (DECL_INITIAL (decl))))
20208 return;
20210 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20211 name = (*targetm.strip_name_encoding) (name);
20212 set_decl_section_name (decl, name);
20215 /* Select section for constant in constant pool.
20217 On RS/6000, all constants are in the private read-only data area.
20218 However, if this is being placed in the TOC it must be output as a
20219 toc entry. */
20221 static section *
20222 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20223 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20225 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20226 return toc_section;
20227 else
20228 return read_only_private_data_section;
20231 /* Remove any trailing [DS] or the like from the symbol name. */
20233 static const char *
20234 rs6000_xcoff_strip_name_encoding (const char *name)
20236 size_t len;
20237 if (*name == '*')
20238 name++;
20239 len = strlen (name);
20240 if (name[len - 1] == ']')
20241 return ggc_alloc_string (name, len - 4);
20242 else
20243 return name;
20246 /* Section attributes. AIX is always PIC. */
20248 static unsigned int
20249 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20251 unsigned int align;
20252 unsigned int flags = default_section_type_flags (decl, name, reloc);
20254 /* Align to at least UNIT size. */
20255 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20256 align = MIN_UNITS_PER_WORD;
20257 else
20258 /* Increase alignment of large objects if not already stricter. */
20259 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20260 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20261 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20263 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20266 /* Output at beginning of assembler file.
20268 Initialize the section names for the RS/6000 at this point.
20270 Specify filename, including full path, to assembler.
20272 We want to go into the TOC section so at least one .toc will be emitted.
20273 Also, in order to output proper .bs/.es pairs, we need at least one static
20274 [RW] section emitted.
20276 Finally, declare mcount when profiling to make the assembler happy. */
20278 static void
20279 rs6000_xcoff_file_start (void)
20281 rs6000_gen_section_name (&xcoff_bss_section_name,
20282 main_input_filename, ".bss_");
20283 rs6000_gen_section_name (&xcoff_private_data_section_name,
20284 main_input_filename, ".rw_");
20285 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20286 main_input_filename, ".rop_");
20287 rs6000_gen_section_name (&xcoff_read_only_section_name,
20288 main_input_filename, ".ro_");
20289 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20290 main_input_filename, ".tls_");
20291 rs6000_gen_section_name (&xcoff_tbss_section_name,
20292 main_input_filename, ".tbss_[UL]");
20294 fputs ("\t.file\t", asm_out_file);
20295 output_quoted_string (asm_out_file, main_input_filename);
20296 fputc ('\n', asm_out_file);
20297 if (write_symbols != NO_DEBUG)
20298 switch_to_section (private_data_section);
20299 switch_to_section (toc_section);
20300 switch_to_section (text_section);
20301 if (profile_flag)
20302 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20303 rs6000_file_start ();
20306 /* Output at end of assembler file.
20307 On the RS/6000, referencing data should automatically pull in text. */
20309 static void
20310 rs6000_xcoff_file_end (void)
20312 switch_to_section (text_section);
20313 fputs ("_section_.text:\n", asm_out_file);
20314 switch_to_section (data_section);
20315 fputs (TARGET_32BIT
20316 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20317 asm_out_file);
20320 struct declare_alias_data
20322 FILE *file;
20323 bool function_descriptor;
20326 /* Declare alias N. A helper function for for_node_and_aliases. */
20328 static bool
20329 rs6000_declare_alias (struct symtab_node *n, void *d)
20331 struct declare_alias_data *data = (struct declare_alias_data *)d;
20332 /* Main symbol is output specially, because varasm machinery does part of
20333 the job for us - we do not need to declare .globl/lglobs and such. */
20334 if (!n->alias || n->weakref)
20335 return false;
20337 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20338 return false;
20340 /* Prevent assemble_alias from trying to use .set pseudo operation
20341 that does not behave as expected by the middle-end. */
20342 TREE_ASM_WRITTEN (n->decl) = true;
20344 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20345 char *buffer = (char *) alloca (strlen (name) + 2);
20346 char *p;
20347 int dollar_inside = 0;
20349 strcpy (buffer, name);
20350 p = strchr (buffer, '$');
20351 while (p) {
20352 *p = '_';
20353 dollar_inside++;
20354 p = strchr (p + 1, '$');
20356 if (TREE_PUBLIC (n->decl))
20358 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20360 if (dollar_inside) {
20361 if (data->function_descriptor)
20362 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20363 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20365 if (data->function_descriptor)
20367 fputs ("\t.globl .", data->file);
20368 RS6000_OUTPUT_BASENAME (data->file, buffer);
20369 putc ('\n', data->file);
20371 fputs ("\t.globl ", data->file);
20372 RS6000_OUTPUT_BASENAME (data->file, buffer);
20373 putc ('\n', data->file);
20375 #ifdef ASM_WEAKEN_DECL
20376 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20377 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20378 #endif
20380 else
20382 if (dollar_inside)
20384 if (data->function_descriptor)
20385 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20386 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20388 if (data->function_descriptor)
20390 fputs ("\t.lglobl .", data->file);
20391 RS6000_OUTPUT_BASENAME (data->file, buffer);
20392 putc ('\n', data->file);
20394 fputs ("\t.lglobl ", data->file);
20395 RS6000_OUTPUT_BASENAME (data->file, buffer);
20396 putc ('\n', data->file);
20398 if (data->function_descriptor)
20399 fputs (".", data->file);
20400 RS6000_OUTPUT_BASENAME (data->file, buffer);
20401 fputs (":\n", data->file);
20402 return false;
20406 #ifdef HAVE_GAS_HIDDEN
20407 /* Helper function to calculate visibility of a DECL
20408 and return the value as a const string. */
20410 static const char *
20411 rs6000_xcoff_visibility (tree decl)
20413 static const char * const visibility_types[] = {
20414 "", ",protected", ",hidden", ",internal"
20417 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20418 return visibility_types[vis];
20420 #endif
20423 /* This macro produces the initial definition of a function name.
20424 On the RS/6000, we need to place an extra '.' in the function name and
20425 output the function descriptor.
20426 Dollar signs are converted to underscores.
20428 The csect for the function will have already been created when
20429 text_section was selected. We do have to go back to that csect, however.
20431 The third and fourth parameters to the .function pseudo-op (16 and 044)
20432 are placeholders which no longer have any use.
20434 Because AIX assembler's .set command has unexpected semantics, we output
20435 all aliases as alternative labels in front of the definition. */
20437 void
20438 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20440 char *buffer = (char *) alloca (strlen (name) + 1);
20441 char *p;
20442 int dollar_inside = 0;
20443 struct declare_alias_data data = {file, false};
20445 strcpy (buffer, name);
20446 p = strchr (buffer, '$');
20447 while (p) {
20448 *p = '_';
20449 dollar_inside++;
20450 p = strchr (p + 1, '$');
20452 if (TREE_PUBLIC (decl))
20454 if (!RS6000_WEAK || !DECL_WEAK (decl))
20456 if (dollar_inside) {
20457 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20458 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20460 fputs ("\t.globl .", file);
20461 RS6000_OUTPUT_BASENAME (file, buffer);
20462 #ifdef HAVE_GAS_HIDDEN
20463 fputs (rs6000_xcoff_visibility (decl), file);
20464 #endif
20465 putc ('\n', file);
20468 else
20470 if (dollar_inside) {
20471 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20472 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20474 fputs ("\t.lglobl .", file);
20475 RS6000_OUTPUT_BASENAME (file, buffer);
20476 putc ('\n', file);
20478 fputs ("\t.csect ", file);
20479 RS6000_OUTPUT_BASENAME (file, buffer);
20480 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20481 RS6000_OUTPUT_BASENAME (file, buffer);
20482 fputs (":\n", file);
20483 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20484 &data, true);
20485 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20486 RS6000_OUTPUT_BASENAME (file, buffer);
20487 fputs (", TOC[tc0], 0\n", file);
20488 in_section = NULL;
20489 switch_to_section (function_section (decl));
20490 putc ('.', file);
20491 RS6000_OUTPUT_BASENAME (file, buffer);
20492 fputs (":\n", file);
20493 data.function_descriptor = true;
20494 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20495 &data, true);
20496 if (!DECL_IGNORED_P (decl))
20498 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20499 xcoffout_declare_function (file, decl, buffer);
20500 else if (write_symbols == DWARF2_DEBUG)
20502 name = (*targetm.strip_name_encoding) (name);
20503 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20506 return;
20510 /* Output assembly language to globalize a symbol from a DECL,
20511 possibly with visibility. */
20513 void
20514 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20516 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20517 fputs (GLOBAL_ASM_OP, stream);
20518 RS6000_OUTPUT_BASENAME (stream, name);
20519 #ifdef HAVE_GAS_HIDDEN
20520 fputs (rs6000_xcoff_visibility (decl), stream);
20521 #endif
20522 putc ('\n', stream);
20525 /* Output assembly language to define a symbol as COMMON from a DECL,
20526 possibly with visibility. */
20528 void
20529 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20530 tree decl ATTRIBUTE_UNUSED,
20531 const char *name,
20532 unsigned HOST_WIDE_INT size,
20533 unsigned HOST_WIDE_INT align)
20535 unsigned HOST_WIDE_INT align2 = 2;
20537 if (align > 32)
20538 align2 = floor_log2 (align / BITS_PER_UNIT);
20539 else if (size > 4)
20540 align2 = 3;
20542 fputs (COMMON_ASM_OP, stream);
20543 RS6000_OUTPUT_BASENAME (stream, name);
20545 fprintf (stream,
20546 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20547 size, align2);
20549 #ifdef HAVE_GAS_HIDDEN
20550 if (decl != NULL)
20551 fputs (rs6000_xcoff_visibility (decl), stream);
20552 #endif
20553 putc ('\n', stream);
20556 /* This macro produces the initial definition of a object (variable) name.
20557 Because AIX assembler's .set command has unexpected semantics, we output
20558 all aliases as alternative labels in front of the definition. */
20560 void
20561 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20563 struct declare_alias_data data = {file, false};
20564 RS6000_OUTPUT_BASENAME (file, name);
20565 fputs (":\n", file);
20566 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20567 &data, true);
20570 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20572 void
20573 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20575 fputs (integer_asm_op (size, FALSE), file);
20576 assemble_name (file, label);
20577 fputs ("-$", file);
20580 /* Output a symbol offset relative to the dbase for the current object.
20581 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20582 signed offsets.
20584 __gcc_unwind_dbase is embedded in all executables/libraries through
20585 libgcc/config/rs6000/crtdbase.S. */
20587 void
20588 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20590 fputs (integer_asm_op (size, FALSE), file);
20591 assemble_name (file, label);
20592 fputs("-__gcc_unwind_dbase", file);
20595 #ifdef HAVE_AS_TLS
20596 static void
20597 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20599 rtx symbol;
20600 int flags;
20601 const char *symname;
20603 default_encode_section_info (decl, rtl, first);
20605 /* Careful not to prod global register variables. */
20606 if (!MEM_P (rtl))
20607 return;
20608 symbol = XEXP (rtl, 0);
20609 if (!SYMBOL_REF_P (symbol))
20610 return;
20612 flags = SYMBOL_REF_FLAGS (symbol);
20614 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20615 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20617 SYMBOL_REF_FLAGS (symbol) = flags;
20619 /* Append mapping class to extern decls. */
20620 symname = XSTR (symbol, 0);
20621 if (decl /* sync condition with assemble_external () */
20622 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20623 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20624 || TREE_CODE (decl) == FUNCTION_DECL)
20625 && symname[strlen (symname) - 1] != ']')
20627 char *newname = (char *) alloca (strlen (symname) + 5);
20628 strcpy (newname, symname);
20629 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20630 ? "[DS]" : "[UA]"));
20631 XSTR (symbol, 0) = ggc_strdup (newname);
20634 #endif /* HAVE_AS_TLS */
20635 #endif /* TARGET_XCOFF */
20637 void
20638 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20639 const char *name, const char *val)
20641 fputs ("\t.weak\t", stream);
20642 RS6000_OUTPUT_BASENAME (stream, name);
20643 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20644 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20646 if (TARGET_XCOFF)
20647 fputs ("[DS]", stream);
20648 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20649 if (TARGET_XCOFF)
20650 fputs (rs6000_xcoff_visibility (decl), stream);
20651 #endif
20652 fputs ("\n\t.weak\t.", stream);
20653 RS6000_OUTPUT_BASENAME (stream, name);
20655 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20656 if (TARGET_XCOFF)
20657 fputs (rs6000_xcoff_visibility (decl), stream);
20658 #endif
20659 fputc ('\n', stream);
20660 if (val)
20662 #ifdef ASM_OUTPUT_DEF
20663 ASM_OUTPUT_DEF (stream, name, val);
20664 #endif
20665 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20666 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20668 fputs ("\t.set\t.", stream);
20669 RS6000_OUTPUT_BASENAME (stream, name);
20670 fputs (",.", stream);
20671 RS6000_OUTPUT_BASENAME (stream, val);
20672 fputc ('\n', stream);
20678 /* Return true if INSN should not be copied. */
20680 static bool
20681 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20683 return recog_memoized (insn) >= 0
20684 && get_attr_cannot_copy (insn);
20687 /* Compute a (partial) cost for rtx X. Return true if the complete
20688 cost has been computed, and false if subexpressions should be
20689 scanned. In either case, *TOTAL contains the cost result. */
20691 static bool
20692 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20693 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20695 int code = GET_CODE (x);
20697 switch (code)
20699 /* On the RS/6000, if it is valid in the insn, it is free. */
20700 case CONST_INT:
20701 if (((outer_code == SET
20702 || outer_code == PLUS
20703 || outer_code == MINUS)
20704 && (satisfies_constraint_I (x)
20705 || satisfies_constraint_L (x)))
20706 || (outer_code == AND
20707 && (satisfies_constraint_K (x)
20708 || (mode == SImode
20709 ? satisfies_constraint_L (x)
20710 : satisfies_constraint_J (x))))
20711 || ((outer_code == IOR || outer_code == XOR)
20712 && (satisfies_constraint_K (x)
20713 || (mode == SImode
20714 ? satisfies_constraint_L (x)
20715 : satisfies_constraint_J (x))))
20716 || outer_code == ASHIFT
20717 || outer_code == ASHIFTRT
20718 || outer_code == LSHIFTRT
20719 || outer_code == ROTATE
20720 || outer_code == ROTATERT
20721 || outer_code == ZERO_EXTRACT
20722 || (outer_code == MULT
20723 && satisfies_constraint_I (x))
20724 || ((outer_code == DIV || outer_code == UDIV
20725 || outer_code == MOD || outer_code == UMOD)
20726 && exact_log2 (INTVAL (x)) >= 0)
20727 || (outer_code == COMPARE
20728 && (satisfies_constraint_I (x)
20729 || satisfies_constraint_K (x)))
20730 || ((outer_code == EQ || outer_code == NE)
20731 && (satisfies_constraint_I (x)
20732 || satisfies_constraint_K (x)
20733 || (mode == SImode
20734 ? satisfies_constraint_L (x)
20735 : satisfies_constraint_J (x))))
20736 || (outer_code == GTU
20737 && satisfies_constraint_I (x))
20738 || (outer_code == LTU
20739 && satisfies_constraint_P (x)))
20741 *total = 0;
20742 return true;
20744 else if ((outer_code == PLUS
20745 && reg_or_add_cint_operand (x, VOIDmode))
20746 || (outer_code == MINUS
20747 && reg_or_sub_cint_operand (x, VOIDmode))
20748 || ((outer_code == SET
20749 || outer_code == IOR
20750 || outer_code == XOR)
20751 && (INTVAL (x)
20752 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20754 *total = COSTS_N_INSNS (1);
20755 return true;
20757 /* FALLTHRU */
20759 case CONST_DOUBLE:
20760 case CONST_WIDE_INT:
20761 case CONST:
20762 case HIGH:
20763 case SYMBOL_REF:
20764 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20765 return true;
20767 case MEM:
20768 /* When optimizing for size, MEM should be slightly more expensive
20769 than generating address, e.g., (plus (reg) (const)).
20770 L1 cache latency is about two instructions. */
20771 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20772 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20773 *total += COSTS_N_INSNS (100);
20774 return true;
20776 case LABEL_REF:
20777 *total = 0;
20778 return true;
20780 case PLUS:
20781 case MINUS:
20782 if (FLOAT_MODE_P (mode))
20783 *total = rs6000_cost->fp;
20784 else
20785 *total = COSTS_N_INSNS (1);
20786 return false;
20788 case MULT:
20789 if (CONST_INT_P (XEXP (x, 1))
20790 && satisfies_constraint_I (XEXP (x, 1)))
20792 if (INTVAL (XEXP (x, 1)) >= -256
20793 && INTVAL (XEXP (x, 1)) <= 255)
20794 *total = rs6000_cost->mulsi_const9;
20795 else
20796 *total = rs6000_cost->mulsi_const;
20798 else if (mode == SFmode)
20799 *total = rs6000_cost->fp;
20800 else if (FLOAT_MODE_P (mode))
20801 *total = rs6000_cost->dmul;
20802 else if (mode == DImode)
20803 *total = rs6000_cost->muldi;
20804 else
20805 *total = rs6000_cost->mulsi;
20806 return false;
20808 case FMA:
20809 if (mode == SFmode)
20810 *total = rs6000_cost->fp;
20811 else
20812 *total = rs6000_cost->dmul;
20813 break;
20815 case DIV:
20816 case MOD:
20817 if (FLOAT_MODE_P (mode))
20819 *total = mode == DFmode ? rs6000_cost->ddiv
20820 : rs6000_cost->sdiv;
20821 return false;
20823 /* FALLTHRU */
20825 case UDIV:
20826 case UMOD:
20827 if (CONST_INT_P (XEXP (x, 1))
20828 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20830 if (code == DIV || code == MOD)
20831 /* Shift, addze */
20832 *total = COSTS_N_INSNS (2);
20833 else
20834 /* Shift */
20835 *total = COSTS_N_INSNS (1);
20837 else
20839 if (GET_MODE (XEXP (x, 1)) == DImode)
20840 *total = rs6000_cost->divdi;
20841 else
20842 *total = rs6000_cost->divsi;
20844 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20845 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20846 *total += COSTS_N_INSNS (2);
20847 return false;
20849 case CTZ:
20850 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20851 return false;
20853 case FFS:
20854 *total = COSTS_N_INSNS (4);
20855 return false;
20857 case POPCOUNT:
20858 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20859 return false;
20861 case PARITY:
20862 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20863 return false;
20865 case NOT:
20866 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20867 *total = 0;
20868 else
20869 *total = COSTS_N_INSNS (1);
20870 return false;
20872 case AND:
20873 if (CONST_INT_P (XEXP (x, 1)))
20875 rtx left = XEXP (x, 0);
20876 rtx_code left_code = GET_CODE (left);
20878 /* rotate-and-mask: 1 insn. */
20879 if ((left_code == ROTATE
20880 || left_code == ASHIFT
20881 || left_code == LSHIFTRT)
20882 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20884 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20885 if (!CONST_INT_P (XEXP (left, 1)))
20886 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20887 *total += COSTS_N_INSNS (1);
20888 return true;
20891 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20892 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20893 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20894 || (val & 0xffff) == val
20895 || (val & 0xffff0000) == val
20896 || ((val & 0xffff) == 0 && mode == SImode))
20898 *total = rtx_cost (left, mode, AND, 0, speed);
20899 *total += COSTS_N_INSNS (1);
20900 return true;
20903 /* 2 insns. */
20904 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20906 *total = rtx_cost (left, mode, AND, 0, speed);
20907 *total += COSTS_N_INSNS (2);
20908 return true;
20912 *total = COSTS_N_INSNS (1);
20913 return false;
20915 case IOR:
20916 /* FIXME */
20917 *total = COSTS_N_INSNS (1);
20918 return true;
20920 case CLZ:
20921 case XOR:
20922 case ZERO_EXTRACT:
20923 *total = COSTS_N_INSNS (1);
20924 return false;
20926 case ASHIFT:
20927 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20928 the sign extend and shift separately within the insn. */
20929 if (TARGET_EXTSWSLI && mode == DImode
20930 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20931 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20933 *total = 0;
20934 return false;
20936 /* fall through */
20938 case ASHIFTRT:
20939 case LSHIFTRT:
20940 case ROTATE:
20941 case ROTATERT:
20942 /* Handle mul_highpart. */
20943 if (outer_code == TRUNCATE
20944 && GET_CODE (XEXP (x, 0)) == MULT)
20946 if (mode == DImode)
20947 *total = rs6000_cost->muldi;
20948 else
20949 *total = rs6000_cost->mulsi;
20950 return true;
20952 else if (outer_code == AND)
20953 *total = 0;
20954 else
20955 *total = COSTS_N_INSNS (1);
20956 return false;
20958 case SIGN_EXTEND:
20959 case ZERO_EXTEND:
20960 if (MEM_P (XEXP (x, 0)))
20961 *total = 0;
20962 else
20963 *total = COSTS_N_INSNS (1);
20964 return false;
20966 case COMPARE:
20967 case NEG:
20968 case ABS:
20969 if (!FLOAT_MODE_P (mode))
20971 *total = COSTS_N_INSNS (1);
20972 return false;
20974 /* FALLTHRU */
20976 case FLOAT:
20977 case UNSIGNED_FLOAT:
20978 case FIX:
20979 case UNSIGNED_FIX:
20980 case FLOAT_TRUNCATE:
20981 *total = rs6000_cost->fp;
20982 return false;
20984 case FLOAT_EXTEND:
20985 if (mode == DFmode)
20986 *total = rs6000_cost->sfdf_convert;
20987 else
20988 *total = rs6000_cost->fp;
20989 return false;
20991 case CALL:
20992 case IF_THEN_ELSE:
20993 if (!speed)
20995 *total = COSTS_N_INSNS (1);
20996 return true;
20998 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21000 *total = rs6000_cost->fp;
21001 return false;
21003 break;
21005 case NE:
21006 case EQ:
21007 case GTU:
21008 case LTU:
21009 /* Carry bit requires mode == Pmode.
21010 NEG or PLUS already counted so only add one. */
21011 if (mode == Pmode
21012 && (outer_code == NEG || outer_code == PLUS))
21014 *total = COSTS_N_INSNS (1);
21015 return true;
21017 /* FALLTHRU */
21019 case GT:
21020 case LT:
21021 case UNORDERED:
21022 if (outer_code == SET)
21024 if (XEXP (x, 1) == const0_rtx)
21026 *total = COSTS_N_INSNS (2);
21027 return true;
21029 else
21031 *total = COSTS_N_INSNS (3);
21032 return false;
21035 /* CC COMPARE. */
21036 if (outer_code == COMPARE)
21038 *total = 0;
21039 return true;
21041 break;
21043 default:
21044 break;
21047 return false;
21050 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21052 static bool
21053 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21054 int opno, int *total, bool speed)
21056 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21058 fprintf (stderr,
21059 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21060 "opno = %d, total = %d, speed = %s, x:\n",
21061 ret ? "complete" : "scan inner",
21062 GET_MODE_NAME (mode),
21063 GET_RTX_NAME (outer_code),
21064 opno,
21065 *total,
21066 speed ? "true" : "false");
21068 debug_rtx (x);
21070 return ret;
21073 static int
21074 rs6000_insn_cost (rtx_insn *insn, bool speed)
21076 if (recog_memoized (insn) < 0)
21077 return 0;
21079 /* If we are optimizing for size, just use the length. */
21080 if (!speed)
21081 return get_attr_length (insn);
21083 /* Use the cost if provided. */
21084 int cost = get_attr_cost (insn);
21085 if (cost > 0)
21086 return cost;
21088 /* If the insn tells us how many insns there are, use that. Otherwise use
21089 the length/4. Adjust the insn length to remove the extra size that
21090 prefixed instructions take. */
21091 int n = get_attr_num_insns (insn);
21092 if (n == 0)
21094 int length = get_attr_length (insn);
21095 if (get_attr_prefixed (insn) == PREFIXED_YES)
21097 int adjust = 0;
21098 ADJUST_INSN_LENGTH (insn, adjust);
21099 length -= adjust;
21102 n = length / 4;
21105 enum attr_type type = get_attr_type (insn);
21107 switch (type)
21109 case TYPE_LOAD:
21110 case TYPE_FPLOAD:
21111 case TYPE_VECLOAD:
21112 cost = COSTS_N_INSNS (n + 1);
21113 break;
21115 case TYPE_MUL:
21116 switch (get_attr_size (insn))
21118 case SIZE_8:
21119 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21120 break;
21121 case SIZE_16:
21122 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21123 break;
21124 case SIZE_32:
21125 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21126 break;
21127 case SIZE_64:
21128 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21129 break;
21130 default:
21131 gcc_unreachable ();
21133 break;
21134 case TYPE_DIV:
21135 switch (get_attr_size (insn))
21137 case SIZE_32:
21138 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21139 break;
21140 case SIZE_64:
21141 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21142 break;
21143 default:
21144 gcc_unreachable ();
21146 break;
21148 case TYPE_FP:
21149 cost = n * rs6000_cost->fp;
21150 break;
21151 case TYPE_DMUL:
21152 cost = n * rs6000_cost->dmul;
21153 break;
21154 case TYPE_SDIV:
21155 cost = n * rs6000_cost->sdiv;
21156 break;
21157 case TYPE_DDIV:
21158 cost = n * rs6000_cost->ddiv;
21159 break;
21161 case TYPE_SYNC:
21162 case TYPE_LOAD_L:
21163 case TYPE_MFCR:
21164 case TYPE_MFCRF:
21165 cost = COSTS_N_INSNS (n + 2);
21166 break;
21168 default:
21169 cost = COSTS_N_INSNS (n);
21172 return cost;
21175 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21177 static int
21178 rs6000_debug_address_cost (rtx x, machine_mode mode,
21179 addr_space_t as, bool speed)
21181 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21183 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21184 ret, speed ? "true" : "false");
21185 debug_rtx (x);
21187 return ret;
21191 /* A C expression returning the cost of moving data from a register of class
21192 CLASS1 to one of CLASS2. */
21194 static int
21195 rs6000_register_move_cost (machine_mode mode,
21196 reg_class_t from, reg_class_t to)
21198 int ret;
21199 reg_class_t rclass;
21201 if (TARGET_DEBUG_COST)
21202 dbg_cost_ctrl++;
21204 /* If we have VSX, we can easily move between FPR or Altivec registers,
21205 otherwise we can only easily move within classes.
21206 Do this first so we give best-case answers for union classes
21207 containing both gprs and vsx regs. */
21208 HARD_REG_SET to_vsx, from_vsx;
21209 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21210 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21211 if (!hard_reg_set_empty_p (to_vsx)
21212 && !hard_reg_set_empty_p (from_vsx)
21213 && (TARGET_VSX
21214 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21216 int reg = FIRST_FPR_REGNO;
21217 if (TARGET_VSX
21218 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21219 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21220 reg = FIRST_ALTIVEC_REGNO;
21221 ret = 2 * hard_regno_nregs (reg, mode);
21224 /* Moves from/to GENERAL_REGS. */
21225 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21226 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21228 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21230 if (TARGET_DIRECT_MOVE)
21232 /* Keep the cost for direct moves above that for within
21233 a register class even if the actual processor cost is
21234 comparable. We do this because a direct move insn
21235 can't be a nop, whereas with ideal register
21236 allocation a move within the same class might turn
21237 out to be a nop. */
21238 if (rs6000_tune == PROCESSOR_POWER9
21239 || rs6000_tune == PROCESSOR_FUTURE)
21240 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21241 else
21242 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21243 /* SFmode requires a conversion when moving between gprs
21244 and vsx. */
21245 if (mode == SFmode)
21246 ret += 2;
21248 else
21249 ret = (rs6000_memory_move_cost (mode, rclass, false)
21250 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21253 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21254 shift. */
21255 else if (rclass == CR_REGS)
21256 ret = 4;
21258 /* For those processors that have slow LR/CTR moves, make them more
21259 expensive than memory in order to bias spills to memory .*/
21260 else if ((rs6000_tune == PROCESSOR_POWER6
21261 || rs6000_tune == PROCESSOR_POWER7
21262 || rs6000_tune == PROCESSOR_POWER8
21263 || rs6000_tune == PROCESSOR_POWER9)
21264 && reg_class_subset_p (rclass, SPECIAL_REGS))
21265 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21267 else
21268 /* A move will cost one instruction per GPR moved. */
21269 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21272 /* Everything else has to go through GENERAL_REGS. */
21273 else
21274 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21275 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21277 if (TARGET_DEBUG_COST)
21279 if (dbg_cost_ctrl == 1)
21280 fprintf (stderr,
21281 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21282 ret, GET_MODE_NAME (mode), reg_class_names[from],
21283 reg_class_names[to]);
21284 dbg_cost_ctrl--;
21287 return ret;
21290 /* A C expressions returning the cost of moving data of MODE from a register to
21291 or from memory. */
21293 static int
21294 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21295 bool in ATTRIBUTE_UNUSED)
21297 int ret;
21299 if (TARGET_DEBUG_COST)
21300 dbg_cost_ctrl++;
21302 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21303 ret = 4 * hard_regno_nregs (0, mode);
21304 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21305 || reg_classes_intersect_p (rclass, VSX_REGS)))
21306 ret = 4 * hard_regno_nregs (32, mode);
21307 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21308 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21309 else
21310 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21312 if (TARGET_DEBUG_COST)
21314 if (dbg_cost_ctrl == 1)
21315 fprintf (stderr,
21316 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21317 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21318 dbg_cost_ctrl--;
21321 return ret;
21324 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21326 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21327 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21328 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21329 move cost between GENERAL_REGS and VSX_REGS low.
21331 It might seem reasonable to use a union class. After all, if usage
21332 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21333 rather than memory. However, in cases where register pressure of
21334 both is high, like the cactus_adm spec test, allowing
21335 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21336 the first scheduling pass. This is partly due to an allocno of
21337 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21338 class, which gives too high a pressure for GENERAL_REGS and too low
21339 for VSX_REGS. So, force a choice of the subclass here.
21341 The best class is also the union if GENERAL_REGS and VSX_REGS have
21342 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21343 allocno class, since trying to narrow down the class by regno mode
21344 is prone to error. For example, SImode is allowed in VSX regs and
21345 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21346 it would be wrong to choose an allocno of GENERAL_REGS based on
21347 SImode. */
21349 static reg_class_t
21350 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21351 reg_class_t allocno_class,
21352 reg_class_t best_class)
21354 switch (allocno_class)
21356 case GEN_OR_VSX_REGS:
21357 /* best_class must be a subset of allocno_class. */
21358 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21359 || best_class == GEN_OR_FLOAT_REGS
21360 || best_class == VSX_REGS
21361 || best_class == ALTIVEC_REGS
21362 || best_class == FLOAT_REGS
21363 || best_class == GENERAL_REGS
21364 || best_class == BASE_REGS);
21365 /* Use best_class but choose wider classes when copying from the
21366 wider class to best_class is cheap. This mimics IRA choice
21367 of allocno class. */
21368 if (best_class == BASE_REGS)
21369 return GENERAL_REGS;
21370 if (TARGET_VSX
21371 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21372 return VSX_REGS;
21373 return best_class;
21375 default:
21376 break;
21379 return allocno_class;
21382 /* Returns a code for a target-specific builtin that implements
21383 reciprocal of the function, or NULL_TREE if not available. */
21385 static tree
21386 rs6000_builtin_reciprocal (tree fndecl)
21388 switch (DECL_MD_FUNCTION_CODE (fndecl))
21390 case VSX_BUILTIN_XVSQRTDP:
21391 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21392 return NULL_TREE;
21394 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21396 case VSX_BUILTIN_XVSQRTSP:
21397 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21398 return NULL_TREE;
21400 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21402 default:
21403 return NULL_TREE;
21407 /* Load up a constant. If the mode is a vector mode, splat the value across
21408 all of the vector elements. */
21410 static rtx
21411 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21413 rtx reg;
21415 if (mode == SFmode || mode == DFmode)
21417 rtx d = const_double_from_real_value (dconst, mode);
21418 reg = force_reg (mode, d);
21420 else if (mode == V4SFmode)
21422 rtx d = const_double_from_real_value (dconst, SFmode);
21423 rtvec v = gen_rtvec (4, d, d, d, d);
21424 reg = gen_reg_rtx (mode);
21425 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21427 else if (mode == V2DFmode)
21429 rtx d = const_double_from_real_value (dconst, DFmode);
21430 rtvec v = gen_rtvec (2, d, d);
21431 reg = gen_reg_rtx (mode);
21432 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21434 else
21435 gcc_unreachable ();
21437 return reg;
21440 /* Generate an FMA instruction. */
21442 static void
21443 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21445 machine_mode mode = GET_MODE (target);
21446 rtx dst;
21448 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21449 gcc_assert (dst != NULL);
21451 if (dst != target)
21452 emit_move_insn (target, dst);
21455 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21457 static void
21458 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21460 machine_mode mode = GET_MODE (dst);
21461 rtx r;
21463 /* This is a tad more complicated, since the fnma_optab is for
21464 a different expression: fma(-m1, m2, a), which is the same
21465 thing except in the case of signed zeros.
21467 Fortunately we know that if FMA is supported that FNMSUB is
21468 also supported in the ISA. Just expand it directly. */
21470 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21472 r = gen_rtx_NEG (mode, a);
21473 r = gen_rtx_FMA (mode, m1, m2, r);
21474 r = gen_rtx_NEG (mode, r);
21475 emit_insn (gen_rtx_SET (dst, r));
21478 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21479 add a reg_note saying that this was a division. Support both scalar and
21480 vector divide. Assumes no trapping math and finite arguments. */
21482 void
21483 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21485 machine_mode mode = GET_MODE (dst);
21486 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21487 int i;
21489 /* Low precision estimates guarantee 5 bits of accuracy. High
21490 precision estimates guarantee 14 bits of accuracy. SFmode
21491 requires 23 bits of accuracy. DFmode requires 52 bits of
21492 accuracy. Each pass at least doubles the accuracy, leading
21493 to the following. */
21494 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21495 if (mode == DFmode || mode == V2DFmode)
21496 passes++;
21498 enum insn_code code = optab_handler (smul_optab, mode);
21499 insn_gen_fn gen_mul = GEN_FCN (code);
21501 gcc_assert (code != CODE_FOR_nothing);
21503 one = rs6000_load_constant_and_splat (mode, dconst1);
21505 /* x0 = 1./d estimate */
21506 x0 = gen_reg_rtx (mode);
21507 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21508 UNSPEC_FRES)));
21510 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21511 if (passes > 1) {
21513 /* e0 = 1. - d * x0 */
21514 e0 = gen_reg_rtx (mode);
21515 rs6000_emit_nmsub (e0, d, x0, one);
21517 /* x1 = x0 + e0 * x0 */
21518 x1 = gen_reg_rtx (mode);
21519 rs6000_emit_madd (x1, e0, x0, x0);
21521 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21522 ++i, xprev = xnext, eprev = enext) {
21524 /* enext = eprev * eprev */
21525 enext = gen_reg_rtx (mode);
21526 emit_insn (gen_mul (enext, eprev, eprev));
21528 /* xnext = xprev + enext * xprev */
21529 xnext = gen_reg_rtx (mode);
21530 rs6000_emit_madd (xnext, enext, xprev, xprev);
21533 } else
21534 xprev = x0;
21536 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21538 /* u = n * xprev */
21539 u = gen_reg_rtx (mode);
21540 emit_insn (gen_mul (u, n, xprev));
21542 /* v = n - (d * u) */
21543 v = gen_reg_rtx (mode);
21544 rs6000_emit_nmsub (v, d, u, n);
21546 /* dst = (v * xprev) + u */
21547 rs6000_emit_madd (dst, v, xprev, u);
21549 if (note_p)
21550 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21553 /* Goldschmidt's Algorithm for single/double-precision floating point
21554 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21556 void
21557 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21559 machine_mode mode = GET_MODE (src);
21560 rtx e = gen_reg_rtx (mode);
21561 rtx g = gen_reg_rtx (mode);
21562 rtx h = gen_reg_rtx (mode);
21564 /* Low precision estimates guarantee 5 bits of accuracy. High
21565 precision estimates guarantee 14 bits of accuracy. SFmode
21566 requires 23 bits of accuracy. DFmode requires 52 bits of
21567 accuracy. Each pass at least doubles the accuracy, leading
21568 to the following. */
21569 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21570 if (mode == DFmode || mode == V2DFmode)
21571 passes++;
21573 int i;
21574 rtx mhalf;
21575 enum insn_code code = optab_handler (smul_optab, mode);
21576 insn_gen_fn gen_mul = GEN_FCN (code);
21578 gcc_assert (code != CODE_FOR_nothing);
21580 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21582 /* e = rsqrt estimate */
21583 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21584 UNSPEC_RSQRT)));
21586 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21587 if (!recip)
21589 rtx zero = force_reg (mode, CONST0_RTX (mode));
21591 if (mode == SFmode)
21593 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21594 e, zero, mode, 0);
21595 if (target != e)
21596 emit_move_insn (e, target);
21598 else
21600 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21601 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21605 /* g = sqrt estimate. */
21606 emit_insn (gen_mul (g, e, src));
21607 /* h = 1/(2*sqrt) estimate. */
21608 emit_insn (gen_mul (h, e, mhalf));
21610 if (recip)
21612 if (passes == 1)
21614 rtx t = gen_reg_rtx (mode);
21615 rs6000_emit_nmsub (t, g, h, mhalf);
21616 /* Apply correction directly to 1/rsqrt estimate. */
21617 rs6000_emit_madd (dst, e, t, e);
21619 else
21621 for (i = 0; i < passes; i++)
21623 rtx t1 = gen_reg_rtx (mode);
21624 rtx g1 = gen_reg_rtx (mode);
21625 rtx h1 = gen_reg_rtx (mode);
21627 rs6000_emit_nmsub (t1, g, h, mhalf);
21628 rs6000_emit_madd (g1, g, t1, g);
21629 rs6000_emit_madd (h1, h, t1, h);
21631 g = g1;
21632 h = h1;
21634 /* Multiply by 2 for 1/rsqrt. */
21635 emit_insn (gen_add3_insn (dst, h, h));
21638 else
21640 rtx t = gen_reg_rtx (mode);
21641 rs6000_emit_nmsub (t, g, h, mhalf);
21642 rs6000_emit_madd (dst, g, t, g);
21645 return;
21648 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21649 (Power7) targets. DST is the target, and SRC is the argument operand. */
21651 void
21652 rs6000_emit_popcount (rtx dst, rtx src)
21654 machine_mode mode = GET_MODE (dst);
21655 rtx tmp1, tmp2;
21657 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21658 if (TARGET_POPCNTD)
21660 if (mode == SImode)
21661 emit_insn (gen_popcntdsi2 (dst, src));
21662 else
21663 emit_insn (gen_popcntddi2 (dst, src));
21664 return;
21667 tmp1 = gen_reg_rtx (mode);
21669 if (mode == SImode)
21671 emit_insn (gen_popcntbsi2 (tmp1, src));
21672 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21673 NULL_RTX, 0);
21674 tmp2 = force_reg (SImode, tmp2);
21675 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21677 else
21679 emit_insn (gen_popcntbdi2 (tmp1, src));
21680 tmp2 = expand_mult (DImode, tmp1,
21681 GEN_INT ((HOST_WIDE_INT)
21682 0x01010101 << 32 | 0x01010101),
21683 NULL_RTX, 0);
21684 tmp2 = force_reg (DImode, tmp2);
21685 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21690 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21691 target, and SRC is the argument operand. */
21693 void
21694 rs6000_emit_parity (rtx dst, rtx src)
21696 machine_mode mode = GET_MODE (dst);
21697 rtx tmp;
21699 tmp = gen_reg_rtx (mode);
21701 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21702 if (TARGET_CMPB)
21704 if (mode == SImode)
21706 emit_insn (gen_popcntbsi2 (tmp, src));
21707 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21709 else
21711 emit_insn (gen_popcntbdi2 (tmp, src));
21712 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21714 return;
21717 if (mode == SImode)
21719 /* Is mult+shift >= shift+xor+shift+xor? */
21720 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21722 rtx tmp1, tmp2, tmp3, tmp4;
21724 tmp1 = gen_reg_rtx (SImode);
21725 emit_insn (gen_popcntbsi2 (tmp1, src));
21727 tmp2 = gen_reg_rtx (SImode);
21728 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21729 tmp3 = gen_reg_rtx (SImode);
21730 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21732 tmp4 = gen_reg_rtx (SImode);
21733 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21734 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21736 else
21737 rs6000_emit_popcount (tmp, src);
21738 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21740 else
21742 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21743 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21745 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21747 tmp1 = gen_reg_rtx (DImode);
21748 emit_insn (gen_popcntbdi2 (tmp1, src));
21750 tmp2 = gen_reg_rtx (DImode);
21751 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21752 tmp3 = gen_reg_rtx (DImode);
21753 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21755 tmp4 = gen_reg_rtx (DImode);
21756 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21757 tmp5 = gen_reg_rtx (DImode);
21758 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21760 tmp6 = gen_reg_rtx (DImode);
21761 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21762 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21764 else
21765 rs6000_emit_popcount (tmp, src);
21766 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21770 /* Expand an Altivec constant permutation for little endian mode.
21771 OP0 and OP1 are the input vectors and TARGET is the output vector.
21772 SEL specifies the constant permutation vector.
21774 There are two issues: First, the two input operands must be
21775 swapped so that together they form a double-wide array in LE
21776 order. Second, the vperm instruction has surprising behavior
21777 in LE mode: it interprets the elements of the source vectors
21778 in BE mode ("left to right") and interprets the elements of
21779 the destination vector in LE mode ("right to left"). To
21780 correct for this, we must subtract each element of the permute
21781 control vector from 31.
21783 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21784 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21785 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21786 serve as the permute control vector. Then, in BE mode,
21788 vperm 9,10,11,12
21790 places the desired result in vr9. However, in LE mode the
21791 vector contents will be
21793 vr10 = 00000003 00000002 00000001 00000000
21794 vr11 = 00000007 00000006 00000005 00000004
21796 The result of the vperm using the same permute control vector is
21798 vr9 = 05000000 07000000 01000000 03000000
21800 That is, the leftmost 4 bytes of vr10 are interpreted as the
21801 source for the rightmost 4 bytes of vr9, and so on.
21803 If we change the permute control vector to
21805 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21807 and issue
21809 vperm 9,11,10,12
21811 we get the desired
21813 vr9 = 00000006 00000004 00000002 00000000. */
21815 static void
21816 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21817 const vec_perm_indices &sel)
21819 unsigned int i;
21820 rtx perm[16];
21821 rtx constv, unspec;
21823 /* Unpack and adjust the constant selector. */
21824 for (i = 0; i < 16; ++i)
21826 unsigned int elt = 31 - (sel[i] & 31);
21827 perm[i] = GEN_INT (elt);
21830 /* Expand to a permute, swapping the inputs and using the
21831 adjusted selector. */
21832 if (!REG_P (op0))
21833 op0 = force_reg (V16QImode, op0);
21834 if (!REG_P (op1))
21835 op1 = force_reg (V16QImode, op1);
21837 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21838 constv = force_reg (V16QImode, constv);
21839 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21840 UNSPEC_VPERM);
21841 if (!REG_P (target))
21843 rtx tmp = gen_reg_rtx (V16QImode);
21844 emit_move_insn (tmp, unspec);
21845 unspec = tmp;
21848 emit_move_insn (target, unspec);
21851 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21852 permute control vector. But here it's not a constant, so we must
21853 generate a vector NAND or NOR to do the adjustment. */
21855 void
21856 altivec_expand_vec_perm_le (rtx operands[4])
21858 rtx notx, iorx, unspec;
21859 rtx target = operands[0];
21860 rtx op0 = operands[1];
21861 rtx op1 = operands[2];
21862 rtx sel = operands[3];
21863 rtx tmp = target;
21864 rtx norreg = gen_reg_rtx (V16QImode);
21865 machine_mode mode = GET_MODE (target);
21867 /* Get everything in regs so the pattern matches. */
21868 if (!REG_P (op0))
21869 op0 = force_reg (mode, op0);
21870 if (!REG_P (op1))
21871 op1 = force_reg (mode, op1);
21872 if (!REG_P (sel))
21873 sel = force_reg (V16QImode, sel);
21874 if (!REG_P (target))
21875 tmp = gen_reg_rtx (mode);
21877 if (TARGET_P9_VECTOR)
21879 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21880 UNSPEC_VPERMR);
21882 else
21884 /* Invert the selector with a VNAND if available, else a VNOR.
21885 The VNAND is preferred for future fusion opportunities. */
21886 notx = gen_rtx_NOT (V16QImode, sel);
21887 iorx = (TARGET_P8_VECTOR
21888 ? gen_rtx_IOR (V16QImode, notx, notx)
21889 : gen_rtx_AND (V16QImode, notx, notx));
21890 emit_insn (gen_rtx_SET (norreg, iorx));
21892 /* Permute with operands reversed and adjusted selector. */
21893 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21894 UNSPEC_VPERM);
21897 /* Copy into target, possibly by way of a register. */
21898 if (!REG_P (target))
21900 emit_move_insn (tmp, unspec);
21901 unspec = tmp;
21904 emit_move_insn (target, unspec);
21907 /* Expand an Altivec constant permutation. Return true if we match
21908 an efficient implementation; false to fall back to VPERM.
21910 OP0 and OP1 are the input vectors and TARGET is the output vector.
21911 SEL specifies the constant permutation vector. */
21913 static bool
21914 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21915 const vec_perm_indices &sel)
21917 struct altivec_perm_insn {
21918 HOST_WIDE_INT mask;
21919 enum insn_code impl;
21920 unsigned char perm[16];
21922 static const struct altivec_perm_insn patterns[] = {
21923 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21924 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21925 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21926 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21927 { OPTION_MASK_ALTIVEC,
21928 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21929 : CODE_FOR_altivec_vmrglb_direct),
21930 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21931 { OPTION_MASK_ALTIVEC,
21932 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21933 : CODE_FOR_altivec_vmrglh_direct),
21934 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21935 { OPTION_MASK_ALTIVEC,
21936 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21937 : CODE_FOR_altivec_vmrglw_direct),
21938 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21939 { OPTION_MASK_ALTIVEC,
21940 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21941 : CODE_FOR_altivec_vmrghb_direct),
21942 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21943 { OPTION_MASK_ALTIVEC,
21944 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21945 : CODE_FOR_altivec_vmrghh_direct),
21946 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21947 { OPTION_MASK_ALTIVEC,
21948 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21949 : CODE_FOR_altivec_vmrghw_direct),
21950 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21951 { OPTION_MASK_P8_VECTOR,
21952 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21953 : CODE_FOR_p8_vmrgow_v4sf_direct),
21954 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21955 { OPTION_MASK_P8_VECTOR,
21956 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21957 : CODE_FOR_p8_vmrgew_v4sf_direct),
21958 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21961 unsigned int i, j, elt, which;
21962 unsigned char perm[16];
21963 rtx x;
21964 bool one_vec;
21966 /* Unpack the constant selector. */
21967 for (i = which = 0; i < 16; ++i)
21969 elt = sel[i] & 31;
21970 which |= (elt < 16 ? 1 : 2);
21971 perm[i] = elt;
21974 /* Simplify the constant selector based on operands. */
21975 switch (which)
21977 default:
21978 gcc_unreachable ();
21980 case 3:
21981 one_vec = false;
21982 if (!rtx_equal_p (op0, op1))
21983 break;
21984 /* FALLTHRU */
21986 case 2:
21987 for (i = 0; i < 16; ++i)
21988 perm[i] &= 15;
21989 op0 = op1;
21990 one_vec = true;
21991 break;
21993 case 1:
21994 op1 = op0;
21995 one_vec = true;
21996 break;
21999 /* Look for splat patterns. */
22000 if (one_vec)
22002 elt = perm[0];
22004 for (i = 0; i < 16; ++i)
22005 if (perm[i] != elt)
22006 break;
22007 if (i == 16)
22009 if (!BYTES_BIG_ENDIAN)
22010 elt = 15 - elt;
22011 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22012 return true;
22015 if (elt % 2 == 0)
22017 for (i = 0; i < 16; i += 2)
22018 if (perm[i] != elt || perm[i + 1] != elt + 1)
22019 break;
22020 if (i == 16)
22022 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22023 x = gen_reg_rtx (V8HImode);
22024 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22025 GEN_INT (field)));
22026 emit_move_insn (target, gen_lowpart (V16QImode, x));
22027 return true;
22031 if (elt % 4 == 0)
22033 for (i = 0; i < 16; i += 4)
22034 if (perm[i] != elt
22035 || perm[i + 1] != elt + 1
22036 || perm[i + 2] != elt + 2
22037 || perm[i + 3] != elt + 3)
22038 break;
22039 if (i == 16)
22041 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22042 x = gen_reg_rtx (V4SImode);
22043 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22044 GEN_INT (field)));
22045 emit_move_insn (target, gen_lowpart (V16QImode, x));
22046 return true;
22051 /* Look for merge and pack patterns. */
22052 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22054 bool swapped;
22056 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22057 continue;
22059 elt = patterns[j].perm[0];
22060 if (perm[0] == elt)
22061 swapped = false;
22062 else if (perm[0] == elt + 16)
22063 swapped = true;
22064 else
22065 continue;
22066 for (i = 1; i < 16; ++i)
22068 elt = patterns[j].perm[i];
22069 if (swapped)
22070 elt = (elt >= 16 ? elt - 16 : elt + 16);
22071 else if (one_vec && elt >= 16)
22072 elt -= 16;
22073 if (perm[i] != elt)
22074 break;
22076 if (i == 16)
22078 enum insn_code icode = patterns[j].impl;
22079 machine_mode omode = insn_data[icode].operand[0].mode;
22080 machine_mode imode = insn_data[icode].operand[1].mode;
22082 /* For little-endian, don't use vpkuwum and vpkuhum if the
22083 underlying vector type is not V4SI and V8HI, respectively.
22084 For example, using vpkuwum with a V8HI picks up the even
22085 halfwords (BE numbering) when the even halfwords (LE
22086 numbering) are what we need. */
22087 if (!BYTES_BIG_ENDIAN
22088 && icode == CODE_FOR_altivec_vpkuwum_direct
22089 && ((REG_P (op0)
22090 && GET_MODE (op0) != V4SImode)
22091 || (SUBREG_P (op0)
22092 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22093 continue;
22094 if (!BYTES_BIG_ENDIAN
22095 && icode == CODE_FOR_altivec_vpkuhum_direct
22096 && ((REG_P (op0)
22097 && GET_MODE (op0) != V8HImode)
22098 || (SUBREG_P (op0)
22099 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22100 continue;
22102 /* For little-endian, the two input operands must be swapped
22103 (or swapped back) to ensure proper right-to-left numbering
22104 from 0 to 2N-1. */
22105 if (swapped ^ !BYTES_BIG_ENDIAN)
22106 std::swap (op0, op1);
22107 if (imode != V16QImode)
22109 op0 = gen_lowpart (imode, op0);
22110 op1 = gen_lowpart (imode, op1);
22112 if (omode == V16QImode)
22113 x = target;
22114 else
22115 x = gen_reg_rtx (omode);
22116 emit_insn (GEN_FCN (icode) (x, op0, op1));
22117 if (omode != V16QImode)
22118 emit_move_insn (target, gen_lowpart (V16QImode, x));
22119 return true;
22123 if (!BYTES_BIG_ENDIAN)
22125 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22126 return true;
22129 return false;
22132 /* Expand a VSX Permute Doubleword constant permutation.
22133 Return true if we match an efficient implementation. */
22135 static bool
22136 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22137 unsigned char perm0, unsigned char perm1)
22139 rtx x;
22141 /* If both selectors come from the same operand, fold to single op. */
22142 if ((perm0 & 2) == (perm1 & 2))
22144 if (perm0 & 2)
22145 op0 = op1;
22146 else
22147 op1 = op0;
22149 /* If both operands are equal, fold to simpler permutation. */
22150 if (rtx_equal_p (op0, op1))
22152 perm0 = perm0 & 1;
22153 perm1 = (perm1 & 1) + 2;
22155 /* If the first selector comes from the second operand, swap. */
22156 else if (perm0 & 2)
22158 if (perm1 & 2)
22159 return false;
22160 perm0 -= 2;
22161 perm1 += 2;
22162 std::swap (op0, op1);
22164 /* If the second selector does not come from the second operand, fail. */
22165 else if ((perm1 & 2) == 0)
22166 return false;
22168 /* Success! */
22169 if (target != NULL)
22171 machine_mode vmode, dmode;
22172 rtvec v;
22174 vmode = GET_MODE (target);
22175 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22176 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22177 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22178 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22179 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22180 emit_insn (gen_rtx_SET (target, x));
22182 return true;
22185 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22187 static bool
22188 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22189 rtx op1, const vec_perm_indices &sel)
22191 bool testing_p = !target;
22193 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22194 if (TARGET_ALTIVEC && testing_p)
22195 return true;
22197 /* Check for ps_merge* or xxpermdi insns. */
22198 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22200 if (testing_p)
22202 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22203 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22205 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22206 return true;
22209 if (TARGET_ALTIVEC)
22211 /* Force the target-independent code to lower to V16QImode. */
22212 if (vmode != V16QImode)
22213 return false;
22214 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22215 return true;
22218 return false;
22221 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22222 OP0 and OP1 are the input vectors and TARGET is the output vector.
22223 PERM specifies the constant permutation vector. */
22225 static void
22226 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22227 machine_mode vmode, const vec_perm_builder &perm)
22229 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22230 if (x != target)
22231 emit_move_insn (target, x);
22234 /* Expand an extract even operation. */
22236 void
22237 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22239 machine_mode vmode = GET_MODE (target);
22240 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22241 vec_perm_builder perm (nelt, nelt, 1);
22243 for (i = 0; i < nelt; i++)
22244 perm.quick_push (i * 2);
22246 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22249 /* Expand a vector interleave operation. */
22251 void
22252 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22254 machine_mode vmode = GET_MODE (target);
22255 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22256 vec_perm_builder perm (nelt, nelt, 1);
22258 high = (highp ? 0 : nelt / 2);
22259 for (i = 0; i < nelt / 2; i++)
22261 perm.quick_push (i + high);
22262 perm.quick_push (i + nelt + high);
22265 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22268 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22269 void
22270 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22272 HOST_WIDE_INT hwi_scale (scale);
22273 REAL_VALUE_TYPE r_pow;
22274 rtvec v = rtvec_alloc (2);
22275 rtx elt;
22276 rtx scale_vec = gen_reg_rtx (V2DFmode);
22277 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22278 elt = const_double_from_real_value (r_pow, DFmode);
22279 RTVEC_ELT (v, 0) = elt;
22280 RTVEC_ELT (v, 1) = elt;
22281 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22282 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22285 /* Return an RTX representing where to find the function value of a
22286 function returning MODE. */
22287 static rtx
22288 rs6000_complex_function_value (machine_mode mode)
22290 unsigned int regno;
22291 rtx r1, r2;
22292 machine_mode inner = GET_MODE_INNER (mode);
22293 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22295 if (TARGET_FLOAT128_TYPE
22296 && (mode == KCmode
22297 || (mode == TCmode && TARGET_IEEEQUAD)))
22298 regno = ALTIVEC_ARG_RETURN;
22300 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22301 regno = FP_ARG_RETURN;
22303 else
22305 regno = GP_ARG_RETURN;
22307 /* 32-bit is OK since it'll go in r3/r4. */
22308 if (TARGET_32BIT && inner_bytes >= 4)
22309 return gen_rtx_REG (mode, regno);
22312 if (inner_bytes >= 8)
22313 return gen_rtx_REG (mode, regno);
22315 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22316 const0_rtx);
22317 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22318 GEN_INT (inner_bytes));
22319 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22322 /* Return an rtx describing a return value of MODE as a PARALLEL
22323 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22324 stride REG_STRIDE. */
22326 static rtx
22327 rs6000_parallel_return (machine_mode mode,
22328 int n_elts, machine_mode elt_mode,
22329 unsigned int regno, unsigned int reg_stride)
22331 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22333 int i;
22334 for (i = 0; i < n_elts; i++)
22336 rtx r = gen_rtx_REG (elt_mode, regno);
22337 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22338 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22339 regno += reg_stride;
22342 return par;
22345 /* Target hook for TARGET_FUNCTION_VALUE.
22347 An integer value is in r3 and a floating-point value is in fp1,
22348 unless -msoft-float. */
22350 static rtx
22351 rs6000_function_value (const_tree valtype,
22352 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22353 bool outgoing ATTRIBUTE_UNUSED)
22355 machine_mode mode;
22356 unsigned int regno;
22357 machine_mode elt_mode;
22358 int n_elts;
22360 /* Special handling for structs in darwin64. */
22361 if (TARGET_MACHO
22362 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22364 CUMULATIVE_ARGS valcum;
22365 rtx valret;
22367 valcum.words = 0;
22368 valcum.fregno = FP_ARG_MIN_REG;
22369 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22370 /* Do a trial code generation as if this were going to be passed as
22371 an argument; if any part goes in memory, we return NULL. */
22372 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22373 if (valret)
22374 return valret;
22375 /* Otherwise fall through to standard ABI rules. */
22378 mode = TYPE_MODE (valtype);
22380 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22381 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22383 int first_reg, n_regs;
22385 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22387 /* _Decimal128 must use even/odd register pairs. */
22388 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22389 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22391 else
22393 first_reg = ALTIVEC_ARG_RETURN;
22394 n_regs = 1;
22397 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22400 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22401 if (TARGET_32BIT && TARGET_POWERPC64)
22402 switch (mode)
22404 default:
22405 break;
22406 case E_DImode:
22407 case E_SCmode:
22408 case E_DCmode:
22409 case E_TCmode:
22410 int count = GET_MODE_SIZE (mode) / 4;
22411 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22414 if ((INTEGRAL_TYPE_P (valtype)
22415 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22416 || POINTER_TYPE_P (valtype))
22417 mode = TARGET_32BIT ? SImode : DImode;
22419 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22420 /* _Decimal128 must use an even/odd register pair. */
22421 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22422 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22423 && !FLOAT128_VECTOR_P (mode))
22424 regno = FP_ARG_RETURN;
22425 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22426 && targetm.calls.split_complex_arg)
22427 return rs6000_complex_function_value (mode);
22428 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22429 return register is used in both cases, and we won't see V2DImode/V2DFmode
22430 for pure altivec, combine the two cases. */
22431 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22432 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22433 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22434 regno = ALTIVEC_ARG_RETURN;
22435 else
22436 regno = GP_ARG_RETURN;
22438 return gen_rtx_REG (mode, regno);
22441 /* Define how to find the value returned by a library function
22442 assuming the value has mode MODE. */
22444 rs6000_libcall_value (machine_mode mode)
22446 unsigned int regno;
22448 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22449 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22450 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22452 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22453 /* _Decimal128 must use an even/odd register pair. */
22454 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22455 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22456 regno = FP_ARG_RETURN;
22457 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22458 return register is used in both cases, and we won't see V2DImode/V2DFmode
22459 for pure altivec, combine the two cases. */
22460 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22461 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22462 regno = ALTIVEC_ARG_RETURN;
22463 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22464 return rs6000_complex_function_value (mode);
22465 else
22466 regno = GP_ARG_RETURN;
22468 return gen_rtx_REG (mode, regno);
22471 /* Compute register pressure classes. We implement the target hook to avoid
22472 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22473 lead to incorrect estimates of number of available registers and therefor
22474 increased register pressure/spill. */
22475 static int
22476 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22478 int n;
22480 n = 0;
22481 pressure_classes[n++] = GENERAL_REGS;
22482 if (TARGET_VSX)
22483 pressure_classes[n++] = VSX_REGS;
22484 else
22486 if (TARGET_ALTIVEC)
22487 pressure_classes[n++] = ALTIVEC_REGS;
22488 if (TARGET_HARD_FLOAT)
22489 pressure_classes[n++] = FLOAT_REGS;
22491 pressure_classes[n++] = CR_REGS;
22492 pressure_classes[n++] = SPECIAL_REGS;
22494 return n;
22497 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22498 Frame pointer elimination is automatically handled.
22500 For the RS/6000, if frame pointer elimination is being done, we would like
22501 to convert ap into fp, not sp.
22503 We need r30 if -mminimal-toc was specified, and there are constant pool
22504 references. */
22506 static bool
22507 rs6000_can_eliminate (const int from, const int to)
22509 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22510 ? ! frame_pointer_needed
22511 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22512 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22513 || constant_pool_empty_p ()
22514 : true);
22517 /* Define the offset between two registers, FROM to be eliminated and its
22518 replacement TO, at the start of a routine. */
22519 HOST_WIDE_INT
22520 rs6000_initial_elimination_offset (int from, int to)
22522 rs6000_stack_t *info = rs6000_stack_info ();
22523 HOST_WIDE_INT offset;
22525 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22526 offset = info->push_p ? 0 : -info->total_size;
22527 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22529 offset = info->push_p ? 0 : -info->total_size;
22530 if (FRAME_GROWS_DOWNWARD)
22531 offset += info->fixed_size + info->vars_size + info->parm_size;
22533 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22534 offset = FRAME_GROWS_DOWNWARD
22535 ? info->fixed_size + info->vars_size + info->parm_size
22536 : 0;
22537 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22538 offset = info->total_size;
22539 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22540 offset = info->push_p ? info->total_size : 0;
22541 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22542 offset = 0;
22543 else
22544 gcc_unreachable ();
22546 return offset;
22549 /* Fill in sizes of registers used by unwinder. */
22551 static void
22552 rs6000_init_dwarf_reg_sizes_extra (tree address)
22554 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22556 int i;
22557 machine_mode mode = TYPE_MODE (char_type_node);
22558 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22559 rtx mem = gen_rtx_MEM (BLKmode, addr);
22560 rtx value = gen_int_mode (16, mode);
22562 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22563 The unwinder still needs to know the size of Altivec registers. */
22565 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22567 int column = DWARF_REG_TO_UNWIND_COLUMN
22568 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22569 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22571 emit_move_insn (adjust_address (mem, mode, offset), value);
22576 /* Map internal gcc register numbers to debug format register numbers.
22577 FORMAT specifies the type of debug register number to use:
22578 0 -- debug information, except for frame-related sections
22579 1 -- DWARF .debug_frame section
22580 2 -- DWARF .eh_frame section */
22582 unsigned int
22583 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22585 /* On some platforms, we use the standard DWARF register
22586 numbering for .debug_info and .debug_frame. */
22587 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22589 #ifdef RS6000_USE_DWARF_NUMBERING
22590 if (regno <= 31)
22591 return regno;
22592 if (FP_REGNO_P (regno))
22593 return regno - FIRST_FPR_REGNO + 32;
22594 if (ALTIVEC_REGNO_P (regno))
22595 return regno - FIRST_ALTIVEC_REGNO + 1124;
22596 if (regno == LR_REGNO)
22597 return 108;
22598 if (regno == CTR_REGNO)
22599 return 109;
22600 if (regno == CA_REGNO)
22601 return 101; /* XER */
22602 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22603 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22604 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22605 to the DWARF reg for CR. */
22606 if (format == 1 && regno == CR2_REGNO)
22607 return 64;
22608 if (CR_REGNO_P (regno))
22609 return regno - CR0_REGNO + 86;
22610 if (regno == VRSAVE_REGNO)
22611 return 356;
22612 if (regno == VSCR_REGNO)
22613 return 67;
22615 /* These do not make much sense. */
22616 if (regno == FRAME_POINTER_REGNUM)
22617 return 111;
22618 if (regno == ARG_POINTER_REGNUM)
22619 return 67;
22620 if (regno == 64)
22621 return 100;
22623 gcc_unreachable ();
22624 #endif
22627 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22628 information, and also for .eh_frame. */
22629 /* Translate the regnos to their numbers in GCC 7 (and before). */
22630 if (regno <= 31)
22631 return regno;
22632 if (FP_REGNO_P (regno))
22633 return regno - FIRST_FPR_REGNO + 32;
22634 if (ALTIVEC_REGNO_P (regno))
22635 return regno - FIRST_ALTIVEC_REGNO + 77;
22636 if (regno == LR_REGNO)
22637 return 65;
22638 if (regno == CTR_REGNO)
22639 return 66;
22640 if (regno == CA_REGNO)
22641 return 76; /* XER */
22642 if (CR_REGNO_P (regno))
22643 return regno - CR0_REGNO + 68;
22644 if (regno == VRSAVE_REGNO)
22645 return 109;
22646 if (regno == VSCR_REGNO)
22647 return 110;
22649 if (regno == FRAME_POINTER_REGNUM)
22650 return 111;
22651 if (regno == ARG_POINTER_REGNUM)
22652 return 67;
22653 if (regno == 64)
22654 return 64;
22656 gcc_unreachable ();
22659 /* target hook eh_return_filter_mode */
22660 static scalar_int_mode
22661 rs6000_eh_return_filter_mode (void)
22663 return TARGET_32BIT ? SImode : word_mode;
22666 /* Target hook for translate_mode_attribute. */
22667 static machine_mode
22668 rs6000_translate_mode_attribute (machine_mode mode)
22670 if ((FLOAT128_IEEE_P (mode)
22671 && ieee128_float_type_node == long_double_type_node)
22672 || (FLOAT128_IBM_P (mode)
22673 && ibm128_float_type_node == long_double_type_node))
22674 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22675 return mode;
22678 /* Target hook for scalar_mode_supported_p. */
22679 static bool
22680 rs6000_scalar_mode_supported_p (scalar_mode mode)
22682 /* -m32 does not support TImode. This is the default, from
22683 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22684 same ABI as for -m32. But default_scalar_mode_supported_p allows
22685 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22686 for -mpowerpc64. */
22687 if (TARGET_32BIT && mode == TImode)
22688 return false;
22690 if (DECIMAL_FLOAT_MODE_P (mode))
22691 return default_decimal_float_supported_p ();
22692 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22693 return true;
22694 else
22695 return default_scalar_mode_supported_p (mode);
22698 /* Target hook for vector_mode_supported_p. */
22699 static bool
22700 rs6000_vector_mode_supported_p (machine_mode mode)
22702 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22703 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22704 double-double. */
22705 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22706 return true;
22708 else
22709 return false;
22712 /* Target hook for floatn_mode. */
22713 static opt_scalar_float_mode
22714 rs6000_floatn_mode (int n, bool extended)
22716 if (extended)
22718 switch (n)
22720 case 32:
22721 return DFmode;
22723 case 64:
22724 if (TARGET_FLOAT128_TYPE)
22725 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22726 else
22727 return opt_scalar_float_mode ();
22729 case 128:
22730 return opt_scalar_float_mode ();
22732 default:
22733 /* Those are the only valid _FloatNx types. */
22734 gcc_unreachable ();
22737 else
22739 switch (n)
22741 case 32:
22742 return SFmode;
22744 case 64:
22745 return DFmode;
22747 case 128:
22748 if (TARGET_FLOAT128_TYPE)
22749 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22750 else
22751 return opt_scalar_float_mode ();
22753 default:
22754 return opt_scalar_float_mode ();
22760 /* Target hook for c_mode_for_suffix. */
22761 static machine_mode
22762 rs6000_c_mode_for_suffix (char suffix)
22764 if (TARGET_FLOAT128_TYPE)
22766 if (suffix == 'q' || suffix == 'Q')
22767 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22769 /* At the moment, we are not defining a suffix for IBM extended double.
22770 If/when the default for -mabi=ieeelongdouble is changed, and we want
22771 to support __ibm128 constants in legacy library code, we may need to
22772 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22773 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22774 __float80 constants. */
22777 return VOIDmode;
22780 /* Target hook for invalid_arg_for_unprototyped_fn. */
22781 static const char *
22782 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22784 return (!rs6000_darwin64_abi
22785 && typelist == 0
22786 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22787 && (funcdecl == NULL_TREE
22788 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22789 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22790 ? N_("AltiVec argument passed to unprototyped function")
22791 : NULL;
22794 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22795 setup by using __stack_chk_fail_local hidden function instead of
22796 calling __stack_chk_fail directly. Otherwise it is better to call
22797 __stack_chk_fail directly. */
22799 static tree ATTRIBUTE_UNUSED
22800 rs6000_stack_protect_fail (void)
22802 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22803 ? default_hidden_stack_protect_fail ()
22804 : default_external_stack_protect_fail ();
22807 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22809 #if TARGET_ELF
22810 static unsigned HOST_WIDE_INT
22811 rs6000_asan_shadow_offset (void)
22813 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22815 #endif
22817 /* Mask options that we want to support inside of attribute((target)) and
22818 #pragma GCC target operations. Note, we do not include things like
22819 64/32-bit, endianness, hard/soft floating point, etc. that would have
22820 different calling sequences. */
22822 struct rs6000_opt_mask {
22823 const char *name; /* option name */
22824 HOST_WIDE_INT mask; /* mask to set */
22825 bool invert; /* invert sense of mask */
22826 bool valid_target; /* option is a target option */
22829 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22831 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22832 { "cmpb", OPTION_MASK_CMPB, false, true },
22833 { "crypto", OPTION_MASK_CRYPTO, false, true },
22834 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22835 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22836 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22837 false, true },
22838 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22839 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22840 { "fprnd", OPTION_MASK_FPRND, false, true },
22841 { "future", OPTION_MASK_FUTURE, false, true },
22842 { "hard-dfp", OPTION_MASK_DFP, false, true },
22843 { "htm", OPTION_MASK_HTM, false, true },
22844 { "isel", OPTION_MASK_ISEL, false, true },
22845 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22846 { "mfpgpr", 0, false, true },
22847 { "modulo", OPTION_MASK_MODULO, false, true },
22848 { "mulhw", OPTION_MASK_MULHW, false, true },
22849 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22850 { "pcrel", OPTION_MASK_PCREL, false, true },
22851 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22852 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22853 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22854 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22855 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22856 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22857 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22858 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22859 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22860 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22861 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
22862 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22863 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22864 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22865 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22866 { "string", 0, false, true },
22867 { "update", OPTION_MASK_NO_UPDATE, true , true },
22868 { "vsx", OPTION_MASK_VSX, false, true },
22869 #ifdef OPTION_MASK_64BIT
22870 #if TARGET_AIX_OS
22871 { "aix64", OPTION_MASK_64BIT, false, false },
22872 { "aix32", OPTION_MASK_64BIT, true, false },
22873 #else
22874 { "64", OPTION_MASK_64BIT, false, false },
22875 { "32", OPTION_MASK_64BIT, true, false },
22876 #endif
22877 #endif
22878 #ifdef OPTION_MASK_EABI
22879 { "eabi", OPTION_MASK_EABI, false, false },
22880 #endif
22881 #ifdef OPTION_MASK_LITTLE_ENDIAN
22882 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22883 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22884 #endif
22885 #ifdef OPTION_MASK_RELOCATABLE
22886 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22887 #endif
22888 #ifdef OPTION_MASK_STRICT_ALIGN
22889 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22890 #endif
22891 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22892 { "string", 0, false, false },
22895 /* Builtin mask mapping for printing the flags. */
22896 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22898 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22899 { "vsx", RS6000_BTM_VSX, false, false },
22900 { "fre", RS6000_BTM_FRE, false, false },
22901 { "fres", RS6000_BTM_FRES, false, false },
22902 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22903 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22904 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22905 { "cell", RS6000_BTM_CELL, false, false },
22906 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22907 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22908 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22909 { "crypto", RS6000_BTM_CRYPTO, false, false },
22910 { "htm", RS6000_BTM_HTM, false, false },
22911 { "hard-dfp", RS6000_BTM_DFP, false, false },
22912 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22913 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22914 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22915 { "float128", RS6000_BTM_FLOAT128, false, false },
22916 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22919 /* Option variables that we want to support inside attribute((target)) and
22920 #pragma GCC target operations. */
22922 struct rs6000_opt_var {
22923 const char *name; /* option name */
22924 size_t global_offset; /* offset of the option in global_options. */
22925 size_t target_offset; /* offset of the option in target options. */
22928 static struct rs6000_opt_var const rs6000_opt_vars[] =
22930 { "friz",
22931 offsetof (struct gcc_options, x_TARGET_FRIZ),
22932 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22933 { "avoid-indexed-addresses",
22934 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22935 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22936 { "longcall",
22937 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22938 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22939 { "optimize-swaps",
22940 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22941 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22942 { "allow-movmisalign",
22943 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22944 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22945 { "sched-groups",
22946 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22947 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22948 { "always-hint",
22949 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22950 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22951 { "align-branch-targets",
22952 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22953 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22954 { "sched-prolog",
22955 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22956 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22957 { "sched-epilog",
22958 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22959 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22960 { "speculate-indirect-jumps",
22961 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22962 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22965 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22966 parsing. Return true if there were no errors. */
22968 static bool
22969 rs6000_inner_target_options (tree args, bool attr_p)
22971 bool ret = true;
22973 if (args == NULL_TREE)
22976 else if (TREE_CODE (args) == STRING_CST)
22978 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22979 char *q;
22981 while ((q = strtok (p, ",")) != NULL)
22983 bool error_p = false;
22984 bool not_valid_p = false;
22985 const char *cpu_opt = NULL;
22987 p = NULL;
22988 if (strncmp (q, "cpu=", 4) == 0)
22990 int cpu_index = rs6000_cpu_name_lookup (q+4);
22991 if (cpu_index >= 0)
22992 rs6000_cpu_index = cpu_index;
22993 else
22995 error_p = true;
22996 cpu_opt = q+4;
22999 else if (strncmp (q, "tune=", 5) == 0)
23001 int tune_index = rs6000_cpu_name_lookup (q+5);
23002 if (tune_index >= 0)
23003 rs6000_tune_index = tune_index;
23004 else
23006 error_p = true;
23007 cpu_opt = q+5;
23010 else
23012 size_t i;
23013 bool invert = false;
23014 char *r = q;
23016 error_p = true;
23017 if (strncmp (r, "no-", 3) == 0)
23019 invert = true;
23020 r += 3;
23023 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23024 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23026 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23028 if (!rs6000_opt_masks[i].valid_target)
23029 not_valid_p = true;
23030 else
23032 error_p = false;
23033 rs6000_isa_flags_explicit |= mask;
23035 /* VSX needs altivec, so -mvsx automagically sets
23036 altivec and disables -mavoid-indexed-addresses. */
23037 if (!invert)
23039 if (mask == OPTION_MASK_VSX)
23041 mask |= OPTION_MASK_ALTIVEC;
23042 TARGET_AVOID_XFORM = 0;
23046 if (rs6000_opt_masks[i].invert)
23047 invert = !invert;
23049 if (invert)
23050 rs6000_isa_flags &= ~mask;
23051 else
23052 rs6000_isa_flags |= mask;
23054 break;
23057 if (error_p && !not_valid_p)
23059 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23060 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23062 size_t j = rs6000_opt_vars[i].global_offset;
23063 *((int *) ((char *)&global_options + j)) = !invert;
23064 error_p = false;
23065 not_valid_p = false;
23066 break;
23071 if (error_p)
23073 const char *eprefix, *esuffix;
23075 ret = false;
23076 if (attr_p)
23078 eprefix = "__attribute__((__target__(";
23079 esuffix = ")))";
23081 else
23083 eprefix = "#pragma GCC target ";
23084 esuffix = "";
23087 if (cpu_opt)
23088 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23089 q, esuffix);
23090 else if (not_valid_p)
23091 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23092 else
23093 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23098 else if (TREE_CODE (args) == TREE_LIST)
23102 tree value = TREE_VALUE (args);
23103 if (value)
23105 bool ret2 = rs6000_inner_target_options (value, attr_p);
23106 if (!ret2)
23107 ret = false;
23109 args = TREE_CHAIN (args);
23111 while (args != NULL_TREE);
23114 else
23116 error ("attribute %<target%> argument not a string");
23117 return false;
23120 return ret;
23123 /* Print out the target options as a list for -mdebug=target. */
23125 static void
23126 rs6000_debug_target_options (tree args, const char *prefix)
23128 if (args == NULL_TREE)
23129 fprintf (stderr, "%s<NULL>", prefix);
23131 else if (TREE_CODE (args) == STRING_CST)
23133 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23134 char *q;
23136 while ((q = strtok (p, ",")) != NULL)
23138 p = NULL;
23139 fprintf (stderr, "%s\"%s\"", prefix, q);
23140 prefix = ", ";
23144 else if (TREE_CODE (args) == TREE_LIST)
23148 tree value = TREE_VALUE (args);
23149 if (value)
23151 rs6000_debug_target_options (value, prefix);
23152 prefix = ", ";
23154 args = TREE_CHAIN (args);
23156 while (args != NULL_TREE);
23159 else
23160 gcc_unreachable ();
23162 return;
23166 /* Hook to validate attribute((target("..."))). */
23168 static bool
23169 rs6000_valid_attribute_p (tree fndecl,
23170 tree ARG_UNUSED (name),
23171 tree args,
23172 int flags)
23174 struct cl_target_option cur_target;
23175 bool ret;
23176 tree old_optimize;
23177 tree new_target, new_optimize;
23178 tree func_optimize;
23180 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23182 if (TARGET_DEBUG_TARGET)
23184 tree tname = DECL_NAME (fndecl);
23185 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23186 if (tname)
23187 fprintf (stderr, "function: %.*s\n",
23188 (int) IDENTIFIER_LENGTH (tname),
23189 IDENTIFIER_POINTER (tname));
23190 else
23191 fprintf (stderr, "function: unknown\n");
23193 fprintf (stderr, "args:");
23194 rs6000_debug_target_options (args, " ");
23195 fprintf (stderr, "\n");
23197 if (flags)
23198 fprintf (stderr, "flags: 0x%x\n", flags);
23200 fprintf (stderr, "--------------------\n");
23203 /* attribute((target("default"))) does nothing, beyond
23204 affecting multi-versioning. */
23205 if (TREE_VALUE (args)
23206 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23207 && TREE_CHAIN (args) == NULL_TREE
23208 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23209 return true;
23211 old_optimize = build_optimization_node (&global_options);
23212 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23214 /* If the function changed the optimization levels as well as setting target
23215 options, start with the optimizations specified. */
23216 if (func_optimize && func_optimize != old_optimize)
23217 cl_optimization_restore (&global_options,
23218 TREE_OPTIMIZATION (func_optimize));
23220 /* The target attributes may also change some optimization flags, so update
23221 the optimization options if necessary. */
23222 cl_target_option_save (&cur_target, &global_options);
23223 rs6000_cpu_index = rs6000_tune_index = -1;
23224 ret = rs6000_inner_target_options (args, true);
23226 /* Set up any additional state. */
23227 if (ret)
23229 ret = rs6000_option_override_internal (false);
23230 new_target = build_target_option_node (&global_options);
23232 else
23233 new_target = NULL;
23235 new_optimize = build_optimization_node (&global_options);
23237 if (!new_target)
23238 ret = false;
23240 else if (fndecl)
23242 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23244 if (old_optimize != new_optimize)
23245 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23248 cl_target_option_restore (&global_options, &cur_target);
23250 if (old_optimize != new_optimize)
23251 cl_optimization_restore (&global_options,
23252 TREE_OPTIMIZATION (old_optimize));
23254 return ret;
23258 /* Hook to validate the current #pragma GCC target and set the state, and
23259 update the macros based on what was changed. If ARGS is NULL, then
23260 POP_TARGET is used to reset the options. */
23262 bool
23263 rs6000_pragma_target_parse (tree args, tree pop_target)
23265 tree prev_tree = build_target_option_node (&global_options);
23266 tree cur_tree;
23267 struct cl_target_option *prev_opt, *cur_opt;
23268 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23269 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23271 if (TARGET_DEBUG_TARGET)
23273 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23274 fprintf (stderr, "args:");
23275 rs6000_debug_target_options (args, " ");
23276 fprintf (stderr, "\n");
23278 if (pop_target)
23280 fprintf (stderr, "pop_target:\n");
23281 debug_tree (pop_target);
23283 else
23284 fprintf (stderr, "pop_target: <NULL>\n");
23286 fprintf (stderr, "--------------------\n");
23289 if (! args)
23291 cur_tree = ((pop_target)
23292 ? pop_target
23293 : target_option_default_node);
23294 cl_target_option_restore (&global_options,
23295 TREE_TARGET_OPTION (cur_tree));
23297 else
23299 rs6000_cpu_index = rs6000_tune_index = -1;
23300 if (!rs6000_inner_target_options (args, false)
23301 || !rs6000_option_override_internal (false)
23302 || (cur_tree = build_target_option_node (&global_options))
23303 == NULL_TREE)
23305 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23306 fprintf (stderr, "invalid pragma\n");
23308 return false;
23312 target_option_current_node = cur_tree;
23313 rs6000_activate_target_options (target_option_current_node);
23315 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23316 change the macros that are defined. */
23317 if (rs6000_target_modify_macros_ptr)
23319 prev_opt = TREE_TARGET_OPTION (prev_tree);
23320 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23321 prev_flags = prev_opt->x_rs6000_isa_flags;
23323 cur_opt = TREE_TARGET_OPTION (cur_tree);
23324 cur_flags = cur_opt->x_rs6000_isa_flags;
23325 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23327 diff_bumask = (prev_bumask ^ cur_bumask);
23328 diff_flags = (prev_flags ^ cur_flags);
23330 if ((diff_flags != 0) || (diff_bumask != 0))
23332 /* Delete old macros. */
23333 rs6000_target_modify_macros_ptr (false,
23334 prev_flags & diff_flags,
23335 prev_bumask & diff_bumask);
23337 /* Define new macros. */
23338 rs6000_target_modify_macros_ptr (true,
23339 cur_flags & diff_flags,
23340 cur_bumask & diff_bumask);
23344 return true;
23348 /* Remember the last target of rs6000_set_current_function. */
23349 static GTY(()) tree rs6000_previous_fndecl;
23351 /* Restore target's globals from NEW_TREE and invalidate the
23352 rs6000_previous_fndecl cache. */
23354 void
23355 rs6000_activate_target_options (tree new_tree)
23357 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23358 if (TREE_TARGET_GLOBALS (new_tree))
23359 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23360 else if (new_tree == target_option_default_node)
23361 restore_target_globals (&default_target_globals);
23362 else
23363 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23364 rs6000_previous_fndecl = NULL_TREE;
23367 /* Establish appropriate back-end context for processing the function
23368 FNDECL. The argument might be NULL to indicate processing at top
23369 level, outside of any function scope. */
23370 static void
23371 rs6000_set_current_function (tree fndecl)
23373 if (TARGET_DEBUG_TARGET)
23375 fprintf (stderr, "\n==================== rs6000_set_current_function");
23377 if (fndecl)
23378 fprintf (stderr, ", fndecl %s (%p)",
23379 (DECL_NAME (fndecl)
23380 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23381 : "<unknown>"), (void *)fndecl);
23383 if (rs6000_previous_fndecl)
23384 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23386 fprintf (stderr, "\n");
23389 /* Only change the context if the function changes. This hook is called
23390 several times in the course of compiling a function, and we don't want to
23391 slow things down too much or call target_reinit when it isn't safe. */
23392 if (fndecl == rs6000_previous_fndecl)
23393 return;
23395 tree old_tree;
23396 if (rs6000_previous_fndecl == NULL_TREE)
23397 old_tree = target_option_current_node;
23398 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23399 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23400 else
23401 old_tree = target_option_default_node;
23403 tree new_tree;
23404 if (fndecl == NULL_TREE)
23406 if (old_tree != target_option_current_node)
23407 new_tree = target_option_current_node;
23408 else
23409 new_tree = NULL_TREE;
23411 else
23413 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23414 if (new_tree == NULL_TREE)
23415 new_tree = target_option_default_node;
23418 if (TARGET_DEBUG_TARGET)
23420 if (new_tree)
23422 fprintf (stderr, "\nnew fndecl target specific options:\n");
23423 debug_tree (new_tree);
23426 if (old_tree)
23428 fprintf (stderr, "\nold fndecl target specific options:\n");
23429 debug_tree (old_tree);
23432 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23433 fprintf (stderr, "--------------------\n");
23436 if (new_tree && old_tree != new_tree)
23437 rs6000_activate_target_options (new_tree);
23439 if (fndecl)
23440 rs6000_previous_fndecl = fndecl;
23444 /* Save the current options */
23446 static void
23447 rs6000_function_specific_save (struct cl_target_option *ptr,
23448 struct gcc_options *opts)
23450 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23451 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23454 /* Restore the current options */
23456 static void
23457 rs6000_function_specific_restore (struct gcc_options *opts,
23458 struct cl_target_option *ptr)
23461 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23462 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23463 (void) rs6000_option_override_internal (false);
23466 /* Print the current options */
23468 static void
23469 rs6000_function_specific_print (FILE *file, int indent,
23470 struct cl_target_option *ptr)
23472 rs6000_print_isa_options (file, indent, "Isa options set",
23473 ptr->x_rs6000_isa_flags);
23475 rs6000_print_isa_options (file, indent, "Isa options explicit",
23476 ptr->x_rs6000_isa_flags_explicit);
23479 /* Helper function to print the current isa or misc options on a line. */
23481 static void
23482 rs6000_print_options_internal (FILE *file,
23483 int indent,
23484 const char *string,
23485 HOST_WIDE_INT flags,
23486 const char *prefix,
23487 const struct rs6000_opt_mask *opts,
23488 size_t num_elements)
23490 size_t i;
23491 size_t start_column = 0;
23492 size_t cur_column;
23493 size_t max_column = 120;
23494 size_t prefix_len = strlen (prefix);
23495 size_t comma_len = 0;
23496 const char *comma = "";
23498 if (indent)
23499 start_column += fprintf (file, "%*s", indent, "");
23501 if (!flags)
23503 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23504 return;
23507 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23509 /* Print the various mask options. */
23510 cur_column = start_column;
23511 for (i = 0; i < num_elements; i++)
23513 bool invert = opts[i].invert;
23514 const char *name = opts[i].name;
23515 const char *no_str = "";
23516 HOST_WIDE_INT mask = opts[i].mask;
23517 size_t len = comma_len + prefix_len + strlen (name);
23519 if (!invert)
23521 if ((flags & mask) == 0)
23523 no_str = "no-";
23524 len += sizeof ("no-") - 1;
23527 flags &= ~mask;
23530 else
23532 if ((flags & mask) != 0)
23534 no_str = "no-";
23535 len += sizeof ("no-") - 1;
23538 flags |= mask;
23541 cur_column += len;
23542 if (cur_column > max_column)
23544 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23545 cur_column = start_column + len;
23546 comma = "";
23549 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23550 comma = ", ";
23551 comma_len = sizeof (", ") - 1;
23554 fputs ("\n", file);
23557 /* Helper function to print the current isa options on a line. */
23559 static void
23560 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23561 HOST_WIDE_INT flags)
23563 rs6000_print_options_internal (file, indent, string, flags, "-m",
23564 &rs6000_opt_masks[0],
23565 ARRAY_SIZE (rs6000_opt_masks));
23568 static void
23569 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23570 HOST_WIDE_INT flags)
23572 rs6000_print_options_internal (file, indent, string, flags, "",
23573 &rs6000_builtin_mask_names[0],
23574 ARRAY_SIZE (rs6000_builtin_mask_names));
23577 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23578 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23579 -mupper-regs-df, etc.).
23581 If the user used -mno-power8-vector, we need to turn off all of the implicit
23582 ISA 2.07 and 3.0 options that relate to the vector unit.
23584 If the user used -mno-power9-vector, we need to turn off all of the implicit
23585 ISA 3.0 options that relate to the vector unit.
23587 This function does not handle explicit options such as the user specifying
23588 -mdirect-move. These are handled in rs6000_option_override_internal, and
23589 the appropriate error is given if needed.
23591 We return a mask of all of the implicit options that should not be enabled
23592 by default. */
23594 static HOST_WIDE_INT
23595 rs6000_disable_incompatible_switches (void)
23597 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23598 size_t i, j;
23600 static const struct {
23601 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23602 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23603 const char *const name; /* name of the switch. */
23604 } flags[] = {
23605 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23606 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23607 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23608 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23611 for (i = 0; i < ARRAY_SIZE (flags); i++)
23613 HOST_WIDE_INT no_flag = flags[i].no_flag;
23615 if ((rs6000_isa_flags & no_flag) == 0
23616 && (rs6000_isa_flags_explicit & no_flag) != 0)
23618 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23619 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23620 & rs6000_isa_flags
23621 & dep_flags);
23623 if (set_flags)
23625 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23626 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23628 set_flags &= ~rs6000_opt_masks[j].mask;
23629 error ("%<-mno-%s%> turns off %<-m%s%>",
23630 flags[i].name,
23631 rs6000_opt_masks[j].name);
23634 gcc_assert (!set_flags);
23637 rs6000_isa_flags &= ~dep_flags;
23638 ignore_masks |= no_flag | dep_flags;
23642 return ignore_masks;
23646 /* Helper function for printing the function name when debugging. */
23648 static const char *
23649 get_decl_name (tree fn)
23651 tree name;
23653 if (!fn)
23654 return "<null>";
23656 name = DECL_NAME (fn);
23657 if (!name)
23658 return "<no-name>";
23660 return IDENTIFIER_POINTER (name);
23663 /* Return the clone id of the target we are compiling code for in a target
23664 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23665 the priority list for the target clones (ordered from lowest to
23666 highest). */
23668 static int
23669 rs6000_clone_priority (tree fndecl)
23671 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23672 HOST_WIDE_INT isa_masks;
23673 int ret = CLONE_DEFAULT;
23674 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23675 const char *attrs_str = NULL;
23677 attrs = TREE_VALUE (TREE_VALUE (attrs));
23678 attrs_str = TREE_STRING_POINTER (attrs);
23680 /* Return priority zero for default function. Return the ISA needed for the
23681 function if it is not the default. */
23682 if (strcmp (attrs_str, "default") != 0)
23684 if (fn_opts == NULL_TREE)
23685 fn_opts = target_option_default_node;
23687 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23688 isa_masks = rs6000_isa_flags;
23689 else
23690 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23692 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23693 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23694 break;
23697 if (TARGET_DEBUG_TARGET)
23698 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23699 get_decl_name (fndecl), ret);
23701 return ret;
23704 /* This compares the priority of target features in function DECL1 and DECL2.
23705 It returns positive value if DECL1 is higher priority, negative value if
23706 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23707 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23709 static int
23710 rs6000_compare_version_priority (tree decl1, tree decl2)
23712 int priority1 = rs6000_clone_priority (decl1);
23713 int priority2 = rs6000_clone_priority (decl2);
23714 int ret = priority1 - priority2;
23716 if (TARGET_DEBUG_TARGET)
23717 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23718 get_decl_name (decl1), get_decl_name (decl2), ret);
23720 return ret;
23723 /* Make a dispatcher declaration for the multi-versioned function DECL.
23724 Calls to DECL function will be replaced with calls to the dispatcher
23725 by the front-end. Returns the decl of the dispatcher function. */
23727 static tree
23728 rs6000_get_function_versions_dispatcher (void *decl)
23730 tree fn = (tree) decl;
23731 struct cgraph_node *node = NULL;
23732 struct cgraph_node *default_node = NULL;
23733 struct cgraph_function_version_info *node_v = NULL;
23734 struct cgraph_function_version_info *first_v = NULL;
23736 tree dispatch_decl = NULL;
23738 struct cgraph_function_version_info *default_version_info = NULL;
23739 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23741 if (TARGET_DEBUG_TARGET)
23742 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23743 get_decl_name (fn));
23745 node = cgraph_node::get (fn);
23746 gcc_assert (node != NULL);
23748 node_v = node->function_version ();
23749 gcc_assert (node_v != NULL);
23751 if (node_v->dispatcher_resolver != NULL)
23752 return node_v->dispatcher_resolver;
23754 /* Find the default version and make it the first node. */
23755 first_v = node_v;
23756 /* Go to the beginning of the chain. */
23757 while (first_v->prev != NULL)
23758 first_v = first_v->prev;
23760 default_version_info = first_v;
23761 while (default_version_info != NULL)
23763 const tree decl2 = default_version_info->this_node->decl;
23764 if (is_function_default_version (decl2))
23765 break;
23766 default_version_info = default_version_info->next;
23769 /* If there is no default node, just return NULL. */
23770 if (default_version_info == NULL)
23771 return NULL;
23773 /* Make default info the first node. */
23774 if (first_v != default_version_info)
23776 default_version_info->prev->next = default_version_info->next;
23777 if (default_version_info->next)
23778 default_version_info->next->prev = default_version_info->prev;
23779 first_v->prev = default_version_info;
23780 default_version_info->next = first_v;
23781 default_version_info->prev = NULL;
23784 default_node = default_version_info->this_node;
23786 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23787 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23788 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23789 "exports hardware capability bits");
23790 #else
23792 if (targetm.has_ifunc_p ())
23794 struct cgraph_function_version_info *it_v = NULL;
23795 struct cgraph_node *dispatcher_node = NULL;
23796 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23798 /* Right now, the dispatching is done via ifunc. */
23799 dispatch_decl = make_dispatcher_decl (default_node->decl);
23801 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23802 gcc_assert (dispatcher_node != NULL);
23803 dispatcher_node->dispatcher_function = 1;
23804 dispatcher_version_info
23805 = dispatcher_node->insert_new_function_version ();
23806 dispatcher_version_info->next = default_version_info;
23807 dispatcher_node->definition = 1;
23809 /* Set the dispatcher for all the versions. */
23810 it_v = default_version_info;
23811 while (it_v != NULL)
23813 it_v->dispatcher_resolver = dispatch_decl;
23814 it_v = it_v->next;
23817 else
23819 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23820 "multiversioning needs ifunc which is not supported "
23821 "on this target");
23823 #endif
23825 return dispatch_decl;
23828 /* Make the resolver function decl to dispatch the versions of a multi-
23829 versioned function, DEFAULT_DECL. Create an empty basic block in the
23830 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23831 function. */
23833 static tree
23834 make_resolver_func (const tree default_decl,
23835 const tree dispatch_decl,
23836 basic_block *empty_bb)
23838 /* Make the resolver function static. The resolver function returns
23839 void *. */
23840 tree decl_name = clone_function_name (default_decl, "resolver");
23841 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23842 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23843 tree decl = build_fn_decl (resolver_name, type);
23844 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23846 DECL_NAME (decl) = decl_name;
23847 TREE_USED (decl) = 1;
23848 DECL_ARTIFICIAL (decl) = 1;
23849 DECL_IGNORED_P (decl) = 0;
23850 TREE_PUBLIC (decl) = 0;
23851 DECL_UNINLINABLE (decl) = 1;
23853 /* Resolver is not external, body is generated. */
23854 DECL_EXTERNAL (decl) = 0;
23855 DECL_EXTERNAL (dispatch_decl) = 0;
23857 DECL_CONTEXT (decl) = NULL_TREE;
23858 DECL_INITIAL (decl) = make_node (BLOCK);
23859 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23861 /* Build result decl and add to function_decl. */
23862 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23863 DECL_CONTEXT (t) = decl;
23864 DECL_ARTIFICIAL (t) = 1;
23865 DECL_IGNORED_P (t) = 1;
23866 DECL_RESULT (decl) = t;
23868 gimplify_function_tree (decl);
23869 push_cfun (DECL_STRUCT_FUNCTION (decl));
23870 *empty_bb = init_lowered_empty_function (decl, false,
23871 profile_count::uninitialized ());
23873 cgraph_node::add_new_function (decl, true);
23874 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23876 pop_cfun ();
23878 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23879 DECL_ATTRIBUTES (dispatch_decl)
23880 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23882 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23884 return decl;
23887 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23888 return a pointer to VERSION_DECL if we are running on a machine that
23889 supports the index CLONE_ISA hardware architecture bits. This function will
23890 be called during version dispatch to decide which function version to
23891 execute. It returns the basic block at the end, to which more conditions
23892 can be added. */
23894 static basic_block
23895 add_condition_to_bb (tree function_decl, tree version_decl,
23896 int clone_isa, basic_block new_bb)
23898 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23900 gcc_assert (new_bb != NULL);
23901 gimple_seq gseq = bb_seq (new_bb);
23904 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23905 build_fold_addr_expr (version_decl));
23906 tree result_var = create_tmp_var (ptr_type_node);
23907 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23908 gimple *return_stmt = gimple_build_return (result_var);
23910 if (clone_isa == CLONE_DEFAULT)
23912 gimple_seq_add_stmt (&gseq, convert_stmt);
23913 gimple_seq_add_stmt (&gseq, return_stmt);
23914 set_bb_seq (new_bb, gseq);
23915 gimple_set_bb (convert_stmt, new_bb);
23916 gimple_set_bb (return_stmt, new_bb);
23917 pop_cfun ();
23918 return new_bb;
23921 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23922 tree cond_var = create_tmp_var (bool_int_type_node);
23923 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23924 const char *arg_str = rs6000_clone_map[clone_isa].name;
23925 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23926 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23927 gimple_call_set_lhs (call_cond_stmt, cond_var);
23929 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23930 gimple_set_bb (call_cond_stmt, new_bb);
23931 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23933 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23934 NULL_TREE, NULL_TREE);
23935 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23936 gimple_set_bb (if_else_stmt, new_bb);
23937 gimple_seq_add_stmt (&gseq, if_else_stmt);
23939 gimple_seq_add_stmt (&gseq, convert_stmt);
23940 gimple_seq_add_stmt (&gseq, return_stmt);
23941 set_bb_seq (new_bb, gseq);
23943 basic_block bb1 = new_bb;
23944 edge e12 = split_block (bb1, if_else_stmt);
23945 basic_block bb2 = e12->dest;
23946 e12->flags &= ~EDGE_FALLTHRU;
23947 e12->flags |= EDGE_TRUE_VALUE;
23949 edge e23 = split_block (bb2, return_stmt);
23950 gimple_set_bb (convert_stmt, bb2);
23951 gimple_set_bb (return_stmt, bb2);
23953 basic_block bb3 = e23->dest;
23954 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
23956 remove_edge (e23);
23957 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
23959 pop_cfun ();
23960 return bb3;
23963 /* This function generates the dispatch function for multi-versioned functions.
23964 DISPATCH_DECL is the function which will contain the dispatch logic.
23965 FNDECLS are the function choices for dispatch, and is a tree chain.
23966 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23967 code is generated. */
23969 static int
23970 dispatch_function_versions (tree dispatch_decl,
23971 void *fndecls_p,
23972 basic_block *empty_bb)
23974 int ix;
23975 tree ele;
23976 vec<tree> *fndecls;
23977 tree clones[CLONE_MAX];
23979 if (TARGET_DEBUG_TARGET)
23980 fputs ("dispatch_function_versions, top\n", stderr);
23982 gcc_assert (dispatch_decl != NULL
23983 && fndecls_p != NULL
23984 && empty_bb != NULL);
23986 /* fndecls_p is actually a vector. */
23987 fndecls = static_cast<vec<tree> *> (fndecls_p);
23989 /* At least one more version other than the default. */
23990 gcc_assert (fndecls->length () >= 2);
23992 /* The first version in the vector is the default decl. */
23993 memset ((void *) clones, '\0', sizeof (clones));
23994 clones[CLONE_DEFAULT] = (*fndecls)[0];
23996 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23997 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23998 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23999 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24000 to insert the code here to do the call. */
24002 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24004 int priority = rs6000_clone_priority (ele);
24005 if (!clones[priority])
24006 clones[priority] = ele;
24009 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24010 if (clones[ix])
24012 if (TARGET_DEBUG_TARGET)
24013 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24014 ix, get_decl_name (clones[ix]));
24016 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24017 *empty_bb);
24020 return 0;
24023 /* Generate the dispatching code body to dispatch multi-versioned function
24024 DECL. The target hook is called to process the "target" attributes and
24025 provide the code to dispatch the right function at run-time. NODE points
24026 to the dispatcher decl whose body will be created. */
24028 static tree
24029 rs6000_generate_version_dispatcher_body (void *node_p)
24031 tree resolver;
24032 basic_block empty_bb;
24033 struct cgraph_node *node = (cgraph_node *) node_p;
24034 struct cgraph_function_version_info *ninfo = node->function_version ();
24036 if (ninfo->dispatcher_resolver)
24037 return ninfo->dispatcher_resolver;
24039 /* node is going to be an alias, so remove the finalized bit. */
24040 node->definition = false;
24042 /* The first version in the chain corresponds to the default version. */
24043 ninfo->dispatcher_resolver = resolver
24044 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24046 if (TARGET_DEBUG_TARGET)
24047 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24048 get_decl_name (resolver));
24050 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24051 auto_vec<tree, 2> fn_ver_vec;
24053 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24054 vinfo;
24055 vinfo = vinfo->next)
24057 struct cgraph_node *version = vinfo->this_node;
24058 /* Check for virtual functions here again, as by this time it should
24059 have been determined if this function needs a vtable index or
24060 not. This happens for methods in derived classes that override
24061 virtual methods in base classes but are not explicitly marked as
24062 virtual. */
24063 if (DECL_VINDEX (version->decl))
24064 sorry ("Virtual function multiversioning not supported");
24066 fn_ver_vec.safe_push (version->decl);
24069 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24070 cgraph_edge::rebuild_edges ();
24071 pop_cfun ();
24072 return resolver;
24076 /* Hook to determine if one function can safely inline another. */
24078 static bool
24079 rs6000_can_inline_p (tree caller, tree callee)
24081 bool ret = false;
24082 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24083 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24085 /* If the callee has no option attributes, then it is ok to inline. */
24086 if (!callee_tree)
24087 ret = true;
24089 else
24091 HOST_WIDE_INT caller_isa;
24092 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24093 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24094 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24096 /* If the caller has option attributes, then use them.
24097 Otherwise, use the command line options. */
24098 if (caller_tree)
24099 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24100 else
24101 caller_isa = rs6000_isa_flags;
24103 /* The callee's options must be a subset of the caller's options, i.e.
24104 a vsx function may inline an altivec function, but a no-vsx function
24105 must not inline a vsx function. However, for those options that the
24106 callee has explicitly enabled or disabled, then we must enforce that
24107 the callee's and caller's options match exactly; see PR70010. */
24108 if (((caller_isa & callee_isa) == callee_isa)
24109 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24110 ret = true;
24113 if (TARGET_DEBUG_TARGET)
24114 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24115 get_decl_name (caller), get_decl_name (callee),
24116 (ret ? "can" : "cannot"));
24118 return ret;
24121 /* Allocate a stack temp and fixup the address so it meets the particular
24122 memory requirements (either offetable or REG+REG addressing). */
24125 rs6000_allocate_stack_temp (machine_mode mode,
24126 bool offsettable_p,
24127 bool reg_reg_p)
24129 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24130 rtx addr = XEXP (stack, 0);
24131 int strict_p = reload_completed;
24133 if (!legitimate_indirect_address_p (addr, strict_p))
24135 if (offsettable_p
24136 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24137 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24139 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24140 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24143 return stack;
24146 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24147 convert to such a form to deal with memory reference instructions
24148 like STFIWX and LDBRX that only take reg+reg addressing. */
24151 rs6000_force_indexed_or_indirect_mem (rtx x)
24153 machine_mode mode = GET_MODE (x);
24155 gcc_assert (MEM_P (x));
24156 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24158 rtx addr = XEXP (x, 0);
24159 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24161 rtx reg = XEXP (addr, 0);
24162 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24163 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24164 gcc_assert (REG_P (reg));
24165 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24166 addr = reg;
24168 else if (GET_CODE (addr) == PRE_MODIFY)
24170 rtx reg = XEXP (addr, 0);
24171 rtx expr = XEXP (addr, 1);
24172 gcc_assert (REG_P (reg));
24173 gcc_assert (GET_CODE (expr) == PLUS);
24174 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24175 addr = reg;
24178 if (GET_CODE (addr) == PLUS)
24180 rtx op0 = XEXP (addr, 0);
24181 rtx op1 = XEXP (addr, 1);
24182 op0 = force_reg (Pmode, op0);
24183 op1 = force_reg (Pmode, op1);
24184 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24186 else
24187 x = replace_equiv_address (x, force_reg (Pmode, addr));
24190 return x;
24193 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24195 On the RS/6000, all integer constants are acceptable, most won't be valid
24196 for particular insns, though. Only easy FP constants are acceptable. */
24198 static bool
24199 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24201 if (TARGET_ELF && tls_referenced_p (x))
24202 return false;
24204 if (CONST_DOUBLE_P (x))
24205 return easy_fp_constant (x, mode);
24207 if (GET_CODE (x) == CONST_VECTOR)
24208 return easy_vector_constant (x, mode);
24210 return true;
24214 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24216 static bool
24217 chain_already_loaded (rtx_insn *last)
24219 for (; last != NULL; last = PREV_INSN (last))
24221 if (NONJUMP_INSN_P (last))
24223 rtx patt = PATTERN (last);
24225 if (GET_CODE (patt) == SET)
24227 rtx lhs = XEXP (patt, 0);
24229 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24230 return true;
24234 return false;
24237 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24239 void
24240 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24242 rtx func = func_desc;
24243 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24244 rtx toc_load = NULL_RTX;
24245 rtx toc_restore = NULL_RTX;
24246 rtx func_addr;
24247 rtx abi_reg = NULL_RTX;
24248 rtx call[4];
24249 int n_call;
24250 rtx insn;
24251 bool is_pltseq_longcall;
24253 if (global_tlsarg)
24254 tlsarg = global_tlsarg;
24256 /* Handle longcall attributes. */
24257 is_pltseq_longcall = false;
24258 if ((INTVAL (cookie) & CALL_LONG) != 0
24259 && GET_CODE (func_desc) == SYMBOL_REF)
24261 func = rs6000_longcall_ref (func_desc, tlsarg);
24262 if (TARGET_PLTSEQ)
24263 is_pltseq_longcall = true;
24266 /* Handle indirect calls. */
24267 if (!SYMBOL_REF_P (func)
24268 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24270 if (!rs6000_pcrel_p (cfun))
24272 /* Save the TOC into its reserved slot before the call,
24273 and prepare to restore it after the call. */
24274 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24275 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24276 gen_rtvec (1, stack_toc_offset),
24277 UNSPEC_TOCSLOT);
24278 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24280 /* Can we optimize saving the TOC in the prologue or
24281 do we need to do it at every call? */
24282 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24283 cfun->machine->save_toc_in_prologue = true;
24284 else
24286 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24287 rtx stack_toc_mem = gen_frame_mem (Pmode,
24288 gen_rtx_PLUS (Pmode, stack_ptr,
24289 stack_toc_offset));
24290 MEM_VOLATILE_P (stack_toc_mem) = 1;
24291 if (is_pltseq_longcall)
24293 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24294 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24295 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24297 else
24298 emit_move_insn (stack_toc_mem, toc_reg);
24302 if (DEFAULT_ABI == ABI_ELFv2)
24304 /* A function pointer in the ELFv2 ABI is just a plain address, but
24305 the ABI requires it to be loaded into r12 before the call. */
24306 func_addr = gen_rtx_REG (Pmode, 12);
24307 if (!rtx_equal_p (func_addr, func))
24308 emit_move_insn (func_addr, func);
24309 abi_reg = func_addr;
24310 /* Indirect calls via CTR are strongly preferred over indirect
24311 calls via LR, so move the address there. Needed to mark
24312 this insn for linker plt sequence editing too. */
24313 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24314 if (is_pltseq_longcall)
24316 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24317 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24318 emit_insn (gen_rtx_SET (func_addr, mark_func));
24319 v = gen_rtvec (2, func_addr, func_desc);
24320 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24322 else
24323 emit_move_insn (func_addr, abi_reg);
24325 else
24327 /* A function pointer under AIX is a pointer to a data area whose
24328 first word contains the actual address of the function, whose
24329 second word contains a pointer to its TOC, and whose third word
24330 contains a value to place in the static chain register (r11).
24331 Note that if we load the static chain, our "trampoline" need
24332 not have any executable code. */
24334 /* Load up address of the actual function. */
24335 func = force_reg (Pmode, func);
24336 func_addr = gen_reg_rtx (Pmode);
24337 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24339 /* Indirect calls via CTR are strongly preferred over indirect
24340 calls via LR, so move the address there. */
24341 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24342 emit_move_insn (ctr_reg, func_addr);
24343 func_addr = ctr_reg;
24345 /* Prepare to load the TOC of the called function. Note that the
24346 TOC load must happen immediately before the actual call so
24347 that unwinding the TOC registers works correctly. See the
24348 comment in frob_update_context. */
24349 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24350 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24351 gen_rtx_PLUS (Pmode, func,
24352 func_toc_offset));
24353 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24355 /* If we have a static chain, load it up. But, if the call was
24356 originally direct, the 3rd word has not been written since no
24357 trampoline has been built, so we ought not to load it, lest we
24358 override a static chain value. */
24359 if (!(GET_CODE (func_desc) == SYMBOL_REF
24360 && SYMBOL_REF_FUNCTION_P (func_desc))
24361 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24362 && !chain_already_loaded (get_current_sequence ()->next->last))
24364 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24365 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24366 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24367 gen_rtx_PLUS (Pmode, func,
24368 func_sc_offset));
24369 emit_move_insn (sc_reg, func_sc_mem);
24370 abi_reg = sc_reg;
24374 else
24376 /* No TOC register needed for calls from PC-relative callers. */
24377 if (!rs6000_pcrel_p (cfun))
24378 /* Direct calls use the TOC: for local calls, the callee will
24379 assume the TOC register is set; for non-local calls, the
24380 PLT stub needs the TOC register. */
24381 abi_reg = toc_reg;
24382 func_addr = func;
24385 /* Create the call. */
24386 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24387 if (value != NULL_RTX)
24388 call[0] = gen_rtx_SET (value, call[0]);
24389 n_call = 1;
24391 if (toc_load)
24392 call[n_call++] = toc_load;
24393 if (toc_restore)
24394 call[n_call++] = toc_restore;
24396 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24398 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24399 insn = emit_call_insn (insn);
24401 /* Mention all registers defined by the ABI to hold information
24402 as uses in CALL_INSN_FUNCTION_USAGE. */
24403 if (abi_reg)
24404 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24407 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24409 void
24410 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24412 rtx call[2];
24413 rtx insn;
24415 gcc_assert (INTVAL (cookie) == 0);
24417 if (global_tlsarg)
24418 tlsarg = global_tlsarg;
24420 /* Create the call. */
24421 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24422 if (value != NULL_RTX)
24423 call[0] = gen_rtx_SET (value, call[0]);
24425 call[1] = simple_return_rtx;
24427 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24428 insn = emit_call_insn (insn);
24430 /* Note use of the TOC register. */
24431 if (!rs6000_pcrel_p (cfun))
24432 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24433 gen_rtx_REG (Pmode, TOC_REGNUM));
24436 /* Expand code to perform a call under the SYSV4 ABI. */
24438 void
24439 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24441 rtx func = func_desc;
24442 rtx func_addr;
24443 rtx call[4];
24444 rtx insn;
24445 rtx abi_reg = NULL_RTX;
24446 int n;
24448 if (global_tlsarg)
24449 tlsarg = global_tlsarg;
24451 /* Handle longcall attributes. */
24452 if ((INTVAL (cookie) & CALL_LONG) != 0
24453 && GET_CODE (func_desc) == SYMBOL_REF)
24455 func = rs6000_longcall_ref (func_desc, tlsarg);
24456 /* If the longcall was implemented as an inline PLT call using
24457 PLT unspecs then func will be REG:r11. If not, func will be
24458 a pseudo reg. The inline PLT call sequence supports lazy
24459 linking (and longcalls to functions in dlopen'd libraries).
24460 The other style of longcalls don't. The lazy linking entry
24461 to the dynamic symbol resolver requires r11 be the function
24462 address (as it is for linker generated PLT stubs). Ensure
24463 r11 stays valid to the bctrl by marking r11 used by the call. */
24464 if (TARGET_PLTSEQ)
24465 abi_reg = func;
24468 /* Handle indirect calls. */
24469 if (GET_CODE (func) != SYMBOL_REF)
24471 func = force_reg (Pmode, func);
24473 /* Indirect calls via CTR are strongly preferred over indirect
24474 calls via LR, so move the address there. That can't be left
24475 to reload because we want to mark every instruction in an
24476 inline PLT call sequence with a reloc, enabling the linker to
24477 edit the sequence back to a direct call when that makes sense. */
24478 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24479 if (abi_reg)
24481 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24482 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24483 emit_insn (gen_rtx_SET (func_addr, mark_func));
24484 v = gen_rtvec (2, func_addr, func_desc);
24485 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24487 else
24488 emit_move_insn (func_addr, func);
24490 else
24491 func_addr = func;
24493 /* Create the call. */
24494 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24495 if (value != NULL_RTX)
24496 call[0] = gen_rtx_SET (value, call[0]);
24498 call[1] = gen_rtx_USE (VOIDmode, cookie);
24499 n = 2;
24500 if (TARGET_SECURE_PLT
24501 && flag_pic
24502 && GET_CODE (func_addr) == SYMBOL_REF
24503 && !SYMBOL_REF_LOCAL_P (func_addr))
24504 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24506 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24508 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24509 insn = emit_call_insn (insn);
24510 if (abi_reg)
24511 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24514 /* Expand code to perform a sibling call under the SysV4 ABI. */
24516 void
24517 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24519 rtx func = func_desc;
24520 rtx func_addr;
24521 rtx call[3];
24522 rtx insn;
24523 rtx abi_reg = NULL_RTX;
24525 if (global_tlsarg)
24526 tlsarg = global_tlsarg;
24528 /* Handle longcall attributes. */
24529 if ((INTVAL (cookie) & CALL_LONG) != 0
24530 && GET_CODE (func_desc) == SYMBOL_REF)
24532 func = rs6000_longcall_ref (func_desc, tlsarg);
24533 /* If the longcall was implemented as an inline PLT call using
24534 PLT unspecs then func will be REG:r11. If not, func will be
24535 a pseudo reg. The inline PLT call sequence supports lazy
24536 linking (and longcalls to functions in dlopen'd libraries).
24537 The other style of longcalls don't. The lazy linking entry
24538 to the dynamic symbol resolver requires r11 be the function
24539 address (as it is for linker generated PLT stubs). Ensure
24540 r11 stays valid to the bctr by marking r11 used by the call. */
24541 if (TARGET_PLTSEQ)
24542 abi_reg = func;
24545 /* Handle indirect calls. */
24546 if (GET_CODE (func) != SYMBOL_REF)
24548 func = force_reg (Pmode, func);
24550 /* Indirect sibcalls must go via CTR. That can't be left to
24551 reload because we want to mark every instruction in an inline
24552 PLT call sequence with a reloc, enabling the linker to edit
24553 the sequence back to a direct call when that makes sense. */
24554 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24555 if (abi_reg)
24557 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24558 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24559 emit_insn (gen_rtx_SET (func_addr, mark_func));
24560 v = gen_rtvec (2, func_addr, func_desc);
24561 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24563 else
24564 emit_move_insn (func_addr, func);
24566 else
24567 func_addr = func;
24569 /* Create the call. */
24570 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24571 if (value != NULL_RTX)
24572 call[0] = gen_rtx_SET (value, call[0]);
24574 call[1] = gen_rtx_USE (VOIDmode, cookie);
24575 call[2] = simple_return_rtx;
24577 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24578 insn = emit_call_insn (insn);
24579 if (abi_reg)
24580 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24583 #if TARGET_MACHO
24585 /* Expand code to perform a call under the Darwin ABI.
24586 Modulo handling of mlongcall, this is much the same as sysv.
24587 if/when the longcall optimisation is removed, we could drop this
24588 code and use the sysv case (taking care to avoid the tls stuff).
24590 We can use this for sibcalls too, if needed. */
24592 void
24593 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24594 rtx cookie, bool sibcall)
24596 rtx func = func_desc;
24597 rtx func_addr;
24598 rtx call[3];
24599 rtx insn;
24600 int cookie_val = INTVAL (cookie);
24601 bool make_island = false;
24603 /* Handle longcall attributes, there are two cases for Darwin:
24604 1) Newer linkers are capable of synthesising any branch islands needed.
24605 2) We need a helper branch island synthesised by the compiler.
24606 The second case has mostly been retired and we don't use it for m64.
24607 In fact, it's is an optimisation, we could just indirect as sysv does..
24608 ... however, backwards compatibility for now.
24609 If we're going to use this, then we need to keep the CALL_LONG bit set,
24610 so that we can pick up the special insn form later. */
24611 if ((cookie_val & CALL_LONG) != 0
24612 && GET_CODE (func_desc) == SYMBOL_REF)
24614 /* FIXME: the longcall opt should not hang off this flag, it is most
24615 likely incorrect for kernel-mode code-generation. */
24616 if (darwin_symbol_stubs && TARGET_32BIT)
24617 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24618 else
24620 /* The linker is capable of doing this, but the user explicitly
24621 asked for -mlongcall, so we'll do the 'normal' version. */
24622 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24623 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24627 /* Handle indirect calls. */
24628 if (GET_CODE (func) != SYMBOL_REF)
24630 func = force_reg (Pmode, func);
24632 /* Indirect calls via CTR are strongly preferred over indirect
24633 calls via LR, and are required for indirect sibcalls, so move
24634 the address there. */
24635 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24636 emit_move_insn (func_addr, func);
24638 else
24639 func_addr = func;
24641 /* Create the call. */
24642 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24643 if (value != NULL_RTX)
24644 call[0] = gen_rtx_SET (value, call[0]);
24646 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24648 if (sibcall)
24649 call[2] = simple_return_rtx;
24650 else
24651 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24653 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24654 insn = emit_call_insn (insn);
24655 /* Now we have the debug info in the insn, we can set up the branch island
24656 if we're using one. */
24657 if (make_island)
24659 tree funname = get_identifier (XSTR (func_desc, 0));
24661 if (no_previous_def (funname))
24663 rtx label_rtx = gen_label_rtx ();
24664 char *label_buf, temp_buf[256];
24665 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24666 CODE_LABEL_NUMBER (label_rtx));
24667 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24668 tree labelname = get_identifier (label_buf);
24669 add_compiler_branch_island (labelname, funname,
24670 insn_line ((const rtx_insn*)insn));
24674 #endif
24676 void
24677 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24678 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24680 #if TARGET_MACHO
24681 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24682 #else
24683 gcc_unreachable();
24684 #endif
24688 void
24689 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24690 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24692 #if TARGET_MACHO
24693 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24694 #else
24695 gcc_unreachable();
24696 #endif
24699 /* Return whether we should generate PC-relative code for FNDECL. */
24700 bool
24701 rs6000_fndecl_pcrel_p (const_tree fndecl)
24703 if (DEFAULT_ABI != ABI_ELFv2)
24704 return false;
24706 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24708 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24709 && TARGET_CMODEL == CMODEL_MEDIUM);
24712 /* Return whether we should generate PC-relative code for *FN. */
24713 bool
24714 rs6000_pcrel_p (struct function *fn)
24716 if (DEFAULT_ABI != ABI_ELFv2)
24717 return false;
24719 /* Optimize usual case. */
24720 if (fn == cfun)
24721 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24722 && TARGET_CMODEL == CMODEL_MEDIUM);
24724 return rs6000_fndecl_pcrel_p (fn->decl);
24728 /* Given an address (ADDR), a mode (MODE), and what the format of the
24729 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24730 for the address. */
24732 enum insn_form
24733 address_to_insn_form (rtx addr,
24734 machine_mode mode,
24735 enum non_prefixed_form non_prefixed_format)
24737 /* Single register is easy. */
24738 if (REG_P (addr) || SUBREG_P (addr))
24739 return INSN_FORM_BASE_REG;
24741 /* If the non prefixed instruction format doesn't support offset addressing,
24742 make sure only indexed addressing is allowed.
24744 We special case SDmode so that the register allocator does not try to move
24745 SDmode through GPR registers, but instead uses the 32-bit integer load and
24746 store instructions for the floating point registers. */
24747 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24749 if (GET_CODE (addr) != PLUS)
24750 return INSN_FORM_BAD;
24752 rtx op0 = XEXP (addr, 0);
24753 rtx op1 = XEXP (addr, 1);
24754 if (!REG_P (op0) && !SUBREG_P (op0))
24755 return INSN_FORM_BAD;
24757 if (!REG_P (op1) && !SUBREG_P (op1))
24758 return INSN_FORM_BAD;
24760 return INSN_FORM_X;
24763 /* Deal with update forms. */
24764 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24765 return INSN_FORM_UPDATE;
24767 /* Handle PC-relative symbols and labels. Check for both local and external
24768 symbols. Assume labels are always local. */
24769 if (TARGET_PCREL)
24771 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24772 return INSN_FORM_PCREL_EXTERNAL;
24774 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24775 return INSN_FORM_PCREL_LOCAL;
24778 if (GET_CODE (addr) == CONST)
24779 addr = XEXP (addr, 0);
24781 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24782 if (GET_CODE (addr) == LO_SUM)
24783 return INSN_FORM_LO_SUM;
24785 /* Everything below must be an offset address of some form. */
24786 if (GET_CODE (addr) != PLUS)
24787 return INSN_FORM_BAD;
24789 rtx op0 = XEXP (addr, 0);
24790 rtx op1 = XEXP (addr, 1);
24792 /* Check for indexed addresses. */
24793 if (REG_P (op1) || SUBREG_P (op1))
24795 if (REG_P (op0) || SUBREG_P (op0))
24796 return INSN_FORM_X;
24798 return INSN_FORM_BAD;
24801 if (!CONST_INT_P (op1))
24802 return INSN_FORM_BAD;
24804 HOST_WIDE_INT offset = INTVAL (op1);
24805 if (!SIGNED_INTEGER_34BIT_P (offset))
24806 return INSN_FORM_BAD;
24808 /* Check for local and external PC-relative addresses. Labels are always
24809 local. */
24810 if (TARGET_PCREL)
24812 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24813 return INSN_FORM_PCREL_EXTERNAL;
24815 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24816 return INSN_FORM_PCREL_LOCAL;
24819 /* If it isn't PC-relative, the address must use a base register. */
24820 if (!REG_P (op0) && !SUBREG_P (op0))
24821 return INSN_FORM_BAD;
24823 /* Large offsets must be prefixed. */
24824 if (!SIGNED_INTEGER_16BIT_P (offset))
24826 if (TARGET_PREFIXED_ADDR)
24827 return INSN_FORM_PREFIXED_NUMERIC;
24829 return INSN_FORM_BAD;
24832 /* We have a 16-bit offset, see what default instruction format to use. */
24833 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24835 unsigned size = GET_MODE_SIZE (mode);
24837 /* On 64-bit systems, assume 64-bit integers need to use DS form
24838 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24839 (for LXV and STXV). TImode is problematical in that its normal usage
24840 is expected to be GPRs where it wants a DS instruction format, but if
24841 it goes into the vector registers, it wants a DQ instruction
24842 format. */
24843 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24844 non_prefixed_format = NON_PREFIXED_DS;
24846 else if (TARGET_VSX && size >= 16
24847 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24848 non_prefixed_format = NON_PREFIXED_DQ;
24850 else
24851 non_prefixed_format = NON_PREFIXED_D;
24854 /* Classify the D/DS/DQ-form addresses. */
24855 switch (non_prefixed_format)
24857 /* Instruction format D, all 16 bits are valid. */
24858 case NON_PREFIXED_D:
24859 return INSN_FORM_D;
24861 /* Instruction format DS, bottom 2 bits must be 0. */
24862 case NON_PREFIXED_DS:
24863 if ((offset & 3) == 0)
24864 return INSN_FORM_DS;
24866 else if (TARGET_PREFIXED_ADDR)
24867 return INSN_FORM_PREFIXED_NUMERIC;
24869 else
24870 return INSN_FORM_BAD;
24872 /* Instruction format DQ, bottom 4 bits must be 0. */
24873 case NON_PREFIXED_DQ:
24874 if ((offset & 15) == 0)
24875 return INSN_FORM_DQ;
24877 else if (TARGET_PREFIXED_ADDR)
24878 return INSN_FORM_PREFIXED_NUMERIC;
24880 else
24881 return INSN_FORM_BAD;
24883 default:
24884 break;
24887 return INSN_FORM_BAD;
24890 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24891 instruction format (D/DS/DQ) used for offset memory. */
24893 static enum non_prefixed_form
24894 reg_to_non_prefixed (rtx reg, machine_mode mode)
24896 /* If it isn't a register, use the defaults. */
24897 if (!REG_P (reg) && !SUBREG_P (reg))
24898 return NON_PREFIXED_DEFAULT;
24900 unsigned int r = reg_or_subregno (reg);
24902 /* If we have a pseudo, use the default instruction format. */
24903 if (!HARD_REGISTER_NUM_P (r))
24904 return NON_PREFIXED_DEFAULT;
24906 unsigned size = GET_MODE_SIZE (mode);
24908 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24909 128-bit floating point, and 128-bit integers. */
24910 if (FP_REGNO_P (r))
24912 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24913 return NON_PREFIXED_D;
24915 else if (size < 8)
24916 return NON_PREFIXED_X;
24918 else if (TARGET_VSX && size >= 16
24919 && (VECTOR_MODE_P (mode)
24920 || FLOAT128_VECTOR_P (mode)
24921 || mode == TImode || mode == CTImode))
24922 return NON_PREFIXED_DQ;
24924 else
24925 return NON_PREFIXED_DEFAULT;
24928 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24929 128-bit floating point, and 128-bit integers. */
24930 else if (ALTIVEC_REGNO_P (r))
24932 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24933 return NON_PREFIXED_DS;
24935 else if (size < 8)
24936 return NON_PREFIXED_X;
24938 else if (TARGET_VSX && size >= 16
24939 && (VECTOR_MODE_P (mode)
24940 || FLOAT128_VECTOR_P (mode)
24941 || mode == TImode || mode == CTImode))
24942 return NON_PREFIXED_DQ;
24944 else
24945 return NON_PREFIXED_DEFAULT;
24948 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24949 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24950 through the GPR registers for memory operations. */
24951 else if (TARGET_POWERPC64 && size >= 8)
24952 return NON_PREFIXED_DS;
24954 return NON_PREFIXED_D;
24958 /* Whether a load instruction is a prefixed instruction. This is called from
24959 the prefixed attribute processing. */
24961 bool
24962 prefixed_load_p (rtx_insn *insn)
24964 /* Validate the insn to make sure it is a normal load insn. */
24965 extract_insn_cached (insn);
24966 if (recog_data.n_operands < 2)
24967 return false;
24969 rtx reg = recog_data.operand[0];
24970 rtx mem = recog_data.operand[1];
24972 if (!REG_P (reg) && !SUBREG_P (reg))
24973 return false;
24975 if (!MEM_P (mem))
24976 return false;
24978 /* Prefixed load instructions do not support update or indexed forms. */
24979 if (get_attr_indexed (insn) == INDEXED_YES
24980 || get_attr_update (insn) == UPDATE_YES)
24981 return false;
24983 /* LWA uses the DS format instead of the D format that LWZ uses. */
24984 enum non_prefixed_form non_prefixed;
24985 machine_mode reg_mode = GET_MODE (reg);
24986 machine_mode mem_mode = GET_MODE (mem);
24988 if (mem_mode == SImode && reg_mode == DImode
24989 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
24990 non_prefixed = NON_PREFIXED_DS;
24992 else
24993 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24995 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24998 /* Whether a store instruction is a prefixed instruction. This is called from
24999 the prefixed attribute processing. */
25001 bool
25002 prefixed_store_p (rtx_insn *insn)
25004 /* Validate the insn to make sure it is a normal store insn. */
25005 extract_insn_cached (insn);
25006 if (recog_data.n_operands < 2)
25007 return false;
25009 rtx mem = recog_data.operand[0];
25010 rtx reg = recog_data.operand[1];
25012 if (!REG_P (reg) && !SUBREG_P (reg))
25013 return false;
25015 if (!MEM_P (mem))
25016 return false;
25018 /* Prefixed store instructions do not support update or indexed forms. */
25019 if (get_attr_indexed (insn) == INDEXED_YES
25020 || get_attr_update (insn) == UPDATE_YES)
25021 return false;
25023 machine_mode mem_mode = GET_MODE (mem);
25024 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25025 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25028 /* Whether a load immediate or add instruction is a prefixed instruction. This
25029 is called from the prefixed attribute processing. */
25031 bool
25032 prefixed_paddi_p (rtx_insn *insn)
25034 rtx set = single_set (insn);
25035 if (!set)
25036 return false;
25038 rtx dest = SET_DEST (set);
25039 rtx src = SET_SRC (set);
25041 if (!REG_P (dest) && !SUBREG_P (dest))
25042 return false;
25044 /* Is this a load immediate that can't be done with a simple ADDI or
25045 ADDIS? */
25046 if (CONST_INT_P (src))
25047 return (satisfies_constraint_eI (src)
25048 && !satisfies_constraint_I (src)
25049 && !satisfies_constraint_L (src));
25051 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25052 ADDIS? */
25053 if (GET_CODE (src) == PLUS)
25055 rtx op1 = XEXP (src, 1);
25057 return (CONST_INT_P (op1)
25058 && satisfies_constraint_eI (op1)
25059 && !satisfies_constraint_I (op1)
25060 && !satisfies_constraint_L (op1));
25063 /* If not, is it a load of a PC-relative address? */
25064 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25065 return false;
25067 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25068 return false;
25070 enum insn_form iform = address_to_insn_form (src, Pmode,
25071 NON_PREFIXED_DEFAULT);
25073 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25076 /* Whether the next instruction needs a 'p' prefix issued before the
25077 instruction is printed out. */
25078 static bool next_insn_prefixed_p;
25080 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25081 outputting the assembler code. On the PowerPC, we remember if the current
25082 insn is a prefixed insn where we need to emit a 'p' before the insn.
25084 In addition, if the insn is part of a PC-relative reference to an external
25085 label optimization, this is recorded also. */
25086 void
25087 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25089 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25090 return;
25093 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25094 We use it to emit a 'p' for prefixed insns that is set in
25095 FINAL_PRESCAN_INSN. */
25096 void
25097 rs6000_asm_output_opcode (FILE *stream)
25099 if (next_insn_prefixed_p)
25100 fprintf (stream, "p");
25102 return;
25105 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25106 should be adjusted to reflect any required changes. This macro is used when
25107 there is some systematic length adjustment required that would be difficult
25108 to express in the length attribute.
25110 In the PowerPC, we use this to adjust the length of an instruction if one or
25111 more prefixed instructions are generated, using the attribute
25112 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25113 hardware requires that a prefied instruciton does not cross a 64-byte
25114 boundary. This means the compiler has to assume the length of the first
25115 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25116 already set for the non-prefixed instruction, we just need to udpate for the
25117 difference. */
25120 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25122 if (TARGET_PREFIXED_ADDR && NONJUMP_INSN_P (insn))
25124 rtx pattern = PATTERN (insn);
25125 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25126 && get_attr_prefixed (insn) == PREFIXED_YES)
25128 int num_prefixed = get_attr_max_prefixed_insns (insn);
25129 length += 4 * (num_prefixed + 1);
25133 return length;
25137 #ifdef HAVE_GAS_HIDDEN
25138 # define USE_HIDDEN_LINKONCE 1
25139 #else
25140 # define USE_HIDDEN_LINKONCE 0
25141 #endif
25143 /* Fills in the label name that should be used for a 476 link stack thunk. */
25145 void
25146 get_ppc476_thunk_name (char name[32])
25148 gcc_assert (TARGET_LINK_STACK);
25150 if (USE_HIDDEN_LINKONCE)
25151 sprintf (name, "__ppc476.get_thunk");
25152 else
25153 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25156 /* This function emits the simple thunk routine that is used to preserve
25157 the link stack on the 476 cpu. */
25159 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25160 static void
25161 rs6000_code_end (void)
25163 char name[32];
25164 tree decl;
25166 if (!TARGET_LINK_STACK)
25167 return;
25169 get_ppc476_thunk_name (name);
25171 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25172 build_function_type_list (void_type_node, NULL_TREE));
25173 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25174 NULL_TREE, void_type_node);
25175 TREE_PUBLIC (decl) = 1;
25176 TREE_STATIC (decl) = 1;
25178 #if RS6000_WEAK
25179 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25181 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25182 targetm.asm_out.unique_section (decl, 0);
25183 switch_to_section (get_named_section (decl, NULL, 0));
25184 DECL_WEAK (decl) = 1;
25185 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25186 targetm.asm_out.globalize_label (asm_out_file, name);
25187 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25188 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25190 else
25191 #endif
25193 switch_to_section (text_section);
25194 ASM_OUTPUT_LABEL (asm_out_file, name);
25197 DECL_INITIAL (decl) = make_node (BLOCK);
25198 current_function_decl = decl;
25199 allocate_struct_function (decl, false);
25200 init_function_start (decl);
25201 first_function_block_is_cold = false;
25202 /* Make sure unwind info is emitted for the thunk if needed. */
25203 final_start_function (emit_barrier (), asm_out_file, 1);
25205 fputs ("\tblr\n", asm_out_file);
25207 final_end_function ();
25208 init_insn_lengths ();
25209 free_after_compilation (cfun);
25210 set_cfun (NULL);
25211 current_function_decl = NULL;
25214 /* Add r30 to hard reg set if the prologue sets it up and it is not
25215 pic_offset_table_rtx. */
25217 static void
25218 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25220 if (!TARGET_SINGLE_PIC_BASE
25221 && TARGET_TOC
25222 && TARGET_MINIMAL_TOC
25223 && !constant_pool_empty_p ())
25224 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25225 if (cfun->machine->split_stack_argp_used)
25226 add_to_hard_reg_set (&set->set, Pmode, 12);
25228 /* Make sure the hard reg set doesn't include r2, which was possibly added
25229 via PIC_OFFSET_TABLE_REGNUM. */
25230 if (TARGET_TOC)
25231 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25235 /* Helper function for rs6000_split_logical to emit a logical instruction after
25236 spliting the operation to single GPR registers.
25238 DEST is the destination register.
25239 OP1 and OP2 are the input source registers.
25240 CODE is the base operation (AND, IOR, XOR, NOT).
25241 MODE is the machine mode.
25242 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25243 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25244 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25246 static void
25247 rs6000_split_logical_inner (rtx dest,
25248 rtx op1,
25249 rtx op2,
25250 enum rtx_code code,
25251 machine_mode mode,
25252 bool complement_final_p,
25253 bool complement_op1_p,
25254 bool complement_op2_p)
25256 rtx bool_rtx;
25258 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25259 if (op2 && CONST_INT_P (op2)
25260 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25261 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25263 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25264 HOST_WIDE_INT value = INTVAL (op2) & mask;
25266 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25267 if (code == AND)
25269 if (value == 0)
25271 emit_insn (gen_rtx_SET (dest, const0_rtx));
25272 return;
25275 else if (value == mask)
25277 if (!rtx_equal_p (dest, op1))
25278 emit_insn (gen_rtx_SET (dest, op1));
25279 return;
25283 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25284 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25285 else if (code == IOR || code == XOR)
25287 if (value == 0)
25289 if (!rtx_equal_p (dest, op1))
25290 emit_insn (gen_rtx_SET (dest, op1));
25291 return;
25296 if (code == AND && mode == SImode
25297 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25299 emit_insn (gen_andsi3 (dest, op1, op2));
25300 return;
25303 if (complement_op1_p)
25304 op1 = gen_rtx_NOT (mode, op1);
25306 if (complement_op2_p)
25307 op2 = gen_rtx_NOT (mode, op2);
25309 /* For canonical RTL, if only one arm is inverted it is the first. */
25310 if (!complement_op1_p && complement_op2_p)
25311 std::swap (op1, op2);
25313 bool_rtx = ((code == NOT)
25314 ? gen_rtx_NOT (mode, op1)
25315 : gen_rtx_fmt_ee (code, mode, op1, op2));
25317 if (complement_final_p)
25318 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25320 emit_insn (gen_rtx_SET (dest, bool_rtx));
25323 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25324 operations are split immediately during RTL generation to allow for more
25325 optimizations of the AND/IOR/XOR.
25327 OPERANDS is an array containing the destination and two input operands.
25328 CODE is the base operation (AND, IOR, XOR, NOT).
25329 MODE is the machine mode.
25330 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25331 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25332 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25333 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25334 formation of the AND instructions. */
25336 static void
25337 rs6000_split_logical_di (rtx operands[3],
25338 enum rtx_code code,
25339 bool complement_final_p,
25340 bool complement_op1_p,
25341 bool complement_op2_p)
25343 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25344 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25345 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25346 enum hi_lo { hi = 0, lo = 1 };
25347 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25348 size_t i;
25350 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25351 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25352 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25353 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25355 if (code == NOT)
25356 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25357 else
25359 if (!CONST_INT_P (operands[2]))
25361 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25362 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25364 else
25366 HOST_WIDE_INT value = INTVAL (operands[2]);
25367 HOST_WIDE_INT value_hi_lo[2];
25369 gcc_assert (!complement_final_p);
25370 gcc_assert (!complement_op1_p);
25371 gcc_assert (!complement_op2_p);
25373 value_hi_lo[hi] = value >> 32;
25374 value_hi_lo[lo] = value & lower_32bits;
25376 for (i = 0; i < 2; i++)
25378 HOST_WIDE_INT sub_value = value_hi_lo[i];
25380 if (sub_value & sign_bit)
25381 sub_value |= upper_32bits;
25383 op2_hi_lo[i] = GEN_INT (sub_value);
25385 /* If this is an AND instruction, check to see if we need to load
25386 the value in a register. */
25387 if (code == AND && sub_value != -1 && sub_value != 0
25388 && !and_operand (op2_hi_lo[i], SImode))
25389 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25394 for (i = 0; i < 2; i++)
25396 /* Split large IOR/XOR operations. */
25397 if ((code == IOR || code == XOR)
25398 && CONST_INT_P (op2_hi_lo[i])
25399 && !complement_final_p
25400 && !complement_op1_p
25401 && !complement_op2_p
25402 && !logical_const_operand (op2_hi_lo[i], SImode))
25404 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25405 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25406 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25407 rtx tmp = gen_reg_rtx (SImode);
25409 /* Make sure the constant is sign extended. */
25410 if ((hi_16bits & sign_bit) != 0)
25411 hi_16bits |= upper_32bits;
25413 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25414 code, SImode, false, false, false);
25416 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25417 code, SImode, false, false, false);
25419 else
25420 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25421 code, SImode, complement_final_p,
25422 complement_op1_p, complement_op2_p);
25425 return;
25428 /* Split the insns that make up boolean operations operating on multiple GPR
25429 registers. The boolean MD patterns ensure that the inputs either are
25430 exactly the same as the output registers, or there is no overlap.
25432 OPERANDS is an array containing the destination and two input operands.
25433 CODE is the base operation (AND, IOR, XOR, NOT).
25434 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25435 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25436 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25438 void
25439 rs6000_split_logical (rtx operands[3],
25440 enum rtx_code code,
25441 bool complement_final_p,
25442 bool complement_op1_p,
25443 bool complement_op2_p)
25445 machine_mode mode = GET_MODE (operands[0]);
25446 machine_mode sub_mode;
25447 rtx op0, op1, op2;
25448 int sub_size, regno0, regno1, nregs, i;
25450 /* If this is DImode, use the specialized version that can run before
25451 register allocation. */
25452 if (mode == DImode && !TARGET_POWERPC64)
25454 rs6000_split_logical_di (operands, code, complement_final_p,
25455 complement_op1_p, complement_op2_p);
25456 return;
25459 op0 = operands[0];
25460 op1 = operands[1];
25461 op2 = (code == NOT) ? NULL_RTX : operands[2];
25462 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25463 sub_size = GET_MODE_SIZE (sub_mode);
25464 regno0 = REGNO (op0);
25465 regno1 = REGNO (op1);
25467 gcc_assert (reload_completed);
25468 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25469 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25471 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25472 gcc_assert (nregs > 1);
25474 if (op2 && REG_P (op2))
25475 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25477 for (i = 0; i < nregs; i++)
25479 int offset = i * sub_size;
25480 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25481 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25482 rtx sub_op2 = ((code == NOT)
25483 ? NULL_RTX
25484 : simplify_subreg (sub_mode, op2, mode, offset));
25486 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25487 complement_final_p, complement_op1_p,
25488 complement_op2_p);
25491 return;
25495 /* Return true if the peephole2 can combine a load involving a combination of
25496 an addis instruction and a load with an offset that can be fused together on
25497 a power8. */
25499 bool
25500 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25501 rtx addis_value, /* addis value. */
25502 rtx target, /* target register that is loaded. */
25503 rtx mem) /* bottom part of the memory addr. */
25505 rtx addr;
25506 rtx base_reg;
25508 /* Validate arguments. */
25509 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25510 return false;
25512 if (!base_reg_operand (target, GET_MODE (target)))
25513 return false;
25515 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25516 return false;
25518 /* Allow sign/zero extension. */
25519 if (GET_CODE (mem) == ZERO_EXTEND
25520 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25521 mem = XEXP (mem, 0);
25523 if (!MEM_P (mem))
25524 return false;
25526 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25527 return false;
25529 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25530 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25531 return false;
25533 /* Validate that the register used to load the high value is either the
25534 register being loaded, or we can safely replace its use.
25536 This function is only called from the peephole2 pass and we assume that
25537 there are 2 instructions in the peephole (addis and load), so we want to
25538 check if the target register was not used in the memory address and the
25539 register to hold the addis result is dead after the peephole. */
25540 if (REGNO (addis_reg) != REGNO (target))
25542 if (reg_mentioned_p (target, mem))
25543 return false;
25545 if (!peep2_reg_dead_p (2, addis_reg))
25546 return false;
25548 /* If the target register being loaded is the stack pointer, we must
25549 avoid loading any other value into it, even temporarily. */
25550 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25551 return false;
25554 base_reg = XEXP (addr, 0);
25555 return REGNO (addis_reg) == REGNO (base_reg);
25558 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25559 sequence. We adjust the addis register to use the target register. If the
25560 load sign extends, we adjust the code to do the zero extending load, and an
25561 explicit sign extension later since the fusion only covers zero extending
25562 loads.
25564 The operands are:
25565 operands[0] register set with addis (to be replaced with target)
25566 operands[1] value set via addis
25567 operands[2] target register being loaded
25568 operands[3] D-form memory reference using operands[0]. */
25570 void
25571 expand_fusion_gpr_load (rtx *operands)
25573 rtx addis_value = operands[1];
25574 rtx target = operands[2];
25575 rtx orig_mem = operands[3];
25576 rtx new_addr, new_mem, orig_addr, offset;
25577 enum rtx_code plus_or_lo_sum;
25578 machine_mode target_mode = GET_MODE (target);
25579 machine_mode extend_mode = target_mode;
25580 machine_mode ptr_mode = Pmode;
25581 enum rtx_code extend = UNKNOWN;
25583 if (GET_CODE (orig_mem) == ZERO_EXTEND
25584 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25586 extend = GET_CODE (orig_mem);
25587 orig_mem = XEXP (orig_mem, 0);
25588 target_mode = GET_MODE (orig_mem);
25591 gcc_assert (MEM_P (orig_mem));
25593 orig_addr = XEXP (orig_mem, 0);
25594 plus_or_lo_sum = GET_CODE (orig_addr);
25595 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25597 offset = XEXP (orig_addr, 1);
25598 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25599 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25601 if (extend != UNKNOWN)
25602 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25604 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25605 UNSPEC_FUSION_GPR);
25606 emit_insn (gen_rtx_SET (target, new_mem));
25608 if (extend == SIGN_EXTEND)
25610 int sub_off = ((BYTES_BIG_ENDIAN)
25611 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25612 : 0);
25613 rtx sign_reg
25614 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25616 emit_insn (gen_rtx_SET (target,
25617 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25620 return;
25623 /* Emit the addis instruction that will be part of a fused instruction
25624 sequence. */
25626 void
25627 emit_fusion_addis (rtx target, rtx addis_value)
25629 rtx fuse_ops[10];
25630 const char *addis_str = NULL;
25632 /* Emit the addis instruction. */
25633 fuse_ops[0] = target;
25634 if (satisfies_constraint_L (addis_value))
25636 fuse_ops[1] = addis_value;
25637 addis_str = "lis %0,%v1";
25640 else if (GET_CODE (addis_value) == PLUS)
25642 rtx op0 = XEXP (addis_value, 0);
25643 rtx op1 = XEXP (addis_value, 1);
25645 if (REG_P (op0) && CONST_INT_P (op1)
25646 && satisfies_constraint_L (op1))
25648 fuse_ops[1] = op0;
25649 fuse_ops[2] = op1;
25650 addis_str = "addis %0,%1,%v2";
25654 else if (GET_CODE (addis_value) == HIGH)
25656 rtx value = XEXP (addis_value, 0);
25657 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25659 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25660 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25661 if (TARGET_ELF)
25662 addis_str = "addis %0,%2,%1@toc@ha";
25664 else if (TARGET_XCOFF)
25665 addis_str = "addis %0,%1@u(%2)";
25667 else
25668 gcc_unreachable ();
25671 else if (GET_CODE (value) == PLUS)
25673 rtx op0 = XEXP (value, 0);
25674 rtx op1 = XEXP (value, 1);
25676 if (GET_CODE (op0) == UNSPEC
25677 && XINT (op0, 1) == UNSPEC_TOCREL
25678 && CONST_INT_P (op1))
25680 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25681 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25682 fuse_ops[3] = op1;
25683 if (TARGET_ELF)
25684 addis_str = "addis %0,%2,%1+%3@toc@ha";
25686 else if (TARGET_XCOFF)
25687 addis_str = "addis %0,%1+%3@u(%2)";
25689 else
25690 gcc_unreachable ();
25694 else if (satisfies_constraint_L (value))
25696 fuse_ops[1] = value;
25697 addis_str = "lis %0,%v1";
25700 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25702 fuse_ops[1] = value;
25703 addis_str = "lis %0,%1@ha";
25707 if (!addis_str)
25708 fatal_insn ("Could not generate addis value for fusion", addis_value);
25710 output_asm_insn (addis_str, fuse_ops);
25713 /* Emit a D-form load or store instruction that is the second instruction
25714 of a fusion sequence. */
25716 static void
25717 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25719 rtx fuse_ops[10];
25720 char insn_template[80];
25722 fuse_ops[0] = load_reg;
25723 fuse_ops[1] = addis_reg;
25725 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25727 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25728 fuse_ops[2] = offset;
25729 output_asm_insn (insn_template, fuse_ops);
25732 else if (GET_CODE (offset) == UNSPEC
25733 && XINT (offset, 1) == UNSPEC_TOCREL)
25735 if (TARGET_ELF)
25736 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25738 else if (TARGET_XCOFF)
25739 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25741 else
25742 gcc_unreachable ();
25744 fuse_ops[2] = XVECEXP (offset, 0, 0);
25745 output_asm_insn (insn_template, fuse_ops);
25748 else if (GET_CODE (offset) == PLUS
25749 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25750 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25751 && CONST_INT_P (XEXP (offset, 1)))
25753 rtx tocrel_unspec = XEXP (offset, 0);
25754 if (TARGET_ELF)
25755 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25757 else if (TARGET_XCOFF)
25758 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25760 else
25761 gcc_unreachable ();
25763 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25764 fuse_ops[3] = XEXP (offset, 1);
25765 output_asm_insn (insn_template, fuse_ops);
25768 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25770 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25772 fuse_ops[2] = offset;
25773 output_asm_insn (insn_template, fuse_ops);
25776 else
25777 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25779 return;
25782 /* Given an address, convert it into the addis and load offset parts. Addresses
25783 created during the peephole2 process look like:
25784 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25785 (unspec [(...)] UNSPEC_TOCREL)) */
25787 static void
25788 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25790 rtx hi, lo;
25792 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25794 hi = XEXP (addr, 0);
25795 lo = XEXP (addr, 1);
25797 else
25798 gcc_unreachable ();
25800 *p_hi = hi;
25801 *p_lo = lo;
25804 /* Return a string to fuse an addis instruction with a gpr load to the same
25805 register that we loaded up the addis instruction. The address that is used
25806 is the logical address that was formed during peephole2:
25807 (lo_sum (high) (low-part))
25809 The code is complicated, so we call output_asm_insn directly, and just
25810 return "". */
25812 const char *
25813 emit_fusion_gpr_load (rtx target, rtx mem)
25815 rtx addis_value;
25816 rtx addr;
25817 rtx load_offset;
25818 const char *load_str = NULL;
25819 machine_mode mode;
25821 if (GET_CODE (mem) == ZERO_EXTEND)
25822 mem = XEXP (mem, 0);
25824 gcc_assert (REG_P (target) && MEM_P (mem));
25826 addr = XEXP (mem, 0);
25827 fusion_split_address (addr, &addis_value, &load_offset);
25829 /* Now emit the load instruction to the same register. */
25830 mode = GET_MODE (mem);
25831 switch (mode)
25833 case E_QImode:
25834 load_str = "lbz";
25835 break;
25837 case E_HImode:
25838 load_str = "lhz";
25839 break;
25841 case E_SImode:
25842 case E_SFmode:
25843 load_str = "lwz";
25844 break;
25846 case E_DImode:
25847 case E_DFmode:
25848 gcc_assert (TARGET_POWERPC64);
25849 load_str = "ld";
25850 break;
25852 default:
25853 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25856 /* Emit the addis instruction. */
25857 emit_fusion_addis (target, addis_value);
25859 /* Emit the D-form load instruction. */
25860 emit_fusion_load (target, target, load_offset, load_str);
25862 return "";
25866 #ifdef RS6000_GLIBC_ATOMIC_FENV
25867 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25868 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25869 #endif
25871 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25873 static void
25874 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25876 if (!TARGET_HARD_FLOAT)
25878 #ifdef RS6000_GLIBC_ATOMIC_FENV
25879 if (atomic_hold_decl == NULL_TREE)
25881 atomic_hold_decl
25882 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25883 get_identifier ("__atomic_feholdexcept"),
25884 build_function_type_list (void_type_node,
25885 double_ptr_type_node,
25886 NULL_TREE));
25887 TREE_PUBLIC (atomic_hold_decl) = 1;
25888 DECL_EXTERNAL (atomic_hold_decl) = 1;
25891 if (atomic_clear_decl == NULL_TREE)
25893 atomic_clear_decl
25894 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25895 get_identifier ("__atomic_feclearexcept"),
25896 build_function_type_list (void_type_node,
25897 NULL_TREE));
25898 TREE_PUBLIC (atomic_clear_decl) = 1;
25899 DECL_EXTERNAL (atomic_clear_decl) = 1;
25902 tree const_double = build_qualified_type (double_type_node,
25903 TYPE_QUAL_CONST);
25904 tree const_double_ptr = build_pointer_type (const_double);
25905 if (atomic_update_decl == NULL_TREE)
25907 atomic_update_decl
25908 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25909 get_identifier ("__atomic_feupdateenv"),
25910 build_function_type_list (void_type_node,
25911 const_double_ptr,
25912 NULL_TREE));
25913 TREE_PUBLIC (atomic_update_decl) = 1;
25914 DECL_EXTERNAL (atomic_update_decl) = 1;
25917 tree fenv_var = create_tmp_var_raw (double_type_node);
25918 TREE_ADDRESSABLE (fenv_var) = 1;
25919 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25921 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25922 *clear = build_call_expr (atomic_clear_decl, 0);
25923 *update = build_call_expr (atomic_update_decl, 1,
25924 fold_convert (const_double_ptr, fenv_addr));
25925 #endif
25926 return;
25929 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25930 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25931 tree call_mffs = build_call_expr (mffs, 0);
25933 /* Generates the equivalent of feholdexcept (&fenv_var)
25935 *fenv_var = __builtin_mffs ();
25936 double fenv_hold;
25937 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25938 __builtin_mtfsf (0xff, fenv_hold); */
25940 /* Mask to clear everything except for the rounding modes and non-IEEE
25941 arithmetic flag. */
25942 const unsigned HOST_WIDE_INT hold_exception_mask =
25943 HOST_WIDE_INT_C (0xffffffff00000007);
25945 tree fenv_var = create_tmp_var_raw (double_type_node);
25947 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
25949 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
25950 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25951 build_int_cst (uint64_type_node,
25952 hold_exception_mask));
25954 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25955 fenv_llu_and);
25957 tree hold_mtfsf = build_call_expr (mtfsf, 2,
25958 build_int_cst (unsigned_type_node, 0xff),
25959 fenv_hold_mtfsf);
25961 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
25963 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25965 double fenv_clear = __builtin_mffs ();
25966 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25967 __builtin_mtfsf (0xff, fenv_clear); */
25969 /* Mask to clear everything except for the rounding modes and non-IEEE
25970 arithmetic flag. */
25971 const unsigned HOST_WIDE_INT clear_exception_mask =
25972 HOST_WIDE_INT_C (0xffffffff00000000);
25974 tree fenv_clear = create_tmp_var_raw (double_type_node);
25976 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
25978 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
25979 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
25980 fenv_clean_llu,
25981 build_int_cst (uint64_type_node,
25982 clear_exception_mask));
25984 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25985 fenv_clear_llu_and);
25987 tree clear_mtfsf = build_call_expr (mtfsf, 2,
25988 build_int_cst (unsigned_type_node, 0xff),
25989 fenv_clear_mtfsf);
25991 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
25993 /* Generates the equivalent of feupdateenv (&fenv_var)
25995 double old_fenv = __builtin_mffs ();
25996 double fenv_update;
25997 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25998 (*(uint64_t*)fenv_var 0x1ff80fff);
25999 __builtin_mtfsf (0xff, fenv_update); */
26001 const unsigned HOST_WIDE_INT update_exception_mask =
26002 HOST_WIDE_INT_C (0xffffffff1fffff00);
26003 const unsigned HOST_WIDE_INT new_exception_mask =
26004 HOST_WIDE_INT_C (0x1ff80fff);
26006 tree old_fenv = create_tmp_var_raw (double_type_node);
26007 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
26009 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26010 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26011 build_int_cst (uint64_type_node,
26012 update_exception_mask));
26014 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26015 build_int_cst (uint64_type_node,
26016 new_exception_mask));
26018 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26019 old_llu_and, new_llu_and);
26021 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26022 new_llu_mask);
26024 tree update_mtfsf = build_call_expr (mtfsf, 2,
26025 build_int_cst (unsigned_type_node, 0xff),
26026 fenv_update_mtfsf);
26028 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26031 void
26032 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26034 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26036 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26037 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26039 /* The destination of the vmrgew instruction layout is:
26040 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26041 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26042 vmrgew instruction will be correct. */
26043 if (BYTES_BIG_ENDIAN)
26045 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26046 GEN_INT (0)));
26047 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26048 GEN_INT (3)));
26050 else
26052 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26053 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26056 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26057 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26059 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26060 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26062 if (BYTES_BIG_ENDIAN)
26063 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26064 else
26065 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26068 void
26069 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26071 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26073 rtx_tmp0 = gen_reg_rtx (V2DImode);
26074 rtx_tmp1 = gen_reg_rtx (V2DImode);
26076 /* The destination of the vmrgew instruction layout is:
26077 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26078 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26079 vmrgew instruction will be correct. */
26080 if (BYTES_BIG_ENDIAN)
26082 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26083 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26085 else
26087 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26088 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26091 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26092 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26094 if (signed_convert)
26096 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26097 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26099 else
26101 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26102 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26105 if (BYTES_BIG_ENDIAN)
26106 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26107 else
26108 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26111 void
26112 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26113 rtx src2)
26115 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26117 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26118 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26120 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26121 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26123 rtx_tmp2 = gen_reg_rtx (V4SImode);
26124 rtx_tmp3 = gen_reg_rtx (V4SImode);
26126 if (signed_convert)
26128 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26129 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26131 else
26133 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26134 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26137 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26140 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26142 static bool
26143 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26144 optimization_type opt_type)
26146 switch (op)
26148 case rsqrt_optab:
26149 return (opt_type == OPTIMIZE_FOR_SPEED
26150 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26152 default:
26153 return true;
26157 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26159 static HOST_WIDE_INT
26160 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26162 if (TREE_CODE (exp) == STRING_CST
26163 && (STRICT_ALIGNMENT || !optimize_size))
26164 return MAX (align, BITS_PER_WORD);
26165 return align;
26168 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26170 static HOST_WIDE_INT
26171 rs6000_starting_frame_offset (void)
26173 if (FRAME_GROWS_DOWNWARD)
26174 return 0;
26175 return RS6000_STARTING_FRAME_OFFSET;
26179 /* Create an alias for a mangled name where we have changed the mangling (in
26180 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26181 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26183 #if TARGET_ELF && RS6000_WEAK
26184 static void
26185 rs6000_globalize_decl_name (FILE * stream, tree decl)
26187 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26189 targetm.asm_out.globalize_label (stream, name);
26191 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26193 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26194 const char *old_name;
26196 ieee128_mangling_gcc_8_1 = true;
26197 lang_hooks.set_decl_assembler_name (decl);
26198 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26199 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26200 ieee128_mangling_gcc_8_1 = false;
26202 if (strcmp (name, old_name) != 0)
26204 fprintf (stream, "\t.weak %s\n", old_name);
26205 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26209 #endif
26212 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26213 function names from <foo>l to <foo>f128 if the default long double type is
26214 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26215 include file switches the names on systems that support long double as IEEE
26216 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26217 In the future, glibc will export names like __ieee128_sinf128 and we can
26218 switch to using those instead of using sinf128, which pollutes the user's
26219 namespace.
26221 This will switch the names for Fortran math functions as well (which doesn't
26222 use math.h). However, Fortran needs other changes to the compiler and
26223 library before you can switch the real*16 type at compile time.
26225 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26226 only do this if the default is that long double is IBM extended double, and
26227 the user asked for IEEE 128-bit. */
26229 static tree
26230 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26232 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26233 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26235 size_t len = IDENTIFIER_LENGTH (id);
26236 const char *name = IDENTIFIER_POINTER (id);
26238 if (name[len - 1] == 'l')
26240 bool uses_ieee128_p = false;
26241 tree type = TREE_TYPE (decl);
26242 machine_mode ret_mode = TYPE_MODE (type);
26244 /* See if the function returns a IEEE 128-bit floating point type or
26245 complex type. */
26246 if (ret_mode == TFmode || ret_mode == TCmode)
26247 uses_ieee128_p = true;
26248 else
26250 function_args_iterator args_iter;
26251 tree arg;
26253 /* See if the function passes a IEEE 128-bit floating point type
26254 or complex type. */
26255 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26257 machine_mode arg_mode = TYPE_MODE (arg);
26258 if (arg_mode == TFmode || arg_mode == TCmode)
26260 uses_ieee128_p = true;
26261 break;
26266 /* If we passed or returned an IEEE 128-bit floating point type,
26267 change the name. */
26268 if (uses_ieee128_p)
26270 char *name2 = (char *) alloca (len + 4);
26271 memcpy (name2, name, len - 1);
26272 strcpy (name2 + len - 1, "f128");
26273 id = get_identifier (name2);
26278 return id;
26281 /* Predict whether the given loop in gimple will be transformed in the RTL
26282 doloop_optimize pass. */
26284 static bool
26285 rs6000_predict_doloop_p (struct loop *loop)
26287 gcc_assert (loop);
26289 /* On rs6000, targetm.can_use_doloop_p is actually
26290 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26291 if (loop->inner != NULL)
26293 if (dump_file && (dump_flags & TDF_DETAILS))
26294 fprintf (dump_file, "Predict doloop failure due to"
26295 " loop nesting.\n");
26296 return false;
26299 return true;
26302 struct gcc_target targetm = TARGET_INITIALIZER;
26304 #include "gt-rs6000.h"