Turn on -mpcrel by default for -mcpu=future
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobaa632ea8f4a0f4a32f0f61b8c8f69981422c076c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Don't enable PC-relative addressing if the target does not support it. */
102 #ifndef PCREL_SUPPORTED_BY_OS
103 #define PCREL_SUPPORTED_BY_OS 0
104 #endif
106 /* Support targetm.vectorize.builtin_mask_for_load. */
107 tree altivec_builtin_mask_for_load;
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
122 #if TARGET_ELF
123 /* Note whether IEEE 128-bit floating point was passed or returned, either as
124 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
125 floating point. We changed the default C++ mangling for these types and we
126 may want to generate a weak alias of the old mangling (U10__float128) to the
127 new mangling (u9__ieee128). */
128 bool rs6000_passes_ieee128 = false;
129 #endif
131 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
132 name used in current releases (i.e. u9__ieee128). */
133 static bool ieee128_mangling_gcc_8_1;
135 /* Width in bits of a pointer. */
136 unsigned rs6000_pointer_size;
138 #ifdef HAVE_AS_GNU_ATTRIBUTE
139 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
140 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
141 # endif
142 /* Flag whether floating point values have been passed/returned.
143 Note that this doesn't say whether fprs are used, since the
144 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
145 should be set for soft-float values passed in gprs and ieee128
146 values passed in vsx registers. */
147 bool rs6000_passes_float = false;
148 bool rs6000_passes_long_double = false;
149 /* Flag whether vector values have been passed/returned. */
150 bool rs6000_passes_vector = false;
151 /* Flag whether small (<= 8 byte) structures have been returned. */
152 bool rs6000_returns_struct = false;
153 #endif
155 /* Value is TRUE if register/mode pair is acceptable. */
156 static bool rs6000_hard_regno_mode_ok_p
157 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
159 /* Maximum number of registers needed for a given register class and mode. */
160 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
162 /* How many registers are needed for a given register and mode. */
163 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
165 /* Map register number to register class. */
166 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
168 static int dbg_cost_ctrl;
170 /* Built in types. */
171 tree rs6000_builtin_types[RS6000_BTI_MAX];
172 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
174 /* Flag to say the TOC is initialized */
175 int toc_initialized, need_toc_init;
176 char toc_label_name[10];
178 /* Cached value of rs6000_variable_issue. This is cached in
179 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
180 static short cached_can_issue_more;
182 static GTY(()) section *read_only_data_section;
183 static GTY(()) section *private_data_section;
184 static GTY(()) section *tls_data_section;
185 static GTY(()) section *tls_private_data_section;
186 static GTY(()) section *read_only_private_data_section;
187 static GTY(()) section *sdata2_section;
189 section *toc_section = 0;
191 /* Describe the vector unit used for modes. */
192 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
193 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
195 /* Register classes for various constraints that are based on the target
196 switches. */
197 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
199 /* Describe the alignment of a vector. */
200 int rs6000_vector_align[NUM_MACHINE_MODES];
202 /* Map selected modes to types for builtins. */
203 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
205 /* What modes to automatically generate reciprocal divide estimate (fre) and
206 reciprocal sqrt (frsqrte) for. */
207 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
209 /* Masks to determine which reciprocal esitmate instructions to generate
210 automatically. */
211 enum rs6000_recip_mask {
212 RECIP_SF_DIV = 0x001, /* Use divide estimate */
213 RECIP_DF_DIV = 0x002,
214 RECIP_V4SF_DIV = 0x004,
215 RECIP_V2DF_DIV = 0x008,
217 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
218 RECIP_DF_RSQRT = 0x020,
219 RECIP_V4SF_RSQRT = 0x040,
220 RECIP_V2DF_RSQRT = 0x080,
222 /* Various combination of flags for -mrecip=xxx. */
223 RECIP_NONE = 0,
224 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
225 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
226 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
228 RECIP_HIGH_PRECISION = RECIP_ALL,
230 /* On low precision machines like the power5, don't enable double precision
231 reciprocal square root estimate, since it isn't accurate enough. */
232 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
235 /* -mrecip options. */
236 static struct
238 const char *string; /* option name */
239 unsigned int mask; /* mask bits to set */
240 } recip_options[] = {
241 { "all", RECIP_ALL },
242 { "none", RECIP_NONE },
243 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
244 | RECIP_V2DF_DIV) },
245 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
246 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
247 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
248 | RECIP_V2DF_RSQRT) },
249 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
250 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
253 /* On PowerPC, we have a limited number of target clones that we care about
254 which means we can use an array to hold the options, rather than having more
255 elaborate data structures to identify each possible variation. Order the
256 clones from the default to the highest ISA. */
257 enum {
258 CLONE_DEFAULT = 0, /* default clone. */
259 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
260 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
261 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
262 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
263 CLONE_MAX
266 /* Map compiler ISA bits into HWCAP names. */
267 struct clone_map {
268 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
269 const char *name; /* name to use in __builtin_cpu_supports. */
272 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
273 { 0, "" }, /* Default options. */
274 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
275 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
276 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
277 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
435 in_set = single_set (in_insn);
436 if (in_set)
438 if (MEM_P (SET_DEST (in_set)))
440 out_set = single_set (out_insn);
441 if (!out_set)
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
459 else
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
473 if (MEM_P (SET_DEST (in_exp)))
475 out_set = single_set (out_insn);
476 if (!out_set)
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
494 return store_data_bypass_p (out_insn, in_insn);
498 /* Processor costs (relative to an add) */
500 const struct processor_costs *rs6000_cost;
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1162 const int INSN_NOT_AVAILABLE = -1;
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1179 /* Hash table stuff for keeping track of TOC entries. */
1181 struct GTY((for_user)) toc_hash_struct
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1251 #endif
1253 /* Table of valid machine attributes. */
1255 static const struct attribute_spec rs6000_attribute_table[] =
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1370 #undef TARGET_LEGITIMIZE_ADDRESS
1371 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1373 #undef TARGET_SCHED_VARIABLE_ISSUE
1374 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1378 #undef TARGET_SCHED_ADJUST_COST
1379 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1380 #undef TARGET_SCHED_ADJUST_PRIORITY
1381 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1382 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1383 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1384 #undef TARGET_SCHED_INIT
1385 #define TARGET_SCHED_INIT rs6000_sched_init
1386 #undef TARGET_SCHED_FINISH
1387 #define TARGET_SCHED_FINISH rs6000_sched_finish
1388 #undef TARGET_SCHED_REORDER
1389 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1390 #undef TARGET_SCHED_REORDER2
1391 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1393 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1394 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1401 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1402 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1403 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1404 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1405 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1406 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1408 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1409 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1411 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1412 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1413 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1414 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1415 rs6000_builtin_support_vector_misalignment
1416 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1417 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1418 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1419 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1420 rs6000_builtin_vectorization_cost
1421 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1422 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1423 rs6000_preferred_simd_mode
1424 #undef TARGET_VECTORIZE_INIT_COST
1425 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1426 #undef TARGET_VECTORIZE_ADD_STMT_COST
1427 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1428 #undef TARGET_VECTORIZE_FINISH_COST
1429 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1430 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1431 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1433 #undef TARGET_LOOP_UNROLL_ADJUST
1434 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1436 #undef TARGET_INIT_BUILTINS
1437 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1438 #undef TARGET_BUILTIN_DECL
1439 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1441 #undef TARGET_FOLD_BUILTIN
1442 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1443 #undef TARGET_GIMPLE_FOLD_BUILTIN
1444 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1446 #undef TARGET_EXPAND_BUILTIN
1447 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1449 #undef TARGET_MANGLE_TYPE
1450 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1452 #undef TARGET_INIT_LIBFUNCS
1453 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1455 #if TARGET_MACHO
1456 #undef TARGET_BINDS_LOCAL_P
1457 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1458 #endif
1460 #undef TARGET_MS_BITFIELD_LAYOUT_P
1461 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1463 #undef TARGET_ASM_OUTPUT_MI_THUNK
1464 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1470 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1472 #undef TARGET_REGISTER_MOVE_COST
1473 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1474 #undef TARGET_MEMORY_MOVE_COST
1475 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1476 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1477 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1478 rs6000_ira_change_pseudo_allocno_class
1479 #undef TARGET_CANNOT_COPY_INSN_P
1480 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1481 #undef TARGET_RTX_COSTS
1482 #define TARGET_RTX_COSTS rs6000_rtx_costs
1483 #undef TARGET_ADDRESS_COST
1484 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1485 #undef TARGET_INSN_COST
1486 #define TARGET_INSN_COST rs6000_insn_cost
1488 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1489 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1491 #undef TARGET_PROMOTE_FUNCTION_MODE
1492 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1494 #undef TARGET_RETURN_IN_MEMORY
1495 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1497 #undef TARGET_RETURN_IN_MSB
1498 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1500 #undef TARGET_SETUP_INCOMING_VARARGS
1501 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1503 /* Always strict argument naming on rs6000. */
1504 #undef TARGET_STRICT_ARGUMENT_NAMING
1505 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1507 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1508 #undef TARGET_SPLIT_COMPLEX_ARG
1509 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1510 #undef TARGET_MUST_PASS_IN_STACK
1511 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1512 #undef TARGET_PASS_BY_REFERENCE
1513 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1514 #undef TARGET_ARG_PARTIAL_BYTES
1515 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1516 #undef TARGET_FUNCTION_ARG_ADVANCE
1517 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1518 #undef TARGET_FUNCTION_ARG
1519 #define TARGET_FUNCTION_ARG rs6000_function_arg
1520 #undef TARGET_FUNCTION_ARG_PADDING
1521 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1522 #undef TARGET_FUNCTION_ARG_BOUNDARY
1523 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1525 #undef TARGET_BUILD_BUILTIN_VA_LIST
1526 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1528 #undef TARGET_EXPAND_BUILTIN_VA_START
1529 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1531 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1532 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1534 #undef TARGET_EH_RETURN_FILTER_MODE
1535 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1537 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1538 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1540 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1541 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1543 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1544 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1546 #undef TARGET_FLOATN_MODE
1547 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1549 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1550 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1552 #undef TARGET_MD_ASM_ADJUST
1553 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1555 #undef TARGET_OPTION_OVERRIDE
1556 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1558 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1559 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1560 rs6000_builtin_vectorized_function
1562 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1564 rs6000_builtin_md_vectorized_function
1566 #undef TARGET_STACK_PROTECT_GUARD
1567 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1569 #if !TARGET_MACHO
1570 #undef TARGET_STACK_PROTECT_FAIL
1571 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1572 #endif
1574 #ifdef HAVE_AS_TLS
1575 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1576 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1577 #endif
1579 /* Use a 32-bit anchor range. This leads to sequences like:
1581 addis tmp,anchor,high
1582 add dest,tmp,low
1584 where tmp itself acts as an anchor, and can be shared between
1585 accesses to the same 64k page. */
1586 #undef TARGET_MIN_ANCHOR_OFFSET
1587 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1588 #undef TARGET_MAX_ANCHOR_OFFSET
1589 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1590 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1591 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1592 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1593 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1595 #undef TARGET_BUILTIN_RECIPROCAL
1596 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1598 #undef TARGET_SECONDARY_RELOAD
1599 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED
1601 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1602 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1603 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1605 #undef TARGET_LEGITIMATE_ADDRESS_P
1606 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1608 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1609 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1611 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1612 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1614 #undef TARGET_CAN_ELIMINATE
1615 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1618 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1620 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1621 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1623 #undef TARGET_TRAMPOLINE_INIT
1624 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1626 #undef TARGET_FUNCTION_VALUE
1627 #define TARGET_FUNCTION_VALUE rs6000_function_value
1629 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1630 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1632 #undef TARGET_OPTION_SAVE
1633 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1635 #undef TARGET_OPTION_RESTORE
1636 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1638 #undef TARGET_OPTION_PRINT
1639 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1641 #undef TARGET_CAN_INLINE_P
1642 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1644 #undef TARGET_SET_CURRENT_FUNCTION
1645 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1647 #undef TARGET_LEGITIMATE_CONSTANT_P
1648 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1650 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1651 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1653 #undef TARGET_CAN_USE_DOLOOP_P
1654 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1656 #undef TARGET_PREDICT_DOLOOP_P
1657 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1659 #undef TARGET_HAVE_COUNT_REG_DECR_P
1660 #define TARGET_HAVE_COUNT_REG_DECR_P true
1662 /* 1000000000 is infinite cost in IVOPTs. */
1663 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1664 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1666 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1667 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1672 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1673 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1674 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1675 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1676 #undef TARGET_UNWIND_WORD_MODE
1677 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1679 #undef TARGET_OFFLOAD_OPTIONS
1680 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1682 #undef TARGET_C_MODE_FOR_SUFFIX
1683 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1685 #undef TARGET_INVALID_BINARY_OP
1686 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1688 #undef TARGET_OPTAB_SUPPORTED_P
1689 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1691 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1692 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1694 #undef TARGET_COMPARE_VERSION_PRIORITY
1695 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1697 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1698 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1699 rs6000_generate_version_dispatcher_body
1701 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1702 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1703 rs6000_get_function_versions_dispatcher
1705 #undef TARGET_OPTION_FUNCTION_VERSIONS
1706 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1708 #undef TARGET_HARD_REGNO_NREGS
1709 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1710 #undef TARGET_HARD_REGNO_MODE_OK
1711 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1713 #undef TARGET_MODES_TIEABLE_P
1714 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1716 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1717 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1718 rs6000_hard_regno_call_part_clobbered
1720 #undef TARGET_SLOW_UNALIGNED_ACCESS
1721 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1723 #undef TARGET_CAN_CHANGE_MODE_CLASS
1724 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1726 #undef TARGET_CONSTANT_ALIGNMENT
1727 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1729 #undef TARGET_STARTING_FRAME_OFFSET
1730 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1732 #if TARGET_ELF && RS6000_WEAK
1733 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1734 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1735 #endif
1737 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1738 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1740 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1741 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1743 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1744 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1745 rs6000_cannot_substitute_mem_equiv_p
1748 /* Processor table. */
1749 struct rs6000_ptt
1751 const char *const name; /* Canonical processor name. */
1752 const enum processor_type processor; /* Processor type enum value. */
1753 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1756 static struct rs6000_ptt const processor_target_table[] =
1758 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1759 #include "rs6000-cpus.def"
1760 #undef RS6000_CPU
1763 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1764 name is invalid. */
1766 static int
1767 rs6000_cpu_name_lookup (const char *name)
1769 size_t i;
1771 if (name != NULL)
1773 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1774 if (! strcmp (name, processor_target_table[i].name))
1775 return (int)i;
1778 return -1;
1782 /* Return number of consecutive hard regs needed starting at reg REGNO
1783 to hold something of mode MODE.
1784 This is ordinarily the length in words of a value of mode MODE
1785 but can be less for certain modes in special long registers.
1787 POWER and PowerPC GPRs hold 32 bits worth;
1788 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1790 static int
1791 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1793 unsigned HOST_WIDE_INT reg_size;
1795 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1796 128-bit floating point that can go in vector registers, which has VSX
1797 memory addressing. */
1798 if (FP_REGNO_P (regno))
1799 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1800 ? UNITS_PER_VSX_WORD
1801 : UNITS_PER_FP_WORD);
1803 else if (ALTIVEC_REGNO_P (regno))
1804 reg_size = UNITS_PER_ALTIVEC_WORD;
1806 else
1807 reg_size = UNITS_PER_WORD;
1809 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1812 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1813 MODE. */
1814 static int
1815 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1817 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1819 if (COMPLEX_MODE_P (mode))
1820 mode = GET_MODE_INNER (mode);
1822 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1823 register combinations, and use PTImode where we need to deal with quad
1824 word memory operations. Don't allow quad words in the argument or frame
1825 pointer registers, just registers 0..31. */
1826 if (mode == PTImode)
1827 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1828 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1829 && ((regno & 1) == 0));
1831 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1832 implementations. Don't allow an item to be split between a FP register
1833 and an Altivec register. Allow TImode in all VSX registers if the user
1834 asked for it. */
1835 if (TARGET_VSX && VSX_REGNO_P (regno)
1836 && (VECTOR_MEM_VSX_P (mode)
1837 || FLOAT128_VECTOR_P (mode)
1838 || reg_addr[mode].scalar_in_vmx_p
1839 || mode == TImode
1840 || (TARGET_VADDUQM && mode == V1TImode)))
1842 if (FP_REGNO_P (regno))
1843 return FP_REGNO_P (last_regno);
1845 if (ALTIVEC_REGNO_P (regno))
1847 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1848 return 0;
1850 return ALTIVEC_REGNO_P (last_regno);
1854 /* The GPRs can hold any mode, but values bigger than one register
1855 cannot go past R31. */
1856 if (INT_REGNO_P (regno))
1857 return INT_REGNO_P (last_regno);
1859 /* The float registers (except for VSX vector modes) can only hold floating
1860 modes and DImode. */
1861 if (FP_REGNO_P (regno))
1863 if (FLOAT128_VECTOR_P (mode))
1864 return false;
1866 if (SCALAR_FLOAT_MODE_P (mode)
1867 && (mode != TDmode || (regno % 2) == 0)
1868 && FP_REGNO_P (last_regno))
1869 return 1;
1871 if (GET_MODE_CLASS (mode) == MODE_INT)
1873 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1874 return 1;
1876 if (TARGET_P8_VECTOR && (mode == SImode))
1877 return 1;
1879 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1880 return 1;
1883 return 0;
1886 /* The CR register can only hold CC modes. */
1887 if (CR_REGNO_P (regno))
1888 return GET_MODE_CLASS (mode) == MODE_CC;
1890 if (CA_REGNO_P (regno))
1891 return mode == Pmode || mode == SImode;
1893 /* AltiVec only in AldyVec registers. */
1894 if (ALTIVEC_REGNO_P (regno))
1895 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1896 || mode == V1TImode);
1898 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1899 and it must be able to fit within the register set. */
1901 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1904 /* Implement TARGET_HARD_REGNO_NREGS. */
1906 static unsigned int
1907 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1909 return rs6000_hard_regno_nregs[mode][regno];
1912 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1914 static bool
1915 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1917 return rs6000_hard_regno_mode_ok_p[mode][regno];
1920 /* Implement TARGET_MODES_TIEABLE_P.
1922 PTImode cannot tie with other modes because PTImode is restricted to even
1923 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1924 57744).
1926 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1927 128-bit floating point on VSX systems ties with other vectors. */
1929 static bool
1930 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1932 if (mode1 == PTImode)
1933 return mode2 == PTImode;
1934 if (mode2 == PTImode)
1935 return false;
1937 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1938 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1939 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1940 return false;
1942 if (SCALAR_FLOAT_MODE_P (mode1))
1943 return SCALAR_FLOAT_MODE_P (mode2);
1944 if (SCALAR_FLOAT_MODE_P (mode2))
1945 return false;
1947 if (GET_MODE_CLASS (mode1) == MODE_CC)
1948 return GET_MODE_CLASS (mode2) == MODE_CC;
1949 if (GET_MODE_CLASS (mode2) == MODE_CC)
1950 return false;
1952 return true;
1955 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1957 static bool
1958 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1959 machine_mode mode)
1961 if (TARGET_32BIT
1962 && TARGET_POWERPC64
1963 && GET_MODE_SIZE (mode) > 4
1964 && INT_REGNO_P (regno))
1965 return true;
1967 if (TARGET_VSX
1968 && FP_REGNO_P (regno)
1969 && GET_MODE_SIZE (mode) > 8
1970 && !FLOAT128_2REG_P (mode))
1971 return true;
1973 return false;
1976 /* Print interesting facts about registers. */
1977 static void
1978 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1980 int r, m;
1982 for (r = first_regno; r <= last_regno; ++r)
1984 const char *comma = "";
1985 int len;
1987 if (first_regno == last_regno)
1988 fprintf (stderr, "%s:\t", reg_name);
1989 else
1990 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1992 len = 8;
1993 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1994 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1996 if (len > 70)
1998 fprintf (stderr, ",\n\t");
1999 len = 8;
2000 comma = "";
2003 if (rs6000_hard_regno_nregs[m][r] > 1)
2004 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2005 rs6000_hard_regno_nregs[m][r]);
2006 else
2007 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2009 comma = ", ";
2012 if (call_used_or_fixed_reg_p (r))
2014 if (len > 70)
2016 fprintf (stderr, ",\n\t");
2017 len = 8;
2018 comma = "";
2021 len += fprintf (stderr, "%s%s", comma, "call-used");
2022 comma = ", ";
2025 if (fixed_regs[r])
2027 if (len > 70)
2029 fprintf (stderr, ",\n\t");
2030 len = 8;
2031 comma = "";
2034 len += fprintf (stderr, "%s%s", comma, "fixed");
2035 comma = ", ";
2038 if (len > 70)
2040 fprintf (stderr, ",\n\t");
2041 comma = "";
2044 len += fprintf (stderr, "%sreg-class = %s", comma,
2045 reg_class_names[(int)rs6000_regno_regclass[r]]);
2046 comma = ", ";
2048 if (len > 70)
2050 fprintf (stderr, ",\n\t");
2051 comma = "";
2054 fprintf (stderr, "%sregno = %d\n", comma, r);
2058 static const char *
2059 rs6000_debug_vector_unit (enum rs6000_vector v)
2061 const char *ret;
2063 switch (v)
2065 case VECTOR_NONE: ret = "none"; break;
2066 case VECTOR_ALTIVEC: ret = "altivec"; break;
2067 case VECTOR_VSX: ret = "vsx"; break;
2068 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2069 default: ret = "unknown"; break;
2072 return ret;
2075 /* Inner function printing just the address mask for a particular reload
2076 register class. */
2077 DEBUG_FUNCTION char *
2078 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2080 static char ret[8];
2081 char *p = ret;
2083 if ((mask & RELOAD_REG_VALID) != 0)
2084 *p++ = 'v';
2085 else if (keep_spaces)
2086 *p++ = ' ';
2088 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2089 *p++ = 'm';
2090 else if (keep_spaces)
2091 *p++ = ' ';
2093 if ((mask & RELOAD_REG_INDEXED) != 0)
2094 *p++ = 'i';
2095 else if (keep_spaces)
2096 *p++ = ' ';
2098 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2099 *p++ = 'O';
2100 else if ((mask & RELOAD_REG_OFFSET) != 0)
2101 *p++ = 'o';
2102 else if (keep_spaces)
2103 *p++ = ' ';
2105 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2106 *p++ = '+';
2107 else if (keep_spaces)
2108 *p++ = ' ';
2110 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2111 *p++ = '+';
2112 else if (keep_spaces)
2113 *p++ = ' ';
2115 if ((mask & RELOAD_REG_AND_M16) != 0)
2116 *p++ = '&';
2117 else if (keep_spaces)
2118 *p++ = ' ';
2120 *p = '\0';
2122 return ret;
2125 /* Print the address masks in a human readble fashion. */
2126 DEBUG_FUNCTION void
2127 rs6000_debug_print_mode (ssize_t m)
2129 ssize_t rc;
2130 int spaces = 0;
2132 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2133 for (rc = 0; rc < N_RELOAD_REG; rc++)
2134 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2135 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2137 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2138 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2140 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2141 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2142 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2143 spaces = 0;
2145 else
2146 spaces += strlen (" Reload=sl");
2148 if (reg_addr[m].scalar_in_vmx_p)
2150 fprintf (stderr, "%*s Upper=y", spaces, "");
2151 spaces = 0;
2153 else
2154 spaces += strlen (" Upper=y");
2156 if (rs6000_vector_unit[m] != VECTOR_NONE
2157 || rs6000_vector_mem[m] != VECTOR_NONE)
2159 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2160 spaces, "",
2161 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2162 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2165 fputs ("\n", stderr);
2168 #define DEBUG_FMT_ID "%-32s= "
2169 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2170 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2171 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2173 /* Print various interesting information with -mdebug=reg. */
2174 static void
2175 rs6000_debug_reg_global (void)
2177 static const char *const tf[2] = { "false", "true" };
2178 const char *nl = (const char *)0;
2179 int m;
2180 size_t m1, m2, v;
2181 char costly_num[20];
2182 char nop_num[20];
2183 char flags_buffer[40];
2184 const char *costly_str;
2185 const char *nop_str;
2186 const char *trace_str;
2187 const char *abi_str;
2188 const char *cmodel_str;
2189 struct cl_target_option cl_opts;
2191 /* Modes we want tieable information on. */
2192 static const machine_mode print_tieable_modes[] = {
2193 QImode,
2194 HImode,
2195 SImode,
2196 DImode,
2197 TImode,
2198 PTImode,
2199 SFmode,
2200 DFmode,
2201 TFmode,
2202 IFmode,
2203 KFmode,
2204 SDmode,
2205 DDmode,
2206 TDmode,
2207 V16QImode,
2208 V8HImode,
2209 V4SImode,
2210 V2DImode,
2211 V1TImode,
2212 V32QImode,
2213 V16HImode,
2214 V8SImode,
2215 V4DImode,
2216 V2TImode,
2217 V4SFmode,
2218 V2DFmode,
2219 V8SFmode,
2220 V4DFmode,
2221 CCmode,
2222 CCUNSmode,
2223 CCEQmode,
2226 /* Virtual regs we are interested in. */
2227 const static struct {
2228 int regno; /* register number. */
2229 const char *name; /* register name. */
2230 } virtual_regs[] = {
2231 { STACK_POINTER_REGNUM, "stack pointer:" },
2232 { TOC_REGNUM, "toc: " },
2233 { STATIC_CHAIN_REGNUM, "static chain: " },
2234 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2235 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2236 { ARG_POINTER_REGNUM, "arg pointer: " },
2237 { FRAME_POINTER_REGNUM, "frame pointer:" },
2238 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2239 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2240 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2241 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2242 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2243 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2244 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2245 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2246 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2249 fputs ("\nHard register information:\n", stderr);
2250 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2251 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2252 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2253 LAST_ALTIVEC_REGNO,
2254 "vs");
2255 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2256 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2257 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2258 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2259 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2260 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2262 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2263 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2264 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2266 fprintf (stderr,
2267 "\n"
2268 "d reg_class = %s\n"
2269 "f reg_class = %s\n"
2270 "v reg_class = %s\n"
2271 "wa reg_class = %s\n"
2272 "we reg_class = %s\n"
2273 "wr reg_class = %s\n"
2274 "wx reg_class = %s\n"
2275 "wA reg_class = %s\n"
2276 "\n",
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2279 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2280 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2281 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2282 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2283 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2284 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2286 nl = "\n";
2287 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2288 rs6000_debug_print_mode (m);
2290 fputs ("\n", stderr);
2292 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2294 machine_mode mode1 = print_tieable_modes[m1];
2295 bool first_time = true;
2297 nl = (const char *)0;
2298 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2300 machine_mode mode2 = print_tieable_modes[m2];
2301 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2303 if (first_time)
2305 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2306 nl = "\n";
2307 first_time = false;
2310 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2314 if (!first_time)
2315 fputs ("\n", stderr);
2318 if (nl)
2319 fputs (nl, stderr);
2321 if (rs6000_recip_control)
2323 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2325 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2326 if (rs6000_recip_bits[m])
2328 fprintf (stderr,
2329 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2330 GET_MODE_NAME (m),
2331 (RS6000_RECIP_AUTO_RE_P (m)
2332 ? "auto"
2333 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2334 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2335 ? "auto"
2336 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2339 fputs ("\n", stderr);
2342 if (rs6000_cpu_index >= 0)
2344 const char *name = processor_target_table[rs6000_cpu_index].name;
2345 HOST_WIDE_INT flags
2346 = processor_target_table[rs6000_cpu_index].target_enable;
2348 sprintf (flags_buffer, "-mcpu=%s flags", name);
2349 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2351 else
2352 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2354 if (rs6000_tune_index >= 0)
2356 const char *name = processor_target_table[rs6000_tune_index].name;
2357 HOST_WIDE_INT flags
2358 = processor_target_table[rs6000_tune_index].target_enable;
2360 sprintf (flags_buffer, "-mtune=%s flags", name);
2361 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2363 else
2364 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2366 cl_target_option_save (&cl_opts, &global_options);
2367 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2368 rs6000_isa_flags);
2370 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2371 rs6000_isa_flags_explicit);
2373 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2374 rs6000_builtin_mask);
2376 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2378 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2379 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2381 switch (rs6000_sched_costly_dep)
2383 case max_dep_latency:
2384 costly_str = "max_dep_latency";
2385 break;
2387 case no_dep_costly:
2388 costly_str = "no_dep_costly";
2389 break;
2391 case all_deps_costly:
2392 costly_str = "all_deps_costly";
2393 break;
2395 case true_store_to_load_dep_costly:
2396 costly_str = "true_store_to_load_dep_costly";
2397 break;
2399 case store_to_load_dep_costly:
2400 costly_str = "store_to_load_dep_costly";
2401 break;
2403 default:
2404 costly_str = costly_num;
2405 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2406 break;
2409 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2411 switch (rs6000_sched_insert_nops)
2413 case sched_finish_regroup_exact:
2414 nop_str = "sched_finish_regroup_exact";
2415 break;
2417 case sched_finish_pad_groups:
2418 nop_str = "sched_finish_pad_groups";
2419 break;
2421 case sched_finish_none:
2422 nop_str = "sched_finish_none";
2423 break;
2425 default:
2426 nop_str = nop_num;
2427 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2428 break;
2431 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2433 switch (rs6000_sdata)
2435 default:
2436 case SDATA_NONE:
2437 break;
2439 case SDATA_DATA:
2440 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2441 break;
2443 case SDATA_SYSV:
2444 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2445 break;
2447 case SDATA_EABI:
2448 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2449 break;
2453 switch (rs6000_traceback)
2455 case traceback_default: trace_str = "default"; break;
2456 case traceback_none: trace_str = "none"; break;
2457 case traceback_part: trace_str = "part"; break;
2458 case traceback_full: trace_str = "full"; break;
2459 default: trace_str = "unknown"; break;
2462 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2464 switch (rs6000_current_cmodel)
2466 case CMODEL_SMALL: cmodel_str = "small"; break;
2467 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2468 case CMODEL_LARGE: cmodel_str = "large"; break;
2469 default: cmodel_str = "unknown"; break;
2472 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2474 switch (rs6000_current_abi)
2476 case ABI_NONE: abi_str = "none"; break;
2477 case ABI_AIX: abi_str = "aix"; break;
2478 case ABI_ELFv2: abi_str = "ELFv2"; break;
2479 case ABI_V4: abi_str = "V4"; break;
2480 case ABI_DARWIN: abi_str = "darwin"; break;
2481 default: abi_str = "unknown"; break;
2484 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2486 if (rs6000_altivec_abi)
2487 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2489 if (rs6000_darwin64_abi)
2490 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2492 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2493 (TARGET_SOFT_FLOAT ? "true" : "false"));
2495 if (TARGET_LINK_STACK)
2496 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2498 if (TARGET_P8_FUSION)
2500 char options[80];
2502 strcpy (options, "power8");
2503 if (TARGET_P8_FUSION_SIGN)
2504 strcat (options, ", sign");
2506 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2509 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2510 TARGET_SECURE_PLT ? "secure" : "bss");
2511 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2512 aix_struct_return ? "aix" : "sysv");
2513 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2514 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2515 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2516 tf[!!rs6000_align_branch_targets]);
2517 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2518 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2519 rs6000_long_double_type_size);
2520 if (rs6000_long_double_type_size > 64)
2522 fprintf (stderr, DEBUG_FMT_S, "long double type",
2523 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2524 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2525 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2527 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2528 (int)rs6000_sched_restricted_insns_priority);
2529 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2530 (int)END_BUILTINS);
2531 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2532 (int)RS6000_BUILTIN_COUNT);
2534 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2535 (int)TARGET_FLOAT128_ENABLE_TYPE);
2537 if (TARGET_VSX)
2538 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2539 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2541 if (TARGET_DIRECT_MOVE_128)
2542 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2543 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2547 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2548 legitimate address support to figure out the appropriate addressing to
2549 use. */
2551 static void
2552 rs6000_setup_reg_addr_masks (void)
2554 ssize_t rc, reg, m, nregs;
2555 addr_mask_type any_addr_mask, addr_mask;
2557 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2559 machine_mode m2 = (machine_mode) m;
2560 bool complex_p = false;
2561 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2562 size_t msize;
2564 if (COMPLEX_MODE_P (m2))
2566 complex_p = true;
2567 m2 = GET_MODE_INNER (m2);
2570 msize = GET_MODE_SIZE (m2);
2572 /* SDmode is special in that we want to access it only via REG+REG
2573 addressing on power7 and above, since we want to use the LFIWZX and
2574 STFIWZX instructions to load it. */
2575 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2577 any_addr_mask = 0;
2578 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2580 addr_mask = 0;
2581 reg = reload_reg_map[rc].reg;
2583 /* Can mode values go in the GPR/FPR/Altivec registers? */
2584 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2586 bool small_int_vsx_p = (small_int_p
2587 && (rc == RELOAD_REG_FPR
2588 || rc == RELOAD_REG_VMX));
2590 nregs = rs6000_hard_regno_nregs[m][reg];
2591 addr_mask |= RELOAD_REG_VALID;
2593 /* Indicate if the mode takes more than 1 physical register. If
2594 it takes a single register, indicate it can do REG+REG
2595 addressing. Small integers in VSX registers can only do
2596 REG+REG addressing. */
2597 if (small_int_vsx_p)
2598 addr_mask |= RELOAD_REG_INDEXED;
2599 else if (nregs > 1 || m == BLKmode || complex_p)
2600 addr_mask |= RELOAD_REG_MULTIPLE;
2601 else
2602 addr_mask |= RELOAD_REG_INDEXED;
2604 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2605 addressing. If we allow scalars into Altivec registers,
2606 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2608 For VSX systems, we don't allow update addressing for
2609 DFmode/SFmode if those registers can go in both the
2610 traditional floating point registers and Altivec registers.
2611 The load/store instructions for the Altivec registers do not
2612 have update forms. If we allowed update addressing, it seems
2613 to break IV-OPT code using floating point if the index type is
2614 int instead of long (PR target/81550 and target/84042). */
2616 if (TARGET_UPDATE
2617 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2618 && msize <= 8
2619 && !VECTOR_MODE_P (m2)
2620 && !FLOAT128_VECTOR_P (m2)
2621 && !complex_p
2622 && (m != E_DFmode || !TARGET_VSX)
2623 && (m != E_SFmode || !TARGET_P8_VECTOR)
2624 && !small_int_vsx_p)
2626 addr_mask |= RELOAD_REG_PRE_INCDEC;
2628 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2629 we don't allow PRE_MODIFY for some multi-register
2630 operations. */
2631 switch (m)
2633 default:
2634 addr_mask |= RELOAD_REG_PRE_MODIFY;
2635 break;
2637 case E_DImode:
2638 if (TARGET_POWERPC64)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2642 case E_DFmode:
2643 case E_DDmode:
2644 if (TARGET_HARD_FLOAT)
2645 addr_mask |= RELOAD_REG_PRE_MODIFY;
2646 break;
2651 /* GPR and FPR registers can do REG+OFFSET addressing, except
2652 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2653 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2654 if ((addr_mask != 0) && !indexed_only_p
2655 && msize <= 8
2656 && (rc == RELOAD_REG_GPR
2657 || ((msize == 8 || m2 == SFmode)
2658 && (rc == RELOAD_REG_FPR
2659 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2660 addr_mask |= RELOAD_REG_OFFSET;
2662 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2663 instructions are enabled. The offset for 128-bit VSX registers is
2664 only 12-bits. While GPRs can handle the full offset range, VSX
2665 registers can only handle the restricted range. */
2666 else if ((addr_mask != 0) && !indexed_only_p
2667 && msize == 16 && TARGET_P9_VECTOR
2668 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2669 || (m2 == TImode && TARGET_VSX)))
2671 addr_mask |= RELOAD_REG_OFFSET;
2672 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2673 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2676 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2677 addressing on 128-bit types. */
2678 if (rc == RELOAD_REG_VMX && msize == 16
2679 && (addr_mask & RELOAD_REG_VALID) != 0)
2680 addr_mask |= RELOAD_REG_AND_M16;
2682 reg_addr[m].addr_mask[rc] = addr_mask;
2683 any_addr_mask |= addr_mask;
2686 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2691 /* Initialize the various global tables that are based on register size. */
2692 static void
2693 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2695 ssize_t r, m, c;
2696 int align64;
2697 int align32;
2699 /* Precalculate REGNO_REG_CLASS. */
2700 rs6000_regno_regclass[0] = GENERAL_REGS;
2701 for (r = 1; r < 32; ++r)
2702 rs6000_regno_regclass[r] = BASE_REGS;
2704 for (r = 32; r < 64; ++r)
2705 rs6000_regno_regclass[r] = FLOAT_REGS;
2707 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2708 rs6000_regno_regclass[r] = NO_REGS;
2710 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2711 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2713 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2714 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2715 rs6000_regno_regclass[r] = CR_REGS;
2717 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2718 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2719 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2720 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2721 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2722 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2723 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2725 /* Precalculate register class to simpler reload register class. We don't
2726 need all of the register classes that are combinations of different
2727 classes, just the simple ones that have constraint letters. */
2728 for (c = 0; c < N_REG_CLASSES; c++)
2729 reg_class_to_reg_type[c] = NO_REG_TYPE;
2731 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2734 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2735 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2736 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2737 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2738 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2739 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2740 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2742 if (TARGET_VSX)
2744 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2745 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2747 else
2749 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2750 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2753 /* Precalculate the valid memory formats as well as the vector information,
2754 this must be set up before the rs6000_hard_regno_nregs_internal calls
2755 below. */
2756 gcc_assert ((int)VECTOR_NONE == 0);
2757 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2758 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2760 gcc_assert ((int)CODE_FOR_nothing == 0);
2761 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2763 gcc_assert ((int)NO_REGS == 0);
2764 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2766 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2767 believes it can use native alignment or still uses 128-bit alignment. */
2768 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2770 align64 = 64;
2771 align32 = 32;
2773 else
2775 align64 = 128;
2776 align32 = 128;
2779 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2780 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2781 if (TARGET_FLOAT128_TYPE)
2783 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2784 rs6000_vector_align[KFmode] = 128;
2786 if (FLOAT128_IEEE_P (TFmode))
2788 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2789 rs6000_vector_align[TFmode] = 128;
2793 /* V2DF mode, VSX only. */
2794 if (TARGET_VSX)
2796 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2797 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2798 rs6000_vector_align[V2DFmode] = align64;
2801 /* V4SF mode, either VSX or Altivec. */
2802 if (TARGET_VSX)
2804 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2806 rs6000_vector_align[V4SFmode] = align32;
2808 else if (TARGET_ALTIVEC)
2810 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2811 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2812 rs6000_vector_align[V4SFmode] = align32;
2815 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2816 and stores. */
2817 if (TARGET_ALTIVEC)
2819 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2820 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2821 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2822 rs6000_vector_align[V4SImode] = align32;
2823 rs6000_vector_align[V8HImode] = align32;
2824 rs6000_vector_align[V16QImode] = align32;
2826 if (TARGET_VSX)
2828 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2829 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2830 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2832 else
2834 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2835 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2836 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2840 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2841 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2842 if (TARGET_VSX)
2844 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2845 rs6000_vector_unit[V2DImode]
2846 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2847 rs6000_vector_align[V2DImode] = align64;
2849 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2850 rs6000_vector_unit[V1TImode]
2851 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2852 rs6000_vector_align[V1TImode] = 128;
2855 /* DFmode, see if we want to use the VSX unit. Memory is handled
2856 differently, so don't set rs6000_vector_mem. */
2857 if (TARGET_VSX)
2859 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2860 rs6000_vector_align[DFmode] = 64;
2863 /* SFmode, see if we want to use the VSX unit. */
2864 if (TARGET_P8_VECTOR)
2866 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2867 rs6000_vector_align[SFmode] = 32;
2870 /* Allow TImode in VSX register and set the VSX memory macros. */
2871 if (TARGET_VSX)
2873 rs6000_vector_mem[TImode] = VECTOR_VSX;
2874 rs6000_vector_align[TImode] = align64;
2877 /* Register class constraints for the constraints that depend on compile
2878 switches. When the VSX code was added, different constraints were added
2879 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2880 of the VSX registers are used. The register classes for scalar floating
2881 point types is set, based on whether we allow that type into the upper
2882 (Altivec) registers. GCC has register classes to target the Altivec
2883 registers for load/store operations, to select using a VSX memory
2884 operation instead of the traditional floating point operation. The
2885 constraints are:
2887 d - Register class to use with traditional DFmode instructions.
2888 f - Register class to use with traditional SFmode instructions.
2889 v - Altivec register.
2890 wa - Any VSX register.
2891 wc - Reserved to represent individual CR bits (used in LLVM).
2892 wn - always NO_REGS.
2893 wr - GPR if 64-bit mode is permitted.
2894 wx - Float register if we can do 32-bit int stores. */
2896 if (TARGET_HARD_FLOAT)
2898 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2899 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2902 if (TARGET_VSX)
2903 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2905 /* Add conditional constraints based on various options, to allow us to
2906 collapse multiple insn patterns. */
2907 if (TARGET_ALTIVEC)
2908 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2910 if (TARGET_POWERPC64)
2912 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2913 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2916 if (TARGET_STFIWX)
2917 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2919 /* Support for new direct moves (ISA 3.0 + 64bit). */
2920 if (TARGET_DIRECT_MOVE_128)
2921 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2923 /* Set up the reload helper and direct move functions. */
2924 if (TARGET_VSX || TARGET_ALTIVEC)
2926 if (TARGET_64BIT)
2928 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2929 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2930 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2931 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2932 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2933 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2934 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2935 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2936 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2937 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2938 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2939 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2940 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2941 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2942 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2943 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2944 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2945 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2946 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2947 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2949 if (FLOAT128_VECTOR_P (KFmode))
2951 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2952 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2955 if (FLOAT128_VECTOR_P (TFmode))
2957 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2958 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2961 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2962 available. */
2963 if (TARGET_NO_SDMODE_STACK)
2965 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2966 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2969 if (TARGET_VSX)
2971 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2972 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2975 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2977 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2978 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2979 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2980 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2981 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2982 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2983 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2984 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2985 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2987 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2988 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2989 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2990 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2991 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2992 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2993 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2994 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2995 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2997 if (FLOAT128_VECTOR_P (KFmode))
2999 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3000 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3003 if (FLOAT128_VECTOR_P (TFmode))
3005 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3006 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3010 else
3012 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3013 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3014 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3015 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3016 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3017 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3018 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3019 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3020 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3021 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3022 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3023 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3024 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3025 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3026 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3027 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3028 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3029 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3030 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3031 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3033 if (FLOAT128_VECTOR_P (KFmode))
3035 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3036 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3039 if (FLOAT128_IEEE_P (TFmode))
3041 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3042 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3045 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3046 available. */
3047 if (TARGET_NO_SDMODE_STACK)
3049 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3050 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3053 if (TARGET_VSX)
3055 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3056 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3059 if (TARGET_DIRECT_MOVE)
3061 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3062 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3063 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3067 reg_addr[DFmode].scalar_in_vmx_p = true;
3068 reg_addr[DImode].scalar_in_vmx_p = true;
3070 if (TARGET_P8_VECTOR)
3072 reg_addr[SFmode].scalar_in_vmx_p = true;
3073 reg_addr[SImode].scalar_in_vmx_p = true;
3075 if (TARGET_P9_VECTOR)
3077 reg_addr[HImode].scalar_in_vmx_p = true;
3078 reg_addr[QImode].scalar_in_vmx_p = true;
3083 /* Precalculate HARD_REGNO_NREGS. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_nregs[m][r]
3087 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3089 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3090 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3091 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3092 rs6000_hard_regno_mode_ok_p[m][r]
3093 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3095 /* Precalculate CLASS_MAX_NREGS sizes. */
3096 for (c = 0; c < LIM_REG_CLASSES; ++c)
3098 int reg_size;
3100 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3101 reg_size = UNITS_PER_VSX_WORD;
3103 else if (c == ALTIVEC_REGS)
3104 reg_size = UNITS_PER_ALTIVEC_WORD;
3106 else if (c == FLOAT_REGS)
3107 reg_size = UNITS_PER_FP_WORD;
3109 else
3110 reg_size = UNITS_PER_WORD;
3112 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3114 machine_mode m2 = (machine_mode)m;
3115 int reg_size2 = reg_size;
3117 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3118 in VSX. */
3119 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3120 reg_size2 = UNITS_PER_FP_WORD;
3122 rs6000_class_max_nregs[m][c]
3123 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3127 /* Calculate which modes to automatically generate code to use a the
3128 reciprocal divide and square root instructions. In the future, possibly
3129 automatically generate the instructions even if the user did not specify
3130 -mrecip. The older machines double precision reciprocal sqrt estimate is
3131 not accurate enough. */
3132 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3133 if (TARGET_FRES)
3134 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3135 if (TARGET_FRE)
3136 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3137 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3138 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3139 if (VECTOR_UNIT_VSX_P (V2DFmode))
3140 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3142 if (TARGET_FRSQRTES)
3143 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3144 if (TARGET_FRSQRTE)
3145 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3146 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3147 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3148 if (VECTOR_UNIT_VSX_P (V2DFmode))
3149 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3151 if (rs6000_recip_control)
3153 if (!flag_finite_math_only)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3155 "-ffast-math");
3156 if (flag_trapping_math)
3157 warning (0, "%qs requires %qs or %qs", "-mrecip",
3158 "-fno-trapping-math", "-ffast-math");
3159 if (!flag_reciprocal_math)
3160 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3161 "-ffast-math");
3162 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3164 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3165 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3166 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3168 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3169 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3170 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3172 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3173 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3174 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3176 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3177 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3178 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3180 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3181 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3182 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3184 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3185 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3186 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3188 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3189 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3190 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3192 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3193 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3194 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3198 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3199 legitimate address support to figure out the appropriate addressing to
3200 use. */
3201 rs6000_setup_reg_addr_masks ();
3203 if (global_init_p || TARGET_DEBUG_TARGET)
3205 if (TARGET_DEBUG_REG)
3206 rs6000_debug_reg_global ();
3208 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3209 fprintf (stderr,
3210 "SImode variable mult cost = %d\n"
3211 "SImode constant mult cost = %d\n"
3212 "SImode short constant mult cost = %d\n"
3213 "DImode multipliciation cost = %d\n"
3214 "SImode division cost = %d\n"
3215 "DImode division cost = %d\n"
3216 "Simple fp operation cost = %d\n"
3217 "DFmode multiplication cost = %d\n"
3218 "SFmode division cost = %d\n"
3219 "DFmode division cost = %d\n"
3220 "cache line size = %d\n"
3221 "l1 cache size = %d\n"
3222 "l2 cache size = %d\n"
3223 "simultaneous prefetches = %d\n"
3224 "\n",
3225 rs6000_cost->mulsi,
3226 rs6000_cost->mulsi_const,
3227 rs6000_cost->mulsi_const9,
3228 rs6000_cost->muldi,
3229 rs6000_cost->divsi,
3230 rs6000_cost->divdi,
3231 rs6000_cost->fp,
3232 rs6000_cost->dmul,
3233 rs6000_cost->sdiv,
3234 rs6000_cost->ddiv,
3235 rs6000_cost->cache_line_size,
3236 rs6000_cost->l1_cache_size,
3237 rs6000_cost->l2_cache_size,
3238 rs6000_cost->simultaneous_prefetches);
3242 #if TARGET_MACHO
3243 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3245 static void
3246 darwin_rs6000_override_options (void)
3248 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3249 off. */
3250 rs6000_altivec_abi = 1;
3251 TARGET_ALTIVEC_VRSAVE = 1;
3252 rs6000_current_abi = ABI_DARWIN;
3254 if (DEFAULT_ABI == ABI_DARWIN
3255 && TARGET_64BIT)
3256 darwin_one_byte_bool = 1;
3258 if (TARGET_64BIT && ! TARGET_POWERPC64)
3260 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3261 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3264 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3265 optimisation, and will not work with the most generic case (where the
3266 symbol is undefined external, but there is no symbl stub). */
3267 if (TARGET_64BIT)
3268 rs6000_default_long_calls = 0;
3270 /* ld_classic is (so far) still used for kernel (static) code, and supports
3271 the JBSR longcall / branch islands. */
3272 if (flag_mkernel)
3274 rs6000_default_long_calls = 1;
3276 /* Allow a kext author to do -mkernel -mhard-float. */
3277 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3278 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3281 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3282 Altivec. */
3283 if (!flag_mkernel && !flag_apple_kext
3284 && TARGET_64BIT
3285 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3286 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3288 /* Unless the user (not the configurer) has explicitly overridden
3289 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3290 G4 unless targeting the kernel. */
3291 if (!flag_mkernel
3292 && !flag_apple_kext
3293 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3294 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3295 && ! global_options_set.x_rs6000_cpu_index)
3297 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3300 #endif
3302 /* If not otherwise specified by a target, make 'long double' equivalent to
3303 'double'. */
3305 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3306 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3307 #endif
3309 /* Return the builtin mask of the various options used that could affect which
3310 builtins were used. In the past we used target_flags, but we've run out of
3311 bits, and some options are no longer in target_flags. */
3313 HOST_WIDE_INT
3314 rs6000_builtin_mask_calculate (void)
3316 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3317 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3318 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3319 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3320 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3321 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3322 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3323 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3324 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3325 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3326 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3327 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3328 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3329 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3330 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3331 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3332 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3333 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3334 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3335 | ((TARGET_LONG_DOUBLE_128
3336 && TARGET_HARD_FLOAT
3337 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3338 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3339 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3342 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3343 to clobber the XER[CA] bit because clobbering that bit without telling
3344 the compiler worked just fine with versions of GCC before GCC 5, and
3345 breaking a lot of older code in ways that are hard to track down is
3346 not such a great idea. */
3348 static rtx_insn *
3349 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3350 vec<const char *> &/*constraints*/,
3351 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3353 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3354 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3355 return NULL;
3358 /* Override command line options.
3360 Combine build-specific configuration information with options
3361 specified on the command line to set various state variables which
3362 influence code generation, optimization, and expansion of built-in
3363 functions. Assure that command-line configuration preferences are
3364 compatible with each other and with the build configuration; issue
3365 warnings while adjusting configuration or error messages while
3366 rejecting configuration.
3368 Upon entry to this function:
3370 This function is called once at the beginning of
3371 compilation, and then again at the start and end of compiling
3372 each section of code that has a different configuration, as
3373 indicated, for example, by adding the
3375 __attribute__((__target__("cpu=power9")))
3377 qualifier to a function definition or, for example, by bracketing
3378 code between
3380 #pragma GCC target("altivec")
3384 #pragma GCC reset_options
3386 directives. Parameter global_init_p is true for the initial
3387 invocation, which initializes global variables, and false for all
3388 subsequent invocations.
3391 Various global state information is assumed to be valid. This
3392 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3393 default CPU specified at build configure time, TARGET_DEFAULT,
3394 representing the default set of option flags for the default
3395 target, and global_options_set.x_rs6000_isa_flags, representing
3396 which options were requested on the command line.
3398 Upon return from this function:
3400 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3401 was set by name on the command line. Additionally, if certain
3402 attributes are automatically enabled or disabled by this function
3403 in order to assure compatibility between options and
3404 configuration, the flags associated with those attributes are
3405 also set. By setting these "explicit bits", we avoid the risk
3406 that other code might accidentally overwrite these particular
3407 attributes with "default values".
3409 The various bits of rs6000_isa_flags are set to indicate the
3410 target options that have been selected for the most current
3411 compilation efforts. This has the effect of also turning on the
3412 associated TARGET_XXX values since these are macros which are
3413 generally defined to test the corresponding bit of the
3414 rs6000_isa_flags variable.
3416 The variable rs6000_builtin_mask is set to represent the target
3417 options for the most current compilation efforts, consistent with
3418 the current contents of rs6000_isa_flags. This variable controls
3419 expansion of built-in functions.
3421 Various other global variables and fields of global structures
3422 (over 50 in all) are initialized to reflect the desired options
3423 for the most current compilation efforts. */
3425 static bool
3426 rs6000_option_override_internal (bool global_init_p)
3428 bool ret = true;
3430 HOST_WIDE_INT set_masks;
3431 HOST_WIDE_INT ignore_masks;
3432 int cpu_index = -1;
3433 int tune_index;
3434 struct cl_target_option *main_target_opt
3435 = ((global_init_p || target_option_default_node == NULL)
3436 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3438 /* Print defaults. */
3439 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3440 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3442 /* Remember the explicit arguments. */
3443 if (global_init_p)
3444 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3446 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3447 library functions, so warn about it. The flag may be useful for
3448 performance studies from time to time though, so don't disable it
3449 entirely. */
3450 if (global_options_set.x_rs6000_alignment_flags
3451 && rs6000_alignment_flags == MASK_ALIGN_POWER
3452 && DEFAULT_ABI == ABI_DARWIN
3453 && TARGET_64BIT)
3454 warning (0, "%qs is not supported for 64-bit Darwin;"
3455 " it is incompatible with the installed C and C++ libraries",
3456 "-malign-power");
3458 /* Numerous experiment shows that IRA based loop pressure
3459 calculation works better for RTL loop invariant motion on targets
3460 with enough (>= 32) registers. It is an expensive optimization.
3461 So it is on only for peak performance. */
3462 if (optimize >= 3 && global_init_p
3463 && !global_options_set.x_flag_ira_loop_pressure)
3464 flag_ira_loop_pressure = 1;
3466 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3467 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3468 options were already specified. */
3469 if (flag_sanitize & SANITIZE_USER_ADDRESS
3470 && !global_options_set.x_flag_asynchronous_unwind_tables)
3471 flag_asynchronous_unwind_tables = 1;
3473 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3474 loop unroller is active. It is only checked during unrolling, so
3475 we can just set it on by default. */
3476 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3477 flag_variable_expansion_in_unroller = 1;
3479 /* Set the pointer size. */
3480 if (TARGET_64BIT)
3482 rs6000_pmode = DImode;
3483 rs6000_pointer_size = 64;
3485 else
3487 rs6000_pmode = SImode;
3488 rs6000_pointer_size = 32;
3491 /* Some OSs don't support saving the high part of 64-bit registers on context
3492 switch. Other OSs don't support saving Altivec registers. On those OSs,
3493 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3494 if the user wants either, the user must explicitly specify them and we
3495 won't interfere with the user's specification. */
3497 set_masks = POWERPC_MASKS;
3498 #ifdef OS_MISSING_POWERPC64
3499 if (OS_MISSING_POWERPC64)
3500 set_masks &= ~OPTION_MASK_POWERPC64;
3501 #endif
3502 #ifdef OS_MISSING_ALTIVEC
3503 if (OS_MISSING_ALTIVEC)
3504 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3505 | OTHER_VSX_VECTOR_MASKS);
3506 #endif
3508 /* Don't override by the processor default if given explicitly. */
3509 set_masks &= ~rs6000_isa_flags_explicit;
3511 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3512 the cpu in a target attribute or pragma, but did not specify a tuning
3513 option, use the cpu for the tuning option rather than the option specified
3514 with -mtune on the command line. Process a '--with-cpu' configuration
3515 request as an implicit --cpu. */
3516 if (rs6000_cpu_index >= 0)
3517 cpu_index = rs6000_cpu_index;
3518 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3519 cpu_index = main_target_opt->x_rs6000_cpu_index;
3520 else if (OPTION_TARGET_CPU_DEFAULT)
3521 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3523 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3524 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3525 with those from the cpu, except for options that were explicitly set. If
3526 we don't have a cpu, do not override the target bits set in
3527 TARGET_DEFAULT. */
3528 if (cpu_index >= 0)
3530 rs6000_cpu_index = cpu_index;
3531 rs6000_isa_flags &= ~set_masks;
3532 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3533 & set_masks);
3535 else
3537 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3538 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3539 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3540 to using rs6000_isa_flags, we need to do the initialization here.
3542 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3543 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3544 HOST_WIDE_INT flags;
3545 if (TARGET_DEFAULT)
3546 flags = TARGET_DEFAULT;
3547 else
3549 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3550 const char *default_cpu = (!TARGET_POWERPC64
3551 ? "powerpc"
3552 : (BYTES_BIG_ENDIAN
3553 ? "powerpc64"
3554 : "powerpc64le"));
3555 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3556 flags = processor_target_table[default_cpu_index].target_enable;
3558 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3561 if (rs6000_tune_index >= 0)
3562 tune_index = rs6000_tune_index;
3563 else if (cpu_index >= 0)
3564 rs6000_tune_index = tune_index = cpu_index;
3565 else
3567 size_t i;
3568 enum processor_type tune_proc
3569 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3571 tune_index = -1;
3572 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3573 if (processor_target_table[i].processor == tune_proc)
3575 tune_index = i;
3576 break;
3580 if (cpu_index >= 0)
3581 rs6000_cpu = processor_target_table[cpu_index].processor;
3582 else
3583 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3585 gcc_assert (tune_index >= 0);
3586 rs6000_tune = processor_target_table[tune_index].processor;
3588 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3589 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3590 || rs6000_cpu == PROCESSOR_PPCE5500)
3592 if (TARGET_ALTIVEC)
3593 error ("AltiVec not supported in this target");
3596 /* If we are optimizing big endian systems for space, use the load/store
3597 multiple instructions. */
3598 if (BYTES_BIG_ENDIAN && optimize_size)
3599 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3601 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3602 because the hardware doesn't support the instructions used in little
3603 endian mode, and causes an alignment trap. The 750 does not cause an
3604 alignment trap (except when the target is unaligned). */
3606 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3608 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3609 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3610 warning (0, "%qs is not supported on little endian systems",
3611 "-mmultiple");
3614 /* If little-endian, default to -mstrict-align on older processors.
3615 Testing for htm matches power8 and later. */
3616 if (!BYTES_BIG_ENDIAN
3617 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3618 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3620 if (!rs6000_fold_gimple)
3621 fprintf (stderr,
3622 "gimple folding of rs6000 builtins has been disabled.\n");
3624 /* Add some warnings for VSX. */
3625 if (TARGET_VSX)
3627 const char *msg = NULL;
3628 if (!TARGET_HARD_FLOAT)
3630 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3631 msg = N_("%<-mvsx%> requires hardware floating point");
3632 else
3634 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3635 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3638 else if (TARGET_AVOID_XFORM > 0)
3639 msg = N_("%<-mvsx%> needs indexed addressing");
3640 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3641 & OPTION_MASK_ALTIVEC))
3643 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3644 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3645 else
3646 msg = N_("%<-mno-altivec%> disables vsx");
3649 if (msg)
3651 warning (0, msg);
3652 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3653 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3657 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3658 the -mcpu setting to enable options that conflict. */
3659 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3660 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3661 | OPTION_MASK_ALTIVEC
3662 | OPTION_MASK_VSX)) != 0)
3663 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3664 | OPTION_MASK_DIRECT_MOVE)
3665 & ~rs6000_isa_flags_explicit);
3667 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3668 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3670 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3671 off all of the options that depend on those flags. */
3672 ignore_masks = rs6000_disable_incompatible_switches ();
3674 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3675 unless the user explicitly used the -mno-<option> to disable the code. */
3676 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3677 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3678 else if (TARGET_P9_MINMAX)
3680 if (cpu_index >= 0)
3682 if (cpu_index == PROCESSOR_POWER9)
3684 /* legacy behavior: allow -mcpu=power9 with certain
3685 capabilities explicitly disabled. */
3686 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3688 else
3689 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3690 "for <xxx> less than power9", "-mcpu");
3692 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3693 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3694 & rs6000_isa_flags_explicit))
3695 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3696 were explicitly cleared. */
3697 error ("%qs incompatible with explicitly disabled options",
3698 "-mpower9-minmax");
3699 else
3700 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3702 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3703 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_VSX)
3705 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3706 else if (TARGET_POPCNTD)
3707 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3708 else if (TARGET_DFP)
3709 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3710 else if (TARGET_CMPB)
3711 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3712 else if (TARGET_FPRND)
3713 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3714 else if (TARGET_POPCNTB)
3715 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3716 else if (TARGET_ALTIVEC)
3717 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3719 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3721 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3722 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3723 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3726 if (!TARGET_FPRND && TARGET_VSX)
3728 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3729 /* TARGET_VSX = 1 implies Power 7 and newer */
3730 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3731 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3734 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3736 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3737 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3738 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3741 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3743 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3744 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3745 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3748 if (TARGET_P8_VECTOR && !TARGET_VSX)
3750 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3751 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3752 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3753 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3755 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3757 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3759 else
3761 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3762 not explicit. */
3763 rs6000_isa_flags |= OPTION_MASK_VSX;
3764 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3768 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3770 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3771 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3772 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3775 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3776 silently turn off quad memory mode. */
3777 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3779 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3780 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3782 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3783 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3785 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3786 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3789 /* Non-atomic quad memory load/store are disabled for little endian, since
3790 the words are reversed, but atomic operations can still be done by
3791 swapping the words. */
3792 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3794 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3795 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3796 "mode"));
3798 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3801 /* Assume if the user asked for normal quad memory instructions, they want
3802 the atomic versions as well, unless they explicity told us not to use quad
3803 word atomic instructions. */
3804 if (TARGET_QUAD_MEMORY
3805 && !TARGET_QUAD_MEMORY_ATOMIC
3806 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3807 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3809 /* If we can shrink-wrap the TOC register save separately, then use
3810 -msave-toc-indirect unless explicitly disabled. */
3811 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3812 && flag_shrink_wrap_separate
3813 && optimize_function_for_speed_p (cfun))
3814 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3816 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3817 generating power8 instructions. Power9 does not optimize power8 fusion
3818 cases. */
3819 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3821 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3822 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3823 else
3824 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3827 /* Setting additional fusion flags turns on base fusion. */
3828 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3830 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3832 if (TARGET_P8_FUSION_SIGN)
3833 error ("%qs requires %qs", "-mpower8-fusion-sign",
3834 "-mpower8-fusion");
3836 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3838 else
3839 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3842 /* Power8 does not fuse sign extended loads with the addis. If we are
3843 optimizing at high levels for speed, convert a sign extended load into a
3844 zero extending load, and an explicit sign extension. */
3845 if (TARGET_P8_FUSION
3846 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3847 && optimize_function_for_speed_p (cfun)
3848 && optimize >= 3)
3849 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3851 /* ISA 3.0 vector instructions include ISA 2.07. */
3852 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3854 /* We prefer to not mention undocumented options in
3855 error messages. However, if users have managed to select
3856 power9-vector without selecting power8-vector, they
3857 already know about undocumented flags. */
3858 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3859 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3860 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3861 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3863 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3864 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3865 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3867 else
3869 /* OPTION_MASK_P9_VECTOR is explicit and
3870 OPTION_MASK_P8_VECTOR is not explicit. */
3871 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3872 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3876 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3877 support. If we only have ISA 2.06 support, and the user did not specify
3878 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3879 but we don't enable the full vectorization support */
3880 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3881 TARGET_ALLOW_MOVMISALIGN = 1;
3883 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3885 if (TARGET_ALLOW_MOVMISALIGN > 0
3886 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3887 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3889 TARGET_ALLOW_MOVMISALIGN = 0;
3892 /* Determine when unaligned vector accesses are permitted, and when
3893 they are preferred over masked Altivec loads. Note that if
3894 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3895 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3896 not true. */
3897 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3899 if (!TARGET_VSX)
3901 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3902 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3904 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3907 else if (!TARGET_ALLOW_MOVMISALIGN)
3909 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3910 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3911 "-mallow-movmisalign");
3913 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3917 /* Use long double size to select the appropriate long double. We use
3918 TYPE_PRECISION to differentiate the 3 different long double types. We map
3919 128 into the precision used for TFmode. */
3920 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3921 ? 64
3922 : FLOAT_PRECISION_TFmode);
3924 /* Set long double size before the IEEE 128-bit tests. */
3925 if (!global_options_set.x_rs6000_long_double_type_size)
3927 if (main_target_opt != NULL
3928 && (main_target_opt->x_rs6000_long_double_type_size
3929 != default_long_double_size))
3930 error ("target attribute or pragma changes %<long double%> size");
3931 else
3932 rs6000_long_double_type_size = default_long_double_size;
3934 else if (rs6000_long_double_type_size == 128)
3935 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3936 else if (global_options_set.x_rs6000_ieeequad)
3938 if (global_options.x_rs6000_ieeequad)
3939 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3940 else
3941 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3944 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3945 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3946 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3947 those systems will not pick up this default. Warn if the user changes the
3948 default unless -Wno-psabi. */
3949 if (!global_options_set.x_rs6000_ieeequad)
3950 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3952 else
3954 if (global_options.x_rs6000_ieeequad
3955 && (!TARGET_POPCNTD || !TARGET_VSX))
3956 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3958 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3960 static bool warned_change_long_double;
3961 if (!warned_change_long_double)
3963 warned_change_long_double = true;
3964 if (TARGET_IEEEQUAD)
3965 warning (OPT_Wpsabi, "Using IEEE extended precision "
3966 "%<long double%>");
3967 else
3968 warning (OPT_Wpsabi, "Using IBM extended precision "
3969 "%<long double%>");
3974 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3975 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
3976 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3977 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3978 the keyword as well as the type. */
3979 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3981 /* IEEE 128-bit floating point requires VSX support. */
3982 if (TARGET_FLOAT128_KEYWORD)
3984 if (!TARGET_VSX)
3986 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3987 error ("%qs requires VSX support", "-mfloat128");
3989 TARGET_FLOAT128_TYPE = 0;
3990 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3991 | OPTION_MASK_FLOAT128_HW);
3993 else if (!TARGET_FLOAT128_TYPE)
3995 TARGET_FLOAT128_TYPE = 1;
3996 warning (0, "The %<-mfloat128%> option may not be fully supported");
4000 /* Enable the __float128 keyword under Linux by default. */
4001 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4002 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4003 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4005 /* If we have are supporting the float128 type and full ISA 3.0 support,
4006 enable -mfloat128-hardware by default. However, don't enable the
4007 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4008 because sometimes the compiler wants to put things in an integer
4009 container, and if we don't have __int128 support, it is impossible. */
4010 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4011 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4012 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4013 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4015 if (TARGET_FLOAT128_HW
4016 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4018 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4019 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4021 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4024 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4026 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4027 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4029 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4032 /* Enable -mprefixed by default on 'future' systems. */
4033 if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4034 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4036 /* -mprefixed requires -mcpu=future. */
4037 else if (TARGET_PREFIXED && !TARGET_FUTURE)
4039 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4040 error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
4042 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4045 /* -mpcrel requires prefixed load/store addressing. */
4046 if (TARGET_PCREL && !TARGET_PREFIXED)
4048 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4049 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4051 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4054 /* Print the options after updating the defaults. */
4055 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4056 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4058 /* E500mc does "better" if we inline more aggressively. Respect the
4059 user's opinion, though. */
4060 if (rs6000_block_move_inline_limit == 0
4061 && (rs6000_tune == PROCESSOR_PPCE500MC
4062 || rs6000_tune == PROCESSOR_PPCE500MC64
4063 || rs6000_tune == PROCESSOR_PPCE5500
4064 || rs6000_tune == PROCESSOR_PPCE6500))
4065 rs6000_block_move_inline_limit = 128;
4067 /* store_one_arg depends on expand_block_move to handle at least the
4068 size of reg_parm_stack_space. */
4069 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4070 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4072 if (global_init_p)
4074 /* If the appropriate debug option is enabled, replace the target hooks
4075 with debug versions that call the real version and then prints
4076 debugging information. */
4077 if (TARGET_DEBUG_COST)
4079 targetm.rtx_costs = rs6000_debug_rtx_costs;
4080 targetm.address_cost = rs6000_debug_address_cost;
4081 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4084 if (TARGET_DEBUG_ADDR)
4086 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4087 targetm.legitimize_address = rs6000_debug_legitimize_address;
4088 rs6000_secondary_reload_class_ptr
4089 = rs6000_debug_secondary_reload_class;
4090 targetm.secondary_memory_needed
4091 = rs6000_debug_secondary_memory_needed;
4092 targetm.can_change_mode_class
4093 = rs6000_debug_can_change_mode_class;
4094 rs6000_preferred_reload_class_ptr
4095 = rs6000_debug_preferred_reload_class;
4096 rs6000_mode_dependent_address_ptr
4097 = rs6000_debug_mode_dependent_address;
4100 if (rs6000_veclibabi_name)
4102 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4103 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4104 else
4106 error ("unknown vectorization library ABI type (%qs) for "
4107 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4108 ret = false;
4113 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4114 target attribute or pragma which automatically enables both options,
4115 unless the altivec ABI was set. This is set by default for 64-bit, but
4116 not for 32-bit. */
4117 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4119 TARGET_FLOAT128_TYPE = 0;
4120 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4121 | OPTION_MASK_FLOAT128_KEYWORD)
4122 & ~rs6000_isa_flags_explicit);
4125 /* Enable Altivec ABI for AIX -maltivec. */
4126 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4128 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4129 error ("target attribute or pragma changes AltiVec ABI");
4130 else
4131 rs6000_altivec_abi = 1;
4134 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4135 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4136 be explicitly overridden in either case. */
4137 if (TARGET_ELF)
4139 if (!global_options_set.x_rs6000_altivec_abi
4140 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4142 if (main_target_opt != NULL &&
4143 !main_target_opt->x_rs6000_altivec_abi)
4144 error ("target attribute or pragma changes AltiVec ABI");
4145 else
4146 rs6000_altivec_abi = 1;
4150 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4151 So far, the only darwin64 targets are also MACH-O. */
4152 if (TARGET_MACHO
4153 && DEFAULT_ABI == ABI_DARWIN
4154 && TARGET_64BIT)
4156 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4157 error ("target attribute or pragma changes darwin64 ABI");
4158 else
4160 rs6000_darwin64_abi = 1;
4161 /* Default to natural alignment, for better performance. */
4162 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4166 /* Place FP constants in the constant pool instead of TOC
4167 if section anchors enabled. */
4168 if (flag_section_anchors
4169 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4170 TARGET_NO_FP_IN_TOC = 1;
4172 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4173 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4175 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4176 SUBTARGET_OVERRIDE_OPTIONS;
4177 #endif
4178 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4179 SUBSUBTARGET_OVERRIDE_OPTIONS;
4180 #endif
4181 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4182 SUB3TARGET_OVERRIDE_OPTIONS;
4183 #endif
4185 /* If the ABI has support for PC-relative relocations, enable it by default.
4186 This test depends on the sub-target tests above setting the code model to
4187 medium for ELF v2 systems. */
4188 if (PCREL_SUPPORTED_BY_OS
4189 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4190 rs6000_isa_flags |= OPTION_MASK_PCREL;
4192 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4193 after the subtarget override options are done. */
4194 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4196 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4197 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4199 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4202 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4203 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4205 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4206 && rs6000_tune != PROCESSOR_POWER5
4207 && rs6000_tune != PROCESSOR_POWER6
4208 && rs6000_tune != PROCESSOR_POWER7
4209 && rs6000_tune != PROCESSOR_POWER8
4210 && rs6000_tune != PROCESSOR_POWER9
4211 && rs6000_tune != PROCESSOR_FUTURE
4212 && rs6000_tune != PROCESSOR_PPCA2
4213 && rs6000_tune != PROCESSOR_CELL
4214 && rs6000_tune != PROCESSOR_PPC476);
4215 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4216 || rs6000_tune == PROCESSOR_POWER5
4217 || rs6000_tune == PROCESSOR_POWER7
4218 || rs6000_tune == PROCESSOR_POWER8);
4219 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4220 || rs6000_tune == PROCESSOR_POWER5
4221 || rs6000_tune == PROCESSOR_POWER6
4222 || rs6000_tune == PROCESSOR_POWER7
4223 || rs6000_tune == PROCESSOR_POWER8
4224 || rs6000_tune == PROCESSOR_POWER9
4225 || rs6000_tune == PROCESSOR_FUTURE
4226 || rs6000_tune == PROCESSOR_PPCE500MC
4227 || rs6000_tune == PROCESSOR_PPCE500MC64
4228 || rs6000_tune == PROCESSOR_PPCE5500
4229 || rs6000_tune == PROCESSOR_PPCE6500);
4231 /* Allow debug switches to override the above settings. These are set to -1
4232 in rs6000.opt to indicate the user hasn't directly set the switch. */
4233 if (TARGET_ALWAYS_HINT >= 0)
4234 rs6000_always_hint = TARGET_ALWAYS_HINT;
4236 if (TARGET_SCHED_GROUPS >= 0)
4237 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4239 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4240 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4242 rs6000_sched_restricted_insns_priority
4243 = (rs6000_sched_groups ? 1 : 0);
4245 /* Handle -msched-costly-dep option. */
4246 rs6000_sched_costly_dep
4247 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4249 if (rs6000_sched_costly_dep_str)
4251 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4252 rs6000_sched_costly_dep = no_dep_costly;
4253 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4254 rs6000_sched_costly_dep = all_deps_costly;
4255 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4256 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4257 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4258 rs6000_sched_costly_dep = store_to_load_dep_costly;
4259 else
4260 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4261 atoi (rs6000_sched_costly_dep_str));
4264 /* Handle -minsert-sched-nops option. */
4265 rs6000_sched_insert_nops
4266 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4268 if (rs6000_sched_insert_nops_str)
4270 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4271 rs6000_sched_insert_nops = sched_finish_none;
4272 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4273 rs6000_sched_insert_nops = sched_finish_pad_groups;
4274 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4275 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4276 else
4277 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4278 atoi (rs6000_sched_insert_nops_str));
4281 /* Handle stack protector */
4282 if (!global_options_set.x_rs6000_stack_protector_guard)
4283 #ifdef TARGET_THREAD_SSP_OFFSET
4284 rs6000_stack_protector_guard = SSP_TLS;
4285 #else
4286 rs6000_stack_protector_guard = SSP_GLOBAL;
4287 #endif
4289 #ifdef TARGET_THREAD_SSP_OFFSET
4290 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4291 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4292 #endif
4294 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4296 char *endp;
4297 const char *str = rs6000_stack_protector_guard_offset_str;
4299 errno = 0;
4300 long offset = strtol (str, &endp, 0);
4301 if (!*str || *endp || errno)
4302 error ("%qs is not a valid number in %qs", str,
4303 "-mstack-protector-guard-offset=");
4305 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4306 || (TARGET_64BIT && (offset & 3)))
4307 error ("%qs is not a valid offset in %qs", str,
4308 "-mstack-protector-guard-offset=");
4310 rs6000_stack_protector_guard_offset = offset;
4313 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4315 const char *str = rs6000_stack_protector_guard_reg_str;
4316 int reg = decode_reg_name (str);
4318 if (!IN_RANGE (reg, 1, 31))
4319 error ("%qs is not a valid base register in %qs", str,
4320 "-mstack-protector-guard-reg=");
4322 rs6000_stack_protector_guard_reg = reg;
4325 if (rs6000_stack_protector_guard == SSP_TLS
4326 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4327 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4329 if (global_init_p)
4331 #ifdef TARGET_REGNAMES
4332 /* If the user desires alternate register names, copy in the
4333 alternate names now. */
4334 if (TARGET_REGNAMES)
4335 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4336 #endif
4338 /* Set aix_struct_return last, after the ABI is determined.
4339 If -maix-struct-return or -msvr4-struct-return was explicitly
4340 used, don't override with the ABI default. */
4341 if (!global_options_set.x_aix_struct_return)
4342 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4344 #if 0
4345 /* IBM XL compiler defaults to unsigned bitfields. */
4346 if (TARGET_XL_COMPAT)
4347 flag_signed_bitfields = 0;
4348 #endif
4350 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4351 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4353 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4355 /* We can only guarantee the availability of DI pseudo-ops when
4356 assembling for 64-bit targets. */
4357 if (!TARGET_64BIT)
4359 targetm.asm_out.aligned_op.di = NULL;
4360 targetm.asm_out.unaligned_op.di = NULL;
4364 /* Set branch target alignment, if not optimizing for size. */
4365 if (!optimize_size)
4367 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4368 aligned 8byte to avoid misprediction by the branch predictor. */
4369 if (rs6000_tune == PROCESSOR_TITAN
4370 || rs6000_tune == PROCESSOR_CELL)
4372 if (flag_align_functions && !str_align_functions)
4373 str_align_functions = "8";
4374 if (flag_align_jumps && !str_align_jumps)
4375 str_align_jumps = "8";
4376 if (flag_align_loops && !str_align_loops)
4377 str_align_loops = "8";
4379 if (rs6000_align_branch_targets)
4381 if (flag_align_functions && !str_align_functions)
4382 str_align_functions = "16";
4383 if (flag_align_jumps && !str_align_jumps)
4384 str_align_jumps = "16";
4385 if (flag_align_loops && !str_align_loops)
4387 can_override_loop_align = 1;
4388 str_align_loops = "16";
4393 /* Arrange to save and restore machine status around nested functions. */
4394 init_machine_status = rs6000_init_machine_status;
4396 /* We should always be splitting complex arguments, but we can't break
4397 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4398 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4399 targetm.calls.split_complex_arg = NULL;
4401 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4402 if (DEFAULT_ABI == ABI_AIX)
4403 targetm.calls.custom_function_descriptors = 0;
4406 /* Initialize rs6000_cost with the appropriate target costs. */
4407 if (optimize_size)
4408 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4409 else
4410 switch (rs6000_tune)
4412 case PROCESSOR_RS64A:
4413 rs6000_cost = &rs64a_cost;
4414 break;
4416 case PROCESSOR_MPCCORE:
4417 rs6000_cost = &mpccore_cost;
4418 break;
4420 case PROCESSOR_PPC403:
4421 rs6000_cost = &ppc403_cost;
4422 break;
4424 case PROCESSOR_PPC405:
4425 rs6000_cost = &ppc405_cost;
4426 break;
4428 case PROCESSOR_PPC440:
4429 rs6000_cost = &ppc440_cost;
4430 break;
4432 case PROCESSOR_PPC476:
4433 rs6000_cost = &ppc476_cost;
4434 break;
4436 case PROCESSOR_PPC601:
4437 rs6000_cost = &ppc601_cost;
4438 break;
4440 case PROCESSOR_PPC603:
4441 rs6000_cost = &ppc603_cost;
4442 break;
4444 case PROCESSOR_PPC604:
4445 rs6000_cost = &ppc604_cost;
4446 break;
4448 case PROCESSOR_PPC604e:
4449 rs6000_cost = &ppc604e_cost;
4450 break;
4452 case PROCESSOR_PPC620:
4453 rs6000_cost = &ppc620_cost;
4454 break;
4456 case PROCESSOR_PPC630:
4457 rs6000_cost = &ppc630_cost;
4458 break;
4460 case PROCESSOR_CELL:
4461 rs6000_cost = &ppccell_cost;
4462 break;
4464 case PROCESSOR_PPC750:
4465 case PROCESSOR_PPC7400:
4466 rs6000_cost = &ppc750_cost;
4467 break;
4469 case PROCESSOR_PPC7450:
4470 rs6000_cost = &ppc7450_cost;
4471 break;
4473 case PROCESSOR_PPC8540:
4474 case PROCESSOR_PPC8548:
4475 rs6000_cost = &ppc8540_cost;
4476 break;
4478 case PROCESSOR_PPCE300C2:
4479 case PROCESSOR_PPCE300C3:
4480 rs6000_cost = &ppce300c2c3_cost;
4481 break;
4483 case PROCESSOR_PPCE500MC:
4484 rs6000_cost = &ppce500mc_cost;
4485 break;
4487 case PROCESSOR_PPCE500MC64:
4488 rs6000_cost = &ppce500mc64_cost;
4489 break;
4491 case PROCESSOR_PPCE5500:
4492 rs6000_cost = &ppce5500_cost;
4493 break;
4495 case PROCESSOR_PPCE6500:
4496 rs6000_cost = &ppce6500_cost;
4497 break;
4499 case PROCESSOR_TITAN:
4500 rs6000_cost = &titan_cost;
4501 break;
4503 case PROCESSOR_POWER4:
4504 case PROCESSOR_POWER5:
4505 rs6000_cost = &power4_cost;
4506 break;
4508 case PROCESSOR_POWER6:
4509 rs6000_cost = &power6_cost;
4510 break;
4512 case PROCESSOR_POWER7:
4513 rs6000_cost = &power7_cost;
4514 break;
4516 case PROCESSOR_POWER8:
4517 rs6000_cost = &power8_cost;
4518 break;
4520 case PROCESSOR_POWER9:
4521 case PROCESSOR_FUTURE:
4522 rs6000_cost = &power9_cost;
4523 break;
4525 case PROCESSOR_PPCA2:
4526 rs6000_cost = &ppca2_cost;
4527 break;
4529 default:
4530 gcc_unreachable ();
4533 if (global_init_p)
4535 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4536 param_simultaneous_prefetches,
4537 rs6000_cost->simultaneous_prefetches);
4538 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4539 param_l1_cache_size,
4540 rs6000_cost->l1_cache_size);
4541 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4542 param_l1_cache_line_size,
4543 rs6000_cost->cache_line_size);
4544 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4545 param_l2_cache_size,
4546 rs6000_cost->l2_cache_size);
4548 /* Increase loop peeling limits based on performance analysis. */
4549 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4550 param_max_peeled_insns, 400);
4551 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4552 param_max_completely_peeled_insns, 400);
4554 /* Use the 'model' -fsched-pressure algorithm by default. */
4555 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4556 param_sched_pressure_algorithm,
4557 SCHED_PRESSURE_MODEL);
4559 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4560 turns -fweb and -frename-registers on. */
4561 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4562 || (global_options_set.x_flag_unroll_all_loops
4563 && flag_unroll_all_loops))
4565 if (!global_options_set.x_unroll_only_small_loops)
4566 unroll_only_small_loops = 0;
4567 if (!global_options_set.x_flag_rename_registers)
4568 flag_rename_registers = 1;
4569 if (!global_options_set.x_flag_web)
4570 flag_web = 1;
4573 /* If using typedef char *va_list, signal that
4574 __builtin_va_start (&ap, 0) can be optimized to
4575 ap = __builtin_next_arg (0). */
4576 if (DEFAULT_ABI != ABI_V4)
4577 targetm.expand_builtin_va_start = NULL;
4580 /* If not explicitly specified via option, decide whether to generate indexed
4581 load/store instructions. A value of -1 indicates that the
4582 initial value of this variable has not been overwritten. During
4583 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4584 if (TARGET_AVOID_XFORM == -1)
4585 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4586 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4587 need indexed accesses and the type used is the scalar type of the element
4588 being loaded or stored. */
4589 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4590 && !TARGET_ALTIVEC);
4592 /* Set the -mrecip options. */
4593 if (rs6000_recip_name)
4595 char *p = ASTRDUP (rs6000_recip_name);
4596 char *q;
4597 unsigned int mask, i;
4598 bool invert;
4600 while ((q = strtok (p, ",")) != NULL)
4602 p = NULL;
4603 if (*q == '!')
4605 invert = true;
4606 q++;
4608 else
4609 invert = false;
4611 if (!strcmp (q, "default"))
4612 mask = ((TARGET_RECIP_PRECISION)
4613 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4614 else
4616 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4617 if (!strcmp (q, recip_options[i].string))
4619 mask = recip_options[i].mask;
4620 break;
4623 if (i == ARRAY_SIZE (recip_options))
4625 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4626 invert = false;
4627 mask = 0;
4628 ret = false;
4632 if (invert)
4633 rs6000_recip_control &= ~mask;
4634 else
4635 rs6000_recip_control |= mask;
4639 /* Set the builtin mask of the various options used that could affect which
4640 builtins were used. In the past we used target_flags, but we've run out
4641 of bits, and some options are no longer in target_flags. */
4642 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4643 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4644 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4645 rs6000_builtin_mask);
4647 /* Initialize all of the registers. */
4648 rs6000_init_hard_regno_mode_ok (global_init_p);
4650 /* Save the initial options in case the user does function specific options */
4651 if (global_init_p)
4652 target_option_default_node = target_option_current_node
4653 = build_target_option_node (&global_options);
4655 /* If not explicitly specified via option, decide whether to generate the
4656 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4657 if (TARGET_LINK_STACK == -1)
4658 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4660 /* Deprecate use of -mno-speculate-indirect-jumps. */
4661 if (!rs6000_speculate_indirect_jumps)
4662 warning (0, "%qs is deprecated and not recommended in any circumstances",
4663 "-mno-speculate-indirect-jumps");
4665 return ret;
4668 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4669 define the target cpu type. */
4671 static void
4672 rs6000_option_override (void)
4674 (void) rs6000_option_override_internal (true);
4678 /* Implement targetm.vectorize.builtin_mask_for_load. */
4679 static tree
4680 rs6000_builtin_mask_for_load (void)
4682 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4683 if ((TARGET_ALTIVEC && !TARGET_VSX)
4684 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4685 return altivec_builtin_mask_for_load;
4686 else
4687 return 0;
4690 /* Implement LOOP_ALIGN. */
4691 align_flags
4692 rs6000_loop_align (rtx label)
4694 basic_block bb;
4695 int ninsns;
4697 /* Don't override loop alignment if -falign-loops was specified. */
4698 if (!can_override_loop_align)
4699 return align_loops;
4701 bb = BLOCK_FOR_INSN (label);
4702 ninsns = num_loop_insns(bb->loop_father);
4704 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4705 if (ninsns > 4 && ninsns <= 8
4706 && (rs6000_tune == PROCESSOR_POWER4
4707 || rs6000_tune == PROCESSOR_POWER5
4708 || rs6000_tune == PROCESSOR_POWER6
4709 || rs6000_tune == PROCESSOR_POWER7
4710 || rs6000_tune == PROCESSOR_POWER8))
4711 return align_flags (5);
4712 else
4713 return align_loops;
4716 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4717 after applying N number of iterations. This routine does not determine
4718 how may iterations are required to reach desired alignment. */
4720 static bool
4721 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4723 if (is_packed)
4724 return false;
4726 if (TARGET_32BIT)
4728 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4729 return true;
4731 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4732 return true;
4734 return false;
4736 else
4738 if (TARGET_MACHO)
4739 return false;
4741 /* Assuming that all other types are naturally aligned. CHECKME! */
4742 return true;
4746 /* Return true if the vector misalignment factor is supported by the
4747 target. */
4748 static bool
4749 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4750 const_tree type,
4751 int misalignment,
4752 bool is_packed)
4754 if (TARGET_VSX)
4756 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4757 return true;
4759 /* Return if movmisalign pattern is not supported for this mode. */
4760 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4761 return false;
4763 if (misalignment == -1)
4765 /* Misalignment factor is unknown at compile time but we know
4766 it's word aligned. */
4767 if (rs6000_vector_alignment_reachable (type, is_packed))
4769 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4771 if (element_size == 64 || element_size == 32)
4772 return true;
4775 return false;
4778 /* VSX supports word-aligned vector. */
4779 if (misalignment % 4 == 0)
4780 return true;
4782 return false;
4785 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4786 static int
4787 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4788 tree vectype, int misalign)
4790 unsigned elements;
4791 tree elem_type;
4793 switch (type_of_cost)
4795 case scalar_stmt:
4796 case scalar_store:
4797 case vector_stmt:
4798 case vector_store:
4799 case vec_to_scalar:
4800 case scalar_to_vec:
4801 case cond_branch_not_taken:
4802 return 1;
4803 case scalar_load:
4804 case vector_load:
4805 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4806 return 2;
4808 case vec_perm:
4809 /* Power7 has only one permute unit, make it a bit expensive. */
4810 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4811 return 3;
4812 else
4813 return 1;
4815 case vec_promote_demote:
4816 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4817 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4818 return 4;
4819 else
4820 return 1;
4822 case cond_branch_taken:
4823 return 3;
4825 case unaligned_load:
4826 case vector_gather_load:
4827 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4828 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4829 return 2;
4831 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4833 elements = TYPE_VECTOR_SUBPARTS (vectype);
4834 if (elements == 2)
4835 /* Double word aligned. */
4836 return 4;
4838 if (elements == 4)
4840 switch (misalign)
4842 case 8:
4843 /* Double word aligned. */
4844 return 4;
4846 case -1:
4847 /* Unknown misalignment. */
4848 case 4:
4849 case 12:
4850 /* Word aligned. */
4851 return 33;
4853 default:
4854 gcc_unreachable ();
4859 if (TARGET_ALTIVEC)
4860 /* Misaligned loads are not supported. */
4861 gcc_unreachable ();
4863 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4864 return 4;
4866 case unaligned_store:
4867 case vector_scatter_store:
4868 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4869 return 1;
4871 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4873 elements = TYPE_VECTOR_SUBPARTS (vectype);
4874 if (elements == 2)
4875 /* Double word aligned. */
4876 return 2;
4878 if (elements == 4)
4880 switch (misalign)
4882 case 8:
4883 /* Double word aligned. */
4884 return 2;
4886 case -1:
4887 /* Unknown misalignment. */
4888 case 4:
4889 case 12:
4890 /* Word aligned. */
4891 return 23;
4893 default:
4894 gcc_unreachable ();
4899 if (TARGET_ALTIVEC)
4900 /* Misaligned stores are not supported. */
4901 gcc_unreachable ();
4903 return 2;
4905 case vec_construct:
4906 /* This is a rough approximation assuming non-constant elements
4907 constructed into a vector via element insertion. FIXME:
4908 vec_construct is not granular enough for uniformly good
4909 decisions. If the initialization is a splat, this is
4910 cheaper than we estimate. Improve this someday. */
4911 elem_type = TREE_TYPE (vectype);
4912 /* 32-bit vectors loaded into registers are stored as double
4913 precision, so we need 2 permutes, 2 converts, and 1 merge
4914 to construct a vector of short floats from them. */
4915 if (SCALAR_FLOAT_TYPE_P (elem_type)
4916 && TYPE_PRECISION (elem_type) == 32)
4917 return 5;
4918 /* On POWER9, integer vector types are built up in GPRs and then
4919 use a direct move (2 cycles). For POWER8 this is even worse,
4920 as we need two direct moves and a merge, and the direct moves
4921 are five cycles. */
4922 else if (INTEGRAL_TYPE_P (elem_type))
4924 if (TARGET_P9_VECTOR)
4925 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4926 else
4927 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4929 else
4930 /* V2DFmode doesn't need a direct move. */
4931 return 2;
4933 default:
4934 gcc_unreachable ();
4938 /* Implement targetm.vectorize.preferred_simd_mode. */
4940 static machine_mode
4941 rs6000_preferred_simd_mode (scalar_mode mode)
4943 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4945 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4946 return vmode.require ();
4948 return word_mode;
4951 typedef struct _rs6000_cost_data
4953 struct loop *loop_info;
4954 unsigned cost[3];
4955 } rs6000_cost_data;
4957 /* Test for likely overcommitment of vector hardware resources. If a
4958 loop iteration is relatively large, and too large a percentage of
4959 instructions in the loop are vectorized, the cost model may not
4960 adequately reflect delays from unavailable vector resources.
4961 Penalize the loop body cost for this case. */
4963 static void
4964 rs6000_density_test (rs6000_cost_data *data)
4966 const int DENSITY_PCT_THRESHOLD = 85;
4967 const int DENSITY_SIZE_THRESHOLD = 70;
4968 const int DENSITY_PENALTY = 10;
4969 struct loop *loop = data->loop_info;
4970 basic_block *bbs = get_loop_body (loop);
4971 int nbbs = loop->num_nodes;
4972 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4973 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4974 int i, density_pct;
4976 for (i = 0; i < nbbs; i++)
4978 basic_block bb = bbs[i];
4979 gimple_stmt_iterator gsi;
4981 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4983 gimple *stmt = gsi_stmt (gsi);
4984 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4986 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4987 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4988 not_vec_cost++;
4992 free (bbs);
4993 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4995 if (density_pct > DENSITY_PCT_THRESHOLD
4996 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4998 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_NOTE, vect_location,
5001 "density %d%%, cost %d exceeds threshold, penalizing "
5002 "loop body cost by %d%%", density_pct,
5003 vec_cost + not_vec_cost, DENSITY_PENALTY);
5007 /* Implement targetm.vectorize.init_cost. */
5009 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5010 instruction is needed by the vectorization. */
5011 static bool rs6000_vect_nonmem;
5013 static void *
5014 rs6000_init_cost (struct loop *loop_info)
5016 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5017 data->loop_info = loop_info;
5018 data->cost[vect_prologue] = 0;
5019 data->cost[vect_body] = 0;
5020 data->cost[vect_epilogue] = 0;
5021 rs6000_vect_nonmem = false;
5022 return data;
5025 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5026 For some statement, we would like to further fine-grain tweak the cost on
5027 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5028 information on statement operation codes etc. One typical case here is
5029 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5030 for scalar cost, but it should be priced more whatever transformed to either
5031 compare + branch or compare + isel instructions. */
5033 static unsigned
5034 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5035 struct _stmt_vec_info *stmt_info)
5037 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5038 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5040 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5041 if (subcode == COND_EXPR)
5042 return 2;
5045 return 0;
5048 /* Implement targetm.vectorize.add_stmt_cost. */
5050 static unsigned
5051 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5052 struct _stmt_vec_info *stmt_info, int misalign,
5053 enum vect_cost_model_location where)
5055 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5056 unsigned retval = 0;
5058 if (flag_vect_cost_model)
5060 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5061 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5062 misalign);
5063 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5064 /* Statements in an inner loop relative to the loop being
5065 vectorized are weighted more heavily. The value here is
5066 arbitrary and could potentially be improved with analysis. */
5067 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5068 count *= 50; /* FIXME. */
5070 retval = (unsigned) (count * stmt_cost);
5071 cost_data->cost[where] += retval;
5073 /* Check whether we're doing something other than just a copy loop.
5074 Not all such loops may be profitably vectorized; see
5075 rs6000_finish_cost. */
5076 if ((kind == vec_to_scalar || kind == vec_perm
5077 || kind == vec_promote_demote || kind == vec_construct
5078 || kind == scalar_to_vec)
5079 || (where == vect_body && kind == vector_stmt))
5080 rs6000_vect_nonmem = true;
5083 return retval;
5086 /* Implement targetm.vectorize.finish_cost. */
5088 static void
5089 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5090 unsigned *body_cost, unsigned *epilogue_cost)
5092 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5094 if (cost_data->loop_info)
5095 rs6000_density_test (cost_data);
5097 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5098 that require versioning for any reason. The vectorization is at
5099 best a wash inside the loop, and the versioning checks make
5100 profitability highly unlikely and potentially quite harmful. */
5101 if (cost_data->loop_info)
5103 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5104 if (!rs6000_vect_nonmem
5105 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5106 && LOOP_REQUIRES_VERSIONING (vec_info))
5107 cost_data->cost[vect_body] += 10000;
5110 *prologue_cost = cost_data->cost[vect_prologue];
5111 *body_cost = cost_data->cost[vect_body];
5112 *epilogue_cost = cost_data->cost[vect_epilogue];
5115 /* Implement targetm.vectorize.destroy_cost_data. */
5117 static void
5118 rs6000_destroy_cost_data (void *data)
5120 free (data);
5123 /* Implement targetm.loop_unroll_adjust. */
5125 static unsigned
5126 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5128 if (unroll_only_small_loops)
5130 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5131 example we may want to unroll very small loops more times (4 perhaps).
5132 We also should use a PARAM for this. */
5133 if (loop->ninsns <= 10)
5134 return MIN (2, nunroll);
5135 else
5136 return 0;
5139 return nunroll;
5142 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5143 library with vectorized intrinsics. */
5145 static tree
5146 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5147 tree type_in)
5149 char name[32];
5150 const char *suffix = NULL;
5151 tree fntype, new_fndecl, bdecl = NULL_TREE;
5152 int n_args = 1;
5153 const char *bname;
5154 machine_mode el_mode, in_mode;
5155 int n, in_n;
5157 /* Libmass is suitable for unsafe math only as it does not correctly support
5158 parts of IEEE with the required precision such as denormals. Only support
5159 it if we have VSX to use the simd d2 or f4 functions.
5160 XXX: Add variable length support. */
5161 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5162 return NULL_TREE;
5164 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5165 n = TYPE_VECTOR_SUBPARTS (type_out);
5166 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5167 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5168 if (el_mode != in_mode
5169 || n != in_n)
5170 return NULL_TREE;
5172 switch (fn)
5174 CASE_CFN_ATAN2:
5175 CASE_CFN_HYPOT:
5176 CASE_CFN_POW:
5177 n_args = 2;
5178 gcc_fallthrough ();
5180 CASE_CFN_ACOS:
5181 CASE_CFN_ACOSH:
5182 CASE_CFN_ASIN:
5183 CASE_CFN_ASINH:
5184 CASE_CFN_ATAN:
5185 CASE_CFN_ATANH:
5186 CASE_CFN_CBRT:
5187 CASE_CFN_COS:
5188 CASE_CFN_COSH:
5189 CASE_CFN_ERF:
5190 CASE_CFN_ERFC:
5191 CASE_CFN_EXP2:
5192 CASE_CFN_EXP:
5193 CASE_CFN_EXPM1:
5194 CASE_CFN_LGAMMA:
5195 CASE_CFN_LOG10:
5196 CASE_CFN_LOG1P:
5197 CASE_CFN_LOG2:
5198 CASE_CFN_LOG:
5199 CASE_CFN_SIN:
5200 CASE_CFN_SINH:
5201 CASE_CFN_SQRT:
5202 CASE_CFN_TAN:
5203 CASE_CFN_TANH:
5204 if (el_mode == DFmode && n == 2)
5206 bdecl = mathfn_built_in (double_type_node, fn);
5207 suffix = "d2"; /* pow -> powd2 */
5209 else if (el_mode == SFmode && n == 4)
5211 bdecl = mathfn_built_in (float_type_node, fn);
5212 suffix = "4"; /* powf -> powf4 */
5214 else
5215 return NULL_TREE;
5216 if (!bdecl)
5217 return NULL_TREE;
5218 break;
5220 default:
5221 return NULL_TREE;
5224 gcc_assert (suffix != NULL);
5225 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5226 if (!bname)
5227 return NULL_TREE;
5229 strcpy (name, bname + strlen ("__builtin_"));
5230 strcat (name, suffix);
5232 if (n_args == 1)
5233 fntype = build_function_type_list (type_out, type_in, NULL);
5234 else if (n_args == 2)
5235 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5236 else
5237 gcc_unreachable ();
5239 /* Build a function declaration for the vectorized function. */
5240 new_fndecl = build_decl (BUILTINS_LOCATION,
5241 FUNCTION_DECL, get_identifier (name), fntype);
5242 TREE_PUBLIC (new_fndecl) = 1;
5243 DECL_EXTERNAL (new_fndecl) = 1;
5244 DECL_IS_NOVOPS (new_fndecl) = 1;
5245 TREE_READONLY (new_fndecl) = 1;
5247 return new_fndecl;
5250 /* Returns a function decl for a vectorized version of the builtin function
5251 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5252 if it is not available. */
5254 static tree
5255 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5256 tree type_in)
5258 machine_mode in_mode, out_mode;
5259 int in_n, out_n;
5261 if (TARGET_DEBUG_BUILTIN)
5262 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5263 combined_fn_name (combined_fn (fn)),
5264 GET_MODE_NAME (TYPE_MODE (type_out)),
5265 GET_MODE_NAME (TYPE_MODE (type_in)));
5267 if (TREE_CODE (type_out) != VECTOR_TYPE
5268 || TREE_CODE (type_in) != VECTOR_TYPE)
5269 return NULL_TREE;
5271 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5272 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5273 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5274 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5276 switch (fn)
5278 CASE_CFN_COPYSIGN:
5279 if (VECTOR_UNIT_VSX_P (V2DFmode)
5280 && out_mode == DFmode && out_n == 2
5281 && in_mode == DFmode && in_n == 2)
5282 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5283 if (VECTOR_UNIT_VSX_P (V4SFmode)
5284 && out_mode == SFmode && out_n == 4
5285 && in_mode == SFmode && in_n == 4)
5286 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5287 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5288 && out_mode == SFmode && out_n == 4
5289 && in_mode == SFmode && in_n == 4)
5290 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5291 break;
5292 CASE_CFN_CEIL:
5293 if (VECTOR_UNIT_VSX_P (V2DFmode)
5294 && out_mode == DFmode && out_n == 2
5295 && in_mode == DFmode && in_n == 2)
5296 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5297 if (VECTOR_UNIT_VSX_P (V4SFmode)
5298 && out_mode == SFmode && out_n == 4
5299 && in_mode == SFmode && in_n == 4)
5300 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5301 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5302 && out_mode == SFmode && out_n == 4
5303 && in_mode == SFmode && in_n == 4)
5304 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5305 break;
5306 CASE_CFN_FLOOR:
5307 if (VECTOR_UNIT_VSX_P (V2DFmode)
5308 && out_mode == DFmode && out_n == 2
5309 && in_mode == DFmode && in_n == 2)
5310 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5311 if (VECTOR_UNIT_VSX_P (V4SFmode)
5312 && out_mode == SFmode && out_n == 4
5313 && in_mode == SFmode && in_n == 4)
5314 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5315 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5316 && out_mode == SFmode && out_n == 4
5317 && in_mode == SFmode && in_n == 4)
5318 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5319 break;
5320 CASE_CFN_FMA:
5321 if (VECTOR_UNIT_VSX_P (V2DFmode)
5322 && out_mode == DFmode && out_n == 2
5323 && in_mode == DFmode && in_n == 2)
5324 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5325 if (VECTOR_UNIT_VSX_P (V4SFmode)
5326 && out_mode == SFmode && out_n == 4
5327 && in_mode == SFmode && in_n == 4)
5328 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5329 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5330 && out_mode == SFmode && out_n == 4
5331 && in_mode == SFmode && in_n == 4)
5332 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5333 break;
5334 CASE_CFN_TRUNC:
5335 if (VECTOR_UNIT_VSX_P (V2DFmode)
5336 && out_mode == DFmode && out_n == 2
5337 && in_mode == DFmode && in_n == 2)
5338 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5339 if (VECTOR_UNIT_VSX_P (V4SFmode)
5340 && out_mode == SFmode && out_n == 4
5341 && in_mode == SFmode && in_n == 4)
5342 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5343 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5344 && out_mode == SFmode && out_n == 4
5345 && in_mode == SFmode && in_n == 4)
5346 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5347 break;
5348 CASE_CFN_NEARBYINT:
5349 if (VECTOR_UNIT_VSX_P (V2DFmode)
5350 && flag_unsafe_math_optimizations
5351 && out_mode == DFmode && out_n == 2
5352 && in_mode == DFmode && in_n == 2)
5353 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5354 if (VECTOR_UNIT_VSX_P (V4SFmode)
5355 && flag_unsafe_math_optimizations
5356 && out_mode == SFmode && out_n == 4
5357 && in_mode == SFmode && in_n == 4)
5358 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5359 break;
5360 CASE_CFN_RINT:
5361 if (VECTOR_UNIT_VSX_P (V2DFmode)
5362 && !flag_trapping_math
5363 && out_mode == DFmode && out_n == 2
5364 && in_mode == DFmode && in_n == 2)
5365 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5366 if (VECTOR_UNIT_VSX_P (V4SFmode)
5367 && !flag_trapping_math
5368 && out_mode == SFmode && out_n == 4
5369 && in_mode == SFmode && in_n == 4)
5370 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5371 break;
5372 default:
5373 break;
5376 /* Generate calls to libmass if appropriate. */
5377 if (rs6000_veclib_handler)
5378 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5380 return NULL_TREE;
5383 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5385 static tree
5386 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5387 tree type_in)
5389 machine_mode in_mode, out_mode;
5390 int in_n, out_n;
5392 if (TARGET_DEBUG_BUILTIN)
5393 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5394 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5395 GET_MODE_NAME (TYPE_MODE (type_out)),
5396 GET_MODE_NAME (TYPE_MODE (type_in)));
5398 if (TREE_CODE (type_out) != VECTOR_TYPE
5399 || TREE_CODE (type_in) != VECTOR_TYPE)
5400 return NULL_TREE;
5402 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5403 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5404 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5405 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5407 enum rs6000_builtins fn
5408 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5409 switch (fn)
5411 case RS6000_BUILTIN_RSQRTF:
5412 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5413 && out_mode == SFmode && out_n == 4
5414 && in_mode == SFmode && in_n == 4)
5415 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5416 break;
5417 case RS6000_BUILTIN_RSQRT:
5418 if (VECTOR_UNIT_VSX_P (V2DFmode)
5419 && out_mode == DFmode && out_n == 2
5420 && in_mode == DFmode && in_n == 2)
5421 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5422 break;
5423 case RS6000_BUILTIN_RECIPF:
5424 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5425 && out_mode == SFmode && out_n == 4
5426 && in_mode == SFmode && in_n == 4)
5427 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5428 break;
5429 case RS6000_BUILTIN_RECIP:
5430 if (VECTOR_UNIT_VSX_P (V2DFmode)
5431 && out_mode == DFmode && out_n == 2
5432 && in_mode == DFmode && in_n == 2)
5433 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5434 break;
5435 default:
5436 break;
5438 return NULL_TREE;
5441 /* Default CPU string for rs6000*_file_start functions. */
5442 static const char *rs6000_default_cpu;
5444 #ifdef USING_ELFOS_H
5445 const char *rs6000_machine;
5447 const char *
5448 rs6000_machine_from_flags (void)
5450 HOST_WIDE_INT flags = rs6000_isa_flags;
5452 /* Disable the flags that should never influence the .machine selection. */
5453 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5455 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5456 return "future";
5457 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5458 return "power9";
5459 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5460 return "power8";
5461 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5462 return "power7";
5463 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5464 return "power6";
5465 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5466 return "power5";
5467 if ((flags & ISA_2_1_MASKS) != 0)
5468 return "power4";
5469 if ((flags & OPTION_MASK_POWERPC64) != 0)
5470 return "ppc64";
5471 return "ppc";
5474 void
5475 emit_asm_machine (void)
5477 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5479 #endif
5481 /* Do anything needed at the start of the asm file. */
5483 static void
5484 rs6000_file_start (void)
5486 char buffer[80];
5487 const char *start = buffer;
5488 FILE *file = asm_out_file;
5490 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5492 default_file_start ();
5494 if (flag_verbose_asm)
5496 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5498 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5500 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5501 start = "";
5504 if (global_options_set.x_rs6000_cpu_index)
5506 fprintf (file, "%s -mcpu=%s", start,
5507 processor_target_table[rs6000_cpu_index].name);
5508 start = "";
5511 if (global_options_set.x_rs6000_tune_index)
5513 fprintf (file, "%s -mtune=%s", start,
5514 processor_target_table[rs6000_tune_index].name);
5515 start = "";
5518 if (PPC405_ERRATUM77)
5520 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5521 start = "";
5524 #ifdef USING_ELFOS_H
5525 switch (rs6000_sdata)
5527 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5528 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5529 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5530 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5533 if (rs6000_sdata && g_switch_value)
5535 fprintf (file, "%s -G %d", start,
5536 g_switch_value);
5537 start = "";
5539 #endif
5541 if (*start == '\0')
5542 putc ('\n', file);
5545 #ifdef USING_ELFOS_H
5546 rs6000_machine = rs6000_machine_from_flags ();
5547 emit_asm_machine ();
5548 #endif
5550 if (DEFAULT_ABI == ABI_ELFv2)
5551 fprintf (file, "\t.abiversion 2\n");
5555 /* Return nonzero if this function is known to have a null epilogue. */
5558 direct_return (void)
5560 if (reload_completed)
5562 rs6000_stack_t *info = rs6000_stack_info ();
5564 if (info->first_gp_reg_save == 32
5565 && info->first_fp_reg_save == 64
5566 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5567 && ! info->lr_save_p
5568 && ! info->cr_save_p
5569 && info->vrsave_size == 0
5570 && ! info->push_p)
5571 return 1;
5574 return 0;
5577 /* Helper for num_insns_constant. Calculate number of instructions to
5578 load VALUE to a single gpr using combinations of addi, addis, ori,
5579 oris and sldi instructions. */
5581 static int
5582 num_insns_constant_gpr (HOST_WIDE_INT value)
5584 /* signed constant loadable with addi */
5585 if (SIGNED_INTEGER_16BIT_P (value))
5586 return 1;
5588 /* constant loadable with addis */
5589 else if ((value & 0xffff) == 0
5590 && (value >> 31 == -1 || value >> 31 == 0))
5591 return 1;
5593 /* PADDI can support up to 34 bit signed integers. */
5594 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5595 return 1;
5597 else if (TARGET_POWERPC64)
5599 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5600 HOST_WIDE_INT high = value >> 31;
5602 if (high == 0 || high == -1)
5603 return 2;
5605 high >>= 1;
5607 if (low == 0)
5608 return num_insns_constant_gpr (high) + 1;
5609 else if (high == 0)
5610 return num_insns_constant_gpr (low) + 1;
5611 else
5612 return (num_insns_constant_gpr (high)
5613 + num_insns_constant_gpr (low) + 1);
5616 else
5617 return 2;
5620 /* Helper for num_insns_constant. Allow constants formed by the
5621 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5622 and handle modes that require multiple gprs. */
5624 static int
5625 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5627 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5628 int total = 0;
5629 while (nregs-- > 0)
5631 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5632 int insns = num_insns_constant_gpr (low);
5633 if (insns > 2
5634 /* We won't get more than 2 from num_insns_constant_gpr
5635 except when TARGET_POWERPC64 and mode is DImode or
5636 wider, so the register mode must be DImode. */
5637 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5638 insns = 2;
5639 total += insns;
5640 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5641 it all at once would be UB. */
5642 value >>= (BITS_PER_WORD - 1);
5643 value >>= 1;
5645 return total;
5648 /* Return the number of instructions it takes to form a constant in as
5649 many gprs are needed for MODE. */
5652 num_insns_constant (rtx op, machine_mode mode)
5654 HOST_WIDE_INT val;
5656 switch (GET_CODE (op))
5658 case CONST_INT:
5659 val = INTVAL (op);
5660 break;
5662 case CONST_WIDE_INT:
5664 int insns = 0;
5665 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5666 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5667 DImode);
5668 return insns;
5671 case CONST_DOUBLE:
5673 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5675 if (mode == SFmode || mode == SDmode)
5677 long l;
5679 if (mode == SDmode)
5680 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5681 else
5682 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5683 /* See the first define_split in rs6000.md handling a
5684 const_double_operand. */
5685 val = l;
5686 mode = SImode;
5688 else if (mode == DFmode || mode == DDmode)
5690 long l[2];
5692 if (mode == DDmode)
5693 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5694 else
5695 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5697 /* See the second (32-bit) and third (64-bit) define_split
5698 in rs6000.md handling a const_double_operand. */
5699 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5700 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5701 mode = DImode;
5703 else if (mode == TFmode || mode == TDmode
5704 || mode == KFmode || mode == IFmode)
5706 long l[4];
5707 int insns;
5709 if (mode == TDmode)
5710 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5711 else
5712 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5714 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5715 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5716 insns = num_insns_constant_multi (val, DImode);
5717 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5718 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5719 insns += num_insns_constant_multi (val, DImode);
5720 return insns;
5722 else
5723 gcc_unreachable ();
5725 break;
5727 default:
5728 gcc_unreachable ();
5731 return num_insns_constant_multi (val, mode);
5734 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5735 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5736 corresponding element of the vector, but for V4SFmode, the
5737 corresponding "float" is interpreted as an SImode integer. */
5739 HOST_WIDE_INT
5740 const_vector_elt_as_int (rtx op, unsigned int elt)
5742 rtx tmp;
5744 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5745 gcc_assert (GET_MODE (op) != V2DImode
5746 && GET_MODE (op) != V2DFmode);
5748 tmp = CONST_VECTOR_ELT (op, elt);
5749 if (GET_MODE (op) == V4SFmode)
5750 tmp = gen_lowpart (SImode, tmp);
5751 return INTVAL (tmp);
5754 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5755 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5756 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5757 all items are set to the same value and contain COPIES replicas of the
5758 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5759 operand and the others are set to the value of the operand's msb. */
5761 static bool
5762 vspltis_constant (rtx op, unsigned step, unsigned copies)
5764 machine_mode mode = GET_MODE (op);
5765 machine_mode inner = GET_MODE_INNER (mode);
5767 unsigned i;
5768 unsigned nunits;
5769 unsigned bitsize;
5770 unsigned mask;
5772 HOST_WIDE_INT val;
5773 HOST_WIDE_INT splat_val;
5774 HOST_WIDE_INT msb_val;
5776 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5777 return false;
5779 nunits = GET_MODE_NUNITS (mode);
5780 bitsize = GET_MODE_BITSIZE (inner);
5781 mask = GET_MODE_MASK (inner);
5783 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5784 splat_val = val;
5785 msb_val = val >= 0 ? 0 : -1;
5787 /* Construct the value to be splatted, if possible. If not, return 0. */
5788 for (i = 2; i <= copies; i *= 2)
5790 HOST_WIDE_INT small_val;
5791 bitsize /= 2;
5792 small_val = splat_val >> bitsize;
5793 mask >>= bitsize;
5794 if (splat_val != ((HOST_WIDE_INT)
5795 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5796 | (small_val & mask)))
5797 return false;
5798 splat_val = small_val;
5801 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5802 if (EASY_VECTOR_15 (splat_val))
5805 /* Also check if we can splat, and then add the result to itself. Do so if
5806 the value is positive, of if the splat instruction is using OP's mode;
5807 for splat_val < 0, the splat and the add should use the same mode. */
5808 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5809 && (splat_val >= 0 || (step == 1 && copies == 1)))
5812 /* Also check if are loading up the most significant bit which can be done by
5813 loading up -1 and shifting the value left by -1. */
5814 else if (EASY_VECTOR_MSB (splat_val, inner))
5817 else
5818 return false;
5820 /* Check if VAL is present in every STEP-th element, and the
5821 other elements are filled with its most significant bit. */
5822 for (i = 1; i < nunits; ++i)
5824 HOST_WIDE_INT desired_val;
5825 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5826 if ((i & (step - 1)) == 0)
5827 desired_val = val;
5828 else
5829 desired_val = msb_val;
5831 if (desired_val != const_vector_elt_as_int (op, elt))
5832 return false;
5835 return true;
5838 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5839 instruction, filling in the bottom elements with 0 or -1.
5841 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5842 for the number of zeroes to shift in, or negative for the number of 0xff
5843 bytes to shift in.
5845 OP is a CONST_VECTOR. */
5848 vspltis_shifted (rtx op)
5850 machine_mode mode = GET_MODE (op);
5851 machine_mode inner = GET_MODE_INNER (mode);
5853 unsigned i, j;
5854 unsigned nunits;
5855 unsigned mask;
5857 HOST_WIDE_INT val;
5859 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5860 return false;
5862 /* We need to create pseudo registers to do the shift, so don't recognize
5863 shift vector constants after reload. */
5864 if (!can_create_pseudo_p ())
5865 return false;
5867 nunits = GET_MODE_NUNITS (mode);
5868 mask = GET_MODE_MASK (inner);
5870 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5872 /* Check if the value can really be the operand of a vspltis[bhw]. */
5873 if (EASY_VECTOR_15 (val))
5876 /* Also check if we are loading up the most significant bit which can be done
5877 by loading up -1 and shifting the value left by -1. */
5878 else if (EASY_VECTOR_MSB (val, inner))
5881 else
5882 return 0;
5884 /* Check if VAL is present in every STEP-th element until we find elements
5885 that are 0 or all 1 bits. */
5886 for (i = 1; i < nunits; ++i)
5888 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5889 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5891 /* If the value isn't the splat value, check for the remaining elements
5892 being 0/-1. */
5893 if (val != elt_val)
5895 if (elt_val == 0)
5897 for (j = i+1; j < nunits; ++j)
5899 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5900 if (const_vector_elt_as_int (op, elt2) != 0)
5901 return 0;
5904 return (nunits - i) * GET_MODE_SIZE (inner);
5907 else if ((elt_val & mask) == mask)
5909 for (j = i+1; j < nunits; ++j)
5911 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5912 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5913 return 0;
5916 return -((nunits - i) * GET_MODE_SIZE (inner));
5919 else
5920 return 0;
5924 /* If all elements are equal, we don't need to do VLSDOI. */
5925 return 0;
5929 /* Return true if OP is of the given MODE and can be synthesized
5930 with a vspltisb, vspltish or vspltisw. */
5932 bool
5933 easy_altivec_constant (rtx op, machine_mode mode)
5935 unsigned step, copies;
5937 if (mode == VOIDmode)
5938 mode = GET_MODE (op);
5939 else if (mode != GET_MODE (op))
5940 return false;
5942 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5943 constants. */
5944 if (mode == V2DFmode)
5945 return zero_constant (op, mode);
5947 else if (mode == V2DImode)
5949 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5950 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5951 return false;
5953 if (zero_constant (op, mode))
5954 return true;
5956 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5957 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5958 return true;
5960 return false;
5963 /* V1TImode is a special container for TImode. Ignore for now. */
5964 else if (mode == V1TImode)
5965 return false;
5967 /* Start with a vspltisw. */
5968 step = GET_MODE_NUNITS (mode) / 4;
5969 copies = 1;
5971 if (vspltis_constant (op, step, copies))
5972 return true;
5974 /* Then try with a vspltish. */
5975 if (step == 1)
5976 copies <<= 1;
5977 else
5978 step >>= 1;
5980 if (vspltis_constant (op, step, copies))
5981 return true;
5983 /* And finally a vspltisb. */
5984 if (step == 1)
5985 copies <<= 1;
5986 else
5987 step >>= 1;
5989 if (vspltis_constant (op, step, copies))
5990 return true;
5992 if (vspltis_shifted (op) != 0)
5993 return true;
5995 return false;
5998 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5999 result is OP. Abort if it is not possible. */
6002 gen_easy_altivec_constant (rtx op)
6004 machine_mode mode = GET_MODE (op);
6005 int nunits = GET_MODE_NUNITS (mode);
6006 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6007 unsigned step = nunits / 4;
6008 unsigned copies = 1;
6010 /* Start with a vspltisw. */
6011 if (vspltis_constant (op, step, copies))
6012 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6014 /* Then try with a vspltish. */
6015 if (step == 1)
6016 copies <<= 1;
6017 else
6018 step >>= 1;
6020 if (vspltis_constant (op, step, copies))
6021 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6023 /* And finally a vspltisb. */
6024 if (step == 1)
6025 copies <<= 1;
6026 else
6027 step >>= 1;
6029 if (vspltis_constant (op, step, copies))
6030 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6032 gcc_unreachable ();
6035 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6036 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6038 Return the number of instructions needed (1 or 2) into the address pointed
6039 via NUM_INSNS_PTR.
6041 Return the constant that is being split via CONSTANT_PTR. */
6043 bool
6044 xxspltib_constant_p (rtx op,
6045 machine_mode mode,
6046 int *num_insns_ptr,
6047 int *constant_ptr)
6049 size_t nunits = GET_MODE_NUNITS (mode);
6050 size_t i;
6051 HOST_WIDE_INT value;
6052 rtx element;
6054 /* Set the returned values to out of bound values. */
6055 *num_insns_ptr = -1;
6056 *constant_ptr = 256;
6058 if (!TARGET_P9_VECTOR)
6059 return false;
6061 if (mode == VOIDmode)
6062 mode = GET_MODE (op);
6064 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6065 return false;
6067 /* Handle (vec_duplicate <constant>). */
6068 if (GET_CODE (op) == VEC_DUPLICATE)
6070 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6071 && mode != V2DImode)
6072 return false;
6074 element = XEXP (op, 0);
6075 if (!CONST_INT_P (element))
6076 return false;
6078 value = INTVAL (element);
6079 if (!IN_RANGE (value, -128, 127))
6080 return false;
6083 /* Handle (const_vector [...]). */
6084 else if (GET_CODE (op) == CONST_VECTOR)
6086 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6087 && mode != V2DImode)
6088 return false;
6090 element = CONST_VECTOR_ELT (op, 0);
6091 if (!CONST_INT_P (element))
6092 return false;
6094 value = INTVAL (element);
6095 if (!IN_RANGE (value, -128, 127))
6096 return false;
6098 for (i = 1; i < nunits; i++)
6100 element = CONST_VECTOR_ELT (op, i);
6101 if (!CONST_INT_P (element))
6102 return false;
6104 if (value != INTVAL (element))
6105 return false;
6109 /* Handle integer constants being loaded into the upper part of the VSX
6110 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6111 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6112 else if (CONST_INT_P (op))
6114 if (!SCALAR_INT_MODE_P (mode))
6115 return false;
6117 value = INTVAL (op);
6118 if (!IN_RANGE (value, -128, 127))
6119 return false;
6121 if (!IN_RANGE (value, -1, 0))
6123 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6124 return false;
6126 if (EASY_VECTOR_15 (value))
6127 return false;
6131 else
6132 return false;
6134 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6135 sign extend. Special case 0/-1 to allow getting any VSX register instead
6136 of an Altivec register. */
6137 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6138 && EASY_VECTOR_15 (value))
6139 return false;
6141 /* Return # of instructions and the constant byte for XXSPLTIB. */
6142 if (mode == V16QImode)
6143 *num_insns_ptr = 1;
6145 else if (IN_RANGE (value, -1, 0))
6146 *num_insns_ptr = 1;
6148 else
6149 *num_insns_ptr = 2;
6151 *constant_ptr = (int) value;
6152 return true;
6155 const char *
6156 output_vec_const_move (rtx *operands)
6158 int shift;
6159 machine_mode mode;
6160 rtx dest, vec;
6162 dest = operands[0];
6163 vec = operands[1];
6164 mode = GET_MODE (dest);
6166 if (TARGET_VSX)
6168 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6169 int xxspltib_value = 256;
6170 int num_insns = -1;
6172 if (zero_constant (vec, mode))
6174 if (TARGET_P9_VECTOR)
6175 return "xxspltib %x0,0";
6177 else if (dest_vmx_p)
6178 return "vspltisw %0,0";
6180 else
6181 return "xxlxor %x0,%x0,%x0";
6184 if (all_ones_constant (vec, mode))
6186 if (TARGET_P9_VECTOR)
6187 return "xxspltib %x0,255";
6189 else if (dest_vmx_p)
6190 return "vspltisw %0,-1";
6192 else if (TARGET_P8_VECTOR)
6193 return "xxlorc %x0,%x0,%x0";
6195 else
6196 gcc_unreachable ();
6199 if (TARGET_P9_VECTOR
6200 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6202 if (num_insns == 1)
6204 operands[2] = GEN_INT (xxspltib_value & 0xff);
6205 return "xxspltib %x0,%2";
6208 return "#";
6212 if (TARGET_ALTIVEC)
6214 rtx splat_vec;
6216 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6217 if (zero_constant (vec, mode))
6218 return "vspltisw %0,0";
6220 if (all_ones_constant (vec, mode))
6221 return "vspltisw %0,-1";
6223 /* Do we need to construct a value using VSLDOI? */
6224 shift = vspltis_shifted (vec);
6225 if (shift != 0)
6226 return "#";
6228 splat_vec = gen_easy_altivec_constant (vec);
6229 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6230 operands[1] = XEXP (splat_vec, 0);
6231 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6232 return "#";
6234 switch (GET_MODE (splat_vec))
6236 case E_V4SImode:
6237 return "vspltisw %0,%1";
6239 case E_V8HImode:
6240 return "vspltish %0,%1";
6242 case E_V16QImode:
6243 return "vspltisb %0,%1";
6245 default:
6246 gcc_unreachable ();
6250 gcc_unreachable ();
6253 /* Initialize vector TARGET to VALS. */
6255 void
6256 rs6000_expand_vector_init (rtx target, rtx vals)
6258 machine_mode mode = GET_MODE (target);
6259 machine_mode inner_mode = GET_MODE_INNER (mode);
6260 int n_elts = GET_MODE_NUNITS (mode);
6261 int n_var = 0, one_var = -1;
6262 bool all_same = true, all_const_zero = true;
6263 rtx x, mem;
6264 int i;
6266 for (i = 0; i < n_elts; ++i)
6268 x = XVECEXP (vals, 0, i);
6269 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6270 ++n_var, one_var = i;
6271 else if (x != CONST0_RTX (inner_mode))
6272 all_const_zero = false;
6274 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6275 all_same = false;
6278 if (n_var == 0)
6280 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6281 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6282 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6284 /* Zero register. */
6285 emit_move_insn (target, CONST0_RTX (mode));
6286 return;
6288 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6290 /* Splat immediate. */
6291 emit_insn (gen_rtx_SET (target, const_vec));
6292 return;
6294 else
6296 /* Load from constant pool. */
6297 emit_move_insn (target, const_vec);
6298 return;
6302 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6303 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6305 rtx op[2];
6306 size_t i;
6307 size_t num_elements = all_same ? 1 : 2;
6308 for (i = 0; i < num_elements; i++)
6310 op[i] = XVECEXP (vals, 0, i);
6311 /* Just in case there is a SUBREG with a smaller mode, do a
6312 conversion. */
6313 if (GET_MODE (op[i]) != inner_mode)
6315 rtx tmp = gen_reg_rtx (inner_mode);
6316 convert_move (tmp, op[i], 0);
6317 op[i] = tmp;
6319 /* Allow load with splat double word. */
6320 else if (MEM_P (op[i]))
6322 if (!all_same)
6323 op[i] = force_reg (inner_mode, op[i]);
6325 else if (!REG_P (op[i]))
6326 op[i] = force_reg (inner_mode, op[i]);
6329 if (all_same)
6331 if (mode == V2DFmode)
6332 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6333 else
6334 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6336 else
6338 if (mode == V2DFmode)
6339 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6340 else
6341 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6343 return;
6346 /* Special case initializing vector int if we are on 64-bit systems with
6347 direct move or we have the ISA 3.0 instructions. */
6348 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6349 && TARGET_DIRECT_MOVE_64BIT)
6351 if (all_same)
6353 rtx element0 = XVECEXP (vals, 0, 0);
6354 if (MEM_P (element0))
6355 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6356 else
6357 element0 = force_reg (SImode, element0);
6359 if (TARGET_P9_VECTOR)
6360 emit_insn (gen_vsx_splat_v4si (target, element0));
6361 else
6363 rtx tmp = gen_reg_rtx (DImode);
6364 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6365 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6367 return;
6369 else
6371 rtx elements[4];
6372 size_t i;
6374 for (i = 0; i < 4; i++)
6375 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6377 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6378 elements[2], elements[3]));
6379 return;
6383 /* With single precision floating point on VSX, know that internally single
6384 precision is actually represented as a double, and either make 2 V2DF
6385 vectors, and convert these vectors to single precision, or do one
6386 conversion, and splat the result to the other elements. */
6387 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6389 if (all_same)
6391 rtx element0 = XVECEXP (vals, 0, 0);
6393 if (TARGET_P9_VECTOR)
6395 if (MEM_P (element0))
6396 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6398 emit_insn (gen_vsx_splat_v4sf (target, element0));
6401 else
6403 rtx freg = gen_reg_rtx (V4SFmode);
6404 rtx sreg = force_reg (SFmode, element0);
6405 rtx cvt = (TARGET_XSCVDPSPN
6406 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6407 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6409 emit_insn (cvt);
6410 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6411 const0_rtx));
6414 else
6416 rtx dbl_even = gen_reg_rtx (V2DFmode);
6417 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6418 rtx flt_even = gen_reg_rtx (V4SFmode);
6419 rtx flt_odd = gen_reg_rtx (V4SFmode);
6420 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6421 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6422 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6423 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6425 /* Use VMRGEW if we can instead of doing a permute. */
6426 if (TARGET_P8_VECTOR)
6428 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6429 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6430 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6431 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6432 if (BYTES_BIG_ENDIAN)
6433 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6434 else
6435 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6437 else
6439 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6440 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6441 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6442 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6443 rs6000_expand_extract_even (target, flt_even, flt_odd);
6446 return;
6449 /* Special case initializing vector short/char that are splats if we are on
6450 64-bit systems with direct move. */
6451 if (all_same && TARGET_DIRECT_MOVE_64BIT
6452 && (mode == V16QImode || mode == V8HImode))
6454 rtx op0 = XVECEXP (vals, 0, 0);
6455 rtx di_tmp = gen_reg_rtx (DImode);
6457 if (!REG_P (op0))
6458 op0 = force_reg (GET_MODE_INNER (mode), op0);
6460 if (mode == V16QImode)
6462 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6463 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6464 return;
6467 if (mode == V8HImode)
6469 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6470 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6471 return;
6475 /* Store value to stack temp. Load vector element. Splat. However, splat
6476 of 64-bit items is not supported on Altivec. */
6477 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6479 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6480 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6481 XVECEXP (vals, 0, 0));
6482 x = gen_rtx_UNSPEC (VOIDmode,
6483 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6484 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6485 gen_rtvec (2,
6486 gen_rtx_SET (target, mem),
6487 x)));
6488 x = gen_rtx_VEC_SELECT (inner_mode, target,
6489 gen_rtx_PARALLEL (VOIDmode,
6490 gen_rtvec (1, const0_rtx)));
6491 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6492 return;
6495 /* One field is non-constant. Load constant then overwrite
6496 varying field. */
6497 if (n_var == 1)
6499 rtx copy = copy_rtx (vals);
6501 /* Load constant part of vector, substitute neighboring value for
6502 varying element. */
6503 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6504 rs6000_expand_vector_init (target, copy);
6506 /* Insert variable. */
6507 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6508 return;
6511 /* Construct the vector in memory one field at a time
6512 and load the whole vector. */
6513 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6514 for (i = 0; i < n_elts; i++)
6515 emit_move_insn (adjust_address_nv (mem, inner_mode,
6516 i * GET_MODE_SIZE (inner_mode)),
6517 XVECEXP (vals, 0, i));
6518 emit_move_insn (target, mem);
6521 /* Set field ELT of TARGET to VAL. */
6523 void
6524 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6526 machine_mode mode = GET_MODE (target);
6527 machine_mode inner_mode = GET_MODE_INNER (mode);
6528 rtx reg = gen_reg_rtx (mode);
6529 rtx mask, mem, x;
6530 int width = GET_MODE_SIZE (inner_mode);
6531 int i;
6533 val = force_reg (GET_MODE (val), val);
6535 if (VECTOR_MEM_VSX_P (mode))
6537 rtx insn = NULL_RTX;
6538 rtx elt_rtx = GEN_INT (elt);
6540 if (mode == V2DFmode)
6541 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6543 else if (mode == V2DImode)
6544 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6546 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6548 if (mode == V4SImode)
6549 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6550 else if (mode == V8HImode)
6551 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6552 else if (mode == V16QImode)
6553 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6554 else if (mode == V4SFmode)
6555 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6558 if (insn)
6560 emit_insn (insn);
6561 return;
6565 /* Simplify setting single element vectors like V1TImode. */
6566 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6568 emit_move_insn (target, gen_lowpart (mode, val));
6569 return;
6572 /* Load single variable value. */
6573 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6574 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6575 x = gen_rtx_UNSPEC (VOIDmode,
6576 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6577 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6578 gen_rtvec (2,
6579 gen_rtx_SET (reg, mem),
6580 x)));
6582 /* Linear sequence. */
6583 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6584 for (i = 0; i < 16; ++i)
6585 XVECEXP (mask, 0, i) = GEN_INT (i);
6587 /* Set permute mask to insert element into target. */
6588 for (i = 0; i < width; ++i)
6589 XVECEXP (mask, 0, elt*width + i)
6590 = GEN_INT (i + 0x10);
6591 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6593 if (BYTES_BIG_ENDIAN)
6594 x = gen_rtx_UNSPEC (mode,
6595 gen_rtvec (3, target, reg,
6596 force_reg (V16QImode, x)),
6597 UNSPEC_VPERM);
6598 else
6600 if (TARGET_P9_VECTOR)
6601 x = gen_rtx_UNSPEC (mode,
6602 gen_rtvec (3, reg, target,
6603 force_reg (V16QImode, x)),
6604 UNSPEC_VPERMR);
6605 else
6607 /* Invert selector. We prefer to generate VNAND on P8 so
6608 that future fusion opportunities can kick in, but must
6609 generate VNOR elsewhere. */
6610 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6611 rtx iorx = (TARGET_P8_VECTOR
6612 ? gen_rtx_IOR (V16QImode, notx, notx)
6613 : gen_rtx_AND (V16QImode, notx, notx));
6614 rtx tmp = gen_reg_rtx (V16QImode);
6615 emit_insn (gen_rtx_SET (tmp, iorx));
6617 /* Permute with operands reversed and adjusted selector. */
6618 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6619 UNSPEC_VPERM);
6623 emit_insn (gen_rtx_SET (target, x));
6626 /* Extract field ELT from VEC into TARGET. */
6628 void
6629 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6631 machine_mode mode = GET_MODE (vec);
6632 machine_mode inner_mode = GET_MODE_INNER (mode);
6633 rtx mem;
6635 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6637 switch (mode)
6639 default:
6640 break;
6641 case E_V1TImode:
6642 emit_move_insn (target, gen_lowpart (TImode, vec));
6643 break;
6644 case E_V2DFmode:
6645 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6646 return;
6647 case E_V2DImode:
6648 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6649 return;
6650 case E_V4SFmode:
6651 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6652 return;
6653 case E_V16QImode:
6654 if (TARGET_DIRECT_MOVE_64BIT)
6656 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6657 return;
6659 else
6660 break;
6661 case E_V8HImode:
6662 if (TARGET_DIRECT_MOVE_64BIT)
6664 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6665 return;
6667 else
6668 break;
6669 case E_V4SImode:
6670 if (TARGET_DIRECT_MOVE_64BIT)
6672 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6673 return;
6675 break;
6678 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6679 && TARGET_DIRECT_MOVE_64BIT)
6681 if (GET_MODE (elt) != DImode)
6683 rtx tmp = gen_reg_rtx (DImode);
6684 convert_move (tmp, elt, 0);
6685 elt = tmp;
6687 else if (!REG_P (elt))
6688 elt = force_reg (DImode, elt);
6690 switch (mode)
6692 case E_V1TImode:
6693 emit_move_insn (target, gen_lowpart (TImode, vec));
6694 return;
6696 case E_V2DFmode:
6697 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6698 return;
6700 case E_V2DImode:
6701 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6702 return;
6704 case E_V4SFmode:
6705 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6706 return;
6708 case E_V4SImode:
6709 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6710 return;
6712 case E_V8HImode:
6713 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6714 return;
6716 case E_V16QImode:
6717 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6718 return;
6720 default:
6721 gcc_unreachable ();
6725 /* Allocate mode-sized buffer. */
6726 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6728 emit_move_insn (mem, vec);
6729 if (CONST_INT_P (elt))
6731 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6733 /* Add offset to field within buffer matching vector element. */
6734 mem = adjust_address_nv (mem, inner_mode,
6735 modulo_elt * GET_MODE_SIZE (inner_mode));
6736 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6738 else
6740 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6741 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6742 rtx new_addr = gen_reg_rtx (Pmode);
6744 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6745 if (ele_size > 1)
6746 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6747 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6748 new_addr = change_address (mem, inner_mode, new_addr);
6749 emit_move_insn (target, new_addr);
6753 /* Return the offset within a memory object (MEM) of a vector type to a given
6754 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6755 the element is constant, we return a constant integer.
6757 Otherwise, we use a base register temporary to calculate the offset after
6758 masking it to fit within the bounds of the vector and scaling it. The
6759 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6760 built-in function. */
6762 static rtx
6763 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6765 if (CONST_INT_P (element))
6766 return GEN_INT (INTVAL (element) * scalar_size);
6768 /* All insns should use the 'Q' constraint (address is a single register) if
6769 the element number is not a constant. */
6770 gcc_assert (satisfies_constraint_Q (mem));
6772 /* Mask the element to make sure the element number is between 0 and the
6773 maximum number of elements - 1 so that we don't generate an address
6774 outside the vector. */
6775 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6776 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6777 emit_insn (gen_rtx_SET (base_tmp, and_op));
6779 /* Shift the element to get the byte offset from the element number. */
6780 int shift = exact_log2 (scalar_size);
6781 gcc_assert (shift >= 0);
6783 if (shift > 0)
6785 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6786 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6789 return base_tmp;
6792 /* Helper function update PC-relative addresses when we are adjusting a memory
6793 address (ADDR) to a vector to point to a scalar field within the vector with
6794 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
6795 use the base register temporary (BASE_TMP) to form the address. */
6797 static rtx
6798 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
6800 rtx new_addr = NULL;
6802 gcc_assert (CONST_INT_P (element_offset));
6804 if (GET_CODE (addr) == CONST)
6805 addr = XEXP (addr, 0);
6807 if (GET_CODE (addr) == PLUS)
6809 rtx op0 = XEXP (addr, 0);
6810 rtx op1 = XEXP (addr, 1);
6812 if (CONST_INT_P (op1))
6814 HOST_WIDE_INT offset
6815 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
6817 if (offset == 0)
6818 new_addr = op0;
6820 else
6822 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
6823 new_addr = gen_rtx_CONST (Pmode, plus);
6827 else
6829 emit_move_insn (base_tmp, addr);
6830 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6834 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
6836 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
6837 new_addr = gen_rtx_CONST (Pmode, plus);
6840 else
6841 gcc_unreachable ();
6843 return new_addr;
6846 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6847 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6848 temporary (BASE_TMP) to fixup the address. Return the new memory address
6849 that is valid for reads or writes to a given register (SCALAR_REG).
6851 This function is expected to be called after reload is completed when we are
6852 splitting insns. The temporary BASE_TMP might be set multiple times with
6853 this code. */
6856 rs6000_adjust_vec_address (rtx scalar_reg,
6857 rtx mem,
6858 rtx element,
6859 rtx base_tmp,
6860 machine_mode scalar_mode)
6862 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6863 rtx addr = XEXP (mem, 0);
6864 rtx new_addr;
6866 gcc_assert (!reg_mentioned_p (base_tmp, addr));
6867 gcc_assert (!reg_mentioned_p (base_tmp, element));
6869 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6870 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6872 /* Calculate what we need to add to the address to get the element
6873 address. */
6874 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
6876 /* Create the new address pointing to the element within the vector. If we
6877 are adding 0, we don't have to change the address. */
6878 if (element_offset == const0_rtx)
6879 new_addr = addr;
6881 /* A simple indirect address can be converted into a reg + offset
6882 address. */
6883 else if (REG_P (addr) || SUBREG_P (addr))
6884 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6886 /* For references to local static variables, fold a constant offset into the
6887 address. */
6888 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
6889 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
6891 /* Optimize D-FORM addresses with constant offset with a constant element, to
6892 include the element offset in the address directly. */
6893 else if (GET_CODE (addr) == PLUS)
6895 rtx op0 = XEXP (addr, 0);
6896 rtx op1 = XEXP (addr, 1);
6898 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6899 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6901 /* op0 should never be r0, because r0+offset is not valid. But it
6902 doesn't hurt to make sure it is not r0. */
6903 gcc_assert (reg_or_subregno (op0) != 0);
6905 /* D-FORM address with constant element number. */
6906 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6907 rtx offset_rtx = GEN_INT (offset);
6908 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6910 else
6912 /* If we don't have a D-FORM address with a constant element number,
6913 add the two elements in the current address. Then add the offset.
6915 Previously, we tried to add the offset to OP1 and change the
6916 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6917 complicated because we had to verify that op1 was not GPR0 and we
6918 had a constant element offset (due to the way ADDI is defined).
6919 By doing the add of OP0 and OP1 first, and then adding in the
6920 offset, it has the benefit that if D-FORM instructions are
6921 allowed, the offset is part of the memory access to the vector
6922 element. */
6923 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
6924 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6928 else
6930 emit_move_insn (base_tmp, addr);
6931 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6934 /* If the address isn't valid, move the address into the temporary base
6935 register. Some reasons it could not be valid include:
6937 The address offset overflowed the 16 or 34 bit offset size;
6938 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6939 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6940 Only X_FORM loads can be done, and the address is D_FORM. */
6942 enum insn_form iform
6943 = address_to_insn_form (new_addr, scalar_mode,
6944 reg_to_non_prefixed (scalar_reg, scalar_mode));
6946 if (iform == INSN_FORM_BAD)
6948 emit_move_insn (base_tmp, new_addr);
6949 new_addr = base_tmp;
6952 return change_address (mem, scalar_mode, new_addr);
6955 /* Split a variable vec_extract operation into the component instructions. */
6957 void
6958 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6959 rtx tmp_altivec)
6961 machine_mode mode = GET_MODE (src);
6962 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6963 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6964 int byte_shift = exact_log2 (scalar_size);
6966 gcc_assert (byte_shift >= 0);
6968 /* If we are given a memory address, optimize to load just the element. We
6969 don't have to adjust the vector element number on little endian
6970 systems. */
6971 if (MEM_P (src))
6973 emit_move_insn (dest,
6974 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
6975 scalar_mode));
6976 return;
6979 else if (REG_P (src) || SUBREG_P (src))
6981 int num_elements = GET_MODE_NUNITS (mode);
6982 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6983 int bit_shift = 7 - exact_log2 (num_elements);
6984 rtx element2;
6985 unsigned int dest_regno = reg_or_subregno (dest);
6986 unsigned int src_regno = reg_or_subregno (src);
6987 unsigned int element_regno = reg_or_subregno (element);
6989 gcc_assert (REG_P (tmp_gpr));
6991 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6992 a general purpose register. */
6993 if (TARGET_P9_VECTOR
6994 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6995 && INT_REGNO_P (dest_regno)
6996 && ALTIVEC_REGNO_P (src_regno)
6997 && INT_REGNO_P (element_regno))
6999 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7000 rtx element_si = gen_rtx_REG (SImode, element_regno);
7002 if (mode == V16QImode)
7003 emit_insn (BYTES_BIG_ENDIAN
7004 ? gen_vextublx (dest_si, element_si, src)
7005 : gen_vextubrx (dest_si, element_si, src));
7007 else if (mode == V8HImode)
7009 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7010 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7011 emit_insn (BYTES_BIG_ENDIAN
7012 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7013 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7017 else
7019 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7020 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7021 emit_insn (BYTES_BIG_ENDIAN
7022 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7023 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7026 return;
7030 gcc_assert (REG_P (tmp_altivec));
7032 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7033 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7034 will shift the element into the upper position (adding 3 to convert a
7035 byte shift into a bit shift). */
7036 if (scalar_size == 8)
7038 if (!BYTES_BIG_ENDIAN)
7040 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7041 element2 = tmp_gpr;
7043 else
7044 element2 = element;
7046 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7047 bit. */
7048 emit_insn (gen_rtx_SET (tmp_gpr,
7049 gen_rtx_AND (DImode,
7050 gen_rtx_ASHIFT (DImode,
7051 element2,
7052 GEN_INT (6)),
7053 GEN_INT (64))));
7055 else
7057 if (!BYTES_BIG_ENDIAN)
7059 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7061 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7062 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7063 element2 = tmp_gpr;
7065 else
7066 element2 = element;
7068 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7071 /* Get the value into the lower byte of the Altivec register where VSLO
7072 expects it. */
7073 if (TARGET_P9_VECTOR)
7074 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7075 else if (can_create_pseudo_p ())
7076 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7077 else
7079 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7080 emit_move_insn (tmp_di, tmp_gpr);
7081 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7084 /* Do the VSLO to get the value into the final location. */
7085 switch (mode)
7087 case E_V2DFmode:
7088 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7089 return;
7091 case E_V2DImode:
7092 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7093 return;
7095 case E_V4SFmode:
7097 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7098 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7099 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7100 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7101 tmp_altivec));
7103 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7104 return;
7107 case E_V4SImode:
7108 case E_V8HImode:
7109 case E_V16QImode:
7111 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7112 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7113 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7114 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7115 tmp_altivec));
7116 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7117 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7118 GEN_INT (64 - bits_in_element)));
7119 return;
7122 default:
7123 gcc_unreachable ();
7126 return;
7128 else
7129 gcc_unreachable ();
7132 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7133 selects whether the alignment is abi mandated, optional, or
7134 both abi and optional alignment. */
7136 unsigned int
7137 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7139 if (how != align_opt)
7141 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7142 align = 128;
7145 if (how != align_abi)
7147 if (TREE_CODE (type) == ARRAY_TYPE
7148 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7150 if (align < BITS_PER_WORD)
7151 align = BITS_PER_WORD;
7155 return align;
7158 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7159 instructions simply ignore the low bits; VSX memory instructions
7160 are aligned to 4 or 8 bytes. */
7162 static bool
7163 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7165 return (STRICT_ALIGNMENT
7166 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7167 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7168 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7169 && (int) align < VECTOR_ALIGN (mode)))));
7172 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7174 bool
7175 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7177 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7179 if (computed != 128)
7181 static bool warned;
7182 if (!warned && warn_psabi)
7184 warned = true;
7185 inform (input_location,
7186 "the layout of aggregates containing vectors with"
7187 " %d-byte alignment has changed in GCC 5",
7188 computed / BITS_PER_UNIT);
7191 /* In current GCC there is no special case. */
7192 return false;
7195 return false;
7198 /* AIX increases natural record alignment to doubleword if the first
7199 field is an FP double while the FP fields remain word aligned. */
7201 unsigned int
7202 rs6000_special_round_type_align (tree type, unsigned int computed,
7203 unsigned int specified)
7205 unsigned int align = MAX (computed, specified);
7206 tree field = TYPE_FIELDS (type);
7208 /* Skip all non field decls */
7209 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7210 field = DECL_CHAIN (field);
7212 if (field != NULL && field != type)
7214 type = TREE_TYPE (field);
7215 while (TREE_CODE (type) == ARRAY_TYPE)
7216 type = TREE_TYPE (type);
7218 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7219 align = MAX (align, 64);
7222 return align;
7225 /* Darwin increases record alignment to the natural alignment of
7226 the first field. */
7228 unsigned int
7229 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7230 unsigned int specified)
7232 unsigned int align = MAX (computed, specified);
7234 if (TYPE_PACKED (type))
7235 return align;
7237 /* Find the first field, looking down into aggregates. */
7238 do {
7239 tree field = TYPE_FIELDS (type);
7240 /* Skip all non field decls */
7241 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7242 field = DECL_CHAIN (field);
7243 if (! field)
7244 break;
7245 /* A packed field does not contribute any extra alignment. */
7246 if (DECL_PACKED (field))
7247 return align;
7248 type = TREE_TYPE (field);
7249 while (TREE_CODE (type) == ARRAY_TYPE)
7250 type = TREE_TYPE (type);
7251 } while (AGGREGATE_TYPE_P (type));
7253 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7254 align = MAX (align, TYPE_ALIGN (type));
7256 return align;
7259 /* Return 1 for an operand in small memory on V.4/eabi. */
7262 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7263 machine_mode mode ATTRIBUTE_UNUSED)
7265 #if TARGET_ELF
7266 rtx sym_ref;
7268 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7269 return 0;
7271 if (DEFAULT_ABI != ABI_V4)
7272 return 0;
7274 if (SYMBOL_REF_P (op))
7275 sym_ref = op;
7277 else if (GET_CODE (op) != CONST
7278 || GET_CODE (XEXP (op, 0)) != PLUS
7279 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7280 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7281 return 0;
7283 else
7285 rtx sum = XEXP (op, 0);
7286 HOST_WIDE_INT summand;
7288 /* We have to be careful here, because it is the referenced address
7289 that must be 32k from _SDA_BASE_, not just the symbol. */
7290 summand = INTVAL (XEXP (sum, 1));
7291 if (summand < 0 || summand > g_switch_value)
7292 return 0;
7294 sym_ref = XEXP (sum, 0);
7297 return SYMBOL_REF_SMALL_P (sym_ref);
7298 #else
7299 return 0;
7300 #endif
7303 /* Return true if either operand is a general purpose register. */
7305 bool
7306 gpr_or_gpr_p (rtx op0, rtx op1)
7308 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7309 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7312 /* Return true if this is a move direct operation between GPR registers and
7313 floating point/VSX registers. */
7315 bool
7316 direct_move_p (rtx op0, rtx op1)
7318 if (!REG_P (op0) || !REG_P (op1))
7319 return false;
7321 if (!TARGET_DIRECT_MOVE)
7322 return false;
7324 int regno0 = REGNO (op0);
7325 int regno1 = REGNO (op1);
7326 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7327 return false;
7329 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7330 return true;
7332 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7333 return true;
7335 return false;
7338 /* Return true if the ADDR is an acceptable address for a quad memory
7339 operation of mode MODE (either LQ/STQ for general purpose registers, or
7340 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7341 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7342 3.0 LXV/STXV instruction. */
7344 bool
7345 quad_address_p (rtx addr, machine_mode mode, bool strict)
7347 rtx op0, op1;
7349 if (GET_MODE_SIZE (mode) != 16)
7350 return false;
7352 if (legitimate_indirect_address_p (addr, strict))
7353 return true;
7355 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7356 return false;
7358 /* Is this a valid prefixed address? If the bottom four bits of the offset
7359 are non-zero, we could use a prefixed instruction (which does not have the
7360 DQ-form constraint that the traditional instruction had) instead of
7361 forcing the unaligned offset to a GPR. */
7362 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7363 return true;
7365 if (GET_CODE (addr) != PLUS)
7366 return false;
7368 op0 = XEXP (addr, 0);
7369 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7370 return false;
7372 op1 = XEXP (addr, 1);
7373 if (!CONST_INT_P (op1))
7374 return false;
7376 return quad_address_offset_p (INTVAL (op1));
7379 /* Return true if this is a load or store quad operation. This function does
7380 not handle the atomic quad memory instructions. */
7382 bool
7383 quad_load_store_p (rtx op0, rtx op1)
7385 bool ret;
7387 if (!TARGET_QUAD_MEMORY)
7388 ret = false;
7390 else if (REG_P (op0) && MEM_P (op1))
7391 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7392 && quad_memory_operand (op1, GET_MODE (op1))
7393 && !reg_overlap_mentioned_p (op0, op1));
7395 else if (MEM_P (op0) && REG_P (op1))
7396 ret = (quad_memory_operand (op0, GET_MODE (op0))
7397 && quad_int_reg_operand (op1, GET_MODE (op1)));
7399 else
7400 ret = false;
7402 if (TARGET_DEBUG_ADDR)
7404 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7405 ret ? "true" : "false");
7406 debug_rtx (gen_rtx_SET (op0, op1));
7409 return ret;
7412 /* Given an address, return a constant offset term if one exists. */
7414 static rtx
7415 address_offset (rtx op)
7417 if (GET_CODE (op) == PRE_INC
7418 || GET_CODE (op) == PRE_DEC)
7419 op = XEXP (op, 0);
7420 else if (GET_CODE (op) == PRE_MODIFY
7421 || GET_CODE (op) == LO_SUM)
7422 op = XEXP (op, 1);
7424 if (GET_CODE (op) == CONST)
7425 op = XEXP (op, 0);
7427 if (GET_CODE (op) == PLUS)
7428 op = XEXP (op, 1);
7430 if (CONST_INT_P (op))
7431 return op;
7433 return NULL_RTX;
7436 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7437 the mode. If we can't find (or don't know) the alignment of the symbol
7438 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7439 should be pessimistic]. Offsets are validated in the same way as for
7440 reg + offset. */
7441 static bool
7442 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7444 /* We should not get here with this. */
7445 gcc_checking_assert (! mode_supports_dq_form (mode));
7447 if (GET_CODE (x) == CONST)
7448 x = XEXP (x, 0);
7450 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7451 x = XVECEXP (x, 0, 0);
7453 rtx sym = NULL_RTX;
7454 unsigned HOST_WIDE_INT offset = 0;
7456 if (GET_CODE (x) == PLUS)
7458 sym = XEXP (x, 0);
7459 if (! SYMBOL_REF_P (sym))
7460 return false;
7461 if (!CONST_INT_P (XEXP (x, 1)))
7462 return false;
7463 offset = INTVAL (XEXP (x, 1));
7465 else if (SYMBOL_REF_P (x))
7466 sym = x;
7467 else if (CONST_INT_P (x))
7468 offset = INTVAL (x);
7469 else if (GET_CODE (x) == LABEL_REF)
7470 offset = 0; // We assume code labels are Pmode aligned
7471 else
7472 return false; // not sure what we have here.
7474 /* If we don't know the alignment of the thing to which the symbol refers,
7475 we assume optimistically it is "enough".
7476 ??? maybe we should be pessimistic instead. */
7477 unsigned align = 0;
7479 if (sym)
7481 tree decl = SYMBOL_REF_DECL (sym);
7482 #if TARGET_MACHO
7483 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7484 /* The decl in an indirection symbol is the original one, which might
7485 be less aligned than the indirection. Our indirections are always
7486 pointer-aligned. */
7488 else
7489 #endif
7490 if (decl && DECL_ALIGN (decl))
7491 align = DECL_ALIGN_UNIT (decl);
7494 unsigned int extra = 0;
7495 switch (mode)
7497 case E_DFmode:
7498 case E_DDmode:
7499 case E_DImode:
7500 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7501 addressing. */
7502 if (VECTOR_MEM_VSX_P (mode))
7503 return false;
7505 if (!TARGET_POWERPC64)
7506 extra = 4;
7507 else if ((offset & 3) || (align & 3))
7508 return false;
7509 break;
7511 case E_TFmode:
7512 case E_IFmode:
7513 case E_KFmode:
7514 case E_TDmode:
7515 case E_TImode:
7516 case E_PTImode:
7517 extra = 8;
7518 if (!TARGET_POWERPC64)
7519 extra = 12;
7520 else if ((offset & 3) || (align & 3))
7521 return false;
7522 break;
7524 default:
7525 break;
7528 /* We only care if the access(es) would cause a change to the high part. */
7529 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7530 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7533 /* Return true if the MEM operand is a memory operand suitable for use
7534 with a (full width, possibly multiple) gpr load/store. On
7535 powerpc64 this means the offset must be divisible by 4.
7536 Implements 'Y' constraint.
7538 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7539 a constraint function we know the operand has satisfied a suitable
7540 memory predicate.
7542 Offsetting a lo_sum should not be allowed, except where we know by
7543 alignment that a 32k boundary is not crossed. Note that by
7544 "offsetting" here we mean a further offset to access parts of the
7545 MEM. It's fine to have a lo_sum where the inner address is offset
7546 from a sym, since the same sym+offset will appear in the high part
7547 of the address calculation. */
7549 bool
7550 mem_operand_gpr (rtx op, machine_mode mode)
7552 unsigned HOST_WIDE_INT offset;
7553 int extra;
7554 rtx addr = XEXP (op, 0);
7556 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7557 if (TARGET_UPDATE
7558 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7559 && mode_supports_pre_incdec_p (mode)
7560 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7561 return true;
7563 /* Allow prefixed instructions if supported. If the bottom two bits of the
7564 offset are non-zero, we could use a prefixed instruction (which does not
7565 have the DS-form constraint that the traditional instruction had) instead
7566 of forcing the unaligned offset to a GPR. */
7567 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7568 return true;
7570 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7571 really OK. Doing this early avoids teaching all the other machinery
7572 about them. */
7573 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7574 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7576 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7577 if (!rs6000_offsettable_memref_p (op, mode, false))
7578 return false;
7580 op = address_offset (addr);
7581 if (op == NULL_RTX)
7582 return true;
7584 offset = INTVAL (op);
7585 if (TARGET_POWERPC64 && (offset & 3) != 0)
7586 return false;
7588 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7589 if (extra < 0)
7590 extra = 0;
7592 if (GET_CODE (addr) == LO_SUM)
7593 /* For lo_sum addresses, we must allow any offset except one that
7594 causes a wrap, so test only the low 16 bits. */
7595 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7597 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7600 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7601 enforce an offset divisible by 4 even for 32-bit. */
7603 bool
7604 mem_operand_ds_form (rtx op, machine_mode mode)
7606 unsigned HOST_WIDE_INT offset;
7607 int extra;
7608 rtx addr = XEXP (op, 0);
7610 /* Allow prefixed instructions if supported. If the bottom two bits of the
7611 offset are non-zero, we could use a prefixed instruction (which does not
7612 have the DS-form constraint that the traditional instruction had) instead
7613 of forcing the unaligned offset to a GPR. */
7614 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7615 return true;
7617 if (!offsettable_address_p (false, mode, addr))
7618 return false;
7620 op = address_offset (addr);
7621 if (op == NULL_RTX)
7622 return true;
7624 offset = INTVAL (op);
7625 if ((offset & 3) != 0)
7626 return false;
7628 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7629 if (extra < 0)
7630 extra = 0;
7632 if (GET_CODE (addr) == LO_SUM)
7633 /* For lo_sum addresses, we must allow any offset except one that
7634 causes a wrap, so test only the low 16 bits. */
7635 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7637 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7640 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7642 static bool
7643 reg_offset_addressing_ok_p (machine_mode mode)
7645 switch (mode)
7647 case E_V16QImode:
7648 case E_V8HImode:
7649 case E_V4SFmode:
7650 case E_V4SImode:
7651 case E_V2DFmode:
7652 case E_V2DImode:
7653 case E_V1TImode:
7654 case E_TImode:
7655 case E_TFmode:
7656 case E_KFmode:
7657 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7658 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7659 a vector mode, if we want to use the VSX registers to move it around,
7660 we need to restrict ourselves to reg+reg addressing. Similarly for
7661 IEEE 128-bit floating point that is passed in a single vector
7662 register. */
7663 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7664 return mode_supports_dq_form (mode);
7665 break;
7667 case E_SDmode:
7668 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7669 addressing for the LFIWZX and STFIWX instructions. */
7670 if (TARGET_NO_SDMODE_STACK)
7671 return false;
7672 break;
7674 default:
7675 break;
7678 return true;
7681 static bool
7682 virtual_stack_registers_memory_p (rtx op)
7684 int regnum;
7686 if (REG_P (op))
7687 regnum = REGNO (op);
7689 else if (GET_CODE (op) == PLUS
7690 && REG_P (XEXP (op, 0))
7691 && CONST_INT_P (XEXP (op, 1)))
7692 regnum = REGNO (XEXP (op, 0));
7694 else
7695 return false;
7697 return (regnum >= FIRST_VIRTUAL_REGISTER
7698 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7701 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7702 is known to not straddle a 32k boundary. This function is used
7703 to determine whether -mcmodel=medium code can use TOC pointer
7704 relative addressing for OP. This means the alignment of the TOC
7705 pointer must also be taken into account, and unfortunately that is
7706 only 8 bytes. */
7708 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7709 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7710 #endif
7712 static bool
7713 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7714 machine_mode mode)
7716 tree decl;
7717 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7719 if (!SYMBOL_REF_P (op))
7720 return false;
7722 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7723 SYMBOL_REF. */
7724 if (mode_supports_dq_form (mode))
7725 return false;
7727 dsize = GET_MODE_SIZE (mode);
7728 decl = SYMBOL_REF_DECL (op);
7729 if (!decl)
7731 if (dsize == 0)
7732 return false;
7734 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7735 replacing memory addresses with an anchor plus offset. We
7736 could find the decl by rummaging around in the block->objects
7737 VEC for the given offset but that seems like too much work. */
7738 dalign = BITS_PER_UNIT;
7739 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7740 && SYMBOL_REF_ANCHOR_P (op)
7741 && SYMBOL_REF_BLOCK (op) != NULL)
7743 struct object_block *block = SYMBOL_REF_BLOCK (op);
7745 dalign = block->alignment;
7746 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7748 else if (CONSTANT_POOL_ADDRESS_P (op))
7750 /* It would be nice to have get_pool_align().. */
7751 machine_mode cmode = get_pool_mode (op);
7753 dalign = GET_MODE_ALIGNMENT (cmode);
7756 else if (DECL_P (decl))
7758 dalign = DECL_ALIGN (decl);
7760 if (dsize == 0)
7762 /* Allow BLKmode when the entire object is known to not
7763 cross a 32k boundary. */
7764 if (!DECL_SIZE_UNIT (decl))
7765 return false;
7767 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7768 return false;
7770 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7771 if (dsize > 32768)
7772 return false;
7774 dalign /= BITS_PER_UNIT;
7775 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7776 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7777 return dalign >= dsize;
7780 else
7781 gcc_unreachable ();
7783 /* Find how many bits of the alignment we know for this access. */
7784 dalign /= BITS_PER_UNIT;
7785 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7786 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7787 mask = dalign - 1;
7788 lsb = offset & -offset;
7789 mask &= lsb - 1;
7790 dalign = mask + 1;
7792 return dalign >= dsize;
7795 static bool
7796 constant_pool_expr_p (rtx op)
7798 rtx base, offset;
7800 split_const (op, &base, &offset);
7801 return (SYMBOL_REF_P (base)
7802 && CONSTANT_POOL_ADDRESS_P (base)
7803 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7806 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7807 use that as the register to put the HIGH value into if register allocation
7808 is already done. */
7811 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7813 rtx tocrel, tocreg, hi;
7815 gcc_assert (TARGET_TOC);
7817 if (TARGET_DEBUG_ADDR)
7819 if (SYMBOL_REF_P (symbol))
7820 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7821 XSTR (symbol, 0));
7822 else
7824 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7825 GET_RTX_NAME (GET_CODE (symbol)));
7826 debug_rtx (symbol);
7830 if (!can_create_pseudo_p ())
7831 df_set_regs_ever_live (TOC_REGISTER, true);
7833 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7834 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7835 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7836 return tocrel;
7838 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7839 if (largetoc_reg != NULL)
7841 emit_move_insn (largetoc_reg, hi);
7842 hi = largetoc_reg;
7844 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7847 /* These are only used to pass through from print_operand/print_operand_address
7848 to rs6000_output_addr_const_extra over the intervening function
7849 output_addr_const which is not target code. */
7850 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7852 /* Return true if OP is a toc pointer relative address (the output
7853 of create_TOC_reference). If STRICT, do not match non-split
7854 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7855 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7856 TOCREL_OFFSET_RET respectively. */
7858 bool
7859 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7860 const_rtx *tocrel_offset_ret)
7862 if (!TARGET_TOC)
7863 return false;
7865 if (TARGET_CMODEL != CMODEL_SMALL)
7867 /* When strict ensure we have everything tidy. */
7868 if (strict
7869 && !(GET_CODE (op) == LO_SUM
7870 && REG_P (XEXP (op, 0))
7871 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7872 return false;
7874 /* When not strict, allow non-split TOC addresses and also allow
7875 (lo_sum (high ..)) TOC addresses created during reload. */
7876 if (GET_CODE (op) == LO_SUM)
7877 op = XEXP (op, 1);
7880 const_rtx tocrel_base = op;
7881 const_rtx tocrel_offset = const0_rtx;
7883 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7885 tocrel_base = XEXP (op, 0);
7886 tocrel_offset = XEXP (op, 1);
7889 if (tocrel_base_ret)
7890 *tocrel_base_ret = tocrel_base;
7891 if (tocrel_offset_ret)
7892 *tocrel_offset_ret = tocrel_offset;
7894 return (GET_CODE (tocrel_base) == UNSPEC
7895 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7896 && REG_P (XVECEXP (tocrel_base, 0, 1))
7897 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7900 /* Return true if X is a constant pool address, and also for cmodel=medium
7901 if X is a toc-relative address known to be offsettable within MODE. */
7903 bool
7904 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7905 bool strict)
7907 const_rtx tocrel_base, tocrel_offset;
7908 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7909 && (TARGET_CMODEL != CMODEL_MEDIUM
7910 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7911 || mode == QImode
7912 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7913 INTVAL (tocrel_offset), mode)));
7916 static bool
7917 legitimate_small_data_p (machine_mode mode, rtx x)
7919 return (DEFAULT_ABI == ABI_V4
7920 && !flag_pic && !TARGET_TOC
7921 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7922 && small_data_operand (x, mode));
7925 bool
7926 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7927 bool strict, bool worst_case)
7929 unsigned HOST_WIDE_INT offset;
7930 unsigned int extra;
7932 if (GET_CODE (x) != PLUS)
7933 return false;
7934 if (!REG_P (XEXP (x, 0)))
7935 return false;
7936 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7937 return false;
7938 if (mode_supports_dq_form (mode))
7939 return quad_address_p (x, mode, strict);
7940 if (!reg_offset_addressing_ok_p (mode))
7941 return virtual_stack_registers_memory_p (x);
7942 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7943 return true;
7944 if (!CONST_INT_P (XEXP (x, 1)))
7945 return false;
7947 offset = INTVAL (XEXP (x, 1));
7948 extra = 0;
7949 switch (mode)
7951 case E_DFmode:
7952 case E_DDmode:
7953 case E_DImode:
7954 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7955 addressing. */
7956 if (VECTOR_MEM_VSX_P (mode))
7957 return false;
7959 if (!worst_case)
7960 break;
7961 if (!TARGET_POWERPC64)
7962 extra = 4;
7963 else if (offset & 3)
7964 return false;
7965 break;
7967 case E_TFmode:
7968 case E_IFmode:
7969 case E_KFmode:
7970 case E_TDmode:
7971 case E_TImode:
7972 case E_PTImode:
7973 extra = 8;
7974 if (!worst_case)
7975 break;
7976 if (!TARGET_POWERPC64)
7977 extra = 12;
7978 else if (offset & 3)
7979 return false;
7980 break;
7982 default:
7983 break;
7986 if (TARGET_PREFIXED)
7987 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7988 else
7989 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7992 bool
7993 legitimate_indexed_address_p (rtx x, int strict)
7995 rtx op0, op1;
7997 if (GET_CODE (x) != PLUS)
7998 return false;
8000 op0 = XEXP (x, 0);
8001 op1 = XEXP (x, 1);
8003 return (REG_P (op0) && REG_P (op1)
8004 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8005 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8006 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8007 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8010 bool
8011 avoiding_indexed_address_p (machine_mode mode)
8013 /* Avoid indexed addressing for modes that have non-indexed
8014 load/store instruction forms. */
8015 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8018 bool
8019 legitimate_indirect_address_p (rtx x, int strict)
8021 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8024 bool
8025 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8027 if (!TARGET_MACHO || !flag_pic
8028 || mode != SImode || !MEM_P (x))
8029 return false;
8030 x = XEXP (x, 0);
8032 if (GET_CODE (x) != LO_SUM)
8033 return false;
8034 if (!REG_P (XEXP (x, 0)))
8035 return false;
8036 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8037 return false;
8038 x = XEXP (x, 1);
8040 return CONSTANT_P (x);
8043 static bool
8044 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8046 if (GET_CODE (x) != LO_SUM)
8047 return false;
8048 if (!REG_P (XEXP (x, 0)))
8049 return false;
8050 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8051 return false;
8052 /* quad word addresses are restricted, and we can't use LO_SUM. */
8053 if (mode_supports_dq_form (mode))
8054 return false;
8055 x = XEXP (x, 1);
8057 if (TARGET_ELF || TARGET_MACHO)
8059 bool large_toc_ok;
8061 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8062 return false;
8063 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8064 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8065 recognizes some LO_SUM addresses as valid although this
8066 function says opposite. In most cases, LRA through different
8067 transformations can generate correct code for address reloads.
8068 It cannot manage only some LO_SUM cases. So we need to add
8069 code here saying that some addresses are still valid. */
8070 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8071 && small_toc_ref (x, VOIDmode));
8072 if (TARGET_TOC && ! large_toc_ok)
8073 return false;
8074 if (GET_MODE_NUNITS (mode) != 1)
8075 return false;
8076 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8077 && !(/* ??? Assume floating point reg based on mode? */
8078 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8079 return false;
8081 return CONSTANT_P (x) || large_toc_ok;
8084 return false;
8088 /* Try machine-dependent ways of modifying an illegitimate address
8089 to be legitimate. If we find one, return the new, valid address.
8090 This is used from only one place: `memory_address' in explow.c.
8092 OLDX is the address as it was before break_out_memory_refs was
8093 called. In some cases it is useful to look at this to decide what
8094 needs to be done.
8096 It is always safe for this function to do nothing. It exists to
8097 recognize opportunities to optimize the output.
8099 On RS/6000, first check for the sum of a register with a constant
8100 integer that is out of range. If so, generate code to add the
8101 constant with the low-order 16 bits masked to the register and force
8102 this result into another register (this can be done with `cau').
8103 Then generate an address of REG+(CONST&0xffff), allowing for the
8104 possibility of bit 16 being a one.
8106 Then check for the sum of a register and something not constant, try to
8107 load the other things into a register and return the sum. */
8109 static rtx
8110 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8111 machine_mode mode)
8113 unsigned int extra;
8115 if (!reg_offset_addressing_ok_p (mode)
8116 || mode_supports_dq_form (mode))
8118 if (virtual_stack_registers_memory_p (x))
8119 return x;
8121 /* In theory we should not be seeing addresses of the form reg+0,
8122 but just in case it is generated, optimize it away. */
8123 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8124 return force_reg (Pmode, XEXP (x, 0));
8126 /* For TImode with load/store quad, restrict addresses to just a single
8127 pointer, so it works with both GPRs and VSX registers. */
8128 /* Make sure both operands are registers. */
8129 else if (GET_CODE (x) == PLUS
8130 && (mode != TImode || !TARGET_VSX))
8131 return gen_rtx_PLUS (Pmode,
8132 force_reg (Pmode, XEXP (x, 0)),
8133 force_reg (Pmode, XEXP (x, 1)));
8134 else
8135 return force_reg (Pmode, x);
8137 if (SYMBOL_REF_P (x))
8139 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8140 if (model != 0)
8141 return rs6000_legitimize_tls_address (x, model);
8144 extra = 0;
8145 switch (mode)
8147 case E_TFmode:
8148 case E_TDmode:
8149 case E_TImode:
8150 case E_PTImode:
8151 case E_IFmode:
8152 case E_KFmode:
8153 /* As in legitimate_offset_address_p we do not assume
8154 worst-case. The mode here is just a hint as to the registers
8155 used. A TImode is usually in gprs, but may actually be in
8156 fprs. Leave worst-case scenario for reload to handle via
8157 insn constraints. PTImode is only GPRs. */
8158 extra = 8;
8159 break;
8160 default:
8161 break;
8164 if (GET_CODE (x) == PLUS
8165 && REG_P (XEXP (x, 0))
8166 && CONST_INT_P (XEXP (x, 1))
8167 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8168 >= 0x10000 - extra))
8170 HOST_WIDE_INT high_int, low_int;
8171 rtx sum;
8172 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8173 if (low_int >= 0x8000 - extra)
8174 low_int = 0;
8175 high_int = INTVAL (XEXP (x, 1)) - low_int;
8176 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8177 GEN_INT (high_int)), 0);
8178 return plus_constant (Pmode, sum, low_int);
8180 else if (GET_CODE (x) == PLUS
8181 && REG_P (XEXP (x, 0))
8182 && !CONST_INT_P (XEXP (x, 1))
8183 && GET_MODE_NUNITS (mode) == 1
8184 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8185 || (/* ??? Assume floating point reg based on mode? */
8186 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8187 && !avoiding_indexed_address_p (mode))
8189 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8190 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8192 else if ((TARGET_ELF
8193 #if TARGET_MACHO
8194 || !MACHO_DYNAMIC_NO_PIC_P
8195 #endif
8197 && TARGET_32BIT
8198 && TARGET_NO_TOC_OR_PCREL
8199 && !flag_pic
8200 && !CONST_INT_P (x)
8201 && !CONST_WIDE_INT_P (x)
8202 && !CONST_DOUBLE_P (x)
8203 && CONSTANT_P (x)
8204 && GET_MODE_NUNITS (mode) == 1
8205 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8206 || (/* ??? Assume floating point reg based on mode? */
8207 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8209 rtx reg = gen_reg_rtx (Pmode);
8210 if (TARGET_ELF)
8211 emit_insn (gen_elf_high (reg, x));
8212 else
8213 emit_insn (gen_macho_high (Pmode, reg, x));
8214 return gen_rtx_LO_SUM (Pmode, reg, x);
8216 else if (TARGET_TOC
8217 && SYMBOL_REF_P (x)
8218 && constant_pool_expr_p (x)
8219 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8220 return create_TOC_reference (x, NULL_RTX);
8221 else
8222 return x;
8225 /* Debug version of rs6000_legitimize_address. */
8226 static rtx
8227 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8229 rtx ret;
8230 rtx_insn *insns;
8232 start_sequence ();
8233 ret = rs6000_legitimize_address (x, oldx, mode);
8234 insns = get_insns ();
8235 end_sequence ();
8237 if (ret != x)
8239 fprintf (stderr,
8240 "\nrs6000_legitimize_address: mode %s, old code %s, "
8241 "new code %s, modified\n",
8242 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8243 GET_RTX_NAME (GET_CODE (ret)));
8245 fprintf (stderr, "Original address:\n");
8246 debug_rtx (x);
8248 fprintf (stderr, "oldx:\n");
8249 debug_rtx (oldx);
8251 fprintf (stderr, "New address:\n");
8252 debug_rtx (ret);
8254 if (insns)
8256 fprintf (stderr, "Insns added:\n");
8257 debug_rtx_list (insns, 20);
8260 else
8262 fprintf (stderr,
8263 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8264 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8266 debug_rtx (x);
8269 if (insns)
8270 emit_insn (insns);
8272 return ret;
8275 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8276 We need to emit DTP-relative relocations. */
8278 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8279 static void
8280 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8282 switch (size)
8284 case 4:
8285 fputs ("\t.long\t", file);
8286 break;
8287 case 8:
8288 fputs (DOUBLE_INT_ASM_OP, file);
8289 break;
8290 default:
8291 gcc_unreachable ();
8293 output_addr_const (file, x);
8294 if (TARGET_ELF)
8295 fputs ("@dtprel+0x8000", file);
8296 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8298 switch (SYMBOL_REF_TLS_MODEL (x))
8300 case 0:
8301 break;
8302 case TLS_MODEL_LOCAL_EXEC:
8303 fputs ("@le", file);
8304 break;
8305 case TLS_MODEL_INITIAL_EXEC:
8306 fputs ("@ie", file);
8307 break;
8308 case TLS_MODEL_GLOBAL_DYNAMIC:
8309 case TLS_MODEL_LOCAL_DYNAMIC:
8310 fputs ("@m", file);
8311 break;
8312 default:
8313 gcc_unreachable ();
8318 /* Return true if X is a symbol that refers to real (rather than emulated)
8319 TLS. */
8321 static bool
8322 rs6000_real_tls_symbol_ref_p (rtx x)
8324 return (SYMBOL_REF_P (x)
8325 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8328 /* In the name of slightly smaller debug output, and to cater to
8329 general assembler lossage, recognize various UNSPEC sequences
8330 and turn them back into a direct symbol reference. */
8332 static rtx
8333 rs6000_delegitimize_address (rtx orig_x)
8335 rtx x, y, offset;
8337 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8338 orig_x = XVECEXP (orig_x, 0, 0);
8340 orig_x = delegitimize_mem_from_attrs (orig_x);
8342 x = orig_x;
8343 if (MEM_P (x))
8344 x = XEXP (x, 0);
8346 y = x;
8347 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8348 y = XEXP (y, 1);
8350 offset = NULL_RTX;
8351 if (GET_CODE (y) == PLUS
8352 && GET_MODE (y) == Pmode
8353 && CONST_INT_P (XEXP (y, 1)))
8355 offset = XEXP (y, 1);
8356 y = XEXP (y, 0);
8359 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8361 y = XVECEXP (y, 0, 0);
8363 #ifdef HAVE_AS_TLS
8364 /* Do not associate thread-local symbols with the original
8365 constant pool symbol. */
8366 if (TARGET_XCOFF
8367 && SYMBOL_REF_P (y)
8368 && CONSTANT_POOL_ADDRESS_P (y)
8369 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8370 return orig_x;
8371 #endif
8373 if (offset != NULL_RTX)
8374 y = gen_rtx_PLUS (Pmode, y, offset);
8375 if (!MEM_P (orig_x))
8376 return y;
8377 else
8378 return replace_equiv_address_nv (orig_x, y);
8381 if (TARGET_MACHO
8382 && GET_CODE (orig_x) == LO_SUM
8383 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8385 y = XEXP (XEXP (orig_x, 1), 0);
8386 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8387 return XVECEXP (y, 0, 0);
8390 return orig_x;
8393 /* Return true if X shouldn't be emitted into the debug info.
8394 The linker doesn't like .toc section references from
8395 .debug_* sections, so reject .toc section symbols. */
8397 static bool
8398 rs6000_const_not_ok_for_debug_p (rtx x)
8400 if (GET_CODE (x) == UNSPEC)
8401 return true;
8402 if (SYMBOL_REF_P (x)
8403 && CONSTANT_POOL_ADDRESS_P (x))
8405 rtx c = get_pool_constant (x);
8406 machine_mode cmode = get_pool_mode (x);
8407 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8408 return true;
8411 return false;
8414 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8416 static bool
8417 rs6000_legitimate_combined_insn (rtx_insn *insn)
8419 int icode = INSN_CODE (insn);
8421 /* Reject creating doloop insns. Combine should not be allowed
8422 to create these for a number of reasons:
8423 1) In a nested loop, if combine creates one of these in an
8424 outer loop and the register allocator happens to allocate ctr
8425 to the outer loop insn, then the inner loop can't use ctr.
8426 Inner loops ought to be more highly optimized.
8427 2) Combine often wants to create one of these from what was
8428 originally a three insn sequence, first combining the three
8429 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8430 allocated ctr, the splitter takes use back to the three insn
8431 sequence. It's better to stop combine at the two insn
8432 sequence.
8433 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8434 insns, the register allocator sometimes uses floating point
8435 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8436 jump insn and output reloads are not implemented for jumps,
8437 the ctrsi/ctrdi splitters need to handle all possible cases.
8438 That's a pain, and it gets to be seriously difficult when a
8439 splitter that runs after reload needs memory to transfer from
8440 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8441 for the difficult case. It's better to not create problems
8442 in the first place. */
8443 if (icode != CODE_FOR_nothing
8444 && (icode == CODE_FOR_bdz_si
8445 || icode == CODE_FOR_bdz_di
8446 || icode == CODE_FOR_bdnz_si
8447 || icode == CODE_FOR_bdnz_di
8448 || icode == CODE_FOR_bdztf_si
8449 || icode == CODE_FOR_bdztf_di
8450 || icode == CODE_FOR_bdnztf_si
8451 || icode == CODE_FOR_bdnztf_di))
8452 return false;
8454 return true;
8457 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8459 static GTY(()) rtx rs6000_tls_symbol;
8460 static rtx
8461 rs6000_tls_get_addr (void)
8463 if (!rs6000_tls_symbol)
8464 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8466 return rs6000_tls_symbol;
8469 /* Construct the SYMBOL_REF for TLS GOT references. */
8471 static GTY(()) rtx rs6000_got_symbol;
8473 rs6000_got_sym (void)
8475 if (!rs6000_got_symbol)
8477 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8478 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8479 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8482 return rs6000_got_symbol;
8485 /* AIX Thread-Local Address support. */
8487 static rtx
8488 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8490 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8491 const char *name;
8492 char *tlsname;
8494 name = XSTR (addr, 0);
8495 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8496 or the symbol will be in TLS private data section. */
8497 if (name[strlen (name) - 1] != ']'
8498 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8499 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8501 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8502 strcpy (tlsname, name);
8503 strcat (tlsname,
8504 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8505 tlsaddr = copy_rtx (addr);
8506 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8508 else
8509 tlsaddr = addr;
8511 /* Place addr into TOC constant pool. */
8512 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8514 /* Output the TOC entry and create the MEM referencing the value. */
8515 if (constant_pool_expr_p (XEXP (sym, 0))
8516 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8518 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8519 mem = gen_const_mem (Pmode, tocref);
8520 set_mem_alias_set (mem, get_TOC_alias_set ());
8522 else
8523 return sym;
8525 /* Use global-dynamic for local-dynamic. */
8526 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8527 || model == TLS_MODEL_LOCAL_DYNAMIC)
8529 /* Create new TOC reference for @m symbol. */
8530 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8531 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8532 strcpy (tlsname, "*LCM");
8533 strcat (tlsname, name + 3);
8534 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8535 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8536 tocref = create_TOC_reference (modaddr, NULL_RTX);
8537 rtx modmem = gen_const_mem (Pmode, tocref);
8538 set_mem_alias_set (modmem, get_TOC_alias_set ());
8540 rtx modreg = gen_reg_rtx (Pmode);
8541 emit_insn (gen_rtx_SET (modreg, modmem));
8543 tmpreg = gen_reg_rtx (Pmode);
8544 emit_insn (gen_rtx_SET (tmpreg, mem));
8546 dest = gen_reg_rtx (Pmode);
8547 if (TARGET_32BIT)
8548 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8549 else
8550 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8551 return dest;
8553 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8554 else if (TARGET_32BIT)
8556 tlsreg = gen_reg_rtx (SImode);
8557 emit_insn (gen_tls_get_tpointer (tlsreg));
8559 else
8560 tlsreg = gen_rtx_REG (DImode, 13);
8562 /* Load the TOC value into temporary register. */
8563 tmpreg = gen_reg_rtx (Pmode);
8564 emit_insn (gen_rtx_SET (tmpreg, mem));
8565 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8566 gen_rtx_MINUS (Pmode, addr, tlsreg));
8568 /* Add TOC symbol value to TLS pointer. */
8569 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8571 return dest;
8574 /* Passes the tls arg value for global dynamic and local dynamic
8575 emit_library_call_value in rs6000_legitimize_tls_address to
8576 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8577 marker relocs put on __tls_get_addr calls. */
8578 static rtx global_tlsarg;
8580 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8581 this (thread-local) address. */
8583 static rtx
8584 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8586 rtx dest, insn;
8588 if (TARGET_XCOFF)
8589 return rs6000_legitimize_tls_address_aix (addr, model);
8591 dest = gen_reg_rtx (Pmode);
8592 if (model == TLS_MODEL_LOCAL_EXEC
8593 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8595 rtx tlsreg;
8597 if (TARGET_64BIT)
8599 tlsreg = gen_rtx_REG (Pmode, 13);
8600 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8602 else
8604 tlsreg = gen_rtx_REG (Pmode, 2);
8605 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8607 emit_insn (insn);
8609 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8611 rtx tlsreg, tmp;
8613 tmp = gen_reg_rtx (Pmode);
8614 if (TARGET_64BIT)
8616 tlsreg = gen_rtx_REG (Pmode, 13);
8617 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8619 else
8621 tlsreg = gen_rtx_REG (Pmode, 2);
8622 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8624 emit_insn (insn);
8625 if (TARGET_64BIT)
8626 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8627 else
8628 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8629 emit_insn (insn);
8631 else
8633 rtx got, tga, tmp1, tmp2;
8635 /* We currently use relocations like @got@tlsgd for tls, which
8636 means the linker will handle allocation of tls entries, placing
8637 them in the .got section. So use a pointer to the .got section,
8638 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8639 or to secondary GOT sections used by 32-bit -fPIC. */
8640 if (rs6000_pcrel_p (cfun))
8641 got = const0_rtx;
8642 else if (TARGET_64BIT)
8643 got = gen_rtx_REG (Pmode, 2);
8644 else
8646 if (flag_pic == 1)
8647 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8648 else
8650 rtx gsym = rs6000_got_sym ();
8651 got = gen_reg_rtx (Pmode);
8652 if (flag_pic == 0)
8653 rs6000_emit_move (got, gsym, Pmode);
8654 else
8656 rtx mem, lab;
8658 tmp1 = gen_reg_rtx (Pmode);
8659 tmp2 = gen_reg_rtx (Pmode);
8660 mem = gen_const_mem (Pmode, tmp1);
8661 lab = gen_label_rtx ();
8662 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8663 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8664 if (TARGET_LINK_STACK)
8665 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8666 emit_move_insn (tmp2, mem);
8667 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8668 set_unique_reg_note (last, REG_EQUAL, gsym);
8673 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8675 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8676 UNSPEC_TLSGD);
8677 tga = rs6000_tls_get_addr ();
8678 rtx argreg = gen_rtx_REG (Pmode, 3);
8679 emit_insn (gen_rtx_SET (argreg, arg));
8680 global_tlsarg = arg;
8681 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8682 global_tlsarg = NULL_RTX;
8684 /* Make a note so that the result of this call can be CSEd. */
8685 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8686 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8687 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8689 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8691 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8692 tga = rs6000_tls_get_addr ();
8693 tmp1 = gen_reg_rtx (Pmode);
8694 rtx argreg = gen_rtx_REG (Pmode, 3);
8695 emit_insn (gen_rtx_SET (argreg, arg));
8696 global_tlsarg = arg;
8697 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8698 global_tlsarg = NULL_RTX;
8700 /* Make a note so that the result of this call can be CSEd. */
8701 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8702 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8703 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8705 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8707 if (TARGET_64BIT)
8708 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8709 else
8710 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8712 else if (rs6000_tls_size == 32)
8714 tmp2 = gen_reg_rtx (Pmode);
8715 if (TARGET_64BIT)
8716 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8717 else
8718 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8719 emit_insn (insn);
8720 if (TARGET_64BIT)
8721 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8722 else
8723 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8725 else
8727 tmp2 = gen_reg_rtx (Pmode);
8728 if (TARGET_64BIT)
8729 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8730 else
8731 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8732 emit_insn (insn);
8733 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8735 emit_insn (insn);
8737 else
8739 /* IE, or 64-bit offset LE. */
8740 tmp2 = gen_reg_rtx (Pmode);
8741 if (TARGET_64BIT)
8742 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8743 else
8744 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8745 emit_insn (insn);
8746 if (rs6000_pcrel_p (cfun))
8748 if (TARGET_64BIT)
8749 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8750 else
8751 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8753 else if (TARGET_64BIT)
8754 insn = gen_tls_tls_64 (dest, tmp2, addr);
8755 else
8756 insn = gen_tls_tls_32 (dest, tmp2, addr);
8757 emit_insn (insn);
8761 return dest;
8764 /* Only create the global variable for the stack protect guard if we are using
8765 the global flavor of that guard. */
8766 static tree
8767 rs6000_init_stack_protect_guard (void)
8769 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8770 return default_stack_protect_guard ();
8772 return NULL_TREE;
8775 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8777 static bool
8778 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8780 if (GET_CODE (x) == HIGH
8781 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8782 return true;
8784 /* A TLS symbol in the TOC cannot contain a sum. */
8785 if (GET_CODE (x) == CONST
8786 && GET_CODE (XEXP (x, 0)) == PLUS
8787 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8788 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8789 return true;
8791 /* Do not place an ELF TLS symbol in the constant pool. */
8792 return TARGET_ELF && tls_referenced_p (x);
8795 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8796 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8797 can be addressed relative to the toc pointer. */
8799 static bool
8800 use_toc_relative_ref (rtx sym, machine_mode mode)
8802 return ((constant_pool_expr_p (sym)
8803 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8804 get_pool_mode (sym)))
8805 || (TARGET_CMODEL == CMODEL_MEDIUM
8806 && SYMBOL_REF_LOCAL_P (sym)
8807 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8810 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8811 that is a valid memory address for an instruction.
8812 The MODE argument is the machine mode for the MEM expression
8813 that wants to use this address.
8815 On the RS/6000, there are four valid address: a SYMBOL_REF that
8816 refers to a constant pool entry of an address (or the sum of it
8817 plus a constant), a short (16-bit signed) constant plus a register,
8818 the sum of two registers, or a register indirect, possibly with an
8819 auto-increment. For DFmode, DDmode and DImode with a constant plus
8820 register, we must ensure that both words are addressable or PowerPC64
8821 with offset word aligned.
8823 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8824 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8825 because adjacent memory cells are accessed by adding word-sized offsets
8826 during assembly output. */
8827 static bool
8828 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8830 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8831 bool quad_offset_p = mode_supports_dq_form (mode);
8833 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8834 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
8835 && GET_CODE (x) == AND
8836 && CONST_INT_P (XEXP (x, 1))
8837 && INTVAL (XEXP (x, 1)) == -16)
8838 x = XEXP (x, 0);
8840 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8841 return 0;
8842 if (legitimate_indirect_address_p (x, reg_ok_strict))
8843 return 1;
8844 if (TARGET_UPDATE
8845 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8846 && mode_supports_pre_incdec_p (mode)
8847 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8848 return 1;
8850 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8851 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8852 return 1;
8854 /* Handle restricted vector d-form offsets in ISA 3.0. */
8855 if (quad_offset_p)
8857 if (quad_address_p (x, mode, reg_ok_strict))
8858 return 1;
8860 else if (virtual_stack_registers_memory_p (x))
8861 return 1;
8863 else if (reg_offset_p)
8865 if (legitimate_small_data_p (mode, x))
8866 return 1;
8867 if (legitimate_constant_pool_address_p (x, mode,
8868 reg_ok_strict || lra_in_progress))
8869 return 1;
8872 /* For TImode, if we have TImode in VSX registers, only allow register
8873 indirect addresses. This will allow the values to go in either GPRs
8874 or VSX registers without reloading. The vector types would tend to
8875 go into VSX registers, so we allow REG+REG, while TImode seems
8876 somewhat split, in that some uses are GPR based, and some VSX based. */
8877 /* FIXME: We could loosen this by changing the following to
8878 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8879 but currently we cannot allow REG+REG addressing for TImode. See
8880 PR72827 for complete details on how this ends up hoodwinking DSE. */
8881 if (mode == TImode && TARGET_VSX)
8882 return 0;
8883 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8884 if (! reg_ok_strict
8885 && reg_offset_p
8886 && GET_CODE (x) == PLUS
8887 && REG_P (XEXP (x, 0))
8888 && (XEXP (x, 0) == virtual_stack_vars_rtx
8889 || XEXP (x, 0) == arg_pointer_rtx)
8890 && CONST_INT_P (XEXP (x, 1)))
8891 return 1;
8892 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8893 return 1;
8894 if (!FLOAT128_2REG_P (mode)
8895 && (TARGET_HARD_FLOAT
8896 || TARGET_POWERPC64
8897 || (mode != DFmode && mode != DDmode))
8898 && (TARGET_POWERPC64 || mode != DImode)
8899 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8900 && mode != PTImode
8901 && !avoiding_indexed_address_p (mode)
8902 && legitimate_indexed_address_p (x, reg_ok_strict))
8903 return 1;
8904 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8905 && mode_supports_pre_modify_p (mode)
8906 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8907 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8908 reg_ok_strict, false)
8909 || (!avoiding_indexed_address_p (mode)
8910 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8911 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8913 /* There is no prefixed version of the load/store with update. */
8914 rtx addr = XEXP (x, 1);
8915 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8917 if (reg_offset_p && !quad_offset_p
8918 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8919 return 1;
8920 return 0;
8923 /* Debug version of rs6000_legitimate_address_p. */
8924 static bool
8925 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8926 bool reg_ok_strict)
8928 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8929 fprintf (stderr,
8930 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8931 "strict = %d, reload = %s, code = %s\n",
8932 ret ? "true" : "false",
8933 GET_MODE_NAME (mode),
8934 reg_ok_strict,
8935 (reload_completed ? "after" : "before"),
8936 GET_RTX_NAME (GET_CODE (x)));
8937 debug_rtx (x);
8939 return ret;
8942 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8944 static bool
8945 rs6000_mode_dependent_address_p (const_rtx addr,
8946 addr_space_t as ATTRIBUTE_UNUSED)
8948 return rs6000_mode_dependent_address_ptr (addr);
8951 /* Go to LABEL if ADDR (a legitimate address expression)
8952 has an effect that depends on the machine mode it is used for.
8954 On the RS/6000 this is true of all integral offsets (since AltiVec
8955 and VSX modes don't allow them) or is a pre-increment or decrement.
8957 ??? Except that due to conceptual problems in offsettable_address_p
8958 we can't really report the problems of integral offsets. So leave
8959 this assuming that the adjustable offset must be valid for the
8960 sub-words of a TFmode operand, which is what we had before. */
8962 static bool
8963 rs6000_mode_dependent_address (const_rtx addr)
8965 switch (GET_CODE (addr))
8967 case PLUS:
8968 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8969 is considered a legitimate address before reload, so there
8970 are no offset restrictions in that case. Note that this
8971 condition is safe in strict mode because any address involving
8972 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8973 been rejected as illegitimate. */
8974 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8975 && XEXP (addr, 0) != arg_pointer_rtx
8976 && CONST_INT_P (XEXP (addr, 1)))
8978 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8979 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8980 if (TARGET_PREFIXED)
8981 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8982 else
8983 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8985 break;
8987 case LO_SUM:
8988 /* Anything in the constant pool is sufficiently aligned that
8989 all bytes have the same high part address. */
8990 return !legitimate_constant_pool_address_p (addr, QImode, false);
8992 /* Auto-increment cases are now treated generically in recog.c. */
8993 case PRE_MODIFY:
8994 return TARGET_UPDATE;
8996 /* AND is only allowed in Altivec loads. */
8997 case AND:
8998 return true;
9000 default:
9001 break;
9004 return false;
9007 /* Debug version of rs6000_mode_dependent_address. */
9008 static bool
9009 rs6000_debug_mode_dependent_address (const_rtx addr)
9011 bool ret = rs6000_mode_dependent_address (addr);
9013 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9014 ret ? "true" : "false");
9015 debug_rtx (addr);
9017 return ret;
9020 /* Implement FIND_BASE_TERM. */
9023 rs6000_find_base_term (rtx op)
9025 rtx base;
9027 base = op;
9028 if (GET_CODE (base) == CONST)
9029 base = XEXP (base, 0);
9030 if (GET_CODE (base) == PLUS)
9031 base = XEXP (base, 0);
9032 if (GET_CODE (base) == UNSPEC)
9033 switch (XINT (base, 1))
9035 case UNSPEC_TOCREL:
9036 case UNSPEC_MACHOPIC_OFFSET:
9037 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9038 for aliasing purposes. */
9039 return XVECEXP (base, 0, 0);
9042 return op;
9045 /* More elaborate version of recog's offsettable_memref_p predicate
9046 that works around the ??? note of rs6000_mode_dependent_address.
9047 In particular it accepts
9049 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9051 in 32-bit mode, that the recog predicate rejects. */
9053 static bool
9054 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9056 bool worst_case;
9058 if (!MEM_P (op))
9059 return false;
9061 /* First mimic offsettable_memref_p. */
9062 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9063 return true;
9065 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9066 the latter predicate knows nothing about the mode of the memory
9067 reference and, therefore, assumes that it is the largest supported
9068 mode (TFmode). As a consequence, legitimate offsettable memory
9069 references are rejected. rs6000_legitimate_offset_address_p contains
9070 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9071 at least with a little bit of help here given that we know the
9072 actual registers used. */
9073 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9074 || GET_MODE_SIZE (reg_mode) == 4);
9075 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9076 strict, worst_case);
9079 /* Determine the reassociation width to be used in reassociate_bb.
9080 This takes into account how many parallel operations we
9081 can actually do of a given type, and also the latency.
9083 int add/sub 6/cycle
9084 mul 2/cycle
9085 vect add/sub/mul 2/cycle
9086 fp add/sub/mul 2/cycle
9087 dfp 1/cycle
9090 static int
9091 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9092 machine_mode mode)
9094 switch (rs6000_tune)
9096 case PROCESSOR_POWER8:
9097 case PROCESSOR_POWER9:
9098 case PROCESSOR_FUTURE:
9099 if (DECIMAL_FLOAT_MODE_P (mode))
9100 return 1;
9101 if (VECTOR_MODE_P (mode))
9102 return 4;
9103 if (INTEGRAL_MODE_P (mode))
9104 return 1;
9105 if (FLOAT_MODE_P (mode))
9106 return 4;
9107 break;
9108 default:
9109 break;
9111 return 1;
9114 /* Change register usage conditional on target flags. */
9115 static void
9116 rs6000_conditional_register_usage (void)
9118 int i;
9120 if (TARGET_DEBUG_TARGET)
9121 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9123 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9124 if (TARGET_64BIT)
9125 fixed_regs[13] = call_used_regs[13] = 1;
9127 /* Conditionally disable FPRs. */
9128 if (TARGET_SOFT_FLOAT)
9129 for (i = 32; i < 64; i++)
9130 fixed_regs[i] = call_used_regs[i] = 1;
9132 /* The TOC register is not killed across calls in a way that is
9133 visible to the compiler. */
9134 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9135 call_used_regs[2] = 0;
9137 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9138 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9140 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9141 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9142 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9144 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9145 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9146 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9148 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9149 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9151 if (!TARGET_ALTIVEC && !TARGET_VSX)
9153 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9154 fixed_regs[i] = call_used_regs[i] = 1;
9155 call_used_regs[VRSAVE_REGNO] = 1;
9158 if (TARGET_ALTIVEC || TARGET_VSX)
9159 global_regs[VSCR_REGNO] = 1;
9161 if (TARGET_ALTIVEC_ABI)
9163 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9164 call_used_regs[i] = 1;
9166 /* AIX reserves VR20:31 in non-extended ABI mode. */
9167 if (TARGET_XCOFF)
9168 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9169 fixed_regs[i] = call_used_regs[i] = 1;
9174 /* Output insns to set DEST equal to the constant SOURCE as a series of
9175 lis, ori and shl instructions and return TRUE. */
9177 bool
9178 rs6000_emit_set_const (rtx dest, rtx source)
9180 machine_mode mode = GET_MODE (dest);
9181 rtx temp, set;
9182 rtx_insn *insn;
9183 HOST_WIDE_INT c;
9185 gcc_checking_assert (CONST_INT_P (source));
9186 c = INTVAL (source);
9187 switch (mode)
9189 case E_QImode:
9190 case E_HImode:
9191 emit_insn (gen_rtx_SET (dest, source));
9192 return true;
9194 case E_SImode:
9195 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9197 emit_insn (gen_rtx_SET (copy_rtx (temp),
9198 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9199 emit_insn (gen_rtx_SET (dest,
9200 gen_rtx_IOR (SImode, copy_rtx (temp),
9201 GEN_INT (c & 0xffff))));
9202 break;
9204 case E_DImode:
9205 if (!TARGET_POWERPC64)
9207 rtx hi, lo;
9209 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9210 DImode);
9211 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9212 DImode);
9213 emit_move_insn (hi, GEN_INT (c >> 32));
9214 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9215 emit_move_insn (lo, GEN_INT (c));
9217 else
9218 rs6000_emit_set_long_const (dest, c);
9219 break;
9221 default:
9222 gcc_unreachable ();
9225 insn = get_last_insn ();
9226 set = single_set (insn);
9227 if (! CONSTANT_P (SET_SRC (set)))
9228 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9230 return true;
9233 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9234 Output insns to set DEST equal to the constant C as a series of
9235 lis, ori and shl instructions. */
9237 static void
9238 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9240 rtx temp;
9241 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9243 ud1 = c & 0xffff;
9244 c = c >> 16;
9245 ud2 = c & 0xffff;
9246 c = c >> 16;
9247 ud3 = c & 0xffff;
9248 c = c >> 16;
9249 ud4 = c & 0xffff;
9251 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9252 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9253 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9255 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9256 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9258 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9260 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9261 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9262 if (ud1 != 0)
9263 emit_move_insn (dest,
9264 gen_rtx_IOR (DImode, copy_rtx (temp),
9265 GEN_INT (ud1)));
9267 else if (ud3 == 0 && ud4 == 0)
9269 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9271 gcc_assert (ud2 & 0x8000);
9272 emit_move_insn (copy_rtx (temp),
9273 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9274 if (ud1 != 0)
9275 emit_move_insn (copy_rtx (temp),
9276 gen_rtx_IOR (DImode, copy_rtx (temp),
9277 GEN_INT (ud1)));
9278 emit_move_insn (dest,
9279 gen_rtx_ZERO_EXTEND (DImode,
9280 gen_lowpart (SImode,
9281 copy_rtx (temp))));
9283 else if (ud1 == ud3 && ud2 == ud4)
9285 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9286 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9287 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9288 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9289 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9290 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9292 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9293 || (ud4 == 0 && ! (ud3 & 0x8000)))
9295 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9297 emit_move_insn (copy_rtx (temp),
9298 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9299 if (ud2 != 0)
9300 emit_move_insn (copy_rtx (temp),
9301 gen_rtx_IOR (DImode, copy_rtx (temp),
9302 GEN_INT (ud2)));
9303 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9304 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9305 GEN_INT (16)));
9306 if (ud1 != 0)
9307 emit_move_insn (dest,
9308 gen_rtx_IOR (DImode, copy_rtx (temp),
9309 GEN_INT (ud1)));
9311 else
9313 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9315 emit_move_insn (copy_rtx (temp),
9316 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9317 if (ud3 != 0)
9318 emit_move_insn (copy_rtx (temp),
9319 gen_rtx_IOR (DImode, copy_rtx (temp),
9320 GEN_INT (ud3)));
9322 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9323 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9324 GEN_INT (32)));
9325 if (ud2 != 0)
9326 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9327 gen_rtx_IOR (DImode, copy_rtx (temp),
9328 GEN_INT (ud2 << 16)));
9329 if (ud1 != 0)
9330 emit_move_insn (dest,
9331 gen_rtx_IOR (DImode, copy_rtx (temp),
9332 GEN_INT (ud1)));
9336 /* Helper for the following. Get rid of [r+r] memory refs
9337 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9339 static void
9340 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9342 if (MEM_P (operands[0])
9343 && !REG_P (XEXP (operands[0], 0))
9344 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9345 GET_MODE (operands[0]), false))
9346 operands[0]
9347 = replace_equiv_address (operands[0],
9348 copy_addr_to_reg (XEXP (operands[0], 0)));
9350 if (MEM_P (operands[1])
9351 && !REG_P (XEXP (operands[1], 0))
9352 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9353 GET_MODE (operands[1]), false))
9354 operands[1]
9355 = replace_equiv_address (operands[1],
9356 copy_addr_to_reg (XEXP (operands[1], 0)));
9359 /* Generate a vector of constants to permute MODE for a little-endian
9360 storage operation by swapping the two halves of a vector. */
9361 static rtvec
9362 rs6000_const_vec (machine_mode mode)
9364 int i, subparts;
9365 rtvec v;
9367 switch (mode)
9369 case E_V1TImode:
9370 subparts = 1;
9371 break;
9372 case E_V2DFmode:
9373 case E_V2DImode:
9374 subparts = 2;
9375 break;
9376 case E_V4SFmode:
9377 case E_V4SImode:
9378 subparts = 4;
9379 break;
9380 case E_V8HImode:
9381 subparts = 8;
9382 break;
9383 case E_V16QImode:
9384 subparts = 16;
9385 break;
9386 default:
9387 gcc_unreachable();
9390 v = rtvec_alloc (subparts);
9392 for (i = 0; i < subparts / 2; ++i)
9393 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9394 for (i = subparts / 2; i < subparts; ++i)
9395 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9397 return v;
9400 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9401 store operation. */
9402 void
9403 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9405 /* Scalar permutations are easier to express in integer modes rather than
9406 floating-point modes, so cast them here. We use V1TImode instead
9407 of TImode to ensure that the values don't go through GPRs. */
9408 if (FLOAT128_VECTOR_P (mode))
9410 dest = gen_lowpart (V1TImode, dest);
9411 source = gen_lowpart (V1TImode, source);
9412 mode = V1TImode;
9415 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9416 scalar. */
9417 if (mode == TImode || mode == V1TImode)
9418 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9419 GEN_INT (64))));
9420 else
9422 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9423 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9427 /* Emit a little-endian load from vector memory location SOURCE to VSX
9428 register DEST in mode MODE. The load is done with two permuting
9429 insn's that represent an lxvd2x and xxpermdi. */
9430 void
9431 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9433 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9434 V1TImode). */
9435 if (mode == TImode || mode == V1TImode)
9437 mode = V2DImode;
9438 dest = gen_lowpart (V2DImode, dest);
9439 source = adjust_address (source, V2DImode, 0);
9442 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9443 rs6000_emit_le_vsx_permute (tmp, source, mode);
9444 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9447 /* Emit a little-endian store to vector memory location DEST from VSX
9448 register SOURCE in mode MODE. The store is done with two permuting
9449 insn's that represent an xxpermdi and an stxvd2x. */
9450 void
9451 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9453 /* This should never be called during or after LRA, because it does
9454 not re-permute the source register. It is intended only for use
9455 during expand. */
9456 gcc_assert (!lra_in_progress && !reload_completed);
9458 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9459 V1TImode). */
9460 if (mode == TImode || mode == V1TImode)
9462 mode = V2DImode;
9463 dest = adjust_address (dest, V2DImode, 0);
9464 source = gen_lowpart (V2DImode, source);
9467 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9468 rs6000_emit_le_vsx_permute (tmp, source, mode);
9469 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9472 /* Emit a sequence representing a little-endian VSX load or store,
9473 moving data from SOURCE to DEST in mode MODE. This is done
9474 separately from rs6000_emit_move to ensure it is called only
9475 during expand. LE VSX loads and stores introduced later are
9476 handled with a split. The expand-time RTL generation allows
9477 us to optimize away redundant pairs of register-permutes. */
9478 void
9479 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9481 gcc_assert (!BYTES_BIG_ENDIAN
9482 && VECTOR_MEM_VSX_P (mode)
9483 && !TARGET_P9_VECTOR
9484 && !gpr_or_gpr_p (dest, source)
9485 && (MEM_P (source) ^ MEM_P (dest)));
9487 if (MEM_P (source))
9489 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9490 rs6000_emit_le_vsx_load (dest, source, mode);
9492 else
9494 if (!REG_P (source))
9495 source = force_reg (mode, source);
9496 rs6000_emit_le_vsx_store (dest, source, mode);
9500 /* Return whether a SFmode or SImode move can be done without converting one
9501 mode to another. This arrises when we have:
9503 (SUBREG:SF (REG:SI ...))
9504 (SUBREG:SI (REG:SF ...))
9506 and one of the values is in a floating point/vector register, where SFmode
9507 scalars are stored in DFmode format. */
9509 bool
9510 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9512 if (TARGET_ALLOW_SF_SUBREG)
9513 return true;
9515 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9516 return true;
9518 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9519 return true;
9521 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9522 if (SUBREG_P (dest))
9524 rtx dest_subreg = SUBREG_REG (dest);
9525 rtx src_subreg = SUBREG_REG (src);
9526 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9529 return false;
9533 /* Helper function to change moves with:
9535 (SUBREG:SF (REG:SI)) and
9536 (SUBREG:SI (REG:SF))
9538 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9539 values are stored as DFmode values in the VSX registers. We need to convert
9540 the bits before we can use a direct move or operate on the bits in the
9541 vector register as an integer type.
9543 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9545 static bool
9546 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9548 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9549 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9550 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9552 rtx inner_source = SUBREG_REG (source);
9553 machine_mode inner_mode = GET_MODE (inner_source);
9555 if (mode == SImode && inner_mode == SFmode)
9557 emit_insn (gen_movsi_from_sf (dest, inner_source));
9558 return true;
9561 if (mode == SFmode && inner_mode == SImode)
9563 emit_insn (gen_movsf_from_si (dest, inner_source));
9564 return true;
9568 return false;
9571 /* Emit a move from SOURCE to DEST in mode MODE. */
9572 void
9573 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9575 rtx operands[2];
9576 operands[0] = dest;
9577 operands[1] = source;
9579 if (TARGET_DEBUG_ADDR)
9581 fprintf (stderr,
9582 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9583 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9584 GET_MODE_NAME (mode),
9585 lra_in_progress,
9586 reload_completed,
9587 can_create_pseudo_p ());
9588 debug_rtx (dest);
9589 fprintf (stderr, "source:\n");
9590 debug_rtx (source);
9593 /* Check that we get CONST_WIDE_INT only when we should. */
9594 if (CONST_WIDE_INT_P (operands[1])
9595 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9596 gcc_unreachable ();
9598 #ifdef HAVE_AS_GNU_ATTRIBUTE
9599 /* If we use a long double type, set the flags in .gnu_attribute that say
9600 what the long double type is. This is to allow the linker's warning
9601 message for the wrong long double to be useful, even if the function does
9602 not do a call (for example, doing a 128-bit add on power9 if the long
9603 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9604 used if they aren't the default long dobule type. */
9605 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9607 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9608 rs6000_passes_float = rs6000_passes_long_double = true;
9610 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9611 rs6000_passes_float = rs6000_passes_long_double = true;
9613 #endif
9615 /* See if we need to special case SImode/SFmode SUBREG moves. */
9616 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9617 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9618 return;
9620 /* Check if GCC is setting up a block move that will end up using FP
9621 registers as temporaries. We must make sure this is acceptable. */
9622 if (MEM_P (operands[0])
9623 && MEM_P (operands[1])
9624 && mode == DImode
9625 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9626 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9627 && ! (rs6000_slow_unaligned_access (SImode,
9628 (MEM_ALIGN (operands[0]) > 32
9629 ? 32 : MEM_ALIGN (operands[0])))
9630 || rs6000_slow_unaligned_access (SImode,
9631 (MEM_ALIGN (operands[1]) > 32
9632 ? 32 : MEM_ALIGN (operands[1]))))
9633 && ! MEM_VOLATILE_P (operands [0])
9634 && ! MEM_VOLATILE_P (operands [1]))
9636 emit_move_insn (adjust_address (operands[0], SImode, 0),
9637 adjust_address (operands[1], SImode, 0));
9638 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9639 adjust_address (copy_rtx (operands[1]), SImode, 4));
9640 return;
9643 if (can_create_pseudo_p () && MEM_P (operands[0])
9644 && !gpc_reg_operand (operands[1], mode))
9645 operands[1] = force_reg (mode, operands[1]);
9647 /* Recognize the case where operand[1] is a reference to thread-local
9648 data and load its address to a register. */
9649 if (tls_referenced_p (operands[1]))
9651 enum tls_model model;
9652 rtx tmp = operands[1];
9653 rtx addend = NULL;
9655 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9657 addend = XEXP (XEXP (tmp, 0), 1);
9658 tmp = XEXP (XEXP (tmp, 0), 0);
9661 gcc_assert (SYMBOL_REF_P (tmp));
9662 model = SYMBOL_REF_TLS_MODEL (tmp);
9663 gcc_assert (model != 0);
9665 tmp = rs6000_legitimize_tls_address (tmp, model);
9666 if (addend)
9668 tmp = gen_rtx_PLUS (mode, tmp, addend);
9669 tmp = force_operand (tmp, operands[0]);
9671 operands[1] = tmp;
9674 /* 128-bit constant floating-point values on Darwin should really be loaded
9675 as two parts. However, this premature splitting is a problem when DFmode
9676 values can go into Altivec registers. */
9677 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9678 && !reg_addr[DFmode].scalar_in_vmx_p)
9680 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9681 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9682 DFmode);
9683 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9684 GET_MODE_SIZE (DFmode)),
9685 simplify_gen_subreg (DFmode, operands[1], mode,
9686 GET_MODE_SIZE (DFmode)),
9687 DFmode);
9688 return;
9691 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9692 p1:SD) if p1 is not of floating point class and p0 is spilled as
9693 we can have no analogous movsd_store for this. */
9694 if (lra_in_progress && mode == DDmode
9695 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9696 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9697 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9698 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9700 enum reg_class cl;
9701 int regno = REGNO (SUBREG_REG (operands[1]));
9703 if (!HARD_REGISTER_NUM_P (regno))
9705 cl = reg_preferred_class (regno);
9706 regno = reg_renumber[regno];
9707 if (regno < 0)
9708 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9710 if (regno >= 0 && ! FP_REGNO_P (regno))
9712 mode = SDmode;
9713 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9714 operands[1] = SUBREG_REG (operands[1]);
9717 if (lra_in_progress
9718 && mode == SDmode
9719 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9720 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9721 && (REG_P (operands[1])
9722 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9724 int regno = reg_or_subregno (operands[1]);
9725 enum reg_class cl;
9727 if (!HARD_REGISTER_NUM_P (regno))
9729 cl = reg_preferred_class (regno);
9730 gcc_assert (cl != NO_REGS);
9731 regno = reg_renumber[regno];
9732 if (regno < 0)
9733 regno = ira_class_hard_regs[cl][0];
9735 if (FP_REGNO_P (regno))
9737 if (GET_MODE (operands[0]) != DDmode)
9738 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9739 emit_insn (gen_movsd_store (operands[0], operands[1]));
9741 else if (INT_REGNO_P (regno))
9742 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9743 else
9744 gcc_unreachable();
9745 return;
9747 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9748 p:DD)) if p0 is not of floating point class and p1 is spilled as
9749 we can have no analogous movsd_load for this. */
9750 if (lra_in_progress && mode == DDmode
9751 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9752 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9753 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9754 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9756 enum reg_class cl;
9757 int regno = REGNO (SUBREG_REG (operands[0]));
9759 if (!HARD_REGISTER_NUM_P (regno))
9761 cl = reg_preferred_class (regno);
9762 regno = reg_renumber[regno];
9763 if (regno < 0)
9764 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9766 if (regno >= 0 && ! FP_REGNO_P (regno))
9768 mode = SDmode;
9769 operands[0] = SUBREG_REG (operands[0]);
9770 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9773 if (lra_in_progress
9774 && mode == SDmode
9775 && (REG_P (operands[0])
9776 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9777 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9778 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9780 int regno = reg_or_subregno (operands[0]);
9781 enum reg_class cl;
9783 if (!HARD_REGISTER_NUM_P (regno))
9785 cl = reg_preferred_class (regno);
9786 gcc_assert (cl != NO_REGS);
9787 regno = reg_renumber[regno];
9788 if (regno < 0)
9789 regno = ira_class_hard_regs[cl][0];
9791 if (FP_REGNO_P (regno))
9793 if (GET_MODE (operands[1]) != DDmode)
9794 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9795 emit_insn (gen_movsd_load (operands[0], operands[1]));
9797 else if (INT_REGNO_P (regno))
9798 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9799 else
9800 gcc_unreachable();
9801 return;
9804 /* FIXME: In the long term, this switch statement should go away
9805 and be replaced by a sequence of tests based on things like
9806 mode == Pmode. */
9807 switch (mode)
9809 case E_HImode:
9810 case E_QImode:
9811 if (CONSTANT_P (operands[1])
9812 && !CONST_INT_P (operands[1]))
9813 operands[1] = force_const_mem (mode, operands[1]);
9814 break;
9816 case E_TFmode:
9817 case E_TDmode:
9818 case E_IFmode:
9819 case E_KFmode:
9820 if (FLOAT128_2REG_P (mode))
9821 rs6000_eliminate_indexed_memrefs (operands);
9822 /* fall through */
9824 case E_DFmode:
9825 case E_DDmode:
9826 case E_SFmode:
9827 case E_SDmode:
9828 if (CONSTANT_P (operands[1])
9829 && ! easy_fp_constant (operands[1], mode))
9830 operands[1] = force_const_mem (mode, operands[1]);
9831 break;
9833 case E_V16QImode:
9834 case E_V8HImode:
9835 case E_V4SFmode:
9836 case E_V4SImode:
9837 case E_V2DFmode:
9838 case E_V2DImode:
9839 case E_V1TImode:
9840 if (CONSTANT_P (operands[1])
9841 && !easy_vector_constant (operands[1], mode))
9842 operands[1] = force_const_mem (mode, operands[1]);
9843 break;
9845 case E_SImode:
9846 case E_DImode:
9847 /* Use default pattern for address of ELF small data */
9848 if (TARGET_ELF
9849 && mode == Pmode
9850 && DEFAULT_ABI == ABI_V4
9851 && (SYMBOL_REF_P (operands[1])
9852 || GET_CODE (operands[1]) == CONST)
9853 && small_data_operand (operands[1], mode))
9855 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9856 return;
9859 /* Use the default pattern for loading up PC-relative addresses. */
9860 if (TARGET_PCREL && mode == Pmode
9861 && pcrel_local_or_external_address (operands[1], Pmode))
9863 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9864 return;
9867 if (DEFAULT_ABI == ABI_V4
9868 && mode == Pmode && mode == SImode
9869 && flag_pic == 1 && got_operand (operands[1], mode))
9871 emit_insn (gen_movsi_got (operands[0], operands[1]));
9872 return;
9875 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9876 && TARGET_NO_TOC_OR_PCREL
9877 && ! flag_pic
9878 && mode == Pmode
9879 && CONSTANT_P (operands[1])
9880 && GET_CODE (operands[1]) != HIGH
9881 && !CONST_INT_P (operands[1]))
9883 rtx target = (!can_create_pseudo_p ()
9884 ? operands[0]
9885 : gen_reg_rtx (mode));
9887 /* If this is a function address on -mcall-aixdesc,
9888 convert it to the address of the descriptor. */
9889 if (DEFAULT_ABI == ABI_AIX
9890 && SYMBOL_REF_P (operands[1])
9891 && XSTR (operands[1], 0)[0] == '.')
9893 const char *name = XSTR (operands[1], 0);
9894 rtx new_ref;
9895 while (*name == '.')
9896 name++;
9897 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9898 CONSTANT_POOL_ADDRESS_P (new_ref)
9899 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9900 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9901 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9902 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9903 operands[1] = new_ref;
9906 if (DEFAULT_ABI == ABI_DARWIN)
9908 #if TARGET_MACHO
9909 /* This is not PIC code, but could require the subset of
9910 indirections used by mdynamic-no-pic. */
9911 if (MACHO_DYNAMIC_NO_PIC_P)
9913 /* Take care of any required data indirection. */
9914 operands[1] = rs6000_machopic_legitimize_pic_address (
9915 operands[1], mode, operands[0]);
9916 if (operands[0] != operands[1])
9917 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9918 return;
9920 #endif
9921 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9922 emit_insn (gen_macho_low (Pmode, operands[0],
9923 target, operands[1]));
9924 return;
9927 emit_insn (gen_elf_high (target, operands[1]));
9928 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9929 return;
9932 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9933 and we have put it in the TOC, we just need to make a TOC-relative
9934 reference to it. */
9935 if (TARGET_TOC
9936 && SYMBOL_REF_P (operands[1])
9937 && use_toc_relative_ref (operands[1], mode))
9938 operands[1] = create_TOC_reference (operands[1], operands[0]);
9939 else if (mode == Pmode
9940 && CONSTANT_P (operands[1])
9941 && GET_CODE (operands[1]) != HIGH
9942 && ((REG_P (operands[0])
9943 && FP_REGNO_P (REGNO (operands[0])))
9944 || !CONST_INT_P (operands[1])
9945 || (num_insns_constant (operands[1], mode)
9946 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9947 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9948 && (TARGET_CMODEL == CMODEL_SMALL
9949 || can_create_pseudo_p ()
9950 || (REG_P (operands[0])
9951 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9954 #if TARGET_MACHO
9955 /* Darwin uses a special PIC legitimizer. */
9956 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9958 operands[1] =
9959 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9960 operands[0]);
9961 if (operands[0] != operands[1])
9962 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9963 return;
9965 #endif
9967 /* If we are to limit the number of things we put in the TOC and
9968 this is a symbol plus a constant we can add in one insn,
9969 just put the symbol in the TOC and add the constant. */
9970 if (GET_CODE (operands[1]) == CONST
9971 && TARGET_NO_SUM_IN_TOC
9972 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9973 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9974 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9975 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9976 && ! side_effects_p (operands[0]))
9978 rtx sym =
9979 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9980 rtx other = XEXP (XEXP (operands[1], 0), 1);
9982 sym = force_reg (mode, sym);
9983 emit_insn (gen_add3_insn (operands[0], sym, other));
9984 return;
9987 operands[1] = force_const_mem (mode, operands[1]);
9989 if (TARGET_TOC
9990 && SYMBOL_REF_P (XEXP (operands[1], 0))
9991 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9993 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9994 operands[0]);
9995 operands[1] = gen_const_mem (mode, tocref);
9996 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9999 break;
10001 case E_TImode:
10002 if (!VECTOR_MEM_VSX_P (TImode))
10003 rs6000_eliminate_indexed_memrefs (operands);
10004 break;
10006 case E_PTImode:
10007 rs6000_eliminate_indexed_memrefs (operands);
10008 break;
10010 default:
10011 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10014 /* Above, we may have called force_const_mem which may have returned
10015 an invalid address. If we can, fix this up; otherwise, reload will
10016 have to deal with it. */
10017 if (MEM_P (operands[1]))
10018 operands[1] = validize_mem (operands[1]);
10020 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10024 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10025 static void
10026 init_float128_ibm (machine_mode mode)
10028 if (!TARGET_XL_COMPAT)
10030 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10031 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10032 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10033 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10035 if (!TARGET_HARD_FLOAT)
10037 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10038 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10039 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10040 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10041 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10042 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10043 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10044 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10046 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10047 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10048 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10049 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10050 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10051 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10052 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10053 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10056 else
10058 set_optab_libfunc (add_optab, mode, "_xlqadd");
10059 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10060 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10061 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10064 /* Add various conversions for IFmode to use the traditional TFmode
10065 names. */
10066 if (mode == IFmode)
10068 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10069 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10070 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10071 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10072 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10073 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10075 if (TARGET_POWERPC64)
10077 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10078 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10079 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10080 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10085 /* Create a decl for either complex long double multiply or complex long double
10086 divide when long double is IEEE 128-bit floating point. We can't use
10087 __multc3 and __divtc3 because the original long double using IBM extended
10088 double used those names. The complex multiply/divide functions are encoded
10089 as builtin functions with a complex result and 4 scalar inputs. */
10091 static void
10092 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10094 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10095 name, NULL_TREE);
10097 set_builtin_decl (fncode, fndecl, true);
10099 if (TARGET_DEBUG_BUILTIN)
10100 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10102 return;
10105 /* Set up IEEE 128-bit floating point routines. Use different names if the
10106 arguments can be passed in a vector register. The historical PowerPC
10107 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10108 continue to use that if we aren't using vector registers to pass IEEE
10109 128-bit floating point. */
10111 static void
10112 init_float128_ieee (machine_mode mode)
10114 if (FLOAT128_VECTOR_P (mode))
10116 static bool complex_muldiv_init_p = false;
10118 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10119 we have clone or target attributes, this will be called a second
10120 time. We want to create the built-in function only once. */
10121 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10123 complex_muldiv_init_p = true;
10124 built_in_function fncode_mul =
10125 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10126 - MIN_MODE_COMPLEX_FLOAT);
10127 built_in_function fncode_div =
10128 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10129 - MIN_MODE_COMPLEX_FLOAT);
10131 tree fntype = build_function_type_list (complex_long_double_type_node,
10132 long_double_type_node,
10133 long_double_type_node,
10134 long_double_type_node,
10135 long_double_type_node,
10136 NULL_TREE);
10138 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10139 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10142 set_optab_libfunc (add_optab, mode, "__addkf3");
10143 set_optab_libfunc (sub_optab, mode, "__subkf3");
10144 set_optab_libfunc (neg_optab, mode, "__negkf2");
10145 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10146 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10147 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10148 set_optab_libfunc (abs_optab, mode, "__abskf2");
10149 set_optab_libfunc (powi_optab, mode, "__powikf2");
10151 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10152 set_optab_libfunc (ne_optab, mode, "__nekf2");
10153 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10154 set_optab_libfunc (ge_optab, mode, "__gekf2");
10155 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10156 set_optab_libfunc (le_optab, mode, "__lekf2");
10157 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10159 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10160 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10161 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10162 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10164 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10165 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10166 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10168 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10169 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10170 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10172 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10173 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10174 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10175 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10176 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10177 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10179 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10180 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10181 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10182 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10184 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10185 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10186 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10187 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10189 if (TARGET_POWERPC64)
10191 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10192 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10193 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10194 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10198 else
10200 set_optab_libfunc (add_optab, mode, "_q_add");
10201 set_optab_libfunc (sub_optab, mode, "_q_sub");
10202 set_optab_libfunc (neg_optab, mode, "_q_neg");
10203 set_optab_libfunc (smul_optab, mode, "_q_mul");
10204 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10205 if (TARGET_PPC_GPOPT)
10206 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10208 set_optab_libfunc (eq_optab, mode, "_q_feq");
10209 set_optab_libfunc (ne_optab, mode, "_q_fne");
10210 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10211 set_optab_libfunc (ge_optab, mode, "_q_fge");
10212 set_optab_libfunc (lt_optab, mode, "_q_flt");
10213 set_optab_libfunc (le_optab, mode, "_q_fle");
10215 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10216 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10217 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10218 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10219 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10220 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10221 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10222 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10226 static void
10227 rs6000_init_libfuncs (void)
10229 /* __float128 support. */
10230 if (TARGET_FLOAT128_TYPE)
10232 init_float128_ibm (IFmode);
10233 init_float128_ieee (KFmode);
10236 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10237 if (TARGET_LONG_DOUBLE_128)
10239 if (!TARGET_IEEEQUAD)
10240 init_float128_ibm (TFmode);
10242 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10243 else
10244 init_float128_ieee (TFmode);
10248 /* Emit a potentially record-form instruction, setting DST from SRC.
10249 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10250 signed comparison of DST with zero. If DOT is 1, the generated RTL
10251 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10252 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10253 a separate COMPARE. */
10255 void
10256 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10258 if (dot == 0)
10260 emit_move_insn (dst, src);
10261 return;
10264 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10266 emit_move_insn (dst, src);
10267 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10268 return;
10271 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10272 if (dot == 1)
10274 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10275 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10277 else
10279 rtx set = gen_rtx_SET (dst, src);
10280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10285 /* A validation routine: say whether CODE, a condition code, and MODE
10286 match. The other alternatives either don't make sense or should
10287 never be generated. */
10289 void
10290 validate_condition_mode (enum rtx_code code, machine_mode mode)
10292 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10293 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10294 && GET_MODE_CLASS (mode) == MODE_CC);
10296 /* These don't make sense. */
10297 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10298 || mode != CCUNSmode);
10300 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10301 || mode == CCUNSmode);
10303 gcc_assert (mode == CCFPmode
10304 || (code != ORDERED && code != UNORDERED
10305 && code != UNEQ && code != LTGT
10306 && code != UNGT && code != UNLT
10307 && code != UNGE && code != UNLE));
10309 /* These are invalid; the information is not there. */
10310 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10314 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10315 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10316 not zero, store there the bit offset (counted from the right) where
10317 the single stretch of 1 bits begins; and similarly for B, the bit
10318 offset where it ends. */
10320 bool
10321 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10323 unsigned HOST_WIDE_INT val = INTVAL (mask);
10324 unsigned HOST_WIDE_INT bit;
10325 int nb, ne;
10326 int n = GET_MODE_PRECISION (mode);
10328 if (mode != DImode && mode != SImode)
10329 return false;
10331 if (INTVAL (mask) >= 0)
10333 bit = val & -val;
10334 ne = exact_log2 (bit);
10335 nb = exact_log2 (val + bit);
10337 else if (val + 1 == 0)
10339 nb = n;
10340 ne = 0;
10342 else if (val & 1)
10344 val = ~val;
10345 bit = val & -val;
10346 nb = exact_log2 (bit);
10347 ne = exact_log2 (val + bit);
10349 else
10351 bit = val & -val;
10352 ne = exact_log2 (bit);
10353 if (val + bit == 0)
10354 nb = n;
10355 else
10356 nb = 0;
10359 nb--;
10361 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10362 return false;
10364 if (b)
10365 *b = nb;
10366 if (e)
10367 *e = ne;
10369 return true;
10372 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10373 or rldicr instruction, to implement an AND with it in mode MODE. */
10375 bool
10376 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10378 int nb, ne;
10380 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10381 return false;
10383 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10384 does not wrap. */
10385 if (mode == DImode)
10386 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10388 /* For SImode, rlwinm can do everything. */
10389 if (mode == SImode)
10390 return (nb < 32 && ne < 32);
10392 return false;
10395 /* Return the instruction template for an AND with mask in mode MODE, with
10396 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10398 const char *
10399 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10401 int nb, ne;
10403 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10404 gcc_unreachable ();
10406 if (mode == DImode && ne == 0)
10408 operands[3] = GEN_INT (63 - nb);
10409 if (dot)
10410 return "rldicl. %0,%1,0,%3";
10411 return "rldicl %0,%1,0,%3";
10414 if (mode == DImode && nb == 63)
10416 operands[3] = GEN_INT (63 - ne);
10417 if (dot)
10418 return "rldicr. %0,%1,0,%3";
10419 return "rldicr %0,%1,0,%3";
10422 if (nb < 32 && ne < 32)
10424 operands[3] = GEN_INT (31 - nb);
10425 operands[4] = GEN_INT (31 - ne);
10426 if (dot)
10427 return "rlwinm. %0,%1,0,%3,%4";
10428 return "rlwinm %0,%1,0,%3,%4";
10431 gcc_unreachable ();
10434 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10435 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10436 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10438 bool
10439 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10441 int nb, ne;
10443 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10444 return false;
10446 int n = GET_MODE_PRECISION (mode);
10447 int sh = -1;
10449 if (CONST_INT_P (XEXP (shift, 1)))
10451 sh = INTVAL (XEXP (shift, 1));
10452 if (sh < 0 || sh >= n)
10453 return false;
10456 rtx_code code = GET_CODE (shift);
10458 /* Convert any shift by 0 to a rotate, to simplify below code. */
10459 if (sh == 0)
10460 code = ROTATE;
10462 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10463 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10464 code = ASHIFT;
10465 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10467 code = LSHIFTRT;
10468 sh = n - sh;
10471 /* DImode rotates need rld*. */
10472 if (mode == DImode && code == ROTATE)
10473 return (nb == 63 || ne == 0 || ne == sh);
10475 /* SImode rotates need rlw*. */
10476 if (mode == SImode && code == ROTATE)
10477 return (nb < 32 && ne < 32 && sh < 32);
10479 /* Wrap-around masks are only okay for rotates. */
10480 if (ne > nb)
10481 return false;
10483 /* Variable shifts are only okay for rotates. */
10484 if (sh < 0)
10485 return false;
10487 /* Don't allow ASHIFT if the mask is wrong for that. */
10488 if (code == ASHIFT && ne < sh)
10489 return false;
10491 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10492 if the mask is wrong for that. */
10493 if (nb < 32 && ne < 32 && sh < 32
10494 && !(code == LSHIFTRT && nb >= 32 - sh))
10495 return true;
10497 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10498 if the mask is wrong for that. */
10499 if (code == LSHIFTRT)
10500 sh = 64 - sh;
10501 if (nb == 63 || ne == 0 || ne == sh)
10502 return !(code == LSHIFTRT && nb >= sh);
10504 return false;
10507 /* Return the instruction template for a shift with mask in mode MODE, with
10508 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10510 const char *
10511 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10513 int nb, ne;
10515 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10516 gcc_unreachable ();
10518 if (mode == DImode && ne == 0)
10520 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10521 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10522 operands[3] = GEN_INT (63 - nb);
10523 if (dot)
10524 return "rld%I2cl. %0,%1,%2,%3";
10525 return "rld%I2cl %0,%1,%2,%3";
10528 if (mode == DImode && nb == 63)
10530 operands[3] = GEN_INT (63 - ne);
10531 if (dot)
10532 return "rld%I2cr. %0,%1,%2,%3";
10533 return "rld%I2cr %0,%1,%2,%3";
10536 if (mode == DImode
10537 && GET_CODE (operands[4]) != LSHIFTRT
10538 && CONST_INT_P (operands[2])
10539 && ne == INTVAL (operands[2]))
10541 operands[3] = GEN_INT (63 - nb);
10542 if (dot)
10543 return "rld%I2c. %0,%1,%2,%3";
10544 return "rld%I2c %0,%1,%2,%3";
10547 if (nb < 32 && ne < 32)
10549 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10550 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10551 operands[3] = GEN_INT (31 - nb);
10552 operands[4] = GEN_INT (31 - ne);
10553 /* This insn can also be a 64-bit rotate with mask that really makes
10554 it just a shift right (with mask); the %h below are to adjust for
10555 that situation (shift count is >= 32 in that case). */
10556 if (dot)
10557 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10558 return "rlw%I2nm %0,%1,%h2,%3,%4";
10561 gcc_unreachable ();
10564 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10565 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10566 ASHIFT, or LSHIFTRT) in mode MODE. */
10568 bool
10569 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10571 int nb, ne;
10573 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10574 return false;
10576 int n = GET_MODE_PRECISION (mode);
10578 int sh = INTVAL (XEXP (shift, 1));
10579 if (sh < 0 || sh >= n)
10580 return false;
10582 rtx_code code = GET_CODE (shift);
10584 /* Convert any shift by 0 to a rotate, to simplify below code. */
10585 if (sh == 0)
10586 code = ROTATE;
10588 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10589 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10590 code = ASHIFT;
10591 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10593 code = LSHIFTRT;
10594 sh = n - sh;
10597 /* DImode rotates need rldimi. */
10598 if (mode == DImode && code == ROTATE)
10599 return (ne == sh);
10601 /* SImode rotates need rlwimi. */
10602 if (mode == SImode && code == ROTATE)
10603 return (nb < 32 && ne < 32 && sh < 32);
10605 /* Wrap-around masks are only okay for rotates. */
10606 if (ne > nb)
10607 return false;
10609 /* Don't allow ASHIFT if the mask is wrong for that. */
10610 if (code == ASHIFT && ne < sh)
10611 return false;
10613 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10614 if the mask is wrong for that. */
10615 if (nb < 32 && ne < 32 && sh < 32
10616 && !(code == LSHIFTRT && nb >= 32 - sh))
10617 return true;
10619 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10620 if the mask is wrong for that. */
10621 if (code == LSHIFTRT)
10622 sh = 64 - sh;
10623 if (ne == sh)
10624 return !(code == LSHIFTRT && nb >= sh);
10626 return false;
10629 /* Return the instruction template for an insert with mask in mode MODE, with
10630 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10632 const char *
10633 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10635 int nb, ne;
10637 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10638 gcc_unreachable ();
10640 /* Prefer rldimi because rlwimi is cracked. */
10641 if (TARGET_POWERPC64
10642 && (!dot || mode == DImode)
10643 && GET_CODE (operands[4]) != LSHIFTRT
10644 && ne == INTVAL (operands[2]))
10646 operands[3] = GEN_INT (63 - nb);
10647 if (dot)
10648 return "rldimi. %0,%1,%2,%3";
10649 return "rldimi %0,%1,%2,%3";
10652 if (nb < 32 && ne < 32)
10654 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10655 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10656 operands[3] = GEN_INT (31 - nb);
10657 operands[4] = GEN_INT (31 - ne);
10658 if (dot)
10659 return "rlwimi. %0,%1,%2,%3,%4";
10660 return "rlwimi %0,%1,%2,%3,%4";
10663 gcc_unreachable ();
10666 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10667 using two machine instructions. */
10669 bool
10670 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10672 /* There are two kinds of AND we can handle with two insns:
10673 1) those we can do with two rl* insn;
10674 2) ori[s];xori[s].
10676 We do not handle that last case yet. */
10678 /* If there is just one stretch of ones, we can do it. */
10679 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10680 return true;
10682 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10683 one insn, we can do the whole thing with two. */
10684 unsigned HOST_WIDE_INT val = INTVAL (c);
10685 unsigned HOST_WIDE_INT bit1 = val & -val;
10686 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10687 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10688 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10689 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10692 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10693 If EXPAND is true, split rotate-and-mask instructions we generate to
10694 their constituent parts as well (this is used during expand); if DOT
10695 is 1, make the last insn a record-form instruction clobbering the
10696 destination GPR and setting the CC reg (from operands[3]); if 2, set
10697 that GPR as well as the CC reg. */
10699 void
10700 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10702 gcc_assert (!(expand && dot));
10704 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10706 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10707 shift right. This generates better code than doing the masks without
10708 shifts, or shifting first right and then left. */
10709 int nb, ne;
10710 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10712 gcc_assert (mode == DImode);
10714 int shift = 63 - nb;
10715 if (expand)
10717 rtx tmp1 = gen_reg_rtx (DImode);
10718 rtx tmp2 = gen_reg_rtx (DImode);
10719 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10720 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10721 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10723 else
10725 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10726 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10727 emit_move_insn (operands[0], tmp);
10728 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10729 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10731 return;
10734 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10735 that does the rest. */
10736 unsigned HOST_WIDE_INT bit1 = val & -val;
10737 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10738 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10739 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10741 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10742 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10744 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10746 /* Two "no-rotate"-and-mask instructions, for SImode. */
10747 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10749 gcc_assert (mode == SImode);
10751 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10752 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10753 emit_move_insn (reg, tmp);
10754 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10755 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10756 return;
10759 gcc_assert (mode == DImode);
10761 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10762 insns; we have to do the first in SImode, because it wraps. */
10763 if (mask2 <= 0xffffffff
10764 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10766 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10767 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10768 GEN_INT (mask1));
10769 rtx reg_low = gen_lowpart (SImode, reg);
10770 emit_move_insn (reg_low, tmp);
10771 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10772 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10773 return;
10776 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10777 at the top end), rotate back and clear the other hole. */
10778 int right = exact_log2 (bit3);
10779 int left = 64 - right;
10781 /* Rotate the mask too. */
10782 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10784 if (expand)
10786 rtx tmp1 = gen_reg_rtx (DImode);
10787 rtx tmp2 = gen_reg_rtx (DImode);
10788 rtx tmp3 = gen_reg_rtx (DImode);
10789 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10790 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10791 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10792 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10794 else
10796 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10797 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10798 emit_move_insn (operands[0], tmp);
10799 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10800 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10801 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10805 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10806 for lfq and stfq insns iff the registers are hard registers. */
10809 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10811 /* We might have been passed a SUBREG. */
10812 if (!REG_P (reg1) || !REG_P (reg2))
10813 return 0;
10815 /* We might have been passed non floating point registers. */
10816 if (!FP_REGNO_P (REGNO (reg1))
10817 || !FP_REGNO_P (REGNO (reg2)))
10818 return 0;
10820 return (REGNO (reg1) == REGNO (reg2) - 1);
10823 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10824 addr1 and addr2 must be in consecutive memory locations
10825 (addr2 == addr1 + 8). */
10828 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10830 rtx addr1, addr2;
10831 unsigned int reg1, reg2;
10832 int offset1, offset2;
10834 /* The mems cannot be volatile. */
10835 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10836 return 0;
10838 addr1 = XEXP (mem1, 0);
10839 addr2 = XEXP (mem2, 0);
10841 /* Extract an offset (if used) from the first addr. */
10842 if (GET_CODE (addr1) == PLUS)
10844 /* If not a REG, return zero. */
10845 if (!REG_P (XEXP (addr1, 0)))
10846 return 0;
10847 else
10849 reg1 = REGNO (XEXP (addr1, 0));
10850 /* The offset must be constant! */
10851 if (!CONST_INT_P (XEXP (addr1, 1)))
10852 return 0;
10853 offset1 = INTVAL (XEXP (addr1, 1));
10856 else if (!REG_P (addr1))
10857 return 0;
10858 else
10860 reg1 = REGNO (addr1);
10861 /* This was a simple (mem (reg)) expression. Offset is 0. */
10862 offset1 = 0;
10865 /* And now for the second addr. */
10866 if (GET_CODE (addr2) == PLUS)
10868 /* If not a REG, return zero. */
10869 if (!REG_P (XEXP (addr2, 0)))
10870 return 0;
10871 else
10873 reg2 = REGNO (XEXP (addr2, 0));
10874 /* The offset must be constant. */
10875 if (!CONST_INT_P (XEXP (addr2, 1)))
10876 return 0;
10877 offset2 = INTVAL (XEXP (addr2, 1));
10880 else if (!REG_P (addr2))
10881 return 0;
10882 else
10884 reg2 = REGNO (addr2);
10885 /* This was a simple (mem (reg)) expression. Offset is 0. */
10886 offset2 = 0;
10889 /* Both of these must have the same base register. */
10890 if (reg1 != reg2)
10891 return 0;
10893 /* The offset for the second addr must be 8 more than the first addr. */
10894 if (offset2 != offset1 + 8)
10895 return 0;
10897 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10898 instructions. */
10899 return 1;
10902 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10903 need to use DDmode, in all other cases we can use the same mode. */
10904 static machine_mode
10905 rs6000_secondary_memory_needed_mode (machine_mode mode)
10907 if (lra_in_progress && mode == SDmode)
10908 return DDmode;
10909 return mode;
10912 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10913 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10914 only work on the traditional altivec registers, note if an altivec register
10915 was chosen. */
10917 static enum rs6000_reg_type
10918 register_to_reg_type (rtx reg, bool *is_altivec)
10920 HOST_WIDE_INT regno;
10921 enum reg_class rclass;
10923 if (SUBREG_P (reg))
10924 reg = SUBREG_REG (reg);
10926 if (!REG_P (reg))
10927 return NO_REG_TYPE;
10929 regno = REGNO (reg);
10930 if (!HARD_REGISTER_NUM_P (regno))
10932 if (!lra_in_progress && !reload_completed)
10933 return PSEUDO_REG_TYPE;
10935 regno = true_regnum (reg);
10936 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10937 return PSEUDO_REG_TYPE;
10940 gcc_assert (regno >= 0);
10942 if (is_altivec && ALTIVEC_REGNO_P (regno))
10943 *is_altivec = true;
10945 rclass = rs6000_regno_regclass[regno];
10946 return reg_class_to_reg_type[(int)rclass];
10949 /* Helper function to return the cost of adding a TOC entry address. */
10951 static inline int
10952 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10954 int ret;
10956 if (TARGET_CMODEL != CMODEL_SMALL)
10957 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10959 else
10960 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10962 return ret;
10965 /* Helper function for rs6000_secondary_reload to determine whether the memory
10966 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10967 needs reloading. Return negative if the memory is not handled by the memory
10968 helper functions and to try a different reload method, 0 if no additional
10969 instructions are need, and positive to give the extra cost for the
10970 memory. */
10972 static int
10973 rs6000_secondary_reload_memory (rtx addr,
10974 enum reg_class rclass,
10975 machine_mode mode)
10977 int extra_cost = 0;
10978 rtx reg, and_arg, plus_arg0, plus_arg1;
10979 addr_mask_type addr_mask;
10980 const char *type = NULL;
10981 const char *fail_msg = NULL;
10983 if (GPR_REG_CLASS_P (rclass))
10984 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10986 else if (rclass == FLOAT_REGS)
10987 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10989 else if (rclass == ALTIVEC_REGS)
10990 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10992 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10993 else if (rclass == VSX_REGS)
10994 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10995 & ~RELOAD_REG_AND_M16);
10997 /* If the register allocator hasn't made up its mind yet on the register
10998 class to use, settle on defaults to use. */
10999 else if (rclass == NO_REGS)
11001 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11002 & ~RELOAD_REG_AND_M16);
11004 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11005 addr_mask &= ~(RELOAD_REG_INDEXED
11006 | RELOAD_REG_PRE_INCDEC
11007 | RELOAD_REG_PRE_MODIFY);
11010 else
11011 addr_mask = 0;
11013 /* If the register isn't valid in this register class, just return now. */
11014 if ((addr_mask & RELOAD_REG_VALID) == 0)
11016 if (TARGET_DEBUG_ADDR)
11018 fprintf (stderr,
11019 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11020 "not valid in class\n",
11021 GET_MODE_NAME (mode), reg_class_names[rclass]);
11022 debug_rtx (addr);
11025 return -1;
11028 switch (GET_CODE (addr))
11030 /* Does the register class supports auto update forms for this mode? We
11031 don't need a scratch register, since the powerpc only supports
11032 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11033 case PRE_INC:
11034 case PRE_DEC:
11035 reg = XEXP (addr, 0);
11036 if (!base_reg_operand (addr, GET_MODE (reg)))
11038 fail_msg = "no base register #1";
11039 extra_cost = -1;
11042 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11044 extra_cost = 1;
11045 type = "update";
11047 break;
11049 case PRE_MODIFY:
11050 reg = XEXP (addr, 0);
11051 plus_arg1 = XEXP (addr, 1);
11052 if (!base_reg_operand (reg, GET_MODE (reg))
11053 || GET_CODE (plus_arg1) != PLUS
11054 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11056 fail_msg = "bad PRE_MODIFY";
11057 extra_cost = -1;
11060 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11062 extra_cost = 1;
11063 type = "update";
11065 break;
11067 /* Do we need to simulate AND -16 to clear the bottom address bits used
11068 in VMX load/stores? Only allow the AND for vector sizes. */
11069 case AND:
11070 and_arg = XEXP (addr, 0);
11071 if (GET_MODE_SIZE (mode) != 16
11072 || !CONST_INT_P (XEXP (addr, 1))
11073 || INTVAL (XEXP (addr, 1)) != -16)
11075 fail_msg = "bad Altivec AND #1";
11076 extra_cost = -1;
11079 if (rclass != ALTIVEC_REGS)
11081 if (legitimate_indirect_address_p (and_arg, false))
11082 extra_cost = 1;
11084 else if (legitimate_indexed_address_p (and_arg, false))
11085 extra_cost = 2;
11087 else
11089 fail_msg = "bad Altivec AND #2";
11090 extra_cost = -1;
11093 type = "and";
11095 break;
11097 /* If this is an indirect address, make sure it is a base register. */
11098 case REG:
11099 case SUBREG:
11100 if (!legitimate_indirect_address_p (addr, false))
11102 extra_cost = 1;
11103 type = "move";
11105 break;
11107 /* If this is an indexed address, make sure the register class can handle
11108 indexed addresses for this mode. */
11109 case PLUS:
11110 plus_arg0 = XEXP (addr, 0);
11111 plus_arg1 = XEXP (addr, 1);
11113 /* (plus (plus (reg) (constant)) (constant)) is generated during
11114 push_reload processing, so handle it now. */
11115 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11117 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11119 extra_cost = 1;
11120 type = "offset";
11124 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11125 push_reload processing, so handle it now. */
11126 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11128 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11130 extra_cost = 1;
11131 type = "indexed #2";
11135 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11137 fail_msg = "no base register #2";
11138 extra_cost = -1;
11141 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11143 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11144 || !legitimate_indexed_address_p (addr, false))
11146 extra_cost = 1;
11147 type = "indexed";
11151 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11152 && CONST_INT_P (plus_arg1))
11154 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11156 extra_cost = 1;
11157 type = "vector d-form offset";
11161 /* Make sure the register class can handle offset addresses. */
11162 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11164 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11166 extra_cost = 1;
11167 type = "offset #2";
11171 else
11173 fail_msg = "bad PLUS";
11174 extra_cost = -1;
11177 break;
11179 case LO_SUM:
11180 /* Quad offsets are restricted and can't handle normal addresses. */
11181 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11183 extra_cost = -1;
11184 type = "vector d-form lo_sum";
11187 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11189 fail_msg = "bad LO_SUM";
11190 extra_cost = -1;
11193 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11195 extra_cost = 1;
11196 type = "lo_sum";
11198 break;
11200 /* Static addresses need to create a TOC entry. */
11201 case CONST:
11202 case SYMBOL_REF:
11203 case LABEL_REF:
11204 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11206 extra_cost = -1;
11207 type = "vector d-form lo_sum #2";
11210 else
11212 type = "address";
11213 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11215 break;
11217 /* TOC references look like offsetable memory. */
11218 case UNSPEC:
11219 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11221 fail_msg = "bad UNSPEC";
11222 extra_cost = -1;
11225 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11227 extra_cost = -1;
11228 type = "vector d-form lo_sum #3";
11231 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11233 extra_cost = 1;
11234 type = "toc reference";
11236 break;
11238 default:
11240 fail_msg = "bad address";
11241 extra_cost = -1;
11245 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11247 if (extra_cost < 0)
11248 fprintf (stderr,
11249 "rs6000_secondary_reload_memory error: mode = %s, "
11250 "class = %s, addr_mask = '%s', %s\n",
11251 GET_MODE_NAME (mode),
11252 reg_class_names[rclass],
11253 rs6000_debug_addr_mask (addr_mask, false),
11254 (fail_msg != NULL) ? fail_msg : "<bad address>");
11256 else
11257 fprintf (stderr,
11258 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11259 "addr_mask = '%s', extra cost = %d, %s\n",
11260 GET_MODE_NAME (mode),
11261 reg_class_names[rclass],
11262 rs6000_debug_addr_mask (addr_mask, false),
11263 extra_cost,
11264 (type) ? type : "<none>");
11266 debug_rtx (addr);
11269 return extra_cost;
11272 /* Helper function for rs6000_secondary_reload to return true if a move to a
11273 different register classe is really a simple move. */
11275 static bool
11276 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11277 enum rs6000_reg_type from_type,
11278 machine_mode mode)
11280 int size = GET_MODE_SIZE (mode);
11282 /* Add support for various direct moves available. In this function, we only
11283 look at cases where we don't need any extra registers, and one or more
11284 simple move insns are issued. Originally small integers are not allowed
11285 in FPR/VSX registers. Single precision binary floating is not a simple
11286 move because we need to convert to the single precision memory layout.
11287 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11288 need special direct move handling, which we do not support yet. */
11289 if (TARGET_DIRECT_MOVE
11290 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11291 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11293 if (TARGET_POWERPC64)
11295 /* ISA 2.07: MTVSRD or MVFVSRD. */
11296 if (size == 8)
11297 return true;
11299 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11300 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11301 return true;
11304 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11305 if (TARGET_P8_VECTOR)
11307 if (mode == SImode)
11308 return true;
11310 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11311 return true;
11314 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11315 if (mode == SDmode)
11316 return true;
11319 /* Move to/from SPR. */
11320 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11321 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11322 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11323 return true;
11325 return false;
11328 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11329 special direct moves that involve allocating an extra register, return the
11330 insn code of the helper function if there is such a function or
11331 CODE_FOR_nothing if not. */
11333 static bool
11334 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11335 enum rs6000_reg_type from_type,
11336 machine_mode mode,
11337 secondary_reload_info *sri,
11338 bool altivec_p)
11340 bool ret = false;
11341 enum insn_code icode = CODE_FOR_nothing;
11342 int cost = 0;
11343 int size = GET_MODE_SIZE (mode);
11345 if (TARGET_POWERPC64 && size == 16)
11347 /* Handle moving 128-bit values from GPRs to VSX point registers on
11348 ISA 2.07 (power8, power9) when running in 64-bit mode using
11349 XXPERMDI to glue the two 64-bit values back together. */
11350 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11352 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11353 icode = reg_addr[mode].reload_vsx_gpr;
11356 /* Handle moving 128-bit values from VSX point registers to GPRs on
11357 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11358 bottom 64-bit value. */
11359 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11361 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11362 icode = reg_addr[mode].reload_gpr_vsx;
11366 else if (TARGET_POWERPC64 && mode == SFmode)
11368 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11370 cost = 3; /* xscvdpspn, mfvsrd, and. */
11371 icode = reg_addr[mode].reload_gpr_vsx;
11374 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11376 cost = 2; /* mtvsrz, xscvspdpn. */
11377 icode = reg_addr[mode].reload_vsx_gpr;
11381 else if (!TARGET_POWERPC64 && size == 8)
11383 /* Handle moving 64-bit values from GPRs to floating point registers on
11384 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11385 32-bit values back together. Altivec register classes must be handled
11386 specially since a different instruction is used, and the secondary
11387 reload support requires a single instruction class in the scratch
11388 register constraint. However, right now TFmode is not allowed in
11389 Altivec registers, so the pattern will never match. */
11390 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11392 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11393 icode = reg_addr[mode].reload_fpr_gpr;
11397 if (icode != CODE_FOR_nothing)
11399 ret = true;
11400 if (sri)
11402 sri->icode = icode;
11403 sri->extra_cost = cost;
11407 return ret;
11410 /* Return whether a move between two register classes can be done either
11411 directly (simple move) or via a pattern that uses a single extra temporary
11412 (using ISA 2.07's direct move in this case. */
11414 static bool
11415 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11416 enum rs6000_reg_type from_type,
11417 machine_mode mode,
11418 secondary_reload_info *sri,
11419 bool altivec_p)
11421 /* Fall back to load/store reloads if either type is not a register. */
11422 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11423 return false;
11425 /* If we haven't allocated registers yet, assume the move can be done for the
11426 standard register types. */
11427 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11428 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11429 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11430 return true;
11432 /* Moves to the same set of registers is a simple move for non-specialized
11433 registers. */
11434 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11435 return true;
11437 /* Check whether a simple move can be done directly. */
11438 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11440 if (sri)
11442 sri->icode = CODE_FOR_nothing;
11443 sri->extra_cost = 0;
11445 return true;
11448 /* Now check if we can do it in a few steps. */
11449 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11450 altivec_p);
11453 /* Inform reload about cases where moving X with a mode MODE to a register in
11454 RCLASS requires an extra scratch or immediate register. Return the class
11455 needed for the immediate register.
11457 For VSX and Altivec, we may need a register to convert sp+offset into
11458 reg+sp.
11460 For misaligned 64-bit gpr loads and stores we need a register to
11461 convert an offset address to indirect. */
11463 static reg_class_t
11464 rs6000_secondary_reload (bool in_p,
11465 rtx x,
11466 reg_class_t rclass_i,
11467 machine_mode mode,
11468 secondary_reload_info *sri)
11470 enum reg_class rclass = (enum reg_class) rclass_i;
11471 reg_class_t ret = ALL_REGS;
11472 enum insn_code icode;
11473 bool default_p = false;
11474 bool done_p = false;
11476 /* Allow subreg of memory before/during reload. */
11477 bool memory_p = (MEM_P (x)
11478 || (!reload_completed && SUBREG_P (x)
11479 && MEM_P (SUBREG_REG (x))));
11481 sri->icode = CODE_FOR_nothing;
11482 sri->t_icode = CODE_FOR_nothing;
11483 sri->extra_cost = 0;
11484 icode = ((in_p)
11485 ? reg_addr[mode].reload_load
11486 : reg_addr[mode].reload_store);
11488 if (REG_P (x) || register_operand (x, mode))
11490 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11491 bool altivec_p = (rclass == ALTIVEC_REGS);
11492 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11494 if (!in_p)
11495 std::swap (to_type, from_type);
11497 /* Can we do a direct move of some sort? */
11498 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11499 altivec_p))
11501 icode = (enum insn_code)sri->icode;
11502 default_p = false;
11503 done_p = true;
11504 ret = NO_REGS;
11508 /* Make sure 0.0 is not reloaded or forced into memory. */
11509 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11511 ret = NO_REGS;
11512 default_p = false;
11513 done_p = true;
11516 /* If this is a scalar floating point value and we want to load it into the
11517 traditional Altivec registers, do it via a move via a traditional floating
11518 point register, unless we have D-form addressing. Also make sure that
11519 non-zero constants use a FPR. */
11520 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11521 && !mode_supports_vmx_dform (mode)
11522 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11523 && (memory_p || CONST_DOUBLE_P (x)))
11525 ret = FLOAT_REGS;
11526 default_p = false;
11527 done_p = true;
11530 /* Handle reload of load/stores if we have reload helper functions. */
11531 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11533 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11534 mode);
11536 if (extra_cost >= 0)
11538 done_p = true;
11539 ret = NO_REGS;
11540 if (extra_cost > 0)
11542 sri->extra_cost = extra_cost;
11543 sri->icode = icode;
11548 /* Handle unaligned loads and stores of integer registers. */
11549 if (!done_p && TARGET_POWERPC64
11550 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11551 && memory_p
11552 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11554 rtx addr = XEXP (x, 0);
11555 rtx off = address_offset (addr);
11557 if (off != NULL_RTX)
11559 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11560 unsigned HOST_WIDE_INT offset = INTVAL (off);
11562 /* We need a secondary reload when our legitimate_address_p
11563 says the address is good (as otherwise the entire address
11564 will be reloaded), and the offset is not a multiple of
11565 four or we have an address wrap. Address wrap will only
11566 occur for LO_SUMs since legitimate_offset_address_p
11567 rejects addresses for 16-byte mems that will wrap. */
11568 if (GET_CODE (addr) == LO_SUM
11569 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11570 && ((offset & 3) != 0
11571 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11572 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11573 && (offset & 3) != 0))
11575 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11576 if (in_p)
11577 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11578 : CODE_FOR_reload_di_load);
11579 else
11580 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11581 : CODE_FOR_reload_di_store);
11582 sri->extra_cost = 2;
11583 ret = NO_REGS;
11584 done_p = true;
11586 else
11587 default_p = true;
11589 else
11590 default_p = true;
11593 if (!done_p && !TARGET_POWERPC64
11594 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11595 && memory_p
11596 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11598 rtx addr = XEXP (x, 0);
11599 rtx off = address_offset (addr);
11601 if (off != NULL_RTX)
11603 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11604 unsigned HOST_WIDE_INT offset = INTVAL (off);
11606 /* We need a secondary reload when our legitimate_address_p
11607 says the address is good (as otherwise the entire address
11608 will be reloaded), and we have a wrap.
11610 legitimate_lo_sum_address_p allows LO_SUM addresses to
11611 have any offset so test for wrap in the low 16 bits.
11613 legitimate_offset_address_p checks for the range
11614 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11615 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11616 [0x7ff4,0x7fff] respectively, so test for the
11617 intersection of these ranges, [0x7ffc,0x7fff] and
11618 [0x7ff4,0x7ff7] respectively.
11620 Note that the address we see here may have been
11621 manipulated by legitimize_reload_address. */
11622 if (GET_CODE (addr) == LO_SUM
11623 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11624 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11626 if (in_p)
11627 sri->icode = CODE_FOR_reload_si_load;
11628 else
11629 sri->icode = CODE_FOR_reload_si_store;
11630 sri->extra_cost = 2;
11631 ret = NO_REGS;
11632 done_p = true;
11634 else
11635 default_p = true;
11637 else
11638 default_p = true;
11641 if (!done_p)
11642 default_p = true;
11644 if (default_p)
11645 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11647 gcc_assert (ret != ALL_REGS);
11649 if (TARGET_DEBUG_ADDR)
11651 fprintf (stderr,
11652 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11653 "mode = %s",
11654 reg_class_names[ret],
11655 in_p ? "true" : "false",
11656 reg_class_names[rclass],
11657 GET_MODE_NAME (mode));
11659 if (reload_completed)
11660 fputs (", after reload", stderr);
11662 if (!done_p)
11663 fputs (", done_p not set", stderr);
11665 if (default_p)
11666 fputs (", default secondary reload", stderr);
11668 if (sri->icode != CODE_FOR_nothing)
11669 fprintf (stderr, ", reload func = %s, extra cost = %d",
11670 insn_data[sri->icode].name, sri->extra_cost);
11672 else if (sri->extra_cost > 0)
11673 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11675 fputs ("\n", stderr);
11676 debug_rtx (x);
11679 return ret;
11682 /* Better tracing for rs6000_secondary_reload_inner. */
11684 static void
11685 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11686 bool store_p)
11688 rtx set, clobber;
11690 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11692 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11693 store_p ? "store" : "load");
11695 if (store_p)
11696 set = gen_rtx_SET (mem, reg);
11697 else
11698 set = gen_rtx_SET (reg, mem);
11700 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11701 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11704 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11705 ATTRIBUTE_NORETURN;
11707 static void
11708 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11709 bool store_p)
11711 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11712 gcc_unreachable ();
11715 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11716 reload helper functions. These were identified in
11717 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11718 reload, it calls the insns:
11719 reload_<RELOAD:mode>_<P:mptrsize>_store
11720 reload_<RELOAD:mode>_<P:mptrsize>_load
11722 which in turn calls this function, to do whatever is necessary to create
11723 valid addresses. */
11725 void
11726 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11728 int regno = true_regnum (reg);
11729 machine_mode mode = GET_MODE (reg);
11730 addr_mask_type addr_mask;
11731 rtx addr;
11732 rtx new_addr;
11733 rtx op_reg, op0, op1;
11734 rtx and_op;
11735 rtx cc_clobber;
11736 rtvec rv;
11738 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11739 || !base_reg_operand (scratch, GET_MODE (scratch)))
11740 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11742 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11743 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11745 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11746 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11748 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11749 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11751 else
11752 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11754 /* Make sure the mode is valid in this register class. */
11755 if ((addr_mask & RELOAD_REG_VALID) == 0)
11756 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11758 if (TARGET_DEBUG_ADDR)
11759 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11761 new_addr = addr = XEXP (mem, 0);
11762 switch (GET_CODE (addr))
11764 /* Does the register class support auto update forms for this mode? If
11765 not, do the update now. We don't need a scratch register, since the
11766 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11767 case PRE_INC:
11768 case PRE_DEC:
11769 op_reg = XEXP (addr, 0);
11770 if (!base_reg_operand (op_reg, Pmode))
11771 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11773 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11775 int delta = GET_MODE_SIZE (mode);
11776 if (GET_CODE (addr) == PRE_DEC)
11777 delta = -delta;
11778 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11779 new_addr = op_reg;
11781 break;
11783 case PRE_MODIFY:
11784 op0 = XEXP (addr, 0);
11785 op1 = XEXP (addr, 1);
11786 if (!base_reg_operand (op0, Pmode)
11787 || GET_CODE (op1) != PLUS
11788 || !rtx_equal_p (op0, XEXP (op1, 0)))
11789 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11791 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11793 emit_insn (gen_rtx_SET (op0, op1));
11794 new_addr = reg;
11796 break;
11798 /* Do we need to simulate AND -16 to clear the bottom address bits used
11799 in VMX load/stores? */
11800 case AND:
11801 op0 = XEXP (addr, 0);
11802 op1 = XEXP (addr, 1);
11803 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11805 if (REG_P (op0) || SUBREG_P (op0))
11806 op_reg = op0;
11808 else if (GET_CODE (op1) == PLUS)
11810 emit_insn (gen_rtx_SET (scratch, op1));
11811 op_reg = scratch;
11814 else
11815 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11817 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11818 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11819 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11820 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11821 new_addr = scratch;
11823 break;
11825 /* If this is an indirect address, make sure it is a base register. */
11826 case REG:
11827 case SUBREG:
11828 if (!base_reg_operand (addr, GET_MODE (addr)))
11830 emit_insn (gen_rtx_SET (scratch, addr));
11831 new_addr = scratch;
11833 break;
11835 /* If this is an indexed address, make sure the register class can handle
11836 indexed addresses for this mode. */
11837 case PLUS:
11838 op0 = XEXP (addr, 0);
11839 op1 = XEXP (addr, 1);
11840 if (!base_reg_operand (op0, Pmode))
11841 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11843 else if (int_reg_operand (op1, Pmode))
11845 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11847 emit_insn (gen_rtx_SET (scratch, addr));
11848 new_addr = scratch;
11852 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11854 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11855 || !quad_address_p (addr, mode, false))
11857 emit_insn (gen_rtx_SET (scratch, addr));
11858 new_addr = scratch;
11862 /* Make sure the register class can handle offset addresses. */
11863 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11865 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11867 emit_insn (gen_rtx_SET (scratch, addr));
11868 new_addr = scratch;
11872 else
11873 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11875 break;
11877 case LO_SUM:
11878 op0 = XEXP (addr, 0);
11879 op1 = XEXP (addr, 1);
11880 if (!base_reg_operand (op0, Pmode))
11881 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11883 else if (int_reg_operand (op1, Pmode))
11885 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11887 emit_insn (gen_rtx_SET (scratch, addr));
11888 new_addr = scratch;
11892 /* Quad offsets are restricted and can't handle normal addresses. */
11893 else if (mode_supports_dq_form (mode))
11895 emit_insn (gen_rtx_SET (scratch, addr));
11896 new_addr = scratch;
11899 /* Make sure the register class can handle offset addresses. */
11900 else if (legitimate_lo_sum_address_p (mode, addr, false))
11902 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11904 emit_insn (gen_rtx_SET (scratch, addr));
11905 new_addr = scratch;
11909 else
11910 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11912 break;
11914 case SYMBOL_REF:
11915 case CONST:
11916 case LABEL_REF:
11917 rs6000_emit_move (scratch, addr, Pmode);
11918 new_addr = scratch;
11919 break;
11921 default:
11922 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11925 /* Adjust the address if it changed. */
11926 if (addr != new_addr)
11928 mem = replace_equiv_address_nv (mem, new_addr);
11929 if (TARGET_DEBUG_ADDR)
11930 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11933 /* Now create the move. */
11934 if (store_p)
11935 emit_insn (gen_rtx_SET (mem, reg));
11936 else
11937 emit_insn (gen_rtx_SET (reg, mem));
11939 return;
11942 /* Convert reloads involving 64-bit gprs and misaligned offset
11943 addressing, or multiple 32-bit gprs and offsets that are too large,
11944 to use indirect addressing. */
11946 void
11947 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11949 int regno = true_regnum (reg);
11950 enum reg_class rclass;
11951 rtx addr;
11952 rtx scratch_or_premodify = scratch;
11954 if (TARGET_DEBUG_ADDR)
11956 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11957 store_p ? "store" : "load");
11958 fprintf (stderr, "reg:\n");
11959 debug_rtx (reg);
11960 fprintf (stderr, "mem:\n");
11961 debug_rtx (mem);
11962 fprintf (stderr, "scratch:\n");
11963 debug_rtx (scratch);
11966 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11967 gcc_assert (MEM_P (mem));
11968 rclass = REGNO_REG_CLASS (regno);
11969 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11970 addr = XEXP (mem, 0);
11972 if (GET_CODE (addr) == PRE_MODIFY)
11974 gcc_assert (REG_P (XEXP (addr, 0))
11975 && GET_CODE (XEXP (addr, 1)) == PLUS
11976 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11977 scratch_or_premodify = XEXP (addr, 0);
11978 addr = XEXP (addr, 1);
11980 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11982 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11984 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11986 /* Now create the move. */
11987 if (store_p)
11988 emit_insn (gen_rtx_SET (mem, reg));
11989 else
11990 emit_insn (gen_rtx_SET (reg, mem));
11992 return;
11995 /* Given an rtx X being reloaded into a reg required to be
11996 in class CLASS, return the class of reg to actually use.
11997 In general this is just CLASS; but on some machines
11998 in some cases it is preferable to use a more restrictive class.
12000 On the RS/6000, we have to return NO_REGS when we want to reload a
12001 floating-point CONST_DOUBLE to force it to be copied to memory.
12003 We also don't want to reload integer values into floating-point
12004 registers if we can at all help it. In fact, this can
12005 cause reload to die, if it tries to generate a reload of CTR
12006 into a FP register and discovers it doesn't have the memory location
12007 required.
12009 ??? Would it be a good idea to have reload do the converse, that is
12010 try to reload floating modes into FP registers if possible?
12013 static enum reg_class
12014 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12016 machine_mode mode = GET_MODE (x);
12017 bool is_constant = CONSTANT_P (x);
12019 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12020 reload class for it. */
12021 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12022 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12023 return NO_REGS;
12025 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12026 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12027 return NO_REGS;
12029 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12030 the reloading of address expressions using PLUS into floating point
12031 registers. */
12032 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12034 if (is_constant)
12036 /* Zero is always allowed in all VSX registers. */
12037 if (x == CONST0_RTX (mode))
12038 return rclass;
12040 /* If this is a vector constant that can be formed with a few Altivec
12041 instructions, we want altivec registers. */
12042 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12043 return ALTIVEC_REGS;
12045 /* If this is an integer constant that can easily be loaded into
12046 vector registers, allow it. */
12047 if (CONST_INT_P (x))
12049 HOST_WIDE_INT value = INTVAL (x);
12051 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12052 2.06 can generate it in the Altivec registers with
12053 VSPLTI<x>. */
12054 if (value == -1)
12056 if (TARGET_P8_VECTOR)
12057 return rclass;
12058 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12059 return ALTIVEC_REGS;
12060 else
12061 return NO_REGS;
12064 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12065 a sign extend in the Altivec registers. */
12066 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12067 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12068 return ALTIVEC_REGS;
12071 /* Force constant to memory. */
12072 return NO_REGS;
12075 /* D-form addressing can easily reload the value. */
12076 if (mode_supports_vmx_dform (mode)
12077 || mode_supports_dq_form (mode))
12078 return rclass;
12080 /* If this is a scalar floating point value and we don't have D-form
12081 addressing, prefer the traditional floating point registers so that we
12082 can use D-form (register+offset) addressing. */
12083 if (rclass == VSX_REGS
12084 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12085 return FLOAT_REGS;
12087 /* Prefer the Altivec registers if Altivec is handling the vector
12088 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12089 loads. */
12090 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12091 || mode == V1TImode)
12092 return ALTIVEC_REGS;
12094 return rclass;
12097 if (is_constant || GET_CODE (x) == PLUS)
12099 if (reg_class_subset_p (GENERAL_REGS, rclass))
12100 return GENERAL_REGS;
12101 if (reg_class_subset_p (BASE_REGS, rclass))
12102 return BASE_REGS;
12103 return NO_REGS;
12106 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12107 return GENERAL_REGS;
12109 return rclass;
12112 /* Debug version of rs6000_preferred_reload_class. */
12113 static enum reg_class
12114 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12116 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12118 fprintf (stderr,
12119 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12120 "mode = %s, x:\n",
12121 reg_class_names[ret], reg_class_names[rclass],
12122 GET_MODE_NAME (GET_MODE (x)));
12123 debug_rtx (x);
12125 return ret;
12128 /* If we are copying between FP or AltiVec registers and anything else, we need
12129 a memory location. The exception is when we are targeting ppc64 and the
12130 move to/from fpr to gpr instructions are available. Also, under VSX, you
12131 can copy vector registers from the FP register set to the Altivec register
12132 set and vice versa. */
12134 static bool
12135 rs6000_secondary_memory_needed (machine_mode mode,
12136 reg_class_t from_class,
12137 reg_class_t to_class)
12139 enum rs6000_reg_type from_type, to_type;
12140 bool altivec_p = ((from_class == ALTIVEC_REGS)
12141 || (to_class == ALTIVEC_REGS));
12143 /* If a simple/direct move is available, we don't need secondary memory */
12144 from_type = reg_class_to_reg_type[(int)from_class];
12145 to_type = reg_class_to_reg_type[(int)to_class];
12147 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12148 (secondary_reload_info *)0, altivec_p))
12149 return false;
12151 /* If we have a floating point or vector register class, we need to use
12152 memory to transfer the data. */
12153 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12154 return true;
12156 return false;
12159 /* Debug version of rs6000_secondary_memory_needed. */
12160 static bool
12161 rs6000_debug_secondary_memory_needed (machine_mode mode,
12162 reg_class_t from_class,
12163 reg_class_t to_class)
12165 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12167 fprintf (stderr,
12168 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12169 "to_class = %s, mode = %s\n",
12170 ret ? "true" : "false",
12171 reg_class_names[from_class],
12172 reg_class_names[to_class],
12173 GET_MODE_NAME (mode));
12175 return ret;
12178 /* Return the register class of a scratch register needed to copy IN into
12179 or out of a register in RCLASS in MODE. If it can be done directly,
12180 NO_REGS is returned. */
12182 static enum reg_class
12183 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12184 rtx in)
12186 int regno;
12188 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12189 #if TARGET_MACHO
12190 && MACHOPIC_INDIRECT
12191 #endif
12194 /* We cannot copy a symbolic operand directly into anything
12195 other than BASE_REGS for TARGET_ELF. So indicate that a
12196 register from BASE_REGS is needed as an intermediate
12197 register.
12199 On Darwin, pic addresses require a load from memory, which
12200 needs a base register. */
12201 if (rclass != BASE_REGS
12202 && (SYMBOL_REF_P (in)
12203 || GET_CODE (in) == HIGH
12204 || GET_CODE (in) == LABEL_REF
12205 || GET_CODE (in) == CONST))
12206 return BASE_REGS;
12209 if (REG_P (in))
12211 regno = REGNO (in);
12212 if (!HARD_REGISTER_NUM_P (regno))
12214 regno = true_regnum (in);
12215 if (!HARD_REGISTER_NUM_P (regno))
12216 regno = -1;
12219 else if (SUBREG_P (in))
12221 regno = true_regnum (in);
12222 if (!HARD_REGISTER_NUM_P (regno))
12223 regno = -1;
12225 else
12226 regno = -1;
12228 /* If we have VSX register moves, prefer moving scalar values between
12229 Altivec registers and GPR by going via an FPR (and then via memory)
12230 instead of reloading the secondary memory address for Altivec moves. */
12231 if (TARGET_VSX
12232 && GET_MODE_SIZE (mode) < 16
12233 && !mode_supports_vmx_dform (mode)
12234 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12235 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12236 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12237 && (regno >= 0 && INT_REGNO_P (regno)))))
12238 return FLOAT_REGS;
12240 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12241 into anything. */
12242 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12243 || (regno >= 0 && INT_REGNO_P (regno)))
12244 return NO_REGS;
12246 /* Constants, memory, and VSX registers can go into VSX registers (both the
12247 traditional floating point and the altivec registers). */
12248 if (rclass == VSX_REGS
12249 && (regno == -1 || VSX_REGNO_P (regno)))
12250 return NO_REGS;
12252 /* Constants, memory, and FP registers can go into FP registers. */
12253 if ((regno == -1 || FP_REGNO_P (regno))
12254 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12255 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12257 /* Memory, and AltiVec registers can go into AltiVec registers. */
12258 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12259 && rclass == ALTIVEC_REGS)
12260 return NO_REGS;
12262 /* We can copy among the CR registers. */
12263 if ((rclass == CR_REGS || rclass == CR0_REGS)
12264 && regno >= 0 && CR_REGNO_P (regno))
12265 return NO_REGS;
12267 /* Otherwise, we need GENERAL_REGS. */
12268 return GENERAL_REGS;
12271 /* Debug version of rs6000_secondary_reload_class. */
12272 static enum reg_class
12273 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12274 machine_mode mode, rtx in)
12276 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12277 fprintf (stderr,
12278 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12279 "mode = %s, input rtx:\n",
12280 reg_class_names[ret], reg_class_names[rclass],
12281 GET_MODE_NAME (mode));
12282 debug_rtx (in);
12284 return ret;
12287 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12289 static bool
12290 rs6000_can_change_mode_class (machine_mode from,
12291 machine_mode to,
12292 reg_class_t rclass)
12294 unsigned from_size = GET_MODE_SIZE (from);
12295 unsigned to_size = GET_MODE_SIZE (to);
12297 if (from_size != to_size)
12299 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12301 if (reg_classes_intersect_p (xclass, rclass))
12303 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12304 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12305 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12306 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12308 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12309 single register under VSX because the scalar part of the register
12310 is in the upper 64-bits, and not the lower 64-bits. Types like
12311 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12312 IEEE floating point can't overlap, and neither can small
12313 values. */
12315 if (to_float128_vector_p && from_float128_vector_p)
12316 return true;
12318 else if (to_float128_vector_p || from_float128_vector_p)
12319 return false;
12321 /* TDmode in floating-mode registers must always go into a register
12322 pair with the most significant word in the even-numbered register
12323 to match ISA requirements. In little-endian mode, this does not
12324 match subreg numbering, so we cannot allow subregs. */
12325 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12326 return false;
12328 /* Allow SD<->DD changes, since SDmode values are stored in
12329 the low half of the DDmode, just like target-independent
12330 code expects. We need to allow at least SD->DD since
12331 rs6000_secondary_memory_needed_mode asks for that change
12332 to be made for SD reloads. */
12333 if ((to == DDmode && from == SDmode)
12334 || (to == SDmode && from == DDmode))
12335 return true;
12337 if (from_size < 8 || to_size < 8)
12338 return false;
12340 if (from_size == 8 && (8 * to_nregs) != to_size)
12341 return false;
12343 if (to_size == 8 && (8 * from_nregs) != from_size)
12344 return false;
12346 return true;
12348 else
12349 return true;
12352 /* Since the VSX register set includes traditional floating point registers
12353 and altivec registers, just check for the size being different instead of
12354 trying to check whether the modes are vector modes. Otherwise it won't
12355 allow say DF and DI to change classes. For types like TFmode and TDmode
12356 that take 2 64-bit registers, rather than a single 128-bit register, don't
12357 allow subregs of those types to other 128 bit types. */
12358 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12360 unsigned num_regs = (from_size + 15) / 16;
12361 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12362 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12363 return false;
12365 return (from_size == 8 || from_size == 16);
12368 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12369 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12370 return false;
12372 return true;
12375 /* Debug version of rs6000_can_change_mode_class. */
12376 static bool
12377 rs6000_debug_can_change_mode_class (machine_mode from,
12378 machine_mode to,
12379 reg_class_t rclass)
12381 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12383 fprintf (stderr,
12384 "rs6000_can_change_mode_class, return %s, from = %s, "
12385 "to = %s, rclass = %s\n",
12386 ret ? "true" : "false",
12387 GET_MODE_NAME (from), GET_MODE_NAME (to),
12388 reg_class_names[rclass]);
12390 return ret;
12393 /* Return a string to do a move operation of 128 bits of data. */
12395 const char *
12396 rs6000_output_move_128bit (rtx operands[])
12398 rtx dest = operands[0];
12399 rtx src = operands[1];
12400 machine_mode mode = GET_MODE (dest);
12401 int dest_regno;
12402 int src_regno;
12403 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12404 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12406 if (REG_P (dest))
12408 dest_regno = REGNO (dest);
12409 dest_gpr_p = INT_REGNO_P (dest_regno);
12410 dest_fp_p = FP_REGNO_P (dest_regno);
12411 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12412 dest_vsx_p = dest_fp_p | dest_vmx_p;
12414 else
12416 dest_regno = -1;
12417 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12420 if (REG_P (src))
12422 src_regno = REGNO (src);
12423 src_gpr_p = INT_REGNO_P (src_regno);
12424 src_fp_p = FP_REGNO_P (src_regno);
12425 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12426 src_vsx_p = src_fp_p | src_vmx_p;
12428 else
12430 src_regno = -1;
12431 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12434 /* Register moves. */
12435 if (dest_regno >= 0 && src_regno >= 0)
12437 if (dest_gpr_p)
12439 if (src_gpr_p)
12440 return "#";
12442 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12443 return (WORDS_BIG_ENDIAN
12444 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12445 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12447 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12448 return "#";
12451 else if (TARGET_VSX && dest_vsx_p)
12453 if (src_vsx_p)
12454 return "xxlor %x0,%x1,%x1";
12456 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12457 return (WORDS_BIG_ENDIAN
12458 ? "mtvsrdd %x0,%1,%L1"
12459 : "mtvsrdd %x0,%L1,%1");
12461 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12462 return "#";
12465 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12466 return "vor %0,%1,%1";
12468 else if (dest_fp_p && src_fp_p)
12469 return "#";
12472 /* Loads. */
12473 else if (dest_regno >= 0 && MEM_P (src))
12475 if (dest_gpr_p)
12477 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12478 return "lq %0,%1";
12479 else
12480 return "#";
12483 else if (TARGET_ALTIVEC && dest_vmx_p
12484 && altivec_indexed_or_indirect_operand (src, mode))
12485 return "lvx %0,%y1";
12487 else if (TARGET_VSX && dest_vsx_p)
12489 if (mode_supports_dq_form (mode)
12490 && quad_address_p (XEXP (src, 0), mode, true))
12491 return "lxv %x0,%1";
12493 else if (TARGET_P9_VECTOR)
12494 return "lxvx %x0,%y1";
12496 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12497 return "lxvw4x %x0,%y1";
12499 else
12500 return "lxvd2x %x0,%y1";
12503 else if (TARGET_ALTIVEC && dest_vmx_p)
12504 return "lvx %0,%y1";
12506 else if (dest_fp_p)
12507 return "#";
12510 /* Stores. */
12511 else if (src_regno >= 0 && MEM_P (dest))
12513 if (src_gpr_p)
12515 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12516 return "stq %1,%0";
12517 else
12518 return "#";
12521 else if (TARGET_ALTIVEC && src_vmx_p
12522 && altivec_indexed_or_indirect_operand (dest, mode))
12523 return "stvx %1,%y0";
12525 else if (TARGET_VSX && src_vsx_p)
12527 if (mode_supports_dq_form (mode)
12528 && quad_address_p (XEXP (dest, 0), mode, true))
12529 return "stxv %x1,%0";
12531 else if (TARGET_P9_VECTOR)
12532 return "stxvx %x1,%y0";
12534 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12535 return "stxvw4x %x1,%y0";
12537 else
12538 return "stxvd2x %x1,%y0";
12541 else if (TARGET_ALTIVEC && src_vmx_p)
12542 return "stvx %1,%y0";
12544 else if (src_fp_p)
12545 return "#";
12548 /* Constants. */
12549 else if (dest_regno >= 0
12550 && (CONST_INT_P (src)
12551 || CONST_WIDE_INT_P (src)
12552 || CONST_DOUBLE_P (src)
12553 || GET_CODE (src) == CONST_VECTOR))
12555 if (dest_gpr_p)
12556 return "#";
12558 else if ((dest_vmx_p && TARGET_ALTIVEC)
12559 || (dest_vsx_p && TARGET_VSX))
12560 return output_vec_const_move (operands);
12563 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12566 /* Validate a 128-bit move. */
12567 bool
12568 rs6000_move_128bit_ok_p (rtx operands[])
12570 machine_mode mode = GET_MODE (operands[0]);
12571 return (gpc_reg_operand (operands[0], mode)
12572 || gpc_reg_operand (operands[1], mode));
12575 /* Return true if a 128-bit move needs to be split. */
12576 bool
12577 rs6000_split_128bit_ok_p (rtx operands[])
12579 if (!reload_completed)
12580 return false;
12582 if (!gpr_or_gpr_p (operands[0], operands[1]))
12583 return false;
12585 if (quad_load_store_p (operands[0], operands[1]))
12586 return false;
12588 return true;
12592 /* Given a comparison operation, return the bit number in CCR to test. We
12593 know this is a valid comparison.
12595 SCC_P is 1 if this is for an scc. That means that %D will have been
12596 used instead of %C, so the bits will be in different places.
12598 Return -1 if OP isn't a valid comparison for some reason. */
12601 ccr_bit (rtx op, int scc_p)
12603 enum rtx_code code = GET_CODE (op);
12604 machine_mode cc_mode;
12605 int cc_regnum;
12606 int base_bit;
12607 rtx reg;
12609 if (!COMPARISON_P (op))
12610 return -1;
12612 reg = XEXP (op, 0);
12614 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12615 return -1;
12617 cc_mode = GET_MODE (reg);
12618 cc_regnum = REGNO (reg);
12619 base_bit = 4 * (cc_regnum - CR0_REGNO);
12621 validate_condition_mode (code, cc_mode);
12623 /* When generating a sCOND operation, only positive conditions are
12624 allowed. */
12625 if (scc_p)
12626 switch (code)
12628 case EQ:
12629 case GT:
12630 case LT:
12631 case UNORDERED:
12632 case GTU:
12633 case LTU:
12634 break;
12635 default:
12636 return -1;
12639 switch (code)
12641 case NE:
12642 return scc_p ? base_bit + 3 : base_bit + 2;
12643 case EQ:
12644 return base_bit + 2;
12645 case GT: case GTU: case UNLE:
12646 return base_bit + 1;
12647 case LT: case LTU: case UNGE:
12648 return base_bit;
12649 case ORDERED: case UNORDERED:
12650 return base_bit + 3;
12652 case GE: case GEU:
12653 /* If scc, we will have done a cror to put the bit in the
12654 unordered position. So test that bit. For integer, this is ! LT
12655 unless this is an scc insn. */
12656 return scc_p ? base_bit + 3 : base_bit;
12658 case LE: case LEU:
12659 return scc_p ? base_bit + 3 : base_bit + 1;
12661 default:
12662 return -1;
12666 /* Return the GOT register. */
12669 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12671 /* The second flow pass currently (June 1999) can't update
12672 regs_ever_live without disturbing other parts of the compiler, so
12673 update it here to make the prolog/epilogue code happy. */
12674 if (!can_create_pseudo_p ()
12675 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12676 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12678 crtl->uses_pic_offset_table = 1;
12680 return pic_offset_table_rtx;
12683 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12685 /* Write out a function code label. */
12687 void
12688 rs6000_output_function_entry (FILE *file, const char *fname)
12690 if (fname[0] != '.')
12692 switch (DEFAULT_ABI)
12694 default:
12695 gcc_unreachable ();
12697 case ABI_AIX:
12698 if (DOT_SYMBOLS)
12699 putc ('.', file);
12700 else
12701 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12702 break;
12704 case ABI_ELFv2:
12705 case ABI_V4:
12706 case ABI_DARWIN:
12707 break;
12711 RS6000_OUTPUT_BASENAME (file, fname);
12714 /* Print an operand. Recognize special options, documented below. */
12716 #if TARGET_ELF
12717 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12718 only introduced by the linker, when applying the sda21
12719 relocation. */
12720 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12721 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12722 #else
12723 #define SMALL_DATA_RELOC "sda21"
12724 #define SMALL_DATA_REG 0
12725 #endif
12727 void
12728 print_operand (FILE *file, rtx x, int code)
12730 int i;
12731 unsigned HOST_WIDE_INT uval;
12733 switch (code)
12735 /* %a is output_address. */
12737 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12738 output_operand. */
12740 case 'D':
12741 /* Like 'J' but get to the GT bit only. */
12742 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12744 output_operand_lossage ("invalid %%D value");
12745 return;
12748 /* Bit 1 is GT bit. */
12749 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12751 /* Add one for shift count in rlinm for scc. */
12752 fprintf (file, "%d", i + 1);
12753 return;
12755 case 'e':
12756 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12757 if (! INT_P (x))
12759 output_operand_lossage ("invalid %%e value");
12760 return;
12763 uval = INTVAL (x);
12764 if ((uval & 0xffff) == 0 && uval != 0)
12765 putc ('s', file);
12766 return;
12768 case 'E':
12769 /* X is a CR register. Print the number of the EQ bit of the CR */
12770 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12771 output_operand_lossage ("invalid %%E value");
12772 else
12773 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12774 return;
12776 case 'f':
12777 /* X is a CR register. Print the shift count needed to move it
12778 to the high-order four bits. */
12779 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12780 output_operand_lossage ("invalid %%f value");
12781 else
12782 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12783 return;
12785 case 'F':
12786 /* Similar, but print the count for the rotate in the opposite
12787 direction. */
12788 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12789 output_operand_lossage ("invalid %%F value");
12790 else
12791 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12792 return;
12794 case 'G':
12795 /* X is a constant integer. If it is negative, print "m",
12796 otherwise print "z". This is to make an aze or ame insn. */
12797 if (!CONST_INT_P (x))
12798 output_operand_lossage ("invalid %%G value");
12799 else if (INTVAL (x) >= 0)
12800 putc ('z', file);
12801 else
12802 putc ('m', file);
12803 return;
12805 case 'h':
12806 /* If constant, output low-order five bits. Otherwise, write
12807 normally. */
12808 if (INT_P (x))
12809 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12810 else
12811 print_operand (file, x, 0);
12812 return;
12814 case 'H':
12815 /* If constant, output low-order six bits. Otherwise, write
12816 normally. */
12817 if (INT_P (x))
12818 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12819 else
12820 print_operand (file, x, 0);
12821 return;
12823 case 'I':
12824 /* Print `i' if this is a constant, else nothing. */
12825 if (INT_P (x))
12826 putc ('i', file);
12827 return;
12829 case 'j':
12830 /* Write the bit number in CCR for jump. */
12831 i = ccr_bit (x, 0);
12832 if (i == -1)
12833 output_operand_lossage ("invalid %%j code");
12834 else
12835 fprintf (file, "%d", i);
12836 return;
12838 case 'J':
12839 /* Similar, but add one for shift count in rlinm for scc and pass
12840 scc flag to `ccr_bit'. */
12841 i = ccr_bit (x, 1);
12842 if (i == -1)
12843 output_operand_lossage ("invalid %%J code");
12844 else
12845 /* If we want bit 31, write a shift count of zero, not 32. */
12846 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12847 return;
12849 case 'k':
12850 /* X must be a constant. Write the 1's complement of the
12851 constant. */
12852 if (! INT_P (x))
12853 output_operand_lossage ("invalid %%k value");
12854 else
12855 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12856 return;
12858 case 'K':
12859 /* X must be a symbolic constant on ELF. Write an
12860 expression suitable for an 'addi' that adds in the low 16
12861 bits of the MEM. */
12862 if (GET_CODE (x) == CONST)
12864 if (GET_CODE (XEXP (x, 0)) != PLUS
12865 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12866 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12867 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12868 output_operand_lossage ("invalid %%K value");
12870 print_operand_address (file, x);
12871 fputs ("@l", file);
12872 return;
12874 /* %l is output_asm_label. */
12876 case 'L':
12877 /* Write second word of DImode or DFmode reference. Works on register
12878 or non-indexed memory only. */
12879 if (REG_P (x))
12880 fputs (reg_names[REGNO (x) + 1], file);
12881 else if (MEM_P (x))
12883 machine_mode mode = GET_MODE (x);
12884 /* Handle possible auto-increment. Since it is pre-increment and
12885 we have already done it, we can just use an offset of word. */
12886 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12887 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12888 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12889 UNITS_PER_WORD));
12890 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12891 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12892 UNITS_PER_WORD));
12893 else
12894 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12895 UNITS_PER_WORD),
12896 0));
12898 if (small_data_operand (x, GET_MODE (x)))
12899 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12900 reg_names[SMALL_DATA_REG]);
12902 return;
12904 case 'N': /* Unused */
12905 /* Write the number of elements in the vector times 4. */
12906 if (GET_CODE (x) != PARALLEL)
12907 output_operand_lossage ("invalid %%N value");
12908 else
12909 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12910 return;
12912 case 'O': /* Unused */
12913 /* Similar, but subtract 1 first. */
12914 if (GET_CODE (x) != PARALLEL)
12915 output_operand_lossage ("invalid %%O value");
12916 else
12917 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12918 return;
12920 case 'p':
12921 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12922 if (! INT_P (x)
12923 || INTVAL (x) < 0
12924 || (i = exact_log2 (INTVAL (x))) < 0)
12925 output_operand_lossage ("invalid %%p value");
12926 else
12927 fprintf (file, "%d", i);
12928 return;
12930 case 'P':
12931 /* The operand must be an indirect memory reference. The result
12932 is the register name. */
12933 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12934 || REGNO (XEXP (x, 0)) >= 32)
12935 output_operand_lossage ("invalid %%P value");
12936 else
12937 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12938 return;
12940 case 'q':
12941 /* This outputs the logical code corresponding to a boolean
12942 expression. The expression may have one or both operands
12943 negated (if one, only the first one). For condition register
12944 logical operations, it will also treat the negated
12945 CR codes as NOTs, but not handle NOTs of them. */
12947 const char *const *t = 0;
12948 const char *s;
12949 enum rtx_code code = GET_CODE (x);
12950 static const char * const tbl[3][3] = {
12951 { "and", "andc", "nor" },
12952 { "or", "orc", "nand" },
12953 { "xor", "eqv", "xor" } };
12955 if (code == AND)
12956 t = tbl[0];
12957 else if (code == IOR)
12958 t = tbl[1];
12959 else if (code == XOR)
12960 t = tbl[2];
12961 else
12962 output_operand_lossage ("invalid %%q value");
12964 if (GET_CODE (XEXP (x, 0)) != NOT)
12965 s = t[0];
12966 else
12968 if (GET_CODE (XEXP (x, 1)) == NOT)
12969 s = t[2];
12970 else
12971 s = t[1];
12974 fputs (s, file);
12976 return;
12978 case 'Q':
12979 if (! TARGET_MFCRF)
12980 return;
12981 fputc (',', file);
12982 /* FALLTHRU */
12984 case 'R':
12985 /* X is a CR register. Print the mask for `mtcrf'. */
12986 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12987 output_operand_lossage ("invalid %%R value");
12988 else
12989 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12990 return;
12992 case 's':
12993 /* Low 5 bits of 32 - value */
12994 if (! INT_P (x))
12995 output_operand_lossage ("invalid %%s value");
12996 else
12997 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12998 return;
13000 case 't':
13001 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13002 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13004 output_operand_lossage ("invalid %%t value");
13005 return;
13008 /* Bit 3 is OV bit. */
13009 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13011 /* If we want bit 31, write a shift count of zero, not 32. */
13012 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13013 return;
13015 case 'T':
13016 /* Print the symbolic name of a branch target register. */
13017 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13018 x = XVECEXP (x, 0, 0);
13019 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13020 && REGNO (x) != CTR_REGNO))
13021 output_operand_lossage ("invalid %%T value");
13022 else if (REGNO (x) == LR_REGNO)
13023 fputs ("lr", file);
13024 else
13025 fputs ("ctr", file);
13026 return;
13028 case 'u':
13029 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13030 for use in unsigned operand. */
13031 if (! INT_P (x))
13033 output_operand_lossage ("invalid %%u value");
13034 return;
13037 uval = INTVAL (x);
13038 if ((uval & 0xffff) == 0)
13039 uval >>= 16;
13041 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13042 return;
13044 case 'v':
13045 /* High-order 16 bits of constant for use in signed operand. */
13046 if (! INT_P (x))
13047 output_operand_lossage ("invalid %%v value");
13048 else
13049 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13050 (INTVAL (x) >> 16) & 0xffff);
13051 return;
13053 case 'U':
13054 /* Print `u' if this has an auto-increment or auto-decrement. */
13055 if (MEM_P (x)
13056 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13057 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13058 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13059 putc ('u', file);
13060 return;
13062 case 'V':
13063 /* Print the trap code for this operand. */
13064 switch (GET_CODE (x))
13066 case EQ:
13067 fputs ("eq", file); /* 4 */
13068 break;
13069 case NE:
13070 fputs ("ne", file); /* 24 */
13071 break;
13072 case LT:
13073 fputs ("lt", file); /* 16 */
13074 break;
13075 case LE:
13076 fputs ("le", file); /* 20 */
13077 break;
13078 case GT:
13079 fputs ("gt", file); /* 8 */
13080 break;
13081 case GE:
13082 fputs ("ge", file); /* 12 */
13083 break;
13084 case LTU:
13085 fputs ("llt", file); /* 2 */
13086 break;
13087 case LEU:
13088 fputs ("lle", file); /* 6 */
13089 break;
13090 case GTU:
13091 fputs ("lgt", file); /* 1 */
13092 break;
13093 case GEU:
13094 fputs ("lge", file); /* 5 */
13095 break;
13096 default:
13097 output_operand_lossage ("invalid %%V value");
13099 break;
13101 case 'w':
13102 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13103 normally. */
13104 if (INT_P (x))
13105 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13106 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13107 else
13108 print_operand (file, x, 0);
13109 return;
13111 case 'x':
13112 /* X is a FPR or Altivec register used in a VSX context. */
13113 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13114 output_operand_lossage ("invalid %%x value");
13115 else
13117 int reg = REGNO (x);
13118 int vsx_reg = (FP_REGNO_P (reg)
13119 ? reg - 32
13120 : reg - FIRST_ALTIVEC_REGNO + 32);
13122 #ifdef TARGET_REGNAMES
13123 if (TARGET_REGNAMES)
13124 fprintf (file, "%%vs%d", vsx_reg);
13125 else
13126 #endif
13127 fprintf (file, "%d", vsx_reg);
13129 return;
13131 case 'X':
13132 if (MEM_P (x)
13133 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13134 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13135 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13136 putc ('x', file);
13137 return;
13139 case 'Y':
13140 /* Like 'L', for third word of TImode/PTImode */
13141 if (REG_P (x))
13142 fputs (reg_names[REGNO (x) + 2], file);
13143 else if (MEM_P (x))
13145 machine_mode mode = GET_MODE (x);
13146 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13147 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13148 output_address (mode, plus_constant (Pmode,
13149 XEXP (XEXP (x, 0), 0), 8));
13150 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13151 output_address (mode, plus_constant (Pmode,
13152 XEXP (XEXP (x, 0), 0), 8));
13153 else
13154 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13155 if (small_data_operand (x, GET_MODE (x)))
13156 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13157 reg_names[SMALL_DATA_REG]);
13159 return;
13161 case 'z':
13162 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13163 x = XVECEXP (x, 0, 1);
13164 /* X is a SYMBOL_REF. Write out the name preceded by a
13165 period and without any trailing data in brackets. Used for function
13166 names. If we are configured for System V (or the embedded ABI) on
13167 the PowerPC, do not emit the period, since those systems do not use
13168 TOCs and the like. */
13169 if (!SYMBOL_REF_P (x))
13171 output_operand_lossage ("invalid %%z value");
13172 return;
13175 /* For macho, check to see if we need a stub. */
13176 if (TARGET_MACHO)
13178 const char *name = XSTR (x, 0);
13179 #if TARGET_MACHO
13180 if (darwin_symbol_stubs
13181 && MACHOPIC_INDIRECT
13182 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13183 name = machopic_indirection_name (x, /*stub_p=*/true);
13184 #endif
13185 assemble_name (file, name);
13187 else if (!DOT_SYMBOLS)
13188 assemble_name (file, XSTR (x, 0));
13189 else
13190 rs6000_output_function_entry (file, XSTR (x, 0));
13191 return;
13193 case 'Z':
13194 /* Like 'L', for last word of TImode/PTImode. */
13195 if (REG_P (x))
13196 fputs (reg_names[REGNO (x) + 3], file);
13197 else if (MEM_P (x))
13199 machine_mode mode = GET_MODE (x);
13200 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13201 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13202 output_address (mode, plus_constant (Pmode,
13203 XEXP (XEXP (x, 0), 0), 12));
13204 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13205 output_address (mode, plus_constant (Pmode,
13206 XEXP (XEXP (x, 0), 0), 12));
13207 else
13208 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13209 if (small_data_operand (x, GET_MODE (x)))
13210 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13211 reg_names[SMALL_DATA_REG]);
13213 return;
13215 /* Print AltiVec memory operand. */
13216 case 'y':
13218 rtx tmp;
13220 gcc_assert (MEM_P (x));
13222 tmp = XEXP (x, 0);
13224 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13225 && GET_CODE (tmp) == AND
13226 && CONST_INT_P (XEXP (tmp, 1))
13227 && INTVAL (XEXP (tmp, 1)) == -16)
13228 tmp = XEXP (tmp, 0);
13229 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13230 && GET_CODE (tmp) == PRE_MODIFY)
13231 tmp = XEXP (tmp, 1);
13232 if (REG_P (tmp))
13233 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13234 else
13236 if (GET_CODE (tmp) != PLUS
13237 || !REG_P (XEXP (tmp, 0))
13238 || !REG_P (XEXP (tmp, 1)))
13240 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13241 break;
13244 if (REGNO (XEXP (tmp, 0)) == 0)
13245 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13246 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13247 else
13248 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13249 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13251 break;
13254 case 0:
13255 if (REG_P (x))
13256 fprintf (file, "%s", reg_names[REGNO (x)]);
13257 else if (MEM_P (x))
13259 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13260 know the width from the mode. */
13261 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13262 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13263 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13264 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13265 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13266 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13267 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13268 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13269 else
13270 output_address (GET_MODE (x), XEXP (x, 0));
13272 else if (toc_relative_expr_p (x, false,
13273 &tocrel_base_oac, &tocrel_offset_oac))
13274 /* This hack along with a corresponding hack in
13275 rs6000_output_addr_const_extra arranges to output addends
13276 where the assembler expects to find them. eg.
13277 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13278 without this hack would be output as "x@toc+4". We
13279 want "x+4@toc". */
13280 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13281 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13282 output_addr_const (file, XVECEXP (x, 0, 0));
13283 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13284 output_addr_const (file, XVECEXP (x, 0, 1));
13285 else
13286 output_addr_const (file, x);
13287 return;
13289 case '&':
13290 if (const char *name = get_some_local_dynamic_name ())
13291 assemble_name (file, name);
13292 else
13293 output_operand_lossage ("'%%&' used without any "
13294 "local dynamic TLS references");
13295 return;
13297 default:
13298 output_operand_lossage ("invalid %%xn code");
13302 /* Print the address of an operand. */
13304 void
13305 print_operand_address (FILE *file, rtx x)
13307 if (REG_P (x))
13308 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13310 /* Is it a PC-relative address? */
13311 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13313 HOST_WIDE_INT offset;
13315 if (GET_CODE (x) == CONST)
13316 x = XEXP (x, 0);
13318 if (GET_CODE (x) == PLUS)
13320 offset = INTVAL (XEXP (x, 1));
13321 x = XEXP (x, 0);
13323 else
13324 offset = 0;
13326 output_addr_const (file, x);
13328 if (offset)
13329 fprintf (file, "%+" PRId64, offset);
13331 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13332 fprintf (file, "@got");
13334 fprintf (file, "@pcrel");
13336 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13337 || GET_CODE (x) == LABEL_REF)
13339 output_addr_const (file, x);
13340 if (small_data_operand (x, GET_MODE (x)))
13341 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13342 reg_names[SMALL_DATA_REG]);
13343 else
13344 gcc_assert (!TARGET_TOC);
13346 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13347 && REG_P (XEXP (x, 1)))
13349 if (REGNO (XEXP (x, 0)) == 0)
13350 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13351 reg_names[ REGNO (XEXP (x, 0)) ]);
13352 else
13353 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13354 reg_names[ REGNO (XEXP (x, 1)) ]);
13356 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13357 && CONST_INT_P (XEXP (x, 1)))
13358 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13359 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13360 #if TARGET_MACHO
13361 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13362 && CONSTANT_P (XEXP (x, 1)))
13364 fprintf (file, "lo16(");
13365 output_addr_const (file, XEXP (x, 1));
13366 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13368 #endif
13369 #if TARGET_ELF
13370 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13371 && CONSTANT_P (XEXP (x, 1)))
13373 output_addr_const (file, XEXP (x, 1));
13374 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13376 #endif
13377 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13379 /* This hack along with a corresponding hack in
13380 rs6000_output_addr_const_extra arranges to output addends
13381 where the assembler expects to find them. eg.
13382 (lo_sum (reg 9)
13383 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13384 without this hack would be output as "x@toc+8@l(9)". We
13385 want "x+8@toc@l(9)". */
13386 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13387 if (GET_CODE (x) == LO_SUM)
13388 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13389 else
13390 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13392 else
13393 output_addr_const (file, x);
13396 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13398 bool
13399 rs6000_output_addr_const_extra (FILE *file, rtx x)
13401 if (GET_CODE (x) == UNSPEC)
13402 switch (XINT (x, 1))
13404 case UNSPEC_TOCREL:
13405 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13406 && REG_P (XVECEXP (x, 0, 1))
13407 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13408 output_addr_const (file, XVECEXP (x, 0, 0));
13409 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13411 if (INTVAL (tocrel_offset_oac) >= 0)
13412 fprintf (file, "+");
13413 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13415 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13417 putc ('-', file);
13418 assemble_name (file, toc_label_name);
13419 need_toc_init = 1;
13421 else if (TARGET_ELF)
13422 fputs ("@toc", file);
13423 return true;
13425 #if TARGET_MACHO
13426 case UNSPEC_MACHOPIC_OFFSET:
13427 output_addr_const (file, XVECEXP (x, 0, 0));
13428 putc ('-', file);
13429 machopic_output_function_base_name (file);
13430 return true;
13431 #endif
13433 return false;
13436 /* Target hook for assembling integer objects. The PowerPC version has
13437 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13438 is defined. It also needs to handle DI-mode objects on 64-bit
13439 targets. */
13441 static bool
13442 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13444 #ifdef RELOCATABLE_NEEDS_FIXUP
13445 /* Special handling for SI values. */
13446 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13448 static int recurse = 0;
13450 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13451 the .fixup section. Since the TOC section is already relocated, we
13452 don't need to mark it here. We used to skip the text section, but it
13453 should never be valid for relocated addresses to be placed in the text
13454 section. */
13455 if (DEFAULT_ABI == ABI_V4
13456 && (TARGET_RELOCATABLE || flag_pic > 1)
13457 && in_section != toc_section
13458 && !recurse
13459 && !CONST_SCALAR_INT_P (x)
13460 && CONSTANT_P (x))
13462 char buf[256];
13464 recurse = 1;
13465 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13466 fixuplabelno++;
13467 ASM_OUTPUT_LABEL (asm_out_file, buf);
13468 fprintf (asm_out_file, "\t.long\t(");
13469 output_addr_const (asm_out_file, x);
13470 fprintf (asm_out_file, ")@fixup\n");
13471 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13472 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13473 fprintf (asm_out_file, "\t.long\t");
13474 assemble_name (asm_out_file, buf);
13475 fprintf (asm_out_file, "\n\t.previous\n");
13476 recurse = 0;
13477 return true;
13479 /* Remove initial .'s to turn a -mcall-aixdesc function
13480 address into the address of the descriptor, not the function
13481 itself. */
13482 else if (SYMBOL_REF_P (x)
13483 && XSTR (x, 0)[0] == '.'
13484 && DEFAULT_ABI == ABI_AIX)
13486 const char *name = XSTR (x, 0);
13487 while (*name == '.')
13488 name++;
13490 fprintf (asm_out_file, "\t.long\t%s\n", name);
13491 return true;
13494 #endif /* RELOCATABLE_NEEDS_FIXUP */
13495 return default_assemble_integer (x, size, aligned_p);
13498 /* Return a template string for assembly to emit when making an
13499 external call. FUNOP is the call mem argument operand number. */
13501 static const char *
13502 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13504 /* -Wformat-overflow workaround, without which gcc thinks that %u
13505 might produce 10 digits. */
13506 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13508 char arg[12];
13509 arg[0] = 0;
13510 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13512 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13513 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13514 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13515 sprintf (arg, "(%%&@tlsld)");
13518 /* The magic 32768 offset here corresponds to the offset of
13519 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13520 char z[11];
13521 sprintf (z, "%%z%u%s", funop,
13522 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13523 ? "+32768" : ""));
13525 static char str[32]; /* 1 spare */
13526 if (rs6000_pcrel_p (cfun))
13527 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13528 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13529 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13530 sibcall ? "" : "\n\tnop");
13531 else if (DEFAULT_ABI == ABI_V4)
13532 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13533 flag_pic ? "@plt" : "");
13534 #if TARGET_MACHO
13535 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13536 else if (DEFAULT_ABI == ABI_DARWIN)
13538 /* The cookie is in operand func+2. */
13539 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13540 int cookie = INTVAL (operands[funop + 2]);
13541 if (cookie & CALL_LONG)
13543 tree funname = get_identifier (XSTR (operands[funop], 0));
13544 tree labelname = get_prev_label (funname);
13545 gcc_checking_assert (labelname && !sibcall);
13547 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13548 instruction will reach 'foo', otherwise link as 'bl L42'".
13549 "L42" should be a 'branch island', that will do a far jump to
13550 'foo'. Branch islands are generated in
13551 macho_branch_islands(). */
13552 sprintf (str, "jbsr %%z%u,%.10s", funop,
13553 IDENTIFIER_POINTER (labelname));
13555 else
13556 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13557 after the call. */
13558 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13560 #endif
13561 else
13562 gcc_unreachable ();
13563 return str;
13566 const char *
13567 rs6000_call_template (rtx *operands, unsigned int funop)
13569 return rs6000_call_template_1 (operands, funop, false);
13572 const char *
13573 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13575 return rs6000_call_template_1 (operands, funop, true);
13578 /* As above, for indirect calls. */
13580 static const char *
13581 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13582 bool sibcall)
13584 /* -Wformat-overflow workaround, without which gcc thinks that %u
13585 might produce 10 digits. Note that -Wformat-overflow will not
13586 currently warn here for str[], so do not rely on a warning to
13587 ensure str[] is correctly sized. */
13588 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13590 /* Currently, funop is either 0 or 1. The maximum string is always
13591 a !speculate 64-bit __tls_get_addr call.
13593 ABI_ELFv2, pcrel:
13594 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13595 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13596 . 9 crset 2\n\t
13597 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13598 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13599 . 8 beq%T1l-
13600 .---
13601 .142
13603 ABI_AIX:
13604 . 9 ld 2,%3\n\t
13605 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13606 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13607 . 9 crset 2\n\t
13608 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13609 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13610 . 10 beq%T1l-\n\t
13611 . 10 ld 2,%4(1)
13612 .---
13613 .151
13615 ABI_ELFv2:
13616 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13617 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13618 . 9 crset 2\n\t
13619 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13620 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13621 . 10 beq%T1l-\n\t
13622 . 10 ld 2,%3(1)
13623 .---
13624 .142
13626 ABI_V4:
13627 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13628 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13629 . 9 crset 2\n\t
13630 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13631 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13632 . 8 beq%T1l-
13633 .---
13634 .141 */
13635 static char str[160]; /* 8 spare */
13636 char *s = str;
13637 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13639 if (DEFAULT_ABI == ABI_AIX)
13640 s += sprintf (s,
13641 "l%s 2,%%%u\n\t",
13642 ptrload, funop + 3);
13644 /* We don't need the extra code to stop indirect call speculation if
13645 calling via LR. */
13646 bool speculate = (TARGET_MACHO
13647 || rs6000_speculate_indirect_jumps
13648 || (REG_P (operands[funop])
13649 && REGNO (operands[funop]) == LR_REGNO));
13651 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13653 const char *rel64 = TARGET_64BIT ? "64" : "";
13654 char tls[29];
13655 tls[0] = 0;
13656 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13658 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13659 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13660 rel64, funop + 1);
13661 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13662 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13663 rel64);
13666 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13667 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13668 && flag_pic == 2 ? "+32768" : "");
13669 if (!speculate)
13671 s += sprintf (s,
13672 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13673 tls, rel64, notoc, funop, addend);
13674 s += sprintf (s, "crset 2\n\t");
13676 s += sprintf (s,
13677 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13678 tls, rel64, notoc, funop, addend);
13680 else if (!speculate)
13681 s += sprintf (s, "crset 2\n\t");
13683 if (rs6000_pcrel_p (cfun))
13685 if (speculate)
13686 sprintf (s, "b%%T%ul", funop);
13687 else
13688 sprintf (s, "beq%%T%ul-", funop);
13690 else if (DEFAULT_ABI == ABI_AIX)
13692 if (speculate)
13693 sprintf (s,
13694 "b%%T%ul\n\t"
13695 "l%s 2,%%%u(1)",
13696 funop, ptrload, funop + 4);
13697 else
13698 sprintf (s,
13699 "beq%%T%ul-\n\t"
13700 "l%s 2,%%%u(1)",
13701 funop, ptrload, funop + 4);
13703 else if (DEFAULT_ABI == ABI_ELFv2)
13705 if (speculate)
13706 sprintf (s,
13707 "b%%T%ul\n\t"
13708 "l%s 2,%%%u(1)",
13709 funop, ptrload, funop + 3);
13710 else
13711 sprintf (s,
13712 "beq%%T%ul-\n\t"
13713 "l%s 2,%%%u(1)",
13714 funop, ptrload, funop + 3);
13716 else
13718 if (speculate)
13719 sprintf (s,
13720 "b%%T%u%s",
13721 funop, sibcall ? "" : "l");
13722 else
13723 sprintf (s,
13724 "beq%%T%u%s-%s",
13725 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13727 return str;
13730 const char *
13731 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13733 return rs6000_indirect_call_template_1 (operands, funop, false);
13736 const char *
13737 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13739 return rs6000_indirect_call_template_1 (operands, funop, true);
13742 #if HAVE_AS_PLTSEQ
13743 /* Output indirect call insns. WHICH identifies the type of sequence. */
13744 const char *
13745 rs6000_pltseq_template (rtx *operands, int which)
13747 const char *rel64 = TARGET_64BIT ? "64" : "";
13748 char tls[30];
13749 tls[0] = 0;
13750 if (GET_CODE (operands[3]) == UNSPEC)
13752 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13753 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13754 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13755 off, rel64);
13756 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13757 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13758 off, rel64);
13761 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13762 static char str[96]; /* 10 spare */
13763 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13764 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13765 && flag_pic == 2 ? "+32768" : "");
13766 switch (which)
13768 case RS6000_PLTSEQ_TOCSAVE:
13769 sprintf (str,
13770 "st%s\n\t"
13771 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13772 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13773 tls, rel64);
13774 break;
13775 case RS6000_PLTSEQ_PLT16_HA:
13776 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13777 sprintf (str,
13778 "lis %%0,0\n\t"
13779 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13780 tls, off, rel64);
13781 else
13782 sprintf (str,
13783 "addis %%0,%%1,0\n\t"
13784 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13785 tls, off, rel64, addend);
13786 break;
13787 case RS6000_PLTSEQ_PLT16_LO:
13788 sprintf (str,
13789 "l%s %%0,0(%%1)\n\t"
13790 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13791 TARGET_64BIT ? "d" : "wz",
13792 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13793 break;
13794 case RS6000_PLTSEQ_MTCTR:
13795 sprintf (str,
13796 "mtctr %%1\n\t"
13797 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13798 tls, rel64, addend);
13799 break;
13800 case RS6000_PLTSEQ_PLT_PCREL34:
13801 sprintf (str,
13802 "pl%s %%0,0(0),1\n\t"
13803 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13804 TARGET_64BIT ? "d" : "wz",
13805 tls, rel64);
13806 break;
13807 default:
13808 gcc_unreachable ();
13810 return str;
13812 #endif
13814 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13815 /* Emit an assembler directive to set symbol visibility for DECL to
13816 VISIBILITY_TYPE. */
13818 static void
13819 rs6000_assemble_visibility (tree decl, int vis)
13821 if (TARGET_XCOFF)
13822 return;
13824 /* Functions need to have their entry point symbol visibility set as
13825 well as their descriptor symbol visibility. */
13826 if (DEFAULT_ABI == ABI_AIX
13827 && DOT_SYMBOLS
13828 && TREE_CODE (decl) == FUNCTION_DECL)
13830 static const char * const visibility_types[] = {
13831 NULL, "protected", "hidden", "internal"
13834 const char *name, *type;
13836 name = ((* targetm.strip_name_encoding)
13837 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13838 type = visibility_types[vis];
13840 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13841 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13843 else
13844 default_assemble_visibility (decl, vis);
13846 #endif
13848 enum rtx_code
13849 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13851 /* Reversal of FP compares takes care -- an ordered compare
13852 becomes an unordered compare and vice versa. */
13853 if (mode == CCFPmode
13854 && (!flag_finite_math_only
13855 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13856 || code == UNEQ || code == LTGT))
13857 return reverse_condition_maybe_unordered (code);
13858 else
13859 return reverse_condition (code);
13862 /* Generate a compare for CODE. Return a brand-new rtx that
13863 represents the result of the compare. */
13865 static rtx
13866 rs6000_generate_compare (rtx cmp, machine_mode mode)
13868 machine_mode comp_mode;
13869 rtx compare_result;
13870 enum rtx_code code = GET_CODE (cmp);
13871 rtx op0 = XEXP (cmp, 0);
13872 rtx op1 = XEXP (cmp, 1);
13874 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13875 comp_mode = CCmode;
13876 else if (FLOAT_MODE_P (mode))
13877 comp_mode = CCFPmode;
13878 else if (code == GTU || code == LTU
13879 || code == GEU || code == LEU)
13880 comp_mode = CCUNSmode;
13881 else if ((code == EQ || code == NE)
13882 && unsigned_reg_p (op0)
13883 && (unsigned_reg_p (op1)
13884 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13885 /* These are unsigned values, perhaps there will be a later
13886 ordering compare that can be shared with this one. */
13887 comp_mode = CCUNSmode;
13888 else
13889 comp_mode = CCmode;
13891 /* If we have an unsigned compare, make sure we don't have a signed value as
13892 an immediate. */
13893 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13894 && INTVAL (op1) < 0)
13896 op0 = copy_rtx_if_shared (op0);
13897 op1 = force_reg (GET_MODE (op0), op1);
13898 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13901 /* First, the compare. */
13902 compare_result = gen_reg_rtx (comp_mode);
13904 /* IEEE 128-bit support in VSX registers when we do not have hardware
13905 support. */
13906 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13908 rtx libfunc = NULL_RTX;
13909 bool check_nan = false;
13910 rtx dest;
13912 switch (code)
13914 case EQ:
13915 case NE:
13916 libfunc = optab_libfunc (eq_optab, mode);
13917 break;
13919 case GT:
13920 case GE:
13921 libfunc = optab_libfunc (ge_optab, mode);
13922 break;
13924 case LT:
13925 case LE:
13926 libfunc = optab_libfunc (le_optab, mode);
13927 break;
13929 case UNORDERED:
13930 case ORDERED:
13931 libfunc = optab_libfunc (unord_optab, mode);
13932 code = (code == UNORDERED) ? NE : EQ;
13933 break;
13935 case UNGE:
13936 case UNGT:
13937 check_nan = true;
13938 libfunc = optab_libfunc (ge_optab, mode);
13939 code = (code == UNGE) ? GE : GT;
13940 break;
13942 case UNLE:
13943 case UNLT:
13944 check_nan = true;
13945 libfunc = optab_libfunc (le_optab, mode);
13946 code = (code == UNLE) ? LE : LT;
13947 break;
13949 case UNEQ:
13950 case LTGT:
13951 check_nan = true;
13952 libfunc = optab_libfunc (eq_optab, mode);
13953 code = (code = UNEQ) ? EQ : NE;
13954 break;
13956 default:
13957 gcc_unreachable ();
13960 gcc_assert (libfunc);
13962 if (!check_nan)
13963 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13964 SImode, op0, mode, op1, mode);
13966 /* The library signals an exception for signalling NaNs, so we need to
13967 handle isgreater, etc. by first checking isordered. */
13968 else
13970 rtx ne_rtx, normal_dest, unord_dest;
13971 rtx unord_func = optab_libfunc (unord_optab, mode);
13972 rtx join_label = gen_label_rtx ();
13973 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13974 rtx unord_cmp = gen_reg_rtx (comp_mode);
13977 /* Test for either value being a NaN. */
13978 gcc_assert (unord_func);
13979 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13980 SImode, op0, mode, op1, mode);
13982 /* Set value (0) if either value is a NaN, and jump to the join
13983 label. */
13984 dest = gen_reg_rtx (SImode);
13985 emit_move_insn (dest, const1_rtx);
13986 emit_insn (gen_rtx_SET (unord_cmp,
13987 gen_rtx_COMPARE (comp_mode, unord_dest,
13988 const0_rtx)));
13990 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13991 emit_jump_insn (gen_rtx_SET (pc_rtx,
13992 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13993 join_ref,
13994 pc_rtx)));
13996 /* Do the normal comparison, knowing that the values are not
13997 NaNs. */
13998 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13999 SImode, op0, mode, op1, mode);
14001 emit_insn (gen_cstoresi4 (dest,
14002 gen_rtx_fmt_ee (code, SImode, normal_dest,
14003 const0_rtx),
14004 normal_dest, const0_rtx));
14006 /* Join NaN and non-Nan paths. Compare dest against 0. */
14007 emit_label (join_label);
14008 code = NE;
14011 emit_insn (gen_rtx_SET (compare_result,
14012 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14015 else
14017 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14018 CLOBBERs to match cmptf_internal2 pattern. */
14019 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14020 && FLOAT128_IBM_P (GET_MODE (op0))
14021 && TARGET_HARD_FLOAT)
14022 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14023 gen_rtvec (10,
14024 gen_rtx_SET (compare_result,
14025 gen_rtx_COMPARE (comp_mode, op0, op1)),
14026 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14027 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14028 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14029 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14030 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14031 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14032 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14033 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14034 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14035 else if (GET_CODE (op1) == UNSPEC
14036 && XINT (op1, 1) == UNSPEC_SP_TEST)
14038 rtx op1b = XVECEXP (op1, 0, 0);
14039 comp_mode = CCEQmode;
14040 compare_result = gen_reg_rtx (CCEQmode);
14041 if (TARGET_64BIT)
14042 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14043 else
14044 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14046 else
14047 emit_insn (gen_rtx_SET (compare_result,
14048 gen_rtx_COMPARE (comp_mode, op0, op1)));
14051 validate_condition_mode (code, GET_MODE (compare_result));
14053 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14057 /* Return the diagnostic message string if the binary operation OP is
14058 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14060 static const char*
14061 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14062 const_tree type1,
14063 const_tree type2)
14065 machine_mode mode1 = TYPE_MODE (type1);
14066 machine_mode mode2 = TYPE_MODE (type2);
14068 /* For complex modes, use the inner type. */
14069 if (COMPLEX_MODE_P (mode1))
14070 mode1 = GET_MODE_INNER (mode1);
14072 if (COMPLEX_MODE_P (mode2))
14073 mode2 = GET_MODE_INNER (mode2);
14075 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14076 double to intermix unless -mfloat128-convert. */
14077 if (mode1 == mode2)
14078 return NULL;
14080 if (!TARGET_FLOAT128_CVT)
14082 if ((mode1 == KFmode && mode2 == IFmode)
14083 || (mode1 == IFmode && mode2 == KFmode))
14084 return N_("__float128 and __ibm128 cannot be used in the same "
14085 "expression");
14087 if (TARGET_IEEEQUAD
14088 && ((mode1 == IFmode && mode2 == TFmode)
14089 || (mode1 == TFmode && mode2 == IFmode)))
14090 return N_("__ibm128 and long double cannot be used in the same "
14091 "expression");
14093 if (!TARGET_IEEEQUAD
14094 && ((mode1 == KFmode && mode2 == TFmode)
14095 || (mode1 == TFmode && mode2 == KFmode)))
14096 return N_("__float128 and long double cannot be used in the same "
14097 "expression");
14100 return NULL;
14104 /* Expand floating point conversion to/from __float128 and __ibm128. */
14106 void
14107 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14109 machine_mode dest_mode = GET_MODE (dest);
14110 machine_mode src_mode = GET_MODE (src);
14111 convert_optab cvt = unknown_optab;
14112 bool do_move = false;
14113 rtx libfunc = NULL_RTX;
14114 rtx dest2;
14115 typedef rtx (*rtx_2func_t) (rtx, rtx);
14116 rtx_2func_t hw_convert = (rtx_2func_t)0;
14117 size_t kf_or_tf;
14119 struct hw_conv_t {
14120 rtx_2func_t from_df;
14121 rtx_2func_t from_sf;
14122 rtx_2func_t from_si_sign;
14123 rtx_2func_t from_si_uns;
14124 rtx_2func_t from_di_sign;
14125 rtx_2func_t from_di_uns;
14126 rtx_2func_t to_df;
14127 rtx_2func_t to_sf;
14128 rtx_2func_t to_si_sign;
14129 rtx_2func_t to_si_uns;
14130 rtx_2func_t to_di_sign;
14131 rtx_2func_t to_di_uns;
14132 } hw_conversions[2] = {
14133 /* convertions to/from KFmode */
14135 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14136 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14137 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14138 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14139 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14140 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14141 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14142 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14143 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14144 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14145 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14146 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14149 /* convertions to/from TFmode */
14151 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14152 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14153 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14154 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14155 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14156 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14157 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14158 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14159 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14160 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14161 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14162 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14166 if (dest_mode == src_mode)
14167 gcc_unreachable ();
14169 /* Eliminate memory operations. */
14170 if (MEM_P (src))
14171 src = force_reg (src_mode, src);
14173 if (MEM_P (dest))
14175 rtx tmp = gen_reg_rtx (dest_mode);
14176 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14177 rs6000_emit_move (dest, tmp, dest_mode);
14178 return;
14181 /* Convert to IEEE 128-bit floating point. */
14182 if (FLOAT128_IEEE_P (dest_mode))
14184 if (dest_mode == KFmode)
14185 kf_or_tf = 0;
14186 else if (dest_mode == TFmode)
14187 kf_or_tf = 1;
14188 else
14189 gcc_unreachable ();
14191 switch (src_mode)
14193 case E_DFmode:
14194 cvt = sext_optab;
14195 hw_convert = hw_conversions[kf_or_tf].from_df;
14196 break;
14198 case E_SFmode:
14199 cvt = sext_optab;
14200 hw_convert = hw_conversions[kf_or_tf].from_sf;
14201 break;
14203 case E_KFmode:
14204 case E_IFmode:
14205 case E_TFmode:
14206 if (FLOAT128_IBM_P (src_mode))
14207 cvt = sext_optab;
14208 else
14209 do_move = true;
14210 break;
14212 case E_SImode:
14213 if (unsigned_p)
14215 cvt = ufloat_optab;
14216 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14218 else
14220 cvt = sfloat_optab;
14221 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14223 break;
14225 case E_DImode:
14226 if (unsigned_p)
14228 cvt = ufloat_optab;
14229 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14231 else
14233 cvt = sfloat_optab;
14234 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14236 break;
14238 default:
14239 gcc_unreachable ();
14243 /* Convert from IEEE 128-bit floating point. */
14244 else if (FLOAT128_IEEE_P (src_mode))
14246 if (src_mode == KFmode)
14247 kf_or_tf = 0;
14248 else if (src_mode == TFmode)
14249 kf_or_tf = 1;
14250 else
14251 gcc_unreachable ();
14253 switch (dest_mode)
14255 case E_DFmode:
14256 cvt = trunc_optab;
14257 hw_convert = hw_conversions[kf_or_tf].to_df;
14258 break;
14260 case E_SFmode:
14261 cvt = trunc_optab;
14262 hw_convert = hw_conversions[kf_or_tf].to_sf;
14263 break;
14265 case E_KFmode:
14266 case E_IFmode:
14267 case E_TFmode:
14268 if (FLOAT128_IBM_P (dest_mode))
14269 cvt = trunc_optab;
14270 else
14271 do_move = true;
14272 break;
14274 case E_SImode:
14275 if (unsigned_p)
14277 cvt = ufix_optab;
14278 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14280 else
14282 cvt = sfix_optab;
14283 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14285 break;
14287 case E_DImode:
14288 if (unsigned_p)
14290 cvt = ufix_optab;
14291 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14293 else
14295 cvt = sfix_optab;
14296 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14298 break;
14300 default:
14301 gcc_unreachable ();
14305 /* Both IBM format. */
14306 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14307 do_move = true;
14309 else
14310 gcc_unreachable ();
14312 /* Handle conversion between TFmode/KFmode/IFmode. */
14313 if (do_move)
14314 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14316 /* Handle conversion if we have hardware support. */
14317 else if (TARGET_FLOAT128_HW && hw_convert)
14318 emit_insn ((hw_convert) (dest, src));
14320 /* Call an external function to do the conversion. */
14321 else if (cvt != unknown_optab)
14323 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14324 gcc_assert (libfunc != NULL_RTX);
14326 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14327 src, src_mode);
14329 gcc_assert (dest2 != NULL_RTX);
14330 if (!rtx_equal_p (dest, dest2))
14331 emit_move_insn (dest, dest2);
14334 else
14335 gcc_unreachable ();
14337 return;
14341 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14342 can be used as that dest register. Return the dest register. */
14345 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14347 if (op2 == const0_rtx)
14348 return op1;
14350 if (GET_CODE (scratch) == SCRATCH)
14351 scratch = gen_reg_rtx (mode);
14353 if (logical_operand (op2, mode))
14354 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14355 else
14356 emit_insn (gen_rtx_SET (scratch,
14357 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14359 return scratch;
14362 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14363 requires this. The result is mode MODE. */
14365 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14367 rtx cond[2];
14368 int n = 0;
14369 if (code == LTGT || code == LE || code == UNLT)
14370 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14371 if (code == LTGT || code == GE || code == UNGT)
14372 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14373 if (code == LE || code == GE || code == UNEQ)
14374 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14375 if (code == UNLT || code == UNGT || code == UNEQ)
14376 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14378 gcc_assert (n == 2);
14380 rtx cc = gen_reg_rtx (CCEQmode);
14381 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14382 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14384 return cc;
14387 void
14388 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14390 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14391 rtx_code cond_code = GET_CODE (condition_rtx);
14393 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14394 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14396 else if (cond_code == NE
14397 || cond_code == GE || cond_code == LE
14398 || cond_code == GEU || cond_code == LEU
14399 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14401 rtx not_result = gen_reg_rtx (CCEQmode);
14402 rtx not_op, rev_cond_rtx;
14403 machine_mode cc_mode;
14405 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14407 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14408 SImode, XEXP (condition_rtx, 0), const0_rtx);
14409 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14410 emit_insn (gen_rtx_SET (not_result, not_op));
14411 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14414 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14415 if (op_mode == VOIDmode)
14416 op_mode = GET_MODE (XEXP (operands[1], 1));
14418 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14420 PUT_MODE (condition_rtx, DImode);
14421 convert_move (operands[0], condition_rtx, 0);
14423 else
14425 PUT_MODE (condition_rtx, SImode);
14426 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14430 /* Emit a branch of kind CODE to location LOC. */
14432 void
14433 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14435 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14436 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14437 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14438 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14441 /* Return the string to output a conditional branch to LABEL, which is
14442 the operand template of the label, or NULL if the branch is really a
14443 conditional return.
14445 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14446 condition code register and its mode specifies what kind of
14447 comparison we made.
14449 REVERSED is nonzero if we should reverse the sense of the comparison.
14451 INSN is the insn. */
14453 char *
14454 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14456 static char string[64];
14457 enum rtx_code code = GET_CODE (op);
14458 rtx cc_reg = XEXP (op, 0);
14459 machine_mode mode = GET_MODE (cc_reg);
14460 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14461 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14462 int really_reversed = reversed ^ need_longbranch;
14463 char *s = string;
14464 const char *ccode;
14465 const char *pred;
14466 rtx note;
14468 validate_condition_mode (code, mode);
14470 /* Work out which way this really branches. We could use
14471 reverse_condition_maybe_unordered here always but this
14472 makes the resulting assembler clearer. */
14473 if (really_reversed)
14475 /* Reversal of FP compares takes care -- an ordered compare
14476 becomes an unordered compare and vice versa. */
14477 if (mode == CCFPmode)
14478 code = reverse_condition_maybe_unordered (code);
14479 else
14480 code = reverse_condition (code);
14483 switch (code)
14485 /* Not all of these are actually distinct opcodes, but
14486 we distinguish them for clarity of the resulting assembler. */
14487 case NE: case LTGT:
14488 ccode = "ne"; break;
14489 case EQ: case UNEQ:
14490 ccode = "eq"; break;
14491 case GE: case GEU:
14492 ccode = "ge"; break;
14493 case GT: case GTU: case UNGT:
14494 ccode = "gt"; break;
14495 case LE: case LEU:
14496 ccode = "le"; break;
14497 case LT: case LTU: case UNLT:
14498 ccode = "lt"; break;
14499 case UNORDERED: ccode = "un"; break;
14500 case ORDERED: ccode = "nu"; break;
14501 case UNGE: ccode = "nl"; break;
14502 case UNLE: ccode = "ng"; break;
14503 default:
14504 gcc_unreachable ();
14507 /* Maybe we have a guess as to how likely the branch is. */
14508 pred = "";
14509 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14510 if (note != NULL_RTX)
14512 /* PROB is the difference from 50%. */
14513 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14514 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14516 /* Only hint for highly probable/improbable branches on newer cpus when
14517 we have real profile data, as static prediction overrides processor
14518 dynamic prediction. For older cpus we may as well always hint, but
14519 assume not taken for branches that are very close to 50% as a
14520 mispredicted taken branch is more expensive than a
14521 mispredicted not-taken branch. */
14522 if (rs6000_always_hint
14523 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14524 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14525 && br_prob_note_reliable_p (note)))
14527 if (abs (prob) > REG_BR_PROB_BASE / 20
14528 && ((prob > 0) ^ need_longbranch))
14529 pred = "+";
14530 else
14531 pred = "-";
14535 if (label == NULL)
14536 s += sprintf (s, "b%slr%s ", ccode, pred);
14537 else
14538 s += sprintf (s, "b%s%s ", ccode, pred);
14540 /* We need to escape any '%' characters in the reg_names string.
14541 Assume they'd only be the first character.... */
14542 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14543 *s++ = '%';
14544 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14546 if (label != NULL)
14548 /* If the branch distance was too far, we may have to use an
14549 unconditional branch to go the distance. */
14550 if (need_longbranch)
14551 s += sprintf (s, ",$+8\n\tb %s", label);
14552 else
14553 s += sprintf (s, ",%s", label);
14556 return string;
14559 /* Return insn for VSX or Altivec comparisons. */
14561 static rtx
14562 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14564 rtx mask;
14565 machine_mode mode = GET_MODE (op0);
14567 switch (code)
14569 default:
14570 break;
14572 case GE:
14573 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14574 return NULL_RTX;
14575 /* FALLTHRU */
14577 case EQ:
14578 case GT:
14579 case GTU:
14580 case ORDERED:
14581 case UNORDERED:
14582 case UNEQ:
14583 case LTGT:
14584 mask = gen_reg_rtx (mode);
14585 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14586 return mask;
14589 return NULL_RTX;
14592 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14593 DMODE is expected destination mode. This is a recursive function. */
14595 static rtx
14596 rs6000_emit_vector_compare (enum rtx_code rcode,
14597 rtx op0, rtx op1,
14598 machine_mode dmode)
14600 rtx mask;
14601 bool swap_operands = false;
14602 bool try_again = false;
14604 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14605 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14607 /* See if the comparison works as is. */
14608 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14609 if (mask)
14610 return mask;
14612 switch (rcode)
14614 case LT:
14615 rcode = GT;
14616 swap_operands = true;
14617 try_again = true;
14618 break;
14619 case LTU:
14620 rcode = GTU;
14621 swap_operands = true;
14622 try_again = true;
14623 break;
14624 case NE:
14625 case UNLE:
14626 case UNLT:
14627 case UNGE:
14628 case UNGT:
14629 /* Invert condition and try again.
14630 e.g., A != B becomes ~(A==B). */
14632 enum rtx_code rev_code;
14633 enum insn_code nor_code;
14634 rtx mask2;
14636 rev_code = reverse_condition_maybe_unordered (rcode);
14637 if (rev_code == UNKNOWN)
14638 return NULL_RTX;
14640 nor_code = optab_handler (one_cmpl_optab, dmode);
14641 if (nor_code == CODE_FOR_nothing)
14642 return NULL_RTX;
14644 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14645 if (!mask2)
14646 return NULL_RTX;
14648 mask = gen_reg_rtx (dmode);
14649 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14650 return mask;
14652 break;
14653 case GE:
14654 case GEU:
14655 case LE:
14656 case LEU:
14657 /* Try GT/GTU/LT/LTU OR EQ */
14659 rtx c_rtx, eq_rtx;
14660 enum insn_code ior_code;
14661 enum rtx_code new_code;
14663 switch (rcode)
14665 case GE:
14666 new_code = GT;
14667 break;
14669 case GEU:
14670 new_code = GTU;
14671 break;
14673 case LE:
14674 new_code = LT;
14675 break;
14677 case LEU:
14678 new_code = LTU;
14679 break;
14681 default:
14682 gcc_unreachable ();
14685 ior_code = optab_handler (ior_optab, dmode);
14686 if (ior_code == CODE_FOR_nothing)
14687 return NULL_RTX;
14689 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14690 if (!c_rtx)
14691 return NULL_RTX;
14693 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14694 if (!eq_rtx)
14695 return NULL_RTX;
14697 mask = gen_reg_rtx (dmode);
14698 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14699 return mask;
14701 break;
14702 default:
14703 return NULL_RTX;
14706 if (try_again)
14708 if (swap_operands)
14709 std::swap (op0, op1);
14711 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14712 if (mask)
14713 return mask;
14716 /* You only get two chances. */
14717 return NULL_RTX;
14720 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14721 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14722 operands for the relation operation COND. */
14725 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14726 rtx cond, rtx cc_op0, rtx cc_op1)
14728 machine_mode dest_mode = GET_MODE (dest);
14729 machine_mode mask_mode = GET_MODE (cc_op0);
14730 enum rtx_code rcode = GET_CODE (cond);
14731 machine_mode cc_mode = CCmode;
14732 rtx mask;
14733 rtx cond2;
14734 bool invert_move = false;
14736 if (VECTOR_UNIT_NONE_P (dest_mode))
14737 return 0;
14739 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14740 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14742 switch (rcode)
14744 /* Swap operands if we can, and fall back to doing the operation as
14745 specified, and doing a NOR to invert the test. */
14746 case NE:
14747 case UNLE:
14748 case UNLT:
14749 case UNGE:
14750 case UNGT:
14751 /* Invert condition and try again.
14752 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14753 invert_move = true;
14754 rcode = reverse_condition_maybe_unordered (rcode);
14755 if (rcode == UNKNOWN)
14756 return 0;
14757 break;
14759 case GE:
14760 case LE:
14761 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14763 /* Invert condition to avoid compound test. */
14764 invert_move = true;
14765 rcode = reverse_condition (rcode);
14767 break;
14769 case GTU:
14770 case GEU:
14771 case LTU:
14772 case LEU:
14773 /* Mark unsigned tests with CCUNSmode. */
14774 cc_mode = CCUNSmode;
14776 /* Invert condition to avoid compound test if necessary. */
14777 if (rcode == GEU || rcode == LEU)
14779 invert_move = true;
14780 rcode = reverse_condition (rcode);
14782 break;
14784 default:
14785 break;
14788 /* Get the vector mask for the given relational operations. */
14789 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14791 if (!mask)
14792 return 0;
14794 if (invert_move)
14795 std::swap (op_true, op_false);
14797 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14798 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14799 && (GET_CODE (op_true) == CONST_VECTOR
14800 || GET_CODE (op_false) == CONST_VECTOR))
14802 rtx constant_0 = CONST0_RTX (dest_mode);
14803 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14805 if (op_true == constant_m1 && op_false == constant_0)
14807 emit_move_insn (dest, mask);
14808 return 1;
14811 else if (op_true == constant_0 && op_false == constant_m1)
14813 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14814 return 1;
14817 /* If we can't use the vector comparison directly, perhaps we can use
14818 the mask for the true or false fields, instead of loading up a
14819 constant. */
14820 if (op_true == constant_m1)
14821 op_true = mask;
14823 if (op_false == constant_0)
14824 op_false = mask;
14827 if (!REG_P (op_true) && !SUBREG_P (op_true))
14828 op_true = force_reg (dest_mode, op_true);
14830 if (!REG_P (op_false) && !SUBREG_P (op_false))
14831 op_false = force_reg (dest_mode, op_false);
14833 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14834 CONST0_RTX (dest_mode));
14835 emit_insn (gen_rtx_SET (dest,
14836 gen_rtx_IF_THEN_ELSE (dest_mode,
14837 cond2,
14838 op_true,
14839 op_false)));
14840 return 1;
14843 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14844 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14845 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14846 hardware has no such operation. */
14848 static int
14849 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14851 enum rtx_code code = GET_CODE (op);
14852 rtx op0 = XEXP (op, 0);
14853 rtx op1 = XEXP (op, 1);
14854 machine_mode compare_mode = GET_MODE (op0);
14855 machine_mode result_mode = GET_MODE (dest);
14856 bool max_p = false;
14858 if (result_mode != compare_mode)
14859 return 0;
14861 if (code == GE || code == GT)
14862 max_p = true;
14863 else if (code == LE || code == LT)
14864 max_p = false;
14865 else
14866 return 0;
14868 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14871 /* Only when NaNs and signed-zeros are not in effect, smax could be
14872 used for `op0 < op1 ? op1 : op0`, and smin could be used for
14873 `op0 > op1 ? op1 : op0`. */
14874 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
14875 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
14876 max_p = !max_p;
14878 else
14879 return 0;
14881 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14882 return 1;
14885 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14886 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14887 operands of the last comparison is nonzero/true, FALSE_COND if it is
14888 zero/false. Return 0 if the hardware has no such operation. */
14890 static int
14891 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14893 enum rtx_code code = GET_CODE (op);
14894 rtx op0 = XEXP (op, 0);
14895 rtx op1 = XEXP (op, 1);
14896 machine_mode result_mode = GET_MODE (dest);
14897 rtx compare_rtx;
14898 rtx cmove_rtx;
14899 rtx clobber_rtx;
14901 if (!can_create_pseudo_p ())
14902 return 0;
14904 switch (code)
14906 case EQ:
14907 case GE:
14908 case GT:
14909 break;
14911 case NE:
14912 case LT:
14913 case LE:
14914 code = swap_condition (code);
14915 std::swap (op0, op1);
14916 break;
14918 default:
14919 return 0;
14922 /* Generate: [(parallel [(set (dest)
14923 (if_then_else (op (cmp1) (cmp2))
14924 (true)
14925 (false)))
14926 (clobber (scratch))])]. */
14928 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14929 cmove_rtx = gen_rtx_SET (dest,
14930 gen_rtx_IF_THEN_ELSE (result_mode,
14931 compare_rtx,
14932 true_cond,
14933 false_cond));
14935 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14936 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14937 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14939 return 1;
14942 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14943 operands of the last comparison is nonzero/true, FALSE_COND if it
14944 is zero/false. Return 0 if the hardware has no such operation. */
14947 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14949 enum rtx_code code = GET_CODE (op);
14950 rtx op0 = XEXP (op, 0);
14951 rtx op1 = XEXP (op, 1);
14952 machine_mode compare_mode = GET_MODE (op0);
14953 machine_mode result_mode = GET_MODE (dest);
14954 rtx temp;
14955 bool is_against_zero;
14957 /* These modes should always match. */
14958 if (GET_MODE (op1) != compare_mode
14959 /* In the isel case however, we can use a compare immediate, so
14960 op1 may be a small constant. */
14961 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14962 return 0;
14963 if (GET_MODE (true_cond) != result_mode)
14964 return 0;
14965 if (GET_MODE (false_cond) != result_mode)
14966 return 0;
14968 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14969 if (TARGET_P9_MINMAX
14970 && (compare_mode == SFmode || compare_mode == DFmode)
14971 && (result_mode == SFmode || result_mode == DFmode))
14973 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14974 return 1;
14976 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14977 return 1;
14980 /* Don't allow using floating point comparisons for integer results for
14981 now. */
14982 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14983 return 0;
14985 /* First, work out if the hardware can do this at all, or
14986 if it's too slow.... */
14987 if (!FLOAT_MODE_P (compare_mode))
14989 if (TARGET_ISEL)
14990 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14991 return 0;
14994 is_against_zero = op1 == CONST0_RTX (compare_mode);
14996 /* A floating-point subtract might overflow, underflow, or produce
14997 an inexact result, thus changing the floating-point flags, so it
14998 can't be generated if we care about that. It's safe if one side
14999 of the construct is zero, since then no subtract will be
15000 generated. */
15001 if (SCALAR_FLOAT_MODE_P (compare_mode)
15002 && flag_trapping_math && ! is_against_zero)
15003 return 0;
15005 /* Eliminate half of the comparisons by switching operands, this
15006 makes the remaining code simpler. */
15007 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15008 || code == LTGT || code == LT || code == UNLE)
15010 code = reverse_condition_maybe_unordered (code);
15011 temp = true_cond;
15012 true_cond = false_cond;
15013 false_cond = temp;
15016 /* UNEQ and LTGT take four instructions for a comparison with zero,
15017 it'll probably be faster to use a branch here too. */
15018 if (code == UNEQ && HONOR_NANS (compare_mode))
15019 return 0;
15021 /* We're going to try to implement comparisons by performing
15022 a subtract, then comparing against zero. Unfortunately,
15023 Inf - Inf is NaN which is not zero, and so if we don't
15024 know that the operand is finite and the comparison
15025 would treat EQ different to UNORDERED, we can't do it. */
15026 if (HONOR_INFINITIES (compare_mode)
15027 && code != GT && code != UNGE
15028 && (!CONST_DOUBLE_P (op1)
15029 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15030 /* Constructs of the form (a OP b ? a : b) are safe. */
15031 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15032 || (! rtx_equal_p (op0, true_cond)
15033 && ! rtx_equal_p (op1, true_cond))))
15034 return 0;
15036 /* At this point we know we can use fsel. */
15038 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15039 is no fsel instruction. */
15040 if (compare_mode != SFmode && compare_mode != DFmode)
15041 return 0;
15043 /* Reduce the comparison to a comparison against zero. */
15044 if (! is_against_zero)
15046 temp = gen_reg_rtx (compare_mode);
15047 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15048 op0 = temp;
15049 op1 = CONST0_RTX (compare_mode);
15052 /* If we don't care about NaNs we can reduce some of the comparisons
15053 down to faster ones. */
15054 if (! HONOR_NANS (compare_mode))
15055 switch (code)
15057 case GT:
15058 code = LE;
15059 temp = true_cond;
15060 true_cond = false_cond;
15061 false_cond = temp;
15062 break;
15063 case UNGE:
15064 code = GE;
15065 break;
15066 case UNEQ:
15067 code = EQ;
15068 break;
15069 default:
15070 break;
15073 /* Now, reduce everything down to a GE. */
15074 switch (code)
15076 case GE:
15077 break;
15079 case LE:
15080 temp = gen_reg_rtx (compare_mode);
15081 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15082 op0 = temp;
15083 break;
15085 case ORDERED:
15086 temp = gen_reg_rtx (compare_mode);
15087 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15088 op0 = temp;
15089 break;
15091 case EQ:
15092 temp = gen_reg_rtx (compare_mode);
15093 emit_insn (gen_rtx_SET (temp,
15094 gen_rtx_NEG (compare_mode,
15095 gen_rtx_ABS (compare_mode, op0))));
15096 op0 = temp;
15097 break;
15099 case UNGE:
15100 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15101 temp = gen_reg_rtx (result_mode);
15102 emit_insn (gen_rtx_SET (temp,
15103 gen_rtx_IF_THEN_ELSE (result_mode,
15104 gen_rtx_GE (VOIDmode,
15105 op0, op1),
15106 true_cond, false_cond)));
15107 false_cond = true_cond;
15108 true_cond = temp;
15110 temp = gen_reg_rtx (compare_mode);
15111 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15112 op0 = temp;
15113 break;
15115 case GT:
15116 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15117 temp = gen_reg_rtx (result_mode);
15118 emit_insn (gen_rtx_SET (temp,
15119 gen_rtx_IF_THEN_ELSE (result_mode,
15120 gen_rtx_GE (VOIDmode,
15121 op0, op1),
15122 true_cond, false_cond)));
15123 true_cond = false_cond;
15124 false_cond = temp;
15126 temp = gen_reg_rtx (compare_mode);
15127 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15128 op0 = temp;
15129 break;
15131 default:
15132 gcc_unreachable ();
15135 emit_insn (gen_rtx_SET (dest,
15136 gen_rtx_IF_THEN_ELSE (result_mode,
15137 gen_rtx_GE (VOIDmode,
15138 op0, op1),
15139 true_cond, false_cond)));
15140 return 1;
15143 /* Same as above, but for ints (isel). */
15146 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15148 rtx condition_rtx, cr;
15149 machine_mode mode = GET_MODE (dest);
15150 enum rtx_code cond_code;
15151 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15152 bool signedp;
15154 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15155 return 0;
15157 /* We still have to do the compare, because isel doesn't do a
15158 compare, it just looks at the CRx bits set by a previous compare
15159 instruction. */
15160 condition_rtx = rs6000_generate_compare (op, mode);
15161 cond_code = GET_CODE (condition_rtx);
15162 cr = XEXP (condition_rtx, 0);
15163 signedp = GET_MODE (cr) == CCmode;
15165 isel_func = (mode == SImode
15166 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15167 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15169 switch (cond_code)
15171 case LT: case GT: case LTU: case GTU: case EQ:
15172 /* isel handles these directly. */
15173 break;
15175 default:
15176 /* We need to swap the sense of the comparison. */
15178 std::swap (false_cond, true_cond);
15179 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15181 break;
15184 false_cond = force_reg (mode, false_cond);
15185 if (true_cond != const0_rtx)
15186 true_cond = force_reg (mode, true_cond);
15188 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15190 return 1;
15193 void
15194 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15196 machine_mode mode = GET_MODE (op0);
15197 enum rtx_code c;
15198 rtx target;
15200 /* VSX/altivec have direct min/max insns. */
15201 if ((code == SMAX || code == SMIN)
15202 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15203 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15205 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15206 return;
15209 if (code == SMAX || code == SMIN)
15210 c = GE;
15211 else
15212 c = GEU;
15214 if (code == SMAX || code == UMAX)
15215 target = emit_conditional_move (dest, c, op0, op1, mode,
15216 op0, op1, mode, 0);
15217 else
15218 target = emit_conditional_move (dest, c, op0, op1, mode,
15219 op1, op0, mode, 0);
15220 gcc_assert (target);
15221 if (target != dest)
15222 emit_move_insn (dest, target);
15225 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15226 COND is true. Mark the jump as unlikely to be taken. */
15228 static void
15229 emit_unlikely_jump (rtx cond, rtx label)
15231 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15232 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15233 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15236 /* A subroutine of the atomic operation splitters. Emit a load-locked
15237 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15238 the zero_extend operation. */
15240 static void
15241 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15243 rtx (*fn) (rtx, rtx) = NULL;
15245 switch (mode)
15247 case E_QImode:
15248 fn = gen_load_lockedqi;
15249 break;
15250 case E_HImode:
15251 fn = gen_load_lockedhi;
15252 break;
15253 case E_SImode:
15254 if (GET_MODE (mem) == QImode)
15255 fn = gen_load_lockedqi_si;
15256 else if (GET_MODE (mem) == HImode)
15257 fn = gen_load_lockedhi_si;
15258 else
15259 fn = gen_load_lockedsi;
15260 break;
15261 case E_DImode:
15262 fn = gen_load_lockeddi;
15263 break;
15264 case E_TImode:
15265 fn = gen_load_lockedti;
15266 break;
15267 default:
15268 gcc_unreachable ();
15270 emit_insn (fn (reg, mem));
15273 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15274 instruction in MODE. */
15276 static void
15277 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15279 rtx (*fn) (rtx, rtx, rtx) = NULL;
15281 switch (mode)
15283 case E_QImode:
15284 fn = gen_store_conditionalqi;
15285 break;
15286 case E_HImode:
15287 fn = gen_store_conditionalhi;
15288 break;
15289 case E_SImode:
15290 fn = gen_store_conditionalsi;
15291 break;
15292 case E_DImode:
15293 fn = gen_store_conditionaldi;
15294 break;
15295 case E_TImode:
15296 fn = gen_store_conditionalti;
15297 break;
15298 default:
15299 gcc_unreachable ();
15302 /* Emit sync before stwcx. to address PPC405 Erratum. */
15303 if (PPC405_ERRATUM77)
15304 emit_insn (gen_hwsync ());
15306 emit_insn (fn (res, mem, val));
15309 /* Expand barriers before and after a load_locked/store_cond sequence. */
15311 static rtx
15312 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15314 rtx addr = XEXP (mem, 0);
15316 if (!legitimate_indirect_address_p (addr, reload_completed)
15317 && !legitimate_indexed_address_p (addr, reload_completed))
15319 addr = force_reg (Pmode, addr);
15320 mem = replace_equiv_address_nv (mem, addr);
15323 switch (model)
15325 case MEMMODEL_RELAXED:
15326 case MEMMODEL_CONSUME:
15327 case MEMMODEL_ACQUIRE:
15328 break;
15329 case MEMMODEL_RELEASE:
15330 case MEMMODEL_ACQ_REL:
15331 emit_insn (gen_lwsync ());
15332 break;
15333 case MEMMODEL_SEQ_CST:
15334 emit_insn (gen_hwsync ());
15335 break;
15336 default:
15337 gcc_unreachable ();
15339 return mem;
15342 static void
15343 rs6000_post_atomic_barrier (enum memmodel model)
15345 switch (model)
15347 case MEMMODEL_RELAXED:
15348 case MEMMODEL_CONSUME:
15349 case MEMMODEL_RELEASE:
15350 break;
15351 case MEMMODEL_ACQUIRE:
15352 case MEMMODEL_ACQ_REL:
15353 case MEMMODEL_SEQ_CST:
15354 emit_insn (gen_isync ());
15355 break;
15356 default:
15357 gcc_unreachable ();
15361 /* A subroutine of the various atomic expanders. For sub-word operations,
15362 we must adjust things to operate on SImode. Given the original MEM,
15363 return a new aligned memory. Also build and return the quantities by
15364 which to shift and mask. */
15366 static rtx
15367 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15369 rtx addr, align, shift, mask, mem;
15370 HOST_WIDE_INT shift_mask;
15371 machine_mode mode = GET_MODE (orig_mem);
15373 /* For smaller modes, we have to implement this via SImode. */
15374 shift_mask = (mode == QImode ? 0x18 : 0x10);
15376 addr = XEXP (orig_mem, 0);
15377 addr = force_reg (GET_MODE (addr), addr);
15379 /* Aligned memory containing subword. Generate a new memory. We
15380 do not want any of the existing MEM_ATTR data, as we're now
15381 accessing memory outside the original object. */
15382 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15383 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15384 mem = gen_rtx_MEM (SImode, align);
15385 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15386 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15387 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15389 /* Shift amount for subword relative to aligned word. */
15390 shift = gen_reg_rtx (SImode);
15391 addr = gen_lowpart (SImode, addr);
15392 rtx tmp = gen_reg_rtx (SImode);
15393 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15394 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15395 if (BYTES_BIG_ENDIAN)
15396 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15397 shift, 1, OPTAB_LIB_WIDEN);
15398 *pshift = shift;
15400 /* Mask for insertion. */
15401 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15402 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15403 *pmask = mask;
15405 return mem;
15408 /* A subroutine of the various atomic expanders. For sub-word operands,
15409 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15411 static rtx
15412 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15414 rtx x;
15416 x = gen_reg_rtx (SImode);
15417 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15418 gen_rtx_NOT (SImode, mask),
15419 oldval)));
15421 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15423 return x;
15426 /* A subroutine of the various atomic expanders. For sub-word operands,
15427 extract WIDE to NARROW via SHIFT. */
15429 static void
15430 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15432 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15433 wide, 1, OPTAB_LIB_WIDEN);
15434 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15437 /* Expand an atomic compare and swap operation. */
15439 void
15440 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15442 rtx boolval, retval, mem, oldval, newval, cond;
15443 rtx label1, label2, x, mask, shift;
15444 machine_mode mode, orig_mode;
15445 enum memmodel mod_s, mod_f;
15446 bool is_weak;
15448 boolval = operands[0];
15449 retval = operands[1];
15450 mem = operands[2];
15451 oldval = operands[3];
15452 newval = operands[4];
15453 is_weak = (INTVAL (operands[5]) != 0);
15454 mod_s = memmodel_base (INTVAL (operands[6]));
15455 mod_f = memmodel_base (INTVAL (operands[7]));
15456 orig_mode = mode = GET_MODE (mem);
15458 mask = shift = NULL_RTX;
15459 if (mode == QImode || mode == HImode)
15461 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15462 lwarx and shift/mask operations. With power8, we need to do the
15463 comparison in SImode, but the store is still done in QI/HImode. */
15464 oldval = convert_modes (SImode, mode, oldval, 1);
15466 if (!TARGET_SYNC_HI_QI)
15468 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15470 /* Shift and mask OLDVAL into position with the word. */
15471 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15472 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15474 /* Shift and mask NEWVAL into position within the word. */
15475 newval = convert_modes (SImode, mode, newval, 1);
15476 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15477 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15480 /* Prepare to adjust the return value. */
15481 retval = gen_reg_rtx (SImode);
15482 mode = SImode;
15484 else if (reg_overlap_mentioned_p (retval, oldval))
15485 oldval = copy_to_reg (oldval);
15487 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15488 oldval = copy_to_mode_reg (mode, oldval);
15490 if (reg_overlap_mentioned_p (retval, newval))
15491 newval = copy_to_reg (newval);
15493 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15495 label1 = NULL_RTX;
15496 if (!is_weak)
15498 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15499 emit_label (XEXP (label1, 0));
15501 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15503 emit_load_locked (mode, retval, mem);
15505 x = retval;
15506 if (mask)
15507 x = expand_simple_binop (SImode, AND, retval, mask,
15508 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15510 cond = gen_reg_rtx (CCmode);
15511 /* If we have TImode, synthesize a comparison. */
15512 if (mode != TImode)
15513 x = gen_rtx_COMPARE (CCmode, x, oldval);
15514 else
15516 rtx xor1_result = gen_reg_rtx (DImode);
15517 rtx xor2_result = gen_reg_rtx (DImode);
15518 rtx or_result = gen_reg_rtx (DImode);
15519 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15520 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15521 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15522 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15524 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15525 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15526 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15527 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15530 emit_insn (gen_rtx_SET (cond, x));
15532 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15533 emit_unlikely_jump (x, label2);
15535 x = newval;
15536 if (mask)
15537 x = rs6000_mask_atomic_subword (retval, newval, mask);
15539 emit_store_conditional (orig_mode, cond, mem, x);
15541 if (!is_weak)
15543 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15544 emit_unlikely_jump (x, label1);
15547 if (!is_mm_relaxed (mod_f))
15548 emit_label (XEXP (label2, 0));
15550 rs6000_post_atomic_barrier (mod_s);
15552 if (is_mm_relaxed (mod_f))
15553 emit_label (XEXP (label2, 0));
15555 if (shift)
15556 rs6000_finish_atomic_subword (operands[1], retval, shift);
15557 else if (mode != GET_MODE (operands[1]))
15558 convert_move (operands[1], retval, 1);
15560 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15561 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15562 emit_insn (gen_rtx_SET (boolval, x));
15565 /* Expand an atomic exchange operation. */
15567 void
15568 rs6000_expand_atomic_exchange (rtx operands[])
15570 rtx retval, mem, val, cond;
15571 machine_mode mode;
15572 enum memmodel model;
15573 rtx label, x, mask, shift;
15575 retval = operands[0];
15576 mem = operands[1];
15577 val = operands[2];
15578 model = memmodel_base (INTVAL (operands[3]));
15579 mode = GET_MODE (mem);
15581 mask = shift = NULL_RTX;
15582 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15584 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15586 /* Shift and mask VAL into position with the word. */
15587 val = convert_modes (SImode, mode, val, 1);
15588 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15589 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15591 /* Prepare to adjust the return value. */
15592 retval = gen_reg_rtx (SImode);
15593 mode = SImode;
15596 mem = rs6000_pre_atomic_barrier (mem, model);
15598 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15599 emit_label (XEXP (label, 0));
15601 emit_load_locked (mode, retval, mem);
15603 x = val;
15604 if (mask)
15605 x = rs6000_mask_atomic_subword (retval, val, mask);
15607 cond = gen_reg_rtx (CCmode);
15608 emit_store_conditional (mode, cond, mem, x);
15610 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15611 emit_unlikely_jump (x, label);
15613 rs6000_post_atomic_barrier (model);
15615 if (shift)
15616 rs6000_finish_atomic_subword (operands[0], retval, shift);
15619 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15620 to perform. MEM is the memory on which to operate. VAL is the second
15621 operand of the binary operator. BEFORE and AFTER are optional locations to
15622 return the value of MEM either before of after the operation. MODEL_RTX
15623 is a CONST_INT containing the memory model to use. */
15625 void
15626 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15627 rtx orig_before, rtx orig_after, rtx model_rtx)
15629 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15630 machine_mode mode = GET_MODE (mem);
15631 machine_mode store_mode = mode;
15632 rtx label, x, cond, mask, shift;
15633 rtx before = orig_before, after = orig_after;
15635 mask = shift = NULL_RTX;
15636 /* On power8, we want to use SImode for the operation. On previous systems,
15637 use the operation in a subword and shift/mask to get the proper byte or
15638 halfword. */
15639 if (mode == QImode || mode == HImode)
15641 if (TARGET_SYNC_HI_QI)
15643 val = convert_modes (SImode, mode, val, 1);
15645 /* Prepare to adjust the return value. */
15646 before = gen_reg_rtx (SImode);
15647 if (after)
15648 after = gen_reg_rtx (SImode);
15649 mode = SImode;
15651 else
15653 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15655 /* Shift and mask VAL into position with the word. */
15656 val = convert_modes (SImode, mode, val, 1);
15657 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15658 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15660 switch (code)
15662 case IOR:
15663 case XOR:
15664 /* We've already zero-extended VAL. That is sufficient to
15665 make certain that it does not affect other bits. */
15666 mask = NULL;
15667 break;
15669 case AND:
15670 /* If we make certain that all of the other bits in VAL are
15671 set, that will be sufficient to not affect other bits. */
15672 x = gen_rtx_NOT (SImode, mask);
15673 x = gen_rtx_IOR (SImode, x, val);
15674 emit_insn (gen_rtx_SET (val, x));
15675 mask = NULL;
15676 break;
15678 case NOT:
15679 case PLUS:
15680 case MINUS:
15681 /* These will all affect bits outside the field and need
15682 adjustment via MASK within the loop. */
15683 break;
15685 default:
15686 gcc_unreachable ();
15689 /* Prepare to adjust the return value. */
15690 before = gen_reg_rtx (SImode);
15691 if (after)
15692 after = gen_reg_rtx (SImode);
15693 store_mode = mode = SImode;
15697 mem = rs6000_pre_atomic_barrier (mem, model);
15699 label = gen_label_rtx ();
15700 emit_label (label);
15701 label = gen_rtx_LABEL_REF (VOIDmode, label);
15703 if (before == NULL_RTX)
15704 before = gen_reg_rtx (mode);
15706 emit_load_locked (mode, before, mem);
15708 if (code == NOT)
15710 x = expand_simple_binop (mode, AND, before, val,
15711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15712 after = expand_simple_unop (mode, NOT, x, after, 1);
15714 else
15716 after = expand_simple_binop (mode, code, before, val,
15717 after, 1, OPTAB_LIB_WIDEN);
15720 x = after;
15721 if (mask)
15723 x = expand_simple_binop (SImode, AND, after, mask,
15724 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15725 x = rs6000_mask_atomic_subword (before, x, mask);
15727 else if (store_mode != mode)
15728 x = convert_modes (store_mode, mode, x, 1);
15730 cond = gen_reg_rtx (CCmode);
15731 emit_store_conditional (store_mode, cond, mem, x);
15733 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15734 emit_unlikely_jump (x, label);
15736 rs6000_post_atomic_barrier (model);
15738 if (shift)
15740 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15741 then do the calcuations in a SImode register. */
15742 if (orig_before)
15743 rs6000_finish_atomic_subword (orig_before, before, shift);
15744 if (orig_after)
15745 rs6000_finish_atomic_subword (orig_after, after, shift);
15747 else if (store_mode != mode)
15749 /* QImode/HImode on machines with lbarx/lharx where we do the native
15750 operation and then do the calcuations in a SImode register. */
15751 if (orig_before)
15752 convert_move (orig_before, before, 1);
15753 if (orig_after)
15754 convert_move (orig_after, after, 1);
15756 else if (orig_after && after != orig_after)
15757 emit_move_insn (orig_after, after);
15760 /* Emit instructions to move SRC to DST. Called by splitters for
15761 multi-register moves. It will emit at most one instruction for
15762 each register that is accessed; that is, it won't emit li/lis pairs
15763 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15764 register. */
15766 void
15767 rs6000_split_multireg_move (rtx dst, rtx src)
15769 /* The register number of the first register being moved. */
15770 int reg;
15771 /* The mode that is to be moved. */
15772 machine_mode mode;
15773 /* The mode that the move is being done in, and its size. */
15774 machine_mode reg_mode;
15775 int reg_mode_size;
15776 /* The number of registers that will be moved. */
15777 int nregs;
15779 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15780 mode = GET_MODE (dst);
15781 nregs = hard_regno_nregs (reg, mode);
15782 if (FP_REGNO_P (reg))
15783 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15784 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15785 else if (ALTIVEC_REGNO_P (reg))
15786 reg_mode = V16QImode;
15787 else
15788 reg_mode = word_mode;
15789 reg_mode_size = GET_MODE_SIZE (reg_mode);
15791 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15793 /* TDmode residing in FP registers is special, since the ISA requires that
15794 the lower-numbered word of a register pair is always the most significant
15795 word, even in little-endian mode. This does not match the usual subreg
15796 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15797 the appropriate constituent registers "by hand" in little-endian mode.
15799 Note we do not need to check for destructive overlap here since TDmode
15800 can only reside in even/odd register pairs. */
15801 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15803 rtx p_src, p_dst;
15804 int i;
15806 for (i = 0; i < nregs; i++)
15808 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15809 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15810 else
15811 p_src = simplify_gen_subreg (reg_mode, src, mode,
15812 i * reg_mode_size);
15814 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15815 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15816 else
15817 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15818 i * reg_mode_size);
15820 emit_insn (gen_rtx_SET (p_dst, p_src));
15823 return;
15826 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15828 /* Move register range backwards, if we might have destructive
15829 overlap. */
15830 int i;
15831 for (i = nregs - 1; i >= 0; i--)
15832 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15833 i * reg_mode_size),
15834 simplify_gen_subreg (reg_mode, src, mode,
15835 i * reg_mode_size)));
15837 else
15839 int i;
15840 int j = -1;
15841 bool used_update = false;
15842 rtx restore_basereg = NULL_RTX;
15844 if (MEM_P (src) && INT_REGNO_P (reg))
15846 rtx breg;
15848 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15849 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15851 rtx delta_rtx;
15852 breg = XEXP (XEXP (src, 0), 0);
15853 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15854 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15855 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15856 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15857 src = replace_equiv_address (src, breg);
15859 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15861 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15863 rtx basereg = XEXP (XEXP (src, 0), 0);
15864 if (TARGET_UPDATE)
15866 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15867 emit_insn (gen_rtx_SET (ndst,
15868 gen_rtx_MEM (reg_mode,
15869 XEXP (src, 0))));
15870 used_update = true;
15872 else
15873 emit_insn (gen_rtx_SET (basereg,
15874 XEXP (XEXP (src, 0), 1)));
15875 src = replace_equiv_address (src, basereg);
15877 else
15879 rtx basereg = gen_rtx_REG (Pmode, reg);
15880 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15881 src = replace_equiv_address (src, basereg);
15885 breg = XEXP (src, 0);
15886 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15887 breg = XEXP (breg, 0);
15889 /* If the base register we are using to address memory is
15890 also a destination reg, then change that register last. */
15891 if (REG_P (breg)
15892 && REGNO (breg) >= REGNO (dst)
15893 && REGNO (breg) < REGNO (dst) + nregs)
15894 j = REGNO (breg) - REGNO (dst);
15896 else if (MEM_P (dst) && INT_REGNO_P (reg))
15898 rtx breg;
15900 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15901 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15903 rtx delta_rtx;
15904 breg = XEXP (XEXP (dst, 0), 0);
15905 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15906 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15907 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15909 /* We have to update the breg before doing the store.
15910 Use store with update, if available. */
15912 if (TARGET_UPDATE)
15914 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15915 emit_insn (TARGET_32BIT
15916 ? (TARGET_POWERPC64
15917 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15918 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15919 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15920 used_update = true;
15922 else
15923 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15924 dst = replace_equiv_address (dst, breg);
15926 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15927 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15929 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15931 rtx basereg = XEXP (XEXP (dst, 0), 0);
15932 if (TARGET_UPDATE)
15934 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15935 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15936 XEXP (dst, 0)),
15937 nsrc));
15938 used_update = true;
15940 else
15941 emit_insn (gen_rtx_SET (basereg,
15942 XEXP (XEXP (dst, 0), 1)));
15943 dst = replace_equiv_address (dst, basereg);
15945 else
15947 rtx basereg = XEXP (XEXP (dst, 0), 0);
15948 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15949 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15950 && REG_P (basereg)
15951 && REG_P (offsetreg)
15952 && REGNO (basereg) != REGNO (offsetreg));
15953 if (REGNO (basereg) == 0)
15955 rtx tmp = offsetreg;
15956 offsetreg = basereg;
15957 basereg = tmp;
15959 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15960 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15961 dst = replace_equiv_address (dst, basereg);
15964 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15965 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15968 for (i = 0; i < nregs; i++)
15970 /* Calculate index to next subword. */
15971 ++j;
15972 if (j == nregs)
15973 j = 0;
15975 /* If compiler already emitted move of first word by
15976 store with update, no need to do anything. */
15977 if (j == 0 && used_update)
15978 continue;
15980 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15981 j * reg_mode_size),
15982 simplify_gen_subreg (reg_mode, src, mode,
15983 j * reg_mode_size)));
15985 if (restore_basereg != NULL_RTX)
15986 emit_insn (restore_basereg);
15990 static GTY(()) alias_set_type TOC_alias_set = -1;
15992 alias_set_type
15993 get_TOC_alias_set (void)
15995 if (TOC_alias_set == -1)
15996 TOC_alias_set = new_alias_set ();
15997 return TOC_alias_set;
16000 /* The mode the ABI uses for a word. This is not the same as word_mode
16001 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16003 static scalar_int_mode
16004 rs6000_abi_word_mode (void)
16006 return TARGET_32BIT ? SImode : DImode;
16009 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16010 static char *
16011 rs6000_offload_options (void)
16013 if (TARGET_64BIT)
16014 return xstrdup ("-foffload-abi=lp64");
16015 else
16016 return xstrdup ("-foffload-abi=ilp32");
16020 /* A quick summary of the various types of 'constant-pool tables'
16021 under PowerPC:
16023 Target Flags Name One table per
16024 AIX (none) AIX TOC object file
16025 AIX -mfull-toc AIX TOC object file
16026 AIX -mminimal-toc AIX minimal TOC translation unit
16027 SVR4/EABI (none) SVR4 SDATA object file
16028 SVR4/EABI -fpic SVR4 pic object file
16029 SVR4/EABI -fPIC SVR4 PIC translation unit
16030 SVR4/EABI -mrelocatable EABI TOC function
16031 SVR4/EABI -maix AIX TOC object file
16032 SVR4/EABI -maix -mminimal-toc
16033 AIX minimal TOC translation unit
16035 Name Reg. Set by entries contains:
16036 made by addrs? fp? sum?
16038 AIX TOC 2 crt0 as Y option option
16039 AIX minimal TOC 30 prolog gcc Y Y option
16040 SVR4 SDATA 13 crt0 gcc N Y N
16041 SVR4 pic 30 prolog ld Y not yet N
16042 SVR4 PIC 30 prolog gcc Y option option
16043 EABI TOC 30 prolog gcc Y option option
16047 /* Hash functions for the hash table. */
16049 static unsigned
16050 rs6000_hash_constant (rtx k)
16052 enum rtx_code code = GET_CODE (k);
16053 machine_mode mode = GET_MODE (k);
16054 unsigned result = (code << 3) ^ mode;
16055 const char *format;
16056 int flen, fidx;
16058 format = GET_RTX_FORMAT (code);
16059 flen = strlen (format);
16060 fidx = 0;
16062 switch (code)
16064 case LABEL_REF:
16065 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16067 case CONST_WIDE_INT:
16069 int i;
16070 flen = CONST_WIDE_INT_NUNITS (k);
16071 for (i = 0; i < flen; i++)
16072 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16073 return result;
16076 case CONST_DOUBLE:
16077 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16079 case CODE_LABEL:
16080 fidx = 3;
16081 break;
16083 default:
16084 break;
16087 for (; fidx < flen; fidx++)
16088 switch (format[fidx])
16090 case 's':
16092 unsigned i, len;
16093 const char *str = XSTR (k, fidx);
16094 len = strlen (str);
16095 result = result * 613 + len;
16096 for (i = 0; i < len; i++)
16097 result = result * 613 + (unsigned) str[i];
16098 break;
16100 case 'u':
16101 case 'e':
16102 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16103 break;
16104 case 'i':
16105 case 'n':
16106 result = result * 613 + (unsigned) XINT (k, fidx);
16107 break;
16108 case 'w':
16109 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16110 result = result * 613 + (unsigned) XWINT (k, fidx);
16111 else
16113 size_t i;
16114 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16115 result = result * 613 + (unsigned) (XWINT (k, fidx)
16116 >> CHAR_BIT * i);
16118 break;
16119 case '0':
16120 break;
16121 default:
16122 gcc_unreachable ();
16125 return result;
16128 hashval_t
16129 toc_hasher::hash (toc_hash_struct *thc)
16131 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16134 /* Compare H1 and H2 for equivalence. */
16136 bool
16137 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16139 rtx r1 = h1->key;
16140 rtx r2 = h2->key;
16142 if (h1->key_mode != h2->key_mode)
16143 return 0;
16145 return rtx_equal_p (r1, r2);
16148 /* These are the names given by the C++ front-end to vtables, and
16149 vtable-like objects. Ideally, this logic should not be here;
16150 instead, there should be some programmatic way of inquiring as
16151 to whether or not an object is a vtable. */
16153 #define VTABLE_NAME_P(NAME) \
16154 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16155 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16156 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16157 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16158 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16160 #ifdef NO_DOLLAR_IN_LABEL
16161 /* Return a GGC-allocated character string translating dollar signs in
16162 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16164 const char *
16165 rs6000_xcoff_strip_dollar (const char *name)
16167 char *strip, *p;
16168 const char *q;
16169 size_t len;
16171 q = (const char *) strchr (name, '$');
16173 if (q == 0 || q == name)
16174 return name;
16176 len = strlen (name);
16177 strip = XALLOCAVEC (char, len + 1);
16178 strcpy (strip, name);
16179 p = strip + (q - name);
16180 while (p)
16182 *p = '_';
16183 p = strchr (p + 1, '$');
16186 return ggc_alloc_string (strip, len);
16188 #endif
16190 void
16191 rs6000_output_symbol_ref (FILE *file, rtx x)
16193 const char *name = XSTR (x, 0);
16195 /* Currently C++ toc references to vtables can be emitted before it
16196 is decided whether the vtable is public or private. If this is
16197 the case, then the linker will eventually complain that there is
16198 a reference to an unknown section. Thus, for vtables only,
16199 we emit the TOC reference to reference the identifier and not the
16200 symbol. */
16201 if (VTABLE_NAME_P (name))
16203 RS6000_OUTPUT_BASENAME (file, name);
16205 else
16206 assemble_name (file, name);
16209 /* Output a TOC entry. We derive the entry name from what is being
16210 written. */
16212 void
16213 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16215 char buf[256];
16216 const char *name = buf;
16217 rtx base = x;
16218 HOST_WIDE_INT offset = 0;
16220 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16222 /* When the linker won't eliminate them, don't output duplicate
16223 TOC entries (this happens on AIX if there is any kind of TOC,
16224 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16225 CODE_LABELs. */
16226 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16228 struct toc_hash_struct *h;
16230 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16231 time because GGC is not initialized at that point. */
16232 if (toc_hash_table == NULL)
16233 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16235 h = ggc_alloc<toc_hash_struct> ();
16236 h->key = x;
16237 h->key_mode = mode;
16238 h->labelno = labelno;
16240 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16241 if (*found == NULL)
16242 *found = h;
16243 else /* This is indeed a duplicate.
16244 Set this label equal to that label. */
16246 fputs ("\t.set ", file);
16247 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16248 fprintf (file, "%d,", labelno);
16249 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16250 fprintf (file, "%d\n", ((*found)->labelno));
16252 #ifdef HAVE_AS_TLS
16253 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16254 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16255 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16257 fputs ("\t.set ", file);
16258 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16259 fprintf (file, "%d,", labelno);
16260 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16261 fprintf (file, "%d\n", ((*found)->labelno));
16263 #endif
16264 return;
16268 /* If we're going to put a double constant in the TOC, make sure it's
16269 aligned properly when strict alignment is on. */
16270 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16271 && STRICT_ALIGNMENT
16272 && GET_MODE_BITSIZE (mode) >= 64
16273 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16274 ASM_OUTPUT_ALIGN (file, 3);
16277 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16279 /* Handle FP constants specially. Note that if we have a minimal
16280 TOC, things we put here aren't actually in the TOC, so we can allow
16281 FP constants. */
16282 if (CONST_DOUBLE_P (x)
16283 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16284 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16286 long k[4];
16288 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16289 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16290 else
16291 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16293 if (TARGET_64BIT)
16295 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16296 fputs (DOUBLE_INT_ASM_OP, file);
16297 else
16298 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16299 k[0] & 0xffffffff, k[1] & 0xffffffff,
16300 k[2] & 0xffffffff, k[3] & 0xffffffff);
16301 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16302 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16303 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16304 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16305 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16306 return;
16308 else
16310 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16311 fputs ("\t.long ", file);
16312 else
16313 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16314 k[0] & 0xffffffff, k[1] & 0xffffffff,
16315 k[2] & 0xffffffff, k[3] & 0xffffffff);
16316 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16317 k[0] & 0xffffffff, k[1] & 0xffffffff,
16318 k[2] & 0xffffffff, k[3] & 0xffffffff);
16319 return;
16322 else if (CONST_DOUBLE_P (x)
16323 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16325 long k[2];
16327 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16328 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16329 else
16330 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16332 if (TARGET_64BIT)
16334 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16335 fputs (DOUBLE_INT_ASM_OP, file);
16336 else
16337 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16338 k[0] & 0xffffffff, k[1] & 0xffffffff);
16339 fprintf (file, "0x%lx%08lx\n",
16340 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16341 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16342 return;
16344 else
16346 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16347 fputs ("\t.long ", file);
16348 else
16349 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16350 k[0] & 0xffffffff, k[1] & 0xffffffff);
16351 fprintf (file, "0x%lx,0x%lx\n",
16352 k[0] & 0xffffffff, k[1] & 0xffffffff);
16353 return;
16356 else if (CONST_DOUBLE_P (x)
16357 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16359 long l;
16361 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16362 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16363 else
16364 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16366 if (TARGET_64BIT)
16368 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16369 fputs (DOUBLE_INT_ASM_OP, file);
16370 else
16371 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16372 if (WORDS_BIG_ENDIAN)
16373 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16374 else
16375 fprintf (file, "0x%lx\n", l & 0xffffffff);
16376 return;
16378 else
16380 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16381 fputs ("\t.long ", file);
16382 else
16383 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16384 fprintf (file, "0x%lx\n", l & 0xffffffff);
16385 return;
16388 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16390 unsigned HOST_WIDE_INT low;
16391 HOST_WIDE_INT high;
16393 low = INTVAL (x) & 0xffffffff;
16394 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16396 /* TOC entries are always Pmode-sized, so when big-endian
16397 smaller integer constants in the TOC need to be padded.
16398 (This is still a win over putting the constants in
16399 a separate constant pool, because then we'd have
16400 to have both a TOC entry _and_ the actual constant.)
16402 For a 32-bit target, CONST_INT values are loaded and shifted
16403 entirely within `low' and can be stored in one TOC entry. */
16405 /* It would be easy to make this work, but it doesn't now. */
16406 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16408 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16410 low |= high << 32;
16411 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16412 high = (HOST_WIDE_INT) low >> 32;
16413 low &= 0xffffffff;
16416 if (TARGET_64BIT)
16418 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16419 fputs (DOUBLE_INT_ASM_OP, file);
16420 else
16421 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16422 (long) high & 0xffffffff, (long) low & 0xffffffff);
16423 fprintf (file, "0x%lx%08lx\n",
16424 (long) high & 0xffffffff, (long) low & 0xffffffff);
16425 return;
16427 else
16429 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16431 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16432 fputs ("\t.long ", file);
16433 else
16434 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16435 (long) high & 0xffffffff, (long) low & 0xffffffff);
16436 fprintf (file, "0x%lx,0x%lx\n",
16437 (long) high & 0xffffffff, (long) low & 0xffffffff);
16439 else
16441 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16442 fputs ("\t.long ", file);
16443 else
16444 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16445 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16447 return;
16451 if (GET_CODE (x) == CONST)
16453 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16454 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16456 base = XEXP (XEXP (x, 0), 0);
16457 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16460 switch (GET_CODE (base))
16462 case SYMBOL_REF:
16463 name = XSTR (base, 0);
16464 break;
16466 case LABEL_REF:
16467 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16468 CODE_LABEL_NUMBER (XEXP (base, 0)));
16469 break;
16471 case CODE_LABEL:
16472 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16473 break;
16475 default:
16476 gcc_unreachable ();
16479 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16480 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16481 else
16483 fputs ("\t.tc ", file);
16484 RS6000_OUTPUT_BASENAME (file, name);
16486 if (offset < 0)
16487 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16488 else if (offset)
16489 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16491 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16492 after other TOC symbols, reducing overflow of small TOC access
16493 to [TC] symbols. */
16494 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16495 ? "[TE]," : "[TC],", file);
16498 /* Currently C++ toc references to vtables can be emitted before it
16499 is decided whether the vtable is public or private. If this is
16500 the case, then the linker will eventually complain that there is
16501 a TOC reference to an unknown section. Thus, for vtables only,
16502 we emit the TOC reference to reference the symbol and not the
16503 section. */
16504 if (VTABLE_NAME_P (name))
16506 RS6000_OUTPUT_BASENAME (file, name);
16507 if (offset < 0)
16508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16509 else if (offset > 0)
16510 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16512 else
16513 output_addr_const (file, x);
16515 #if HAVE_AS_TLS
16516 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16518 switch (SYMBOL_REF_TLS_MODEL (base))
16520 case 0:
16521 break;
16522 case TLS_MODEL_LOCAL_EXEC:
16523 fputs ("@le", file);
16524 break;
16525 case TLS_MODEL_INITIAL_EXEC:
16526 fputs ("@ie", file);
16527 break;
16528 /* Use global-dynamic for local-dynamic. */
16529 case TLS_MODEL_GLOBAL_DYNAMIC:
16530 case TLS_MODEL_LOCAL_DYNAMIC:
16531 putc ('\n', file);
16532 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16533 fputs ("\t.tc .", file);
16534 RS6000_OUTPUT_BASENAME (file, name);
16535 fputs ("[TC],", file);
16536 output_addr_const (file, x);
16537 fputs ("@m", file);
16538 break;
16539 default:
16540 gcc_unreachable ();
16543 #endif
16545 putc ('\n', file);
16548 /* Output an assembler pseudo-op to write an ASCII string of N characters
16549 starting at P to FILE.
16551 On the RS/6000, we have to do this using the .byte operation and
16552 write out special characters outside the quoted string.
16553 Also, the assembler is broken; very long strings are truncated,
16554 so we must artificially break them up early. */
16556 void
16557 output_ascii (FILE *file, const char *p, int n)
16559 char c;
16560 int i, count_string;
16561 const char *for_string = "\t.byte \"";
16562 const char *for_decimal = "\t.byte ";
16563 const char *to_close = NULL;
16565 count_string = 0;
16566 for (i = 0; i < n; i++)
16568 c = *p++;
16569 if (c >= ' ' && c < 0177)
16571 if (for_string)
16572 fputs (for_string, file);
16573 putc (c, file);
16575 /* Write two quotes to get one. */
16576 if (c == '"')
16578 putc (c, file);
16579 ++count_string;
16582 for_string = NULL;
16583 for_decimal = "\"\n\t.byte ";
16584 to_close = "\"\n";
16585 ++count_string;
16587 if (count_string >= 512)
16589 fputs (to_close, file);
16591 for_string = "\t.byte \"";
16592 for_decimal = "\t.byte ";
16593 to_close = NULL;
16594 count_string = 0;
16597 else
16599 if (for_decimal)
16600 fputs (for_decimal, file);
16601 fprintf (file, "%d", c);
16603 for_string = "\n\t.byte \"";
16604 for_decimal = ", ";
16605 to_close = "\n";
16606 count_string = 0;
16610 /* Now close the string if we have written one. Then end the line. */
16611 if (to_close)
16612 fputs (to_close, file);
16615 /* Generate a unique section name for FILENAME for a section type
16616 represented by SECTION_DESC. Output goes into BUF.
16618 SECTION_DESC can be any string, as long as it is different for each
16619 possible section type.
16621 We name the section in the same manner as xlc. The name begins with an
16622 underscore followed by the filename (after stripping any leading directory
16623 names) with the last period replaced by the string SECTION_DESC. If
16624 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16625 the name. */
16627 void
16628 rs6000_gen_section_name (char **buf, const char *filename,
16629 const char *section_desc)
16631 const char *q, *after_last_slash, *last_period = 0;
16632 char *p;
16633 int len;
16635 after_last_slash = filename;
16636 for (q = filename; *q; q++)
16638 if (*q == '/')
16639 after_last_slash = q + 1;
16640 else if (*q == '.')
16641 last_period = q;
16644 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16645 *buf = (char *) xmalloc (len);
16647 p = *buf;
16648 *p++ = '_';
16650 for (q = after_last_slash; *q; q++)
16652 if (q == last_period)
16654 strcpy (p, section_desc);
16655 p += strlen (section_desc);
16656 break;
16659 else if (ISALNUM (*q))
16660 *p++ = *q;
16663 if (last_period == 0)
16664 strcpy (p, section_desc);
16665 else
16666 *p = '\0';
16669 /* Emit profile function. */
16671 void
16672 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16674 /* Non-standard profiling for kernels, which just saves LR then calls
16675 _mcount without worrying about arg saves. The idea is to change
16676 the function prologue as little as possible as it isn't easy to
16677 account for arg save/restore code added just for _mcount. */
16678 if (TARGET_PROFILE_KERNEL)
16679 return;
16681 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16683 #ifndef NO_PROFILE_COUNTERS
16684 # define NO_PROFILE_COUNTERS 0
16685 #endif
16686 if (NO_PROFILE_COUNTERS)
16687 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16688 LCT_NORMAL, VOIDmode);
16689 else
16691 char buf[30];
16692 const char *label_name;
16693 rtx fun;
16695 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16696 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16697 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16699 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16700 LCT_NORMAL, VOIDmode, fun, Pmode);
16703 else if (DEFAULT_ABI == ABI_DARWIN)
16705 const char *mcount_name = RS6000_MCOUNT;
16706 int caller_addr_regno = LR_REGNO;
16708 /* Be conservative and always set this, at least for now. */
16709 crtl->uses_pic_offset_table = 1;
16711 #if TARGET_MACHO
16712 /* For PIC code, set up a stub and collect the caller's address
16713 from r0, which is where the prologue puts it. */
16714 if (MACHOPIC_INDIRECT
16715 && crtl->uses_pic_offset_table)
16716 caller_addr_regno = 0;
16717 #endif
16718 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16719 LCT_NORMAL, VOIDmode,
16720 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16724 /* Write function profiler code. */
16726 void
16727 output_function_profiler (FILE *file, int labelno)
16729 char buf[100];
16731 switch (DEFAULT_ABI)
16733 default:
16734 gcc_unreachable ();
16736 case ABI_V4:
16737 if (!TARGET_32BIT)
16739 warning (0, "no profiling of 64-bit code for this ABI");
16740 return;
16742 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16743 fprintf (file, "\tmflr %s\n", reg_names[0]);
16744 if (NO_PROFILE_COUNTERS)
16746 asm_fprintf (file, "\tstw %s,4(%s)\n",
16747 reg_names[0], reg_names[1]);
16749 else if (TARGET_SECURE_PLT && flag_pic)
16751 if (TARGET_LINK_STACK)
16753 char name[32];
16754 get_ppc476_thunk_name (name);
16755 asm_fprintf (file, "\tbl %s\n", name);
16757 else
16758 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16759 asm_fprintf (file, "\tstw %s,4(%s)\n",
16760 reg_names[0], reg_names[1]);
16761 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16762 asm_fprintf (file, "\taddis %s,%s,",
16763 reg_names[12], reg_names[12]);
16764 assemble_name (file, buf);
16765 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16766 assemble_name (file, buf);
16767 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16769 else if (flag_pic == 1)
16771 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16772 asm_fprintf (file, "\tstw %s,4(%s)\n",
16773 reg_names[0], reg_names[1]);
16774 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16775 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16776 assemble_name (file, buf);
16777 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16779 else if (flag_pic > 1)
16781 asm_fprintf (file, "\tstw %s,4(%s)\n",
16782 reg_names[0], reg_names[1]);
16783 /* Now, we need to get the address of the label. */
16784 if (TARGET_LINK_STACK)
16786 char name[32];
16787 get_ppc476_thunk_name (name);
16788 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16789 assemble_name (file, buf);
16790 fputs ("-.\n1:", file);
16791 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16792 asm_fprintf (file, "\taddi %s,%s,4\n",
16793 reg_names[11], reg_names[11]);
16795 else
16797 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16798 assemble_name (file, buf);
16799 fputs ("-.\n1:", file);
16800 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16802 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16803 reg_names[0], reg_names[11]);
16804 asm_fprintf (file, "\tadd %s,%s,%s\n",
16805 reg_names[0], reg_names[0], reg_names[11]);
16807 else
16809 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16810 assemble_name (file, buf);
16811 fputs ("@ha\n", file);
16812 asm_fprintf (file, "\tstw %s,4(%s)\n",
16813 reg_names[0], reg_names[1]);
16814 asm_fprintf (file, "\tla %s,", reg_names[0]);
16815 assemble_name (file, buf);
16816 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16819 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16820 fprintf (file, "\tbl %s%s\n",
16821 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16822 break;
16824 case ABI_AIX:
16825 case ABI_ELFv2:
16826 case ABI_DARWIN:
16827 /* Don't do anything, done in output_profile_hook (). */
16828 break;
16834 /* The following variable value is the last issued insn. */
16836 static rtx_insn *last_scheduled_insn;
16838 /* The following variable helps to balance issuing of load and
16839 store instructions */
16841 static int load_store_pendulum;
16843 /* The following variable helps pair divide insns during scheduling. */
16844 static int divide_cnt;
16845 /* The following variable helps pair and alternate vector and vector load
16846 insns during scheduling. */
16847 static int vec_pairing;
16850 /* Power4 load update and store update instructions are cracked into a
16851 load or store and an integer insn which are executed in the same cycle.
16852 Branches have their own dispatch slot which does not count against the
16853 GCC issue rate, but it changes the program flow so there are no other
16854 instructions to issue in this cycle. */
16856 static int
16857 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16859 last_scheduled_insn = insn;
16860 if (GET_CODE (PATTERN (insn)) == USE
16861 || GET_CODE (PATTERN (insn)) == CLOBBER)
16863 cached_can_issue_more = more;
16864 return cached_can_issue_more;
16867 if (insn_terminates_group_p (insn, current_group))
16869 cached_can_issue_more = 0;
16870 return cached_can_issue_more;
16873 /* If no reservation, but reach here */
16874 if (recog_memoized (insn) < 0)
16875 return more;
16877 if (rs6000_sched_groups)
16879 if (is_microcoded_insn (insn))
16880 cached_can_issue_more = 0;
16881 else if (is_cracked_insn (insn))
16882 cached_can_issue_more = more > 2 ? more - 2 : 0;
16883 else
16884 cached_can_issue_more = more - 1;
16886 return cached_can_issue_more;
16889 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16890 return 0;
16892 cached_can_issue_more = more - 1;
16893 return cached_can_issue_more;
16896 static int
16897 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16899 int r = rs6000_variable_issue_1 (insn, more);
16900 if (verbose)
16901 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16902 return r;
16905 /* Adjust the cost of a scheduling dependency. Return the new cost of
16906 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16908 static int
16909 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16910 unsigned int)
16912 enum attr_type attr_type;
16914 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16915 return cost;
16917 switch (dep_type)
16919 case REG_DEP_TRUE:
16921 /* Data dependency; DEP_INSN writes a register that INSN reads
16922 some cycles later. */
16924 /* Separate a load from a narrower, dependent store. */
16925 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16926 || rs6000_tune == PROCESSOR_FUTURE)
16927 && GET_CODE (PATTERN (insn)) == SET
16928 && GET_CODE (PATTERN (dep_insn)) == SET
16929 && MEM_P (XEXP (PATTERN (insn), 1))
16930 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16931 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16932 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16933 return cost + 14;
16935 attr_type = get_attr_type (insn);
16937 switch (attr_type)
16939 case TYPE_JMPREG:
16940 /* Tell the first scheduling pass about the latency between
16941 a mtctr and bctr (and mtlr and br/blr). The first
16942 scheduling pass will not know about this latency since
16943 the mtctr instruction, which has the latency associated
16944 to it, will be generated by reload. */
16945 return 4;
16946 case TYPE_BRANCH:
16947 /* Leave some extra cycles between a compare and its
16948 dependent branch, to inhibit expensive mispredicts. */
16949 if ((rs6000_tune == PROCESSOR_PPC603
16950 || rs6000_tune == PROCESSOR_PPC604
16951 || rs6000_tune == PROCESSOR_PPC604e
16952 || rs6000_tune == PROCESSOR_PPC620
16953 || rs6000_tune == PROCESSOR_PPC630
16954 || rs6000_tune == PROCESSOR_PPC750
16955 || rs6000_tune == PROCESSOR_PPC7400
16956 || rs6000_tune == PROCESSOR_PPC7450
16957 || rs6000_tune == PROCESSOR_PPCE5500
16958 || rs6000_tune == PROCESSOR_PPCE6500
16959 || rs6000_tune == PROCESSOR_POWER4
16960 || rs6000_tune == PROCESSOR_POWER5
16961 || rs6000_tune == PROCESSOR_POWER7
16962 || rs6000_tune == PROCESSOR_POWER8
16963 || rs6000_tune == PROCESSOR_POWER9
16964 || rs6000_tune == PROCESSOR_FUTURE
16965 || rs6000_tune == PROCESSOR_CELL)
16966 && recog_memoized (dep_insn)
16967 && (INSN_CODE (dep_insn) >= 0))
16969 switch (get_attr_type (dep_insn))
16971 case TYPE_CMP:
16972 case TYPE_FPCOMPARE:
16973 case TYPE_CR_LOGICAL:
16974 return cost + 2;
16975 case TYPE_EXTS:
16976 case TYPE_MUL:
16977 if (get_attr_dot (dep_insn) == DOT_YES)
16978 return cost + 2;
16979 else
16980 break;
16981 case TYPE_SHIFT:
16982 if (get_attr_dot (dep_insn) == DOT_YES
16983 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16984 return cost + 2;
16985 else
16986 break;
16987 default:
16988 break;
16990 break;
16992 case TYPE_STORE:
16993 case TYPE_FPSTORE:
16994 if ((rs6000_tune == PROCESSOR_POWER6)
16995 && recog_memoized (dep_insn)
16996 && (INSN_CODE (dep_insn) >= 0))
16999 if (GET_CODE (PATTERN (insn)) != SET)
17000 /* If this happens, we have to extend this to schedule
17001 optimally. Return default for now. */
17002 return cost;
17004 /* Adjust the cost for the case where the value written
17005 by a fixed point operation is used as the address
17006 gen value on a store. */
17007 switch (get_attr_type (dep_insn))
17009 case TYPE_LOAD:
17010 case TYPE_CNTLZ:
17012 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17013 return get_attr_sign_extend (dep_insn)
17014 == SIGN_EXTEND_YES ? 6 : 4;
17015 break;
17017 case TYPE_SHIFT:
17019 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17020 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17021 6 : 3;
17022 break;
17024 case TYPE_INTEGER:
17025 case TYPE_ADD:
17026 case TYPE_LOGICAL:
17027 case TYPE_EXTS:
17028 case TYPE_INSERT:
17030 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17031 return 3;
17032 break;
17034 case TYPE_STORE:
17035 case TYPE_FPLOAD:
17036 case TYPE_FPSTORE:
17038 if (get_attr_update (dep_insn) == UPDATE_YES
17039 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17040 return 3;
17041 break;
17043 case TYPE_MUL:
17045 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17046 return 17;
17047 break;
17049 case TYPE_DIV:
17051 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17052 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17053 break;
17055 default:
17056 break;
17059 break;
17061 case TYPE_LOAD:
17062 if ((rs6000_tune == PROCESSOR_POWER6)
17063 && recog_memoized (dep_insn)
17064 && (INSN_CODE (dep_insn) >= 0))
17067 /* Adjust the cost for the case where the value written
17068 by a fixed point instruction is used within the address
17069 gen portion of a subsequent load(u)(x) */
17070 switch (get_attr_type (dep_insn))
17072 case TYPE_LOAD:
17073 case TYPE_CNTLZ:
17075 if (set_to_load_agen (dep_insn, insn))
17076 return get_attr_sign_extend (dep_insn)
17077 == SIGN_EXTEND_YES ? 6 : 4;
17078 break;
17080 case TYPE_SHIFT:
17082 if (set_to_load_agen (dep_insn, insn))
17083 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17084 6 : 3;
17085 break;
17087 case TYPE_INTEGER:
17088 case TYPE_ADD:
17089 case TYPE_LOGICAL:
17090 case TYPE_EXTS:
17091 case TYPE_INSERT:
17093 if (set_to_load_agen (dep_insn, insn))
17094 return 3;
17095 break;
17097 case TYPE_STORE:
17098 case TYPE_FPLOAD:
17099 case TYPE_FPSTORE:
17101 if (get_attr_update (dep_insn) == UPDATE_YES
17102 && set_to_load_agen (dep_insn, insn))
17103 return 3;
17104 break;
17106 case TYPE_MUL:
17108 if (set_to_load_agen (dep_insn, insn))
17109 return 17;
17110 break;
17112 case TYPE_DIV:
17114 if (set_to_load_agen (dep_insn, insn))
17115 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17116 break;
17118 default:
17119 break;
17122 break;
17124 case TYPE_FPLOAD:
17125 if ((rs6000_tune == PROCESSOR_POWER6)
17126 && get_attr_update (insn) == UPDATE_NO
17127 && recog_memoized (dep_insn)
17128 && (INSN_CODE (dep_insn) >= 0)
17129 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17130 return 2;
17132 default:
17133 break;
17136 /* Fall out to return default cost. */
17138 break;
17140 case REG_DEP_OUTPUT:
17141 /* Output dependency; DEP_INSN writes a register that INSN writes some
17142 cycles later. */
17143 if ((rs6000_tune == PROCESSOR_POWER6)
17144 && recog_memoized (dep_insn)
17145 && (INSN_CODE (dep_insn) >= 0))
17147 attr_type = get_attr_type (insn);
17149 switch (attr_type)
17151 case TYPE_FP:
17152 case TYPE_FPSIMPLE:
17153 if (get_attr_type (dep_insn) == TYPE_FP
17154 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17155 return 1;
17156 break;
17157 case TYPE_FPLOAD:
17158 if (get_attr_update (insn) == UPDATE_NO
17159 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17160 return 2;
17161 break;
17162 default:
17163 break;
17166 /* Fall through, no cost for output dependency. */
17167 /* FALLTHRU */
17169 case REG_DEP_ANTI:
17170 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17171 cycles later. */
17172 return 0;
17174 default:
17175 gcc_unreachable ();
17178 return cost;
17181 /* Debug version of rs6000_adjust_cost. */
17183 static int
17184 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17185 int cost, unsigned int dw)
17187 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17189 if (ret != cost)
17191 const char *dep;
17193 switch (dep_type)
17195 default: dep = "unknown depencency"; break;
17196 case REG_DEP_TRUE: dep = "data dependency"; break;
17197 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17198 case REG_DEP_ANTI: dep = "anti depencency"; break;
17201 fprintf (stderr,
17202 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17203 "%s, insn:\n", ret, cost, dep);
17205 debug_rtx (insn);
17208 return ret;
17211 /* The function returns a true if INSN is microcoded.
17212 Return false otherwise. */
17214 static bool
17215 is_microcoded_insn (rtx_insn *insn)
17217 if (!insn || !NONDEBUG_INSN_P (insn)
17218 || GET_CODE (PATTERN (insn)) == USE
17219 || GET_CODE (PATTERN (insn)) == CLOBBER)
17220 return false;
17222 if (rs6000_tune == PROCESSOR_CELL)
17223 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17225 if (rs6000_sched_groups
17226 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17228 enum attr_type type = get_attr_type (insn);
17229 if ((type == TYPE_LOAD
17230 && get_attr_update (insn) == UPDATE_YES
17231 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17232 || ((type == TYPE_LOAD || type == TYPE_STORE)
17233 && get_attr_update (insn) == UPDATE_YES
17234 && get_attr_indexed (insn) == INDEXED_YES)
17235 || type == TYPE_MFCR)
17236 return true;
17239 return false;
17242 /* The function returns true if INSN is cracked into 2 instructions
17243 by the processor (and therefore occupies 2 issue slots). */
17245 static bool
17246 is_cracked_insn (rtx_insn *insn)
17248 if (!insn || !NONDEBUG_INSN_P (insn)
17249 || GET_CODE (PATTERN (insn)) == USE
17250 || GET_CODE (PATTERN (insn)) == CLOBBER)
17251 return false;
17253 if (rs6000_sched_groups
17254 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17256 enum attr_type type = get_attr_type (insn);
17257 if ((type == TYPE_LOAD
17258 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17259 && get_attr_update (insn) == UPDATE_NO)
17260 || (type == TYPE_LOAD
17261 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17262 && get_attr_update (insn) == UPDATE_YES
17263 && get_attr_indexed (insn) == INDEXED_NO)
17264 || (type == TYPE_STORE
17265 && get_attr_update (insn) == UPDATE_YES
17266 && get_attr_indexed (insn) == INDEXED_NO)
17267 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17268 && get_attr_update (insn) == UPDATE_YES)
17269 || (type == TYPE_CR_LOGICAL
17270 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17271 || (type == TYPE_EXTS
17272 && get_attr_dot (insn) == DOT_YES)
17273 || (type == TYPE_SHIFT
17274 && get_attr_dot (insn) == DOT_YES
17275 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17276 || (type == TYPE_MUL
17277 && get_attr_dot (insn) == DOT_YES)
17278 || type == TYPE_DIV
17279 || (type == TYPE_INSERT
17280 && get_attr_size (insn) == SIZE_32))
17281 return true;
17284 return false;
17287 /* The function returns true if INSN can be issued only from
17288 the branch slot. */
17290 static bool
17291 is_branch_slot_insn (rtx_insn *insn)
17293 if (!insn || !NONDEBUG_INSN_P (insn)
17294 || GET_CODE (PATTERN (insn)) == USE
17295 || GET_CODE (PATTERN (insn)) == CLOBBER)
17296 return false;
17298 if (rs6000_sched_groups)
17300 enum attr_type type = get_attr_type (insn);
17301 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17302 return true;
17303 return false;
17306 return false;
17309 /* The function returns true if out_inst sets a value that is
17310 used in the address generation computation of in_insn */
17311 static bool
17312 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17314 rtx out_set, in_set;
17316 /* For performance reasons, only handle the simple case where
17317 both loads are a single_set. */
17318 out_set = single_set (out_insn);
17319 if (out_set)
17321 in_set = single_set (in_insn);
17322 if (in_set)
17323 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17326 return false;
17329 /* Try to determine base/offset/size parts of the given MEM.
17330 Return true if successful, false if all the values couldn't
17331 be determined.
17333 This function only looks for REG or REG+CONST address forms.
17334 REG+REG address form will return false. */
17336 static bool
17337 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17338 HOST_WIDE_INT *size)
17340 rtx addr_rtx;
17341 if MEM_SIZE_KNOWN_P (mem)
17342 *size = MEM_SIZE (mem);
17343 else
17344 return false;
17346 addr_rtx = (XEXP (mem, 0));
17347 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17348 addr_rtx = XEXP (addr_rtx, 1);
17350 *offset = 0;
17351 while (GET_CODE (addr_rtx) == PLUS
17352 && CONST_INT_P (XEXP (addr_rtx, 1)))
17354 *offset += INTVAL (XEXP (addr_rtx, 1));
17355 addr_rtx = XEXP (addr_rtx, 0);
17357 if (!REG_P (addr_rtx))
17358 return false;
17360 *base = addr_rtx;
17361 return true;
17364 /* The function returns true if the target storage location of
17365 mem1 is adjacent to the target storage location of mem2 */
17366 /* Return 1 if memory locations are adjacent. */
17368 static bool
17369 adjacent_mem_locations (rtx mem1, rtx mem2)
17371 rtx reg1, reg2;
17372 HOST_WIDE_INT off1, size1, off2, size2;
17374 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17375 && get_memref_parts (mem2, &reg2, &off2, &size2))
17376 return ((REGNO (reg1) == REGNO (reg2))
17377 && ((off1 + size1 == off2)
17378 || (off2 + size2 == off1)));
17380 return false;
17383 /* This function returns true if it can be determined that the two MEM
17384 locations overlap by at least 1 byte based on base reg/offset/size. */
17386 static bool
17387 mem_locations_overlap (rtx mem1, rtx mem2)
17389 rtx reg1, reg2;
17390 HOST_WIDE_INT off1, size1, off2, size2;
17392 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17393 && get_memref_parts (mem2, &reg2, &off2, &size2))
17394 return ((REGNO (reg1) == REGNO (reg2))
17395 && (((off1 <= off2) && (off1 + size1 > off2))
17396 || ((off2 <= off1) && (off2 + size2 > off1))));
17398 return false;
17401 /* A C statement (sans semicolon) to update the integer scheduling
17402 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17403 INSN earlier, reduce the priority to execute INSN later. Do not
17404 define this macro if you do not need to adjust the scheduling
17405 priorities of insns. */
17407 static int
17408 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17410 rtx load_mem, str_mem;
17411 /* On machines (like the 750) which have asymmetric integer units,
17412 where one integer unit can do multiply and divides and the other
17413 can't, reduce the priority of multiply/divide so it is scheduled
17414 before other integer operations. */
17416 #if 0
17417 if (! INSN_P (insn))
17418 return priority;
17420 if (GET_CODE (PATTERN (insn)) == USE)
17421 return priority;
17423 switch (rs6000_tune) {
17424 case PROCESSOR_PPC750:
17425 switch (get_attr_type (insn))
17427 default:
17428 break;
17430 case TYPE_MUL:
17431 case TYPE_DIV:
17432 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17433 priority, priority);
17434 if (priority >= 0 && priority < 0x01000000)
17435 priority >>= 3;
17436 break;
17439 #endif
17441 if (insn_must_be_first_in_group (insn)
17442 && reload_completed
17443 && current_sched_info->sched_max_insns_priority
17444 && rs6000_sched_restricted_insns_priority)
17447 /* Prioritize insns that can be dispatched only in the first
17448 dispatch slot. */
17449 if (rs6000_sched_restricted_insns_priority == 1)
17450 /* Attach highest priority to insn. This means that in
17451 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17452 precede 'priority' (critical path) considerations. */
17453 return current_sched_info->sched_max_insns_priority;
17454 else if (rs6000_sched_restricted_insns_priority == 2)
17455 /* Increase priority of insn by a minimal amount. This means that in
17456 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17457 considerations precede dispatch-slot restriction considerations. */
17458 return (priority + 1);
17461 if (rs6000_tune == PROCESSOR_POWER6
17462 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17463 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17464 /* Attach highest priority to insn if the scheduler has just issued two
17465 stores and this instruction is a load, or two loads and this instruction
17466 is a store. Power6 wants loads and stores scheduled alternately
17467 when possible */
17468 return current_sched_info->sched_max_insns_priority;
17470 return priority;
17473 /* Return true if the instruction is nonpipelined on the Cell. */
17474 static bool
17475 is_nonpipeline_insn (rtx_insn *insn)
17477 enum attr_type type;
17478 if (!insn || !NONDEBUG_INSN_P (insn)
17479 || GET_CODE (PATTERN (insn)) == USE
17480 || GET_CODE (PATTERN (insn)) == CLOBBER)
17481 return false;
17483 type = get_attr_type (insn);
17484 if (type == TYPE_MUL
17485 || type == TYPE_DIV
17486 || type == TYPE_SDIV
17487 || type == TYPE_DDIV
17488 || type == TYPE_SSQRT
17489 || type == TYPE_DSQRT
17490 || type == TYPE_MFCR
17491 || type == TYPE_MFCRF
17492 || type == TYPE_MFJMPR)
17494 return true;
17496 return false;
17500 /* Return how many instructions the machine can issue per cycle. */
17502 static int
17503 rs6000_issue_rate (void)
17505 /* Unless scheduling for register pressure, use issue rate of 1 for
17506 first scheduling pass to decrease degradation. */
17507 if (!reload_completed && !flag_sched_pressure)
17508 return 1;
17510 switch (rs6000_tune) {
17511 case PROCESSOR_RS64A:
17512 case PROCESSOR_PPC601: /* ? */
17513 case PROCESSOR_PPC7450:
17514 return 3;
17515 case PROCESSOR_PPC440:
17516 case PROCESSOR_PPC603:
17517 case PROCESSOR_PPC750:
17518 case PROCESSOR_PPC7400:
17519 case PROCESSOR_PPC8540:
17520 case PROCESSOR_PPC8548:
17521 case PROCESSOR_CELL:
17522 case PROCESSOR_PPCE300C2:
17523 case PROCESSOR_PPCE300C3:
17524 case PROCESSOR_PPCE500MC:
17525 case PROCESSOR_PPCE500MC64:
17526 case PROCESSOR_PPCE5500:
17527 case PROCESSOR_PPCE6500:
17528 case PROCESSOR_TITAN:
17529 return 2;
17530 case PROCESSOR_PPC476:
17531 case PROCESSOR_PPC604:
17532 case PROCESSOR_PPC604e:
17533 case PROCESSOR_PPC620:
17534 case PROCESSOR_PPC630:
17535 return 4;
17536 case PROCESSOR_POWER4:
17537 case PROCESSOR_POWER5:
17538 case PROCESSOR_POWER6:
17539 case PROCESSOR_POWER7:
17540 return 5;
17541 case PROCESSOR_POWER8:
17542 return 7;
17543 case PROCESSOR_POWER9:
17544 case PROCESSOR_FUTURE:
17545 return 6;
17546 default:
17547 return 1;
17551 /* Return how many instructions to look ahead for better insn
17552 scheduling. */
17554 static int
17555 rs6000_use_sched_lookahead (void)
17557 switch (rs6000_tune)
17559 case PROCESSOR_PPC8540:
17560 case PROCESSOR_PPC8548:
17561 return 4;
17563 case PROCESSOR_CELL:
17564 return (reload_completed ? 8 : 0);
17566 default:
17567 return 0;
17571 /* We are choosing insn from the ready queue. Return zero if INSN can be
17572 chosen. */
17573 static int
17574 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17576 if (ready_index == 0)
17577 return 0;
17579 if (rs6000_tune != PROCESSOR_CELL)
17580 return 0;
17582 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17584 if (!reload_completed
17585 || is_nonpipeline_insn (insn)
17586 || is_microcoded_insn (insn))
17587 return 1;
17589 return 0;
17592 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17593 and return true. */
17595 static bool
17596 find_mem_ref (rtx pat, rtx *mem_ref)
17598 const char * fmt;
17599 int i, j;
17601 /* stack_tie does not produce any real memory traffic. */
17602 if (tie_operand (pat, VOIDmode))
17603 return false;
17605 if (MEM_P (pat))
17607 *mem_ref = pat;
17608 return true;
17611 /* Recursively process the pattern. */
17612 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17614 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17616 if (fmt[i] == 'e')
17618 if (find_mem_ref (XEXP (pat, i), mem_ref))
17619 return true;
17621 else if (fmt[i] == 'E')
17622 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17624 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17625 return true;
17629 return false;
17632 /* Determine if PAT is a PATTERN of a load insn. */
17634 static bool
17635 is_load_insn1 (rtx pat, rtx *load_mem)
17637 if (!pat || pat == NULL_RTX)
17638 return false;
17640 if (GET_CODE (pat) == SET)
17641 return find_mem_ref (SET_SRC (pat), load_mem);
17643 if (GET_CODE (pat) == PARALLEL)
17645 int i;
17647 for (i = 0; i < XVECLEN (pat, 0); i++)
17648 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17649 return true;
17652 return false;
17655 /* Determine if INSN loads from memory. */
17657 static bool
17658 is_load_insn (rtx insn, rtx *load_mem)
17660 if (!insn || !INSN_P (insn))
17661 return false;
17663 if (CALL_P (insn))
17664 return false;
17666 return is_load_insn1 (PATTERN (insn), load_mem);
17669 /* Determine if PAT is a PATTERN of a store insn. */
17671 static bool
17672 is_store_insn1 (rtx pat, rtx *str_mem)
17674 if (!pat || pat == NULL_RTX)
17675 return false;
17677 if (GET_CODE (pat) == SET)
17678 return find_mem_ref (SET_DEST (pat), str_mem);
17680 if (GET_CODE (pat) == PARALLEL)
17682 int i;
17684 for (i = 0; i < XVECLEN (pat, 0); i++)
17685 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17686 return true;
17689 return false;
17692 /* Determine if INSN stores to memory. */
17694 static bool
17695 is_store_insn (rtx insn, rtx *str_mem)
17697 if (!insn || !INSN_P (insn))
17698 return false;
17700 return is_store_insn1 (PATTERN (insn), str_mem);
17703 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17705 static bool
17706 is_power9_pairable_vec_type (enum attr_type type)
17708 switch (type)
17710 case TYPE_VECSIMPLE:
17711 case TYPE_VECCOMPLEX:
17712 case TYPE_VECDIV:
17713 case TYPE_VECCMP:
17714 case TYPE_VECPERM:
17715 case TYPE_VECFLOAT:
17716 case TYPE_VECFDIV:
17717 case TYPE_VECDOUBLE:
17718 return true;
17719 default:
17720 break;
17722 return false;
17725 /* Returns whether the dependence between INSN and NEXT is considered
17726 costly by the given target. */
17728 static bool
17729 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17731 rtx insn;
17732 rtx next;
17733 rtx load_mem, str_mem;
17735 /* If the flag is not enabled - no dependence is considered costly;
17736 allow all dependent insns in the same group.
17737 This is the most aggressive option. */
17738 if (rs6000_sched_costly_dep == no_dep_costly)
17739 return false;
17741 /* If the flag is set to 1 - a dependence is always considered costly;
17742 do not allow dependent instructions in the same group.
17743 This is the most conservative option. */
17744 if (rs6000_sched_costly_dep == all_deps_costly)
17745 return true;
17747 insn = DEP_PRO (dep);
17748 next = DEP_CON (dep);
17750 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17751 && is_load_insn (next, &load_mem)
17752 && is_store_insn (insn, &str_mem))
17753 /* Prevent load after store in the same group. */
17754 return true;
17756 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17757 && is_load_insn (next, &load_mem)
17758 && is_store_insn (insn, &str_mem)
17759 && DEP_TYPE (dep) == REG_DEP_TRUE
17760 && mem_locations_overlap(str_mem, load_mem))
17761 /* Prevent load after store in the same group if it is a true
17762 dependence. */
17763 return true;
17765 /* The flag is set to X; dependences with latency >= X are considered costly,
17766 and will not be scheduled in the same group. */
17767 if (rs6000_sched_costly_dep <= max_dep_latency
17768 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17769 return true;
17771 return false;
17774 /* Return the next insn after INSN that is found before TAIL is reached,
17775 skipping any "non-active" insns - insns that will not actually occupy
17776 an issue slot. Return NULL_RTX if such an insn is not found. */
17778 static rtx_insn *
17779 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17781 if (insn == NULL_RTX || insn == tail)
17782 return NULL;
17784 while (1)
17786 insn = NEXT_INSN (insn);
17787 if (insn == NULL_RTX || insn == tail)
17788 return NULL;
17790 if (CALL_P (insn)
17791 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17792 || (NONJUMP_INSN_P (insn)
17793 && GET_CODE (PATTERN (insn)) != USE
17794 && GET_CODE (PATTERN (insn)) != CLOBBER
17795 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17796 break;
17798 return insn;
17801 /* Move instruction at POS to the end of the READY list. */
17803 static void
17804 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17806 rtx_insn *tmp;
17807 int i;
17809 tmp = ready[pos];
17810 for (i = pos; i < lastpos; i++)
17811 ready[i] = ready[i + 1];
17812 ready[lastpos] = tmp;
17815 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17817 static int
17818 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17820 /* For Power6, we need to handle some special cases to try and keep the
17821 store queue from overflowing and triggering expensive flushes.
17823 This code monitors how load and store instructions are being issued
17824 and skews the ready list one way or the other to increase the likelihood
17825 that a desired instruction is issued at the proper time.
17827 A couple of things are done. First, we maintain a "load_store_pendulum"
17828 to track the current state of load/store issue.
17830 - If the pendulum is at zero, then no loads or stores have been
17831 issued in the current cycle so we do nothing.
17833 - If the pendulum is 1, then a single load has been issued in this
17834 cycle and we attempt to locate another load in the ready list to
17835 issue with it.
17837 - If the pendulum is -2, then two stores have already been
17838 issued in this cycle, so we increase the priority of the first load
17839 in the ready list to increase it's likelihood of being chosen first
17840 in the next cycle.
17842 - If the pendulum is -1, then a single store has been issued in this
17843 cycle and we attempt to locate another store in the ready list to
17844 issue with it, preferring a store to an adjacent memory location to
17845 facilitate store pairing in the store queue.
17847 - If the pendulum is 2, then two loads have already been
17848 issued in this cycle, so we increase the priority of the first store
17849 in the ready list to increase it's likelihood of being chosen first
17850 in the next cycle.
17852 - If the pendulum < -2 or > 2, then do nothing.
17854 Note: This code covers the most common scenarios. There exist non
17855 load/store instructions which make use of the LSU and which
17856 would need to be accounted for to strictly model the behavior
17857 of the machine. Those instructions are currently unaccounted
17858 for to help minimize compile time overhead of this code.
17860 int pos;
17861 rtx load_mem, str_mem;
17863 if (is_store_insn (last_scheduled_insn, &str_mem))
17864 /* Issuing a store, swing the load_store_pendulum to the left */
17865 load_store_pendulum--;
17866 else if (is_load_insn (last_scheduled_insn, &load_mem))
17867 /* Issuing a load, swing the load_store_pendulum to the right */
17868 load_store_pendulum++;
17869 else
17870 return cached_can_issue_more;
17872 /* If the pendulum is balanced, or there is only one instruction on
17873 the ready list, then all is well, so return. */
17874 if ((load_store_pendulum == 0) || (lastpos <= 0))
17875 return cached_can_issue_more;
17877 if (load_store_pendulum == 1)
17879 /* A load has been issued in this cycle. Scan the ready list
17880 for another load to issue with it */
17881 pos = lastpos;
17883 while (pos >= 0)
17885 if (is_load_insn (ready[pos], &load_mem))
17887 /* Found a load. Move it to the head of the ready list,
17888 and adjust it's priority so that it is more likely to
17889 stay there */
17890 move_to_end_of_ready (ready, pos, lastpos);
17892 if (!sel_sched_p ()
17893 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17894 INSN_PRIORITY (ready[lastpos])++;
17895 break;
17897 pos--;
17900 else if (load_store_pendulum == -2)
17902 /* Two stores have been issued in this cycle. Increase the
17903 priority of the first load in the ready list to favor it for
17904 issuing in the next cycle. */
17905 pos = lastpos;
17907 while (pos >= 0)
17909 if (is_load_insn (ready[pos], &load_mem)
17910 && !sel_sched_p ()
17911 && INSN_PRIORITY_KNOWN (ready[pos]))
17913 INSN_PRIORITY (ready[pos])++;
17915 /* Adjust the pendulum to account for the fact that a load
17916 was found and increased in priority. This is to prevent
17917 increasing the priority of multiple loads */
17918 load_store_pendulum--;
17920 break;
17922 pos--;
17925 else if (load_store_pendulum == -1)
17927 /* A store has been issued in this cycle. Scan the ready list for
17928 another store to issue with it, preferring a store to an adjacent
17929 memory location */
17930 int first_store_pos = -1;
17932 pos = lastpos;
17934 while (pos >= 0)
17936 if (is_store_insn (ready[pos], &str_mem))
17938 rtx str_mem2;
17939 /* Maintain the index of the first store found on the
17940 list */
17941 if (first_store_pos == -1)
17942 first_store_pos = pos;
17944 if (is_store_insn (last_scheduled_insn, &str_mem2)
17945 && adjacent_mem_locations (str_mem, str_mem2))
17947 /* Found an adjacent store. Move it to the head of the
17948 ready list, and adjust it's priority so that it is
17949 more likely to stay there */
17950 move_to_end_of_ready (ready, pos, lastpos);
17952 if (!sel_sched_p ()
17953 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17954 INSN_PRIORITY (ready[lastpos])++;
17956 first_store_pos = -1;
17958 break;
17961 pos--;
17964 if (first_store_pos >= 0)
17966 /* An adjacent store wasn't found, but a non-adjacent store was,
17967 so move the non-adjacent store to the front of the ready
17968 list, and adjust its priority so that it is more likely to
17969 stay there. */
17970 move_to_end_of_ready (ready, first_store_pos, lastpos);
17971 if (!sel_sched_p ()
17972 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17973 INSN_PRIORITY (ready[lastpos])++;
17976 else if (load_store_pendulum == 2)
17978 /* Two loads have been issued in this cycle. Increase the priority
17979 of the first store in the ready list to favor it for issuing in
17980 the next cycle. */
17981 pos = lastpos;
17983 while (pos >= 0)
17985 if (is_store_insn (ready[pos], &str_mem)
17986 && !sel_sched_p ()
17987 && INSN_PRIORITY_KNOWN (ready[pos]))
17989 INSN_PRIORITY (ready[pos])++;
17991 /* Adjust the pendulum to account for the fact that a store
17992 was found and increased in priority. This is to prevent
17993 increasing the priority of multiple stores */
17994 load_store_pendulum++;
17996 break;
17998 pos--;
18002 return cached_can_issue_more;
18005 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18007 static int
18008 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18010 int pos;
18011 enum attr_type type, type2;
18013 type = get_attr_type (last_scheduled_insn);
18015 /* Try to issue fixed point divides back-to-back in pairs so they will be
18016 routed to separate execution units and execute in parallel. */
18017 if (type == TYPE_DIV && divide_cnt == 0)
18019 /* First divide has been scheduled. */
18020 divide_cnt = 1;
18022 /* Scan the ready list looking for another divide, if found move it
18023 to the end of the list so it is chosen next. */
18024 pos = lastpos;
18025 while (pos >= 0)
18027 if (recog_memoized (ready[pos]) >= 0
18028 && get_attr_type (ready[pos]) == TYPE_DIV)
18030 move_to_end_of_ready (ready, pos, lastpos);
18031 break;
18033 pos--;
18036 else
18038 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18039 divide_cnt = 0;
18041 /* The best dispatch throughput for vector and vector load insns can be
18042 achieved by interleaving a vector and vector load such that they'll
18043 dispatch to the same superslice. If this pairing cannot be achieved
18044 then it is best to pair vector insns together and vector load insns
18045 together.
18047 To aid in this pairing, vec_pairing maintains the current state with
18048 the following values:
18050 0 : Initial state, no vecload/vector pairing has been started.
18052 1 : A vecload or vector insn has been issued and a candidate for
18053 pairing has been found and moved to the end of the ready
18054 list. */
18055 if (type == TYPE_VECLOAD)
18057 /* Issued a vecload. */
18058 if (vec_pairing == 0)
18060 int vecload_pos = -1;
18061 /* We issued a single vecload, look for a vector insn to pair it
18062 with. If one isn't found, try to pair another vecload. */
18063 pos = lastpos;
18064 while (pos >= 0)
18066 if (recog_memoized (ready[pos]) >= 0)
18068 type2 = get_attr_type (ready[pos]);
18069 if (is_power9_pairable_vec_type (type2))
18071 /* Found a vector insn to pair with, move it to the
18072 end of the ready list so it is scheduled next. */
18073 move_to_end_of_ready (ready, pos, lastpos);
18074 vec_pairing = 1;
18075 return cached_can_issue_more;
18077 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18078 /* Remember position of first vecload seen. */
18079 vecload_pos = pos;
18081 pos--;
18083 if (vecload_pos >= 0)
18085 /* Didn't find a vector to pair with but did find a vecload,
18086 move it to the end of the ready list. */
18087 move_to_end_of_ready (ready, vecload_pos, lastpos);
18088 vec_pairing = 1;
18089 return cached_can_issue_more;
18093 else if (is_power9_pairable_vec_type (type))
18095 /* Issued a vector operation. */
18096 if (vec_pairing == 0)
18098 int vec_pos = -1;
18099 /* We issued a single vector insn, look for a vecload to pair it
18100 with. If one isn't found, try to pair another vector. */
18101 pos = lastpos;
18102 while (pos >= 0)
18104 if (recog_memoized (ready[pos]) >= 0)
18106 type2 = get_attr_type (ready[pos]);
18107 if (type2 == TYPE_VECLOAD)
18109 /* Found a vecload insn to pair with, move it to the
18110 end of the ready list so it is scheduled next. */
18111 move_to_end_of_ready (ready, pos, lastpos);
18112 vec_pairing = 1;
18113 return cached_can_issue_more;
18115 else if (is_power9_pairable_vec_type (type2)
18116 && vec_pos == -1)
18117 /* Remember position of first vector insn seen. */
18118 vec_pos = pos;
18120 pos--;
18122 if (vec_pos >= 0)
18124 /* Didn't find a vecload to pair with but did find a vector
18125 insn, move it to the end of the ready list. */
18126 move_to_end_of_ready (ready, vec_pos, lastpos);
18127 vec_pairing = 1;
18128 return cached_can_issue_more;
18133 /* We've either finished a vec/vecload pair, couldn't find an insn to
18134 continue the current pair, or the last insn had nothing to do with
18135 with pairing. In any case, reset the state. */
18136 vec_pairing = 0;
18139 return cached_can_issue_more;
18142 /* We are about to begin issuing insns for this clock cycle. */
18144 static int
18145 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18146 rtx_insn **ready ATTRIBUTE_UNUSED,
18147 int *pn_ready ATTRIBUTE_UNUSED,
18148 int clock_var ATTRIBUTE_UNUSED)
18150 int n_ready = *pn_ready;
18152 if (sched_verbose)
18153 fprintf (dump, "// rs6000_sched_reorder :\n");
18155 /* Reorder the ready list, if the second to last ready insn
18156 is a nonepipeline insn. */
18157 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18159 if (is_nonpipeline_insn (ready[n_ready - 1])
18160 && (recog_memoized (ready[n_ready - 2]) > 0))
18161 /* Simply swap first two insns. */
18162 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18165 if (rs6000_tune == PROCESSOR_POWER6)
18166 load_store_pendulum = 0;
18168 return rs6000_issue_rate ();
18171 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18173 static int
18174 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18175 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18177 if (sched_verbose)
18178 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18180 /* Do Power6 dependent reordering if necessary. */
18181 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18182 return power6_sched_reorder2 (ready, *pn_ready - 1);
18184 /* Do Power9 dependent reordering if necessary. */
18185 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18186 && recog_memoized (last_scheduled_insn) >= 0)
18187 return power9_sched_reorder2 (ready, *pn_ready - 1);
18189 return cached_can_issue_more;
18192 /* Return whether the presence of INSN causes a dispatch group termination
18193 of group WHICH_GROUP.
18195 If WHICH_GROUP == current_group, this function will return true if INSN
18196 causes the termination of the current group (i.e, the dispatch group to
18197 which INSN belongs). This means that INSN will be the last insn in the
18198 group it belongs to.
18200 If WHICH_GROUP == previous_group, this function will return true if INSN
18201 causes the termination of the previous group (i.e, the dispatch group that
18202 precedes the group to which INSN belongs). This means that INSN will be
18203 the first insn in the group it belongs to). */
18205 static bool
18206 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18208 bool first, last;
18210 if (! insn)
18211 return false;
18213 first = insn_must_be_first_in_group (insn);
18214 last = insn_must_be_last_in_group (insn);
18216 if (first && last)
18217 return true;
18219 if (which_group == current_group)
18220 return last;
18221 else if (which_group == previous_group)
18222 return first;
18224 return false;
18228 static bool
18229 insn_must_be_first_in_group (rtx_insn *insn)
18231 enum attr_type type;
18233 if (!insn
18234 || NOTE_P (insn)
18235 || DEBUG_INSN_P (insn)
18236 || GET_CODE (PATTERN (insn)) == USE
18237 || GET_CODE (PATTERN (insn)) == CLOBBER)
18238 return false;
18240 switch (rs6000_tune)
18242 case PROCESSOR_POWER5:
18243 if (is_cracked_insn (insn))
18244 return true;
18245 /* FALLTHRU */
18246 case PROCESSOR_POWER4:
18247 if (is_microcoded_insn (insn))
18248 return true;
18250 if (!rs6000_sched_groups)
18251 return false;
18253 type = get_attr_type (insn);
18255 switch (type)
18257 case TYPE_MFCR:
18258 case TYPE_MFCRF:
18259 case TYPE_MTCR:
18260 case TYPE_CR_LOGICAL:
18261 case TYPE_MTJMPR:
18262 case TYPE_MFJMPR:
18263 case TYPE_DIV:
18264 case TYPE_LOAD_L:
18265 case TYPE_STORE_C:
18266 case TYPE_ISYNC:
18267 case TYPE_SYNC:
18268 return true;
18269 default:
18270 break;
18272 break;
18273 case PROCESSOR_POWER6:
18274 type = get_attr_type (insn);
18276 switch (type)
18278 case TYPE_EXTS:
18279 case TYPE_CNTLZ:
18280 case TYPE_TRAP:
18281 case TYPE_MUL:
18282 case TYPE_INSERT:
18283 case TYPE_FPCOMPARE:
18284 case TYPE_MFCR:
18285 case TYPE_MTCR:
18286 case TYPE_MFJMPR:
18287 case TYPE_MTJMPR:
18288 case TYPE_ISYNC:
18289 case TYPE_SYNC:
18290 case TYPE_LOAD_L:
18291 case TYPE_STORE_C:
18292 return true;
18293 case TYPE_SHIFT:
18294 if (get_attr_dot (insn) == DOT_NO
18295 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18296 return true;
18297 else
18298 break;
18299 case TYPE_DIV:
18300 if (get_attr_size (insn) == SIZE_32)
18301 return true;
18302 else
18303 break;
18304 case TYPE_LOAD:
18305 case TYPE_STORE:
18306 case TYPE_FPLOAD:
18307 case TYPE_FPSTORE:
18308 if (get_attr_update (insn) == UPDATE_YES)
18309 return true;
18310 else
18311 break;
18312 default:
18313 break;
18315 break;
18316 case PROCESSOR_POWER7:
18317 type = get_attr_type (insn);
18319 switch (type)
18321 case TYPE_CR_LOGICAL:
18322 case TYPE_MFCR:
18323 case TYPE_MFCRF:
18324 case TYPE_MTCR:
18325 case TYPE_DIV:
18326 case TYPE_ISYNC:
18327 case TYPE_LOAD_L:
18328 case TYPE_STORE_C:
18329 case TYPE_MFJMPR:
18330 case TYPE_MTJMPR:
18331 return true;
18332 case TYPE_MUL:
18333 case TYPE_SHIFT:
18334 case TYPE_EXTS:
18335 if (get_attr_dot (insn) == DOT_YES)
18336 return true;
18337 else
18338 break;
18339 case TYPE_LOAD:
18340 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18341 || get_attr_update (insn) == UPDATE_YES)
18342 return true;
18343 else
18344 break;
18345 case TYPE_STORE:
18346 case TYPE_FPLOAD:
18347 case TYPE_FPSTORE:
18348 if (get_attr_update (insn) == UPDATE_YES)
18349 return true;
18350 else
18351 break;
18352 default:
18353 break;
18355 break;
18356 case PROCESSOR_POWER8:
18357 type = get_attr_type (insn);
18359 switch (type)
18361 case TYPE_CR_LOGICAL:
18362 case TYPE_MFCR:
18363 case TYPE_MFCRF:
18364 case TYPE_MTCR:
18365 case TYPE_SYNC:
18366 case TYPE_ISYNC:
18367 case TYPE_LOAD_L:
18368 case TYPE_STORE_C:
18369 case TYPE_VECSTORE:
18370 case TYPE_MFJMPR:
18371 case TYPE_MTJMPR:
18372 return true;
18373 case TYPE_SHIFT:
18374 case TYPE_EXTS:
18375 case TYPE_MUL:
18376 if (get_attr_dot (insn) == DOT_YES)
18377 return true;
18378 else
18379 break;
18380 case TYPE_LOAD:
18381 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18382 || get_attr_update (insn) == UPDATE_YES)
18383 return true;
18384 else
18385 break;
18386 case TYPE_STORE:
18387 if (get_attr_update (insn) == UPDATE_YES
18388 && get_attr_indexed (insn) == INDEXED_YES)
18389 return true;
18390 else
18391 break;
18392 default:
18393 break;
18395 break;
18396 default:
18397 break;
18400 return false;
18403 static bool
18404 insn_must_be_last_in_group (rtx_insn *insn)
18406 enum attr_type type;
18408 if (!insn
18409 || NOTE_P (insn)
18410 || DEBUG_INSN_P (insn)
18411 || GET_CODE (PATTERN (insn)) == USE
18412 || GET_CODE (PATTERN (insn)) == CLOBBER)
18413 return false;
18415 switch (rs6000_tune) {
18416 case PROCESSOR_POWER4:
18417 case PROCESSOR_POWER5:
18418 if (is_microcoded_insn (insn))
18419 return true;
18421 if (is_branch_slot_insn (insn))
18422 return true;
18424 break;
18425 case PROCESSOR_POWER6:
18426 type = get_attr_type (insn);
18428 switch (type)
18430 case TYPE_EXTS:
18431 case TYPE_CNTLZ:
18432 case TYPE_TRAP:
18433 case TYPE_MUL:
18434 case TYPE_FPCOMPARE:
18435 case TYPE_MFCR:
18436 case TYPE_MTCR:
18437 case TYPE_MFJMPR:
18438 case TYPE_MTJMPR:
18439 case TYPE_ISYNC:
18440 case TYPE_SYNC:
18441 case TYPE_LOAD_L:
18442 case TYPE_STORE_C:
18443 return true;
18444 case TYPE_SHIFT:
18445 if (get_attr_dot (insn) == DOT_NO
18446 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18447 return true;
18448 else
18449 break;
18450 case TYPE_DIV:
18451 if (get_attr_size (insn) == SIZE_32)
18452 return true;
18453 else
18454 break;
18455 default:
18456 break;
18458 break;
18459 case PROCESSOR_POWER7:
18460 type = get_attr_type (insn);
18462 switch (type)
18464 case TYPE_ISYNC:
18465 case TYPE_SYNC:
18466 case TYPE_LOAD_L:
18467 case TYPE_STORE_C:
18468 return true;
18469 case TYPE_LOAD:
18470 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18471 && get_attr_update (insn) == UPDATE_YES)
18472 return true;
18473 else
18474 break;
18475 case TYPE_STORE:
18476 if (get_attr_update (insn) == UPDATE_YES
18477 && get_attr_indexed (insn) == INDEXED_YES)
18478 return true;
18479 else
18480 break;
18481 default:
18482 break;
18484 break;
18485 case PROCESSOR_POWER8:
18486 type = get_attr_type (insn);
18488 switch (type)
18490 case TYPE_MFCR:
18491 case TYPE_MTCR:
18492 case TYPE_ISYNC:
18493 case TYPE_SYNC:
18494 case TYPE_LOAD_L:
18495 case TYPE_STORE_C:
18496 return true;
18497 case TYPE_LOAD:
18498 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18499 && get_attr_update (insn) == UPDATE_YES)
18500 return true;
18501 else
18502 break;
18503 case TYPE_STORE:
18504 if (get_attr_update (insn) == UPDATE_YES
18505 && get_attr_indexed (insn) == INDEXED_YES)
18506 return true;
18507 else
18508 break;
18509 default:
18510 break;
18512 break;
18513 default:
18514 break;
18517 return false;
18520 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18521 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18523 static bool
18524 is_costly_group (rtx *group_insns, rtx next_insn)
18526 int i;
18527 int issue_rate = rs6000_issue_rate ();
18529 for (i = 0; i < issue_rate; i++)
18531 sd_iterator_def sd_it;
18532 dep_t dep;
18533 rtx insn = group_insns[i];
18535 if (!insn)
18536 continue;
18538 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18540 rtx next = DEP_CON (dep);
18542 if (next == next_insn
18543 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18544 return true;
18548 return false;
18551 /* Utility of the function redefine_groups.
18552 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18553 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18554 to keep it "far" (in a separate group) from GROUP_INSNS, following
18555 one of the following schemes, depending on the value of the flag
18556 -minsert_sched_nops = X:
18557 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18558 in order to force NEXT_INSN into a separate group.
18559 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18560 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18561 insertion (has a group just ended, how many vacant issue slots remain in the
18562 last group, and how many dispatch groups were encountered so far). */
18564 static int
18565 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18566 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18567 int *group_count)
18569 rtx nop;
18570 bool force;
18571 int issue_rate = rs6000_issue_rate ();
18572 bool end = *group_end;
18573 int i;
18575 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18576 return can_issue_more;
18578 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18579 return can_issue_more;
18581 force = is_costly_group (group_insns, next_insn);
18582 if (!force)
18583 return can_issue_more;
18585 if (sched_verbose > 6)
18586 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18587 *group_count ,can_issue_more);
18589 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18591 if (*group_end)
18592 can_issue_more = 0;
18594 /* Since only a branch can be issued in the last issue_slot, it is
18595 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18596 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18597 in this case the last nop will start a new group and the branch
18598 will be forced to the new group. */
18599 if (can_issue_more && !is_branch_slot_insn (next_insn))
18600 can_issue_more--;
18602 /* Do we have a special group ending nop? */
18603 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18604 || rs6000_tune == PROCESSOR_POWER8)
18606 nop = gen_group_ending_nop ();
18607 emit_insn_before (nop, next_insn);
18608 can_issue_more = 0;
18610 else
18611 while (can_issue_more > 0)
18613 nop = gen_nop ();
18614 emit_insn_before (nop, next_insn);
18615 can_issue_more--;
18618 *group_end = true;
18619 return 0;
18622 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18624 int n_nops = rs6000_sched_insert_nops;
18626 /* Nops can't be issued from the branch slot, so the effective
18627 issue_rate for nops is 'issue_rate - 1'. */
18628 if (can_issue_more == 0)
18629 can_issue_more = issue_rate;
18630 can_issue_more--;
18631 if (can_issue_more == 0)
18633 can_issue_more = issue_rate - 1;
18634 (*group_count)++;
18635 end = true;
18636 for (i = 0; i < issue_rate; i++)
18638 group_insns[i] = 0;
18642 while (n_nops > 0)
18644 nop = gen_nop ();
18645 emit_insn_before (nop, next_insn);
18646 if (can_issue_more == issue_rate - 1) /* new group begins */
18647 end = false;
18648 can_issue_more--;
18649 if (can_issue_more == 0)
18651 can_issue_more = issue_rate - 1;
18652 (*group_count)++;
18653 end = true;
18654 for (i = 0; i < issue_rate; i++)
18656 group_insns[i] = 0;
18659 n_nops--;
18662 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18663 can_issue_more++;
18665 /* Is next_insn going to start a new group? */
18666 *group_end
18667 = (end
18668 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18669 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18670 || (can_issue_more < issue_rate &&
18671 insn_terminates_group_p (next_insn, previous_group)));
18672 if (*group_end && end)
18673 (*group_count)--;
18675 if (sched_verbose > 6)
18676 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18677 *group_count, can_issue_more);
18678 return can_issue_more;
18681 return can_issue_more;
18684 /* This function tries to synch the dispatch groups that the compiler "sees"
18685 with the dispatch groups that the processor dispatcher is expected to
18686 form in practice. It tries to achieve this synchronization by forcing the
18687 estimated processor grouping on the compiler (as opposed to the function
18688 'pad_goups' which tries to force the scheduler's grouping on the processor).
18690 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18691 examines the (estimated) dispatch groups that will be formed by the processor
18692 dispatcher. It marks these group boundaries to reflect the estimated
18693 processor grouping, overriding the grouping that the scheduler had marked.
18694 Depending on the value of the flag '-minsert-sched-nops' this function can
18695 force certain insns into separate groups or force a certain distance between
18696 them by inserting nops, for example, if there exists a "costly dependence"
18697 between the insns.
18699 The function estimates the group boundaries that the processor will form as
18700 follows: It keeps track of how many vacant issue slots are available after
18701 each insn. A subsequent insn will start a new group if one of the following
18702 4 cases applies:
18703 - no more vacant issue slots remain in the current dispatch group.
18704 - only the last issue slot, which is the branch slot, is vacant, but the next
18705 insn is not a branch.
18706 - only the last 2 or less issue slots, including the branch slot, are vacant,
18707 which means that a cracked insn (which occupies two issue slots) can't be
18708 issued in this group.
18709 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18710 start a new group. */
18712 static int
18713 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18714 rtx_insn *tail)
18716 rtx_insn *insn, *next_insn;
18717 int issue_rate;
18718 int can_issue_more;
18719 int slot, i;
18720 bool group_end;
18721 int group_count = 0;
18722 rtx *group_insns;
18724 /* Initialize. */
18725 issue_rate = rs6000_issue_rate ();
18726 group_insns = XALLOCAVEC (rtx, issue_rate);
18727 for (i = 0; i < issue_rate; i++)
18729 group_insns[i] = 0;
18731 can_issue_more = issue_rate;
18732 slot = 0;
18733 insn = get_next_active_insn (prev_head_insn, tail);
18734 group_end = false;
18736 while (insn != NULL_RTX)
18738 slot = (issue_rate - can_issue_more);
18739 group_insns[slot] = insn;
18740 can_issue_more =
18741 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18742 if (insn_terminates_group_p (insn, current_group))
18743 can_issue_more = 0;
18745 next_insn = get_next_active_insn (insn, tail);
18746 if (next_insn == NULL_RTX)
18747 return group_count + 1;
18749 /* Is next_insn going to start a new group? */
18750 group_end
18751 = (can_issue_more == 0
18752 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18753 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18754 || (can_issue_more < issue_rate &&
18755 insn_terminates_group_p (next_insn, previous_group)));
18757 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18758 next_insn, &group_end, can_issue_more,
18759 &group_count);
18761 if (group_end)
18763 group_count++;
18764 can_issue_more = 0;
18765 for (i = 0; i < issue_rate; i++)
18767 group_insns[i] = 0;
18771 if (GET_MODE (next_insn) == TImode && can_issue_more)
18772 PUT_MODE (next_insn, VOIDmode);
18773 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18774 PUT_MODE (next_insn, TImode);
18776 insn = next_insn;
18777 if (can_issue_more == 0)
18778 can_issue_more = issue_rate;
18779 } /* while */
18781 return group_count;
18784 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18785 dispatch group boundaries that the scheduler had marked. Pad with nops
18786 any dispatch groups which have vacant issue slots, in order to force the
18787 scheduler's grouping on the processor dispatcher. The function
18788 returns the number of dispatch groups found. */
18790 static int
18791 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18792 rtx_insn *tail)
18794 rtx_insn *insn, *next_insn;
18795 rtx nop;
18796 int issue_rate;
18797 int can_issue_more;
18798 int group_end;
18799 int group_count = 0;
18801 /* Initialize issue_rate. */
18802 issue_rate = rs6000_issue_rate ();
18803 can_issue_more = issue_rate;
18805 insn = get_next_active_insn (prev_head_insn, tail);
18806 next_insn = get_next_active_insn (insn, tail);
18808 while (insn != NULL_RTX)
18810 can_issue_more =
18811 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18813 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18815 if (next_insn == NULL_RTX)
18816 break;
18818 if (group_end)
18820 /* If the scheduler had marked group termination at this location
18821 (between insn and next_insn), and neither insn nor next_insn will
18822 force group termination, pad the group with nops to force group
18823 termination. */
18824 if (can_issue_more
18825 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18826 && !insn_terminates_group_p (insn, current_group)
18827 && !insn_terminates_group_p (next_insn, previous_group))
18829 if (!is_branch_slot_insn (next_insn))
18830 can_issue_more--;
18832 while (can_issue_more)
18834 nop = gen_nop ();
18835 emit_insn_before (nop, next_insn);
18836 can_issue_more--;
18840 can_issue_more = issue_rate;
18841 group_count++;
18844 insn = next_insn;
18845 next_insn = get_next_active_insn (insn, tail);
18848 return group_count;
18851 /* We're beginning a new block. Initialize data structures as necessary. */
18853 static void
18854 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18855 int sched_verbose ATTRIBUTE_UNUSED,
18856 int max_ready ATTRIBUTE_UNUSED)
18858 last_scheduled_insn = NULL;
18859 load_store_pendulum = 0;
18860 divide_cnt = 0;
18861 vec_pairing = 0;
18864 /* The following function is called at the end of scheduling BB.
18865 After reload, it inserts nops at insn group bundling. */
18867 static void
18868 rs6000_sched_finish (FILE *dump, int sched_verbose)
18870 int n_groups;
18872 if (sched_verbose)
18873 fprintf (dump, "=== Finishing schedule.\n");
18875 if (reload_completed && rs6000_sched_groups)
18877 /* Do not run sched_finish hook when selective scheduling enabled. */
18878 if (sel_sched_p ())
18879 return;
18881 if (rs6000_sched_insert_nops == sched_finish_none)
18882 return;
18884 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18885 n_groups = pad_groups (dump, sched_verbose,
18886 current_sched_info->prev_head,
18887 current_sched_info->next_tail);
18888 else
18889 n_groups = redefine_groups (dump, sched_verbose,
18890 current_sched_info->prev_head,
18891 current_sched_info->next_tail);
18893 if (sched_verbose >= 6)
18895 fprintf (dump, "ngroups = %d\n", n_groups);
18896 print_rtl (dump, current_sched_info->prev_head);
18897 fprintf (dump, "Done finish_sched\n");
18902 struct rs6000_sched_context
18904 short cached_can_issue_more;
18905 rtx_insn *last_scheduled_insn;
18906 int load_store_pendulum;
18907 int divide_cnt;
18908 int vec_pairing;
18911 typedef struct rs6000_sched_context rs6000_sched_context_def;
18912 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18914 /* Allocate store for new scheduling context. */
18915 static void *
18916 rs6000_alloc_sched_context (void)
18918 return xmalloc (sizeof (rs6000_sched_context_def));
18921 /* If CLEAN_P is true then initializes _SC with clean data,
18922 and from the global context otherwise. */
18923 static void
18924 rs6000_init_sched_context (void *_sc, bool clean_p)
18926 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18928 if (clean_p)
18930 sc->cached_can_issue_more = 0;
18931 sc->last_scheduled_insn = NULL;
18932 sc->load_store_pendulum = 0;
18933 sc->divide_cnt = 0;
18934 sc->vec_pairing = 0;
18936 else
18938 sc->cached_can_issue_more = cached_can_issue_more;
18939 sc->last_scheduled_insn = last_scheduled_insn;
18940 sc->load_store_pendulum = load_store_pendulum;
18941 sc->divide_cnt = divide_cnt;
18942 sc->vec_pairing = vec_pairing;
18946 /* Sets the global scheduling context to the one pointed to by _SC. */
18947 static void
18948 rs6000_set_sched_context (void *_sc)
18950 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18952 gcc_assert (sc != NULL);
18954 cached_can_issue_more = sc->cached_can_issue_more;
18955 last_scheduled_insn = sc->last_scheduled_insn;
18956 load_store_pendulum = sc->load_store_pendulum;
18957 divide_cnt = sc->divide_cnt;
18958 vec_pairing = sc->vec_pairing;
18961 /* Free _SC. */
18962 static void
18963 rs6000_free_sched_context (void *_sc)
18965 gcc_assert (_sc != NULL);
18967 free (_sc);
18970 static bool
18971 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18973 switch (get_attr_type (insn))
18975 case TYPE_DIV:
18976 case TYPE_SDIV:
18977 case TYPE_DDIV:
18978 case TYPE_VECDIV:
18979 case TYPE_SSQRT:
18980 case TYPE_DSQRT:
18981 return false;
18983 default:
18984 return true;
18988 /* Length in units of the trampoline for entering a nested function. */
18991 rs6000_trampoline_size (void)
18993 int ret = 0;
18995 switch (DEFAULT_ABI)
18997 default:
18998 gcc_unreachable ();
19000 case ABI_AIX:
19001 ret = (TARGET_32BIT) ? 12 : 24;
19002 break;
19004 case ABI_ELFv2:
19005 gcc_assert (!TARGET_32BIT);
19006 ret = 32;
19007 break;
19009 case ABI_DARWIN:
19010 case ABI_V4:
19011 ret = (TARGET_32BIT) ? 40 : 48;
19012 break;
19015 return ret;
19018 /* Emit RTL insns to initialize the variable parts of a trampoline.
19019 FNADDR is an RTX for the address of the function's pure code.
19020 CXT is an RTX for the static chain value for the function. */
19022 static void
19023 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19025 int regsize = (TARGET_32BIT) ? 4 : 8;
19026 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19027 rtx ctx_reg = force_reg (Pmode, cxt);
19028 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19030 switch (DEFAULT_ABI)
19032 default:
19033 gcc_unreachable ();
19035 /* Under AIX, just build the 3 word function descriptor */
19036 case ABI_AIX:
19038 rtx fnmem, fn_reg, toc_reg;
19040 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19041 error ("you cannot take the address of a nested function if you use "
19042 "the %qs option", "-mno-pointers-to-nested-functions");
19044 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19045 fn_reg = gen_reg_rtx (Pmode);
19046 toc_reg = gen_reg_rtx (Pmode);
19048 /* Macro to shorten the code expansions below. */
19049 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19051 m_tramp = replace_equiv_address (m_tramp, addr);
19053 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19054 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19055 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19056 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19057 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19059 # undef MEM_PLUS
19061 break;
19063 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19064 case ABI_ELFv2:
19065 case ABI_DARWIN:
19066 case ABI_V4:
19067 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19068 LCT_NORMAL, VOIDmode,
19069 addr, Pmode,
19070 GEN_INT (rs6000_trampoline_size ()), SImode,
19071 fnaddr, Pmode,
19072 ctx_reg, Pmode);
19073 break;
19078 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19079 identifier as an argument, so the front end shouldn't look it up. */
19081 static bool
19082 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19084 return is_attribute_p ("altivec", attr_id);
19087 /* Handle the "altivec" attribute. The attribute may have
19088 arguments as follows:
19090 __attribute__((altivec(vector__)))
19091 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19092 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19094 and may appear more than once (e.g., 'vector bool char') in a
19095 given declaration. */
19097 static tree
19098 rs6000_handle_altivec_attribute (tree *node,
19099 tree name ATTRIBUTE_UNUSED,
19100 tree args,
19101 int flags ATTRIBUTE_UNUSED,
19102 bool *no_add_attrs)
19104 tree type = *node, result = NULL_TREE;
19105 machine_mode mode;
19106 int unsigned_p;
19107 char altivec_type
19108 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19109 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19110 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19111 : '?');
19113 while (POINTER_TYPE_P (type)
19114 || TREE_CODE (type) == FUNCTION_TYPE
19115 || TREE_CODE (type) == METHOD_TYPE
19116 || TREE_CODE (type) == ARRAY_TYPE)
19117 type = TREE_TYPE (type);
19119 mode = TYPE_MODE (type);
19121 /* Check for invalid AltiVec type qualifiers. */
19122 if (type == long_double_type_node)
19123 error ("use of %<long double%> in AltiVec types is invalid");
19124 else if (type == boolean_type_node)
19125 error ("use of boolean types in AltiVec types is invalid");
19126 else if (TREE_CODE (type) == COMPLEX_TYPE)
19127 error ("use of %<complex%> in AltiVec types is invalid");
19128 else if (DECIMAL_FLOAT_MODE_P (mode))
19129 error ("use of decimal floating point types in AltiVec types is invalid");
19130 else if (!TARGET_VSX)
19132 if (type == long_unsigned_type_node || type == long_integer_type_node)
19134 if (TARGET_64BIT)
19135 error ("use of %<long%> in AltiVec types is invalid for "
19136 "64-bit code without %qs", "-mvsx");
19137 else if (rs6000_warn_altivec_long)
19138 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19139 "use %<int%>");
19141 else if (type == long_long_unsigned_type_node
19142 || type == long_long_integer_type_node)
19143 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19144 "-mvsx");
19145 else if (type == double_type_node)
19146 error ("use of %<double%> in AltiVec types is invalid without %qs",
19147 "-mvsx");
19150 switch (altivec_type)
19152 case 'v':
19153 unsigned_p = TYPE_UNSIGNED (type);
19154 switch (mode)
19156 case E_TImode:
19157 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19158 break;
19159 case E_DImode:
19160 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19161 break;
19162 case E_SImode:
19163 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19164 break;
19165 case E_HImode:
19166 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19167 break;
19168 case E_QImode:
19169 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19170 break;
19171 case E_SFmode: result = V4SF_type_node; break;
19172 case E_DFmode: result = V2DF_type_node; break;
19173 /* If the user says 'vector int bool', we may be handed the 'bool'
19174 attribute _before_ the 'vector' attribute, and so select the
19175 proper type in the 'b' case below. */
19176 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19177 case E_V2DImode: case E_V2DFmode:
19178 result = type;
19179 default: break;
19181 break;
19182 case 'b':
19183 switch (mode)
19185 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19186 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19187 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19188 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19189 default: break;
19191 break;
19192 case 'p':
19193 switch (mode)
19195 case E_V8HImode: result = pixel_V8HI_type_node;
19196 default: break;
19198 default: break;
19201 /* Propagate qualifiers attached to the element type
19202 onto the vector type. */
19203 if (result && result != type && TYPE_QUALS (type))
19204 result = build_qualified_type (result, TYPE_QUALS (type));
19206 *no_add_attrs = true; /* No need to hang on to the attribute. */
19208 if (result)
19209 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19211 return NULL_TREE;
19214 /* AltiVec defines five built-in scalar types that serve as vector
19215 elements; we must teach the compiler how to mangle them. The 128-bit
19216 floating point mangling is target-specific as well. */
19218 static const char *
19219 rs6000_mangle_type (const_tree type)
19221 type = TYPE_MAIN_VARIANT (type);
19223 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19224 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19225 return NULL;
19227 if (type == bool_char_type_node) return "U6__boolc";
19228 if (type == bool_short_type_node) return "U6__bools";
19229 if (type == pixel_type_node) return "u7__pixel";
19230 if (type == bool_int_type_node) return "U6__booli";
19231 if (type == bool_long_long_type_node) return "U6__boolx";
19233 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19234 return "g";
19235 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19236 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19238 /* For all other types, use the default mangling. */
19239 return NULL;
19242 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19243 struct attribute_spec.handler. */
19245 static tree
19246 rs6000_handle_longcall_attribute (tree *node, tree name,
19247 tree args ATTRIBUTE_UNUSED,
19248 int flags ATTRIBUTE_UNUSED,
19249 bool *no_add_attrs)
19251 if (TREE_CODE (*node) != FUNCTION_TYPE
19252 && TREE_CODE (*node) != FIELD_DECL
19253 && TREE_CODE (*node) != TYPE_DECL)
19255 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19256 name);
19257 *no_add_attrs = true;
19260 return NULL_TREE;
19263 /* Set longcall attributes on all functions declared when
19264 rs6000_default_long_calls is true. */
19265 static void
19266 rs6000_set_default_type_attributes (tree type)
19268 if (rs6000_default_long_calls
19269 && (TREE_CODE (type) == FUNCTION_TYPE
19270 || TREE_CODE (type) == METHOD_TYPE))
19271 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19272 NULL_TREE,
19273 TYPE_ATTRIBUTES (type));
19275 #if TARGET_MACHO
19276 darwin_set_default_type_attributes (type);
19277 #endif
19280 /* Return a reference suitable for calling a function with the
19281 longcall attribute. */
19283 static rtx
19284 rs6000_longcall_ref (rtx call_ref, rtx arg)
19286 /* System V adds '.' to the internal name, so skip them. */
19287 const char *call_name = XSTR (call_ref, 0);
19288 if (*call_name == '.')
19290 while (*call_name == '.')
19291 call_name++;
19293 tree node = get_identifier (call_name);
19294 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19297 if (TARGET_PLTSEQ)
19299 rtx base = const0_rtx;
19300 int regno = 12;
19301 if (rs6000_pcrel_p (cfun))
19303 rtx reg = gen_rtx_REG (Pmode, regno);
19304 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19305 gen_rtvec (3, base, call_ref, arg),
19306 UNSPECV_PLT_PCREL);
19307 emit_insn (gen_rtx_SET (reg, u));
19308 return reg;
19311 if (DEFAULT_ABI == ABI_ELFv2)
19312 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19313 else
19315 if (flag_pic)
19316 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19317 regno = 11;
19319 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19320 may be used by a function global entry point. For SysV4, r11
19321 is used by __glink_PLTresolve lazy resolver entry. */
19322 rtx reg = gen_rtx_REG (Pmode, regno);
19323 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19324 UNSPEC_PLT16_HA);
19325 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
19326 gen_rtvec (3, reg, call_ref, arg),
19327 UNSPECV_PLT16_LO);
19328 emit_insn (gen_rtx_SET (reg, hi));
19329 emit_insn (gen_rtx_SET (reg, lo));
19330 return reg;
19333 return force_reg (Pmode, call_ref);
19336 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19337 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19338 #endif
19340 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19341 struct attribute_spec.handler. */
19342 static tree
19343 rs6000_handle_struct_attribute (tree *node, tree name,
19344 tree args ATTRIBUTE_UNUSED,
19345 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19347 tree *type = NULL;
19348 if (DECL_P (*node))
19350 if (TREE_CODE (*node) == TYPE_DECL)
19351 type = &TREE_TYPE (*node);
19353 else
19354 type = node;
19356 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19357 || TREE_CODE (*type) == UNION_TYPE)))
19359 warning (OPT_Wattributes, "%qE attribute ignored", name);
19360 *no_add_attrs = true;
19363 else if ((is_attribute_p ("ms_struct", name)
19364 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19365 || ((is_attribute_p ("gcc_struct", name)
19366 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19368 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19369 name);
19370 *no_add_attrs = true;
19373 return NULL_TREE;
19376 static bool
19377 rs6000_ms_bitfield_layout_p (const_tree record_type)
19379 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19380 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19381 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19384 #ifdef USING_ELFOS_H
19386 /* A get_unnamed_section callback, used for switching to toc_section. */
19388 static void
19389 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19391 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19392 && TARGET_MINIMAL_TOC)
19394 if (!toc_initialized)
19396 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19397 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19398 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19399 fprintf (asm_out_file, "\t.tc ");
19400 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19401 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19402 fprintf (asm_out_file, "\n");
19404 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19405 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19406 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19407 fprintf (asm_out_file, " = .+32768\n");
19408 toc_initialized = 1;
19410 else
19411 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19413 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19415 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19416 if (!toc_initialized)
19418 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19419 toc_initialized = 1;
19422 else
19424 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19425 if (!toc_initialized)
19427 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19428 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19429 fprintf (asm_out_file, " = .+32768\n");
19430 toc_initialized = 1;
19435 /* Implement TARGET_ASM_INIT_SECTIONS. */
19437 static void
19438 rs6000_elf_asm_init_sections (void)
19440 toc_section
19441 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19443 sdata2_section
19444 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19445 SDATA2_SECTION_ASM_OP);
19448 /* Implement TARGET_SELECT_RTX_SECTION. */
19450 static section *
19451 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19452 unsigned HOST_WIDE_INT align)
19454 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19455 return toc_section;
19456 else
19457 return default_elf_select_rtx_section (mode, x, align);
19460 /* For a SYMBOL_REF, set generic flags and then perform some
19461 target-specific processing.
19463 When the AIX ABI is requested on a non-AIX system, replace the
19464 function name with the real name (with a leading .) rather than the
19465 function descriptor name. This saves a lot of overriding code to
19466 read the prefixes. */
19468 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19469 static void
19470 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19472 default_encode_section_info (decl, rtl, first);
19474 if (first
19475 && TREE_CODE (decl) == FUNCTION_DECL
19476 && !TARGET_AIX
19477 && DEFAULT_ABI == ABI_AIX)
19479 rtx sym_ref = XEXP (rtl, 0);
19480 size_t len = strlen (XSTR (sym_ref, 0));
19481 char *str = XALLOCAVEC (char, len + 2);
19482 str[0] = '.';
19483 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19484 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19488 static inline bool
19489 compare_section_name (const char *section, const char *templ)
19491 int len;
19493 len = strlen (templ);
19494 return (strncmp (section, templ, len) == 0
19495 && (section[len] == 0 || section[len] == '.'));
19498 bool
19499 rs6000_elf_in_small_data_p (const_tree decl)
19501 if (rs6000_sdata == SDATA_NONE)
19502 return false;
19504 /* We want to merge strings, so we never consider them small data. */
19505 if (TREE_CODE (decl) == STRING_CST)
19506 return false;
19508 /* Functions are never in the small data area. */
19509 if (TREE_CODE (decl) == FUNCTION_DECL)
19510 return false;
19512 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19514 const char *section = DECL_SECTION_NAME (decl);
19515 if (compare_section_name (section, ".sdata")
19516 || compare_section_name (section, ".sdata2")
19517 || compare_section_name (section, ".gnu.linkonce.s")
19518 || compare_section_name (section, ".sbss")
19519 || compare_section_name (section, ".sbss2")
19520 || compare_section_name (section, ".gnu.linkonce.sb")
19521 || strcmp (section, ".PPC.EMB.sdata0") == 0
19522 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19523 return true;
19525 else
19527 /* If we are told not to put readonly data in sdata, then don't. */
19528 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19529 && !rs6000_readonly_in_sdata)
19530 return false;
19532 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19534 if (size > 0
19535 && size <= g_switch_value
19536 /* If it's not public, and we're not going to reference it there,
19537 there's no need to put it in the small data section. */
19538 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19539 return true;
19542 return false;
19545 #endif /* USING_ELFOS_H */
19547 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19549 static bool
19550 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19552 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19555 /* Do not place thread-local symbols refs in the object blocks. */
19557 static bool
19558 rs6000_use_blocks_for_decl_p (const_tree decl)
19560 return !DECL_THREAD_LOCAL_P (decl);
19563 /* Return a REG that occurs in ADDR with coefficient 1.
19564 ADDR can be effectively incremented by incrementing REG.
19566 r0 is special and we must not select it as an address
19567 register by this routine since our caller will try to
19568 increment the returned register via an "la" instruction. */
19571 find_addr_reg (rtx addr)
19573 while (GET_CODE (addr) == PLUS)
19575 if (REG_P (XEXP (addr, 0))
19576 && REGNO (XEXP (addr, 0)) != 0)
19577 addr = XEXP (addr, 0);
19578 else if (REG_P (XEXP (addr, 1))
19579 && REGNO (XEXP (addr, 1)) != 0)
19580 addr = XEXP (addr, 1);
19581 else if (CONSTANT_P (XEXP (addr, 0)))
19582 addr = XEXP (addr, 1);
19583 else if (CONSTANT_P (XEXP (addr, 1)))
19584 addr = XEXP (addr, 0);
19585 else
19586 gcc_unreachable ();
19588 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19589 return addr;
19592 void
19593 rs6000_fatal_bad_address (rtx op)
19595 fatal_insn ("bad address", op);
19598 #if TARGET_MACHO
19600 vec<branch_island, va_gc> *branch_islands;
19602 /* Remember to generate a branch island for far calls to the given
19603 function. */
19605 static void
19606 add_compiler_branch_island (tree label_name, tree function_name,
19607 int line_number)
19609 branch_island bi = {function_name, label_name, line_number};
19610 vec_safe_push (branch_islands, bi);
19613 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19614 already there or not. */
19616 static int
19617 no_previous_def (tree function_name)
19619 branch_island *bi;
19620 unsigned ix;
19622 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19623 if (function_name == bi->function_name)
19624 return 0;
19625 return 1;
19628 /* GET_PREV_LABEL gets the label name from the previous definition of
19629 the function. */
19631 static tree
19632 get_prev_label (tree function_name)
19634 branch_island *bi;
19635 unsigned ix;
19637 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19638 if (function_name == bi->function_name)
19639 return bi->label_name;
19640 return NULL_TREE;
19643 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19645 void
19646 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19648 unsigned int length;
19649 char *symbol_name, *lazy_ptr_name;
19650 char *local_label_0;
19651 static unsigned label = 0;
19653 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19654 symb = (*targetm.strip_name_encoding) (symb);
19656 length = strlen (symb);
19657 symbol_name = XALLOCAVEC (char, length + 32);
19658 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19660 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19661 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19663 if (MACHOPIC_PURE)
19665 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19666 fprintf (file, "\t.align 5\n");
19668 fprintf (file, "%s:\n", stub);
19669 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19671 label++;
19672 local_label_0 = XALLOCAVEC (char, 16);
19673 sprintf (local_label_0, "L%u$spb", label);
19675 fprintf (file, "\tmflr r0\n");
19676 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19677 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19678 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19679 lazy_ptr_name, local_label_0);
19680 fprintf (file, "\tmtlr r0\n");
19681 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19682 (TARGET_64BIT ? "ldu" : "lwzu"),
19683 lazy_ptr_name, local_label_0);
19684 fprintf (file, "\tmtctr r12\n");
19685 fprintf (file, "\tbctr\n");
19687 else /* mdynamic-no-pic or mkernel. */
19689 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19690 fprintf (file, "\t.align 4\n");
19692 fprintf (file, "%s:\n", stub);
19693 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19695 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19696 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19697 (TARGET_64BIT ? "ldu" : "lwzu"),
19698 lazy_ptr_name);
19699 fprintf (file, "\tmtctr r12\n");
19700 fprintf (file, "\tbctr\n");
19703 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19704 fprintf (file, "%s:\n", lazy_ptr_name);
19705 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19706 fprintf (file, "%sdyld_stub_binding_helper\n",
19707 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19710 /* Legitimize PIC addresses. If the address is already
19711 position-independent, we return ORIG. Newly generated
19712 position-independent addresses go into a reg. This is REG if non
19713 zero, otherwise we allocate register(s) as necessary. */
19715 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19718 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19719 rtx reg)
19721 rtx base, offset;
19723 if (reg == NULL && !reload_completed)
19724 reg = gen_reg_rtx (Pmode);
19726 if (GET_CODE (orig) == CONST)
19728 rtx reg_temp;
19730 if (GET_CODE (XEXP (orig, 0)) == PLUS
19731 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19732 return orig;
19734 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19736 /* Use a different reg for the intermediate value, as
19737 it will be marked UNCHANGING. */
19738 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19739 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19740 Pmode, reg_temp);
19741 offset =
19742 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19743 Pmode, reg);
19745 if (CONST_INT_P (offset))
19747 if (SMALL_INT (offset))
19748 return plus_constant (Pmode, base, INTVAL (offset));
19749 else if (!reload_completed)
19750 offset = force_reg (Pmode, offset);
19751 else
19753 rtx mem = force_const_mem (Pmode, orig);
19754 return machopic_legitimize_pic_address (mem, Pmode, reg);
19757 return gen_rtx_PLUS (Pmode, base, offset);
19760 /* Fall back on generic machopic code. */
19761 return machopic_legitimize_pic_address (orig, mode, reg);
19764 /* Output a .machine directive for the Darwin assembler, and call
19765 the generic start_file routine. */
19767 static void
19768 rs6000_darwin_file_start (void)
19770 static const struct
19772 const char *arg;
19773 const char *name;
19774 HOST_WIDE_INT if_set;
19775 } mapping[] = {
19776 { "ppc64", "ppc64", MASK_64BIT },
19777 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19778 { "power4", "ppc970", 0 },
19779 { "G5", "ppc970", 0 },
19780 { "7450", "ppc7450", 0 },
19781 { "7400", "ppc7400", MASK_ALTIVEC },
19782 { "G4", "ppc7400", 0 },
19783 { "750", "ppc750", 0 },
19784 { "740", "ppc750", 0 },
19785 { "G3", "ppc750", 0 },
19786 { "604e", "ppc604e", 0 },
19787 { "604", "ppc604", 0 },
19788 { "603e", "ppc603", 0 },
19789 { "603", "ppc603", 0 },
19790 { "601", "ppc601", 0 },
19791 { NULL, "ppc", 0 } };
19792 const char *cpu_id = "";
19793 size_t i;
19795 rs6000_file_start ();
19796 darwin_file_start ();
19798 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19800 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19801 cpu_id = rs6000_default_cpu;
19803 if (global_options_set.x_rs6000_cpu_index)
19804 cpu_id = processor_target_table[rs6000_cpu_index].name;
19806 /* Look through the mapping array. Pick the first name that either
19807 matches the argument, has a bit set in IF_SET that is also set
19808 in the target flags, or has a NULL name. */
19810 i = 0;
19811 while (mapping[i].arg != NULL
19812 && strcmp (mapping[i].arg, cpu_id) != 0
19813 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19814 i++;
19816 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19819 #endif /* TARGET_MACHO */
19821 #if TARGET_ELF
19822 static int
19823 rs6000_elf_reloc_rw_mask (void)
19825 if (flag_pic)
19826 return 3;
19827 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19828 return 2;
19829 else
19830 return 0;
19833 /* Record an element in the table of global constructors. SYMBOL is
19834 a SYMBOL_REF of the function to be called; PRIORITY is a number
19835 between 0 and MAX_INIT_PRIORITY.
19837 This differs from default_named_section_asm_out_constructor in
19838 that we have special handling for -mrelocatable. */
19840 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19841 static void
19842 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19844 const char *section = ".ctors";
19845 char buf[18];
19847 if (priority != DEFAULT_INIT_PRIORITY)
19849 sprintf (buf, ".ctors.%.5u",
19850 /* Invert the numbering so the linker puts us in the proper
19851 order; constructors are run from right to left, and the
19852 linker sorts in increasing order. */
19853 MAX_INIT_PRIORITY - priority);
19854 section = buf;
19857 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19858 assemble_align (POINTER_SIZE);
19860 if (DEFAULT_ABI == ABI_V4
19861 && (TARGET_RELOCATABLE || flag_pic > 1))
19863 fputs ("\t.long (", asm_out_file);
19864 output_addr_const (asm_out_file, symbol);
19865 fputs (")@fixup\n", asm_out_file);
19867 else
19868 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19871 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19872 static void
19873 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19875 const char *section = ".dtors";
19876 char buf[18];
19878 if (priority != DEFAULT_INIT_PRIORITY)
19880 sprintf (buf, ".dtors.%.5u",
19881 /* Invert the numbering so the linker puts us in the proper
19882 order; constructors are run from right to left, and the
19883 linker sorts in increasing order. */
19884 MAX_INIT_PRIORITY - priority);
19885 section = buf;
19888 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19889 assemble_align (POINTER_SIZE);
19891 if (DEFAULT_ABI == ABI_V4
19892 && (TARGET_RELOCATABLE || flag_pic > 1))
19894 fputs ("\t.long (", asm_out_file);
19895 output_addr_const (asm_out_file, symbol);
19896 fputs (")@fixup\n", asm_out_file);
19898 else
19899 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19902 void
19903 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19905 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19907 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19908 ASM_OUTPUT_LABEL (file, name);
19909 fputs (DOUBLE_INT_ASM_OP, file);
19910 rs6000_output_function_entry (file, name);
19911 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19912 if (DOT_SYMBOLS)
19914 fputs ("\t.size\t", file);
19915 assemble_name (file, name);
19916 fputs (",24\n\t.type\t.", file);
19917 assemble_name (file, name);
19918 fputs (",@function\n", file);
19919 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19921 fputs ("\t.globl\t.", file);
19922 assemble_name (file, name);
19923 putc ('\n', file);
19926 else
19927 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19928 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19929 rs6000_output_function_entry (file, name);
19930 fputs (":\n", file);
19931 return;
19934 int uses_toc;
19935 if (DEFAULT_ABI == ABI_V4
19936 && (TARGET_RELOCATABLE || flag_pic > 1)
19937 && !TARGET_SECURE_PLT
19938 && (!constant_pool_empty_p () || crtl->profile)
19939 && (uses_toc = uses_TOC ()))
19941 char buf[256];
19943 if (uses_toc == 2)
19944 switch_to_other_text_partition ();
19945 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19947 fprintf (file, "\t.long ");
19948 assemble_name (file, toc_label_name);
19949 need_toc_init = 1;
19950 putc ('-', file);
19951 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19952 assemble_name (file, buf);
19953 putc ('\n', file);
19954 if (uses_toc == 2)
19955 switch_to_other_text_partition ();
19958 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19959 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19961 if (TARGET_CMODEL == CMODEL_LARGE
19962 && rs6000_global_entry_point_prologue_needed_p ())
19964 char buf[256];
19966 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19968 fprintf (file, "\t.quad .TOC.-");
19969 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19970 assemble_name (file, buf);
19971 putc ('\n', file);
19974 if (DEFAULT_ABI == ABI_AIX)
19976 const char *desc_name, *orig_name;
19978 orig_name = (*targetm.strip_name_encoding) (name);
19979 desc_name = orig_name;
19980 while (*desc_name == '.')
19981 desc_name++;
19983 if (TREE_PUBLIC (decl))
19984 fprintf (file, "\t.globl %s\n", desc_name);
19986 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19987 fprintf (file, "%s:\n", desc_name);
19988 fprintf (file, "\t.long %s\n", orig_name);
19989 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19990 fputs ("\t.long 0\n", file);
19991 fprintf (file, "\t.previous\n");
19993 ASM_OUTPUT_LABEL (file, name);
19996 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19997 static void
19998 rs6000_elf_file_end (void)
20000 #ifdef HAVE_AS_GNU_ATTRIBUTE
20001 /* ??? The value emitted depends on options active at file end.
20002 Assume anyone using #pragma or attributes that might change
20003 options knows what they are doing. */
20004 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20005 && rs6000_passes_float)
20007 int fp;
20009 if (TARGET_HARD_FLOAT)
20010 fp = 1;
20011 else
20012 fp = 2;
20013 if (rs6000_passes_long_double)
20015 if (!TARGET_LONG_DOUBLE_128)
20016 fp |= 2 * 4;
20017 else if (TARGET_IEEEQUAD)
20018 fp |= 3 * 4;
20019 else
20020 fp |= 1 * 4;
20022 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20024 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20026 if (rs6000_passes_vector)
20027 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20028 (TARGET_ALTIVEC_ABI ? 2 : 1));
20029 if (rs6000_returns_struct)
20030 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20031 aix_struct_return ? 2 : 1);
20033 #endif
20034 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20035 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20036 file_end_indicate_exec_stack ();
20037 #endif
20039 if (flag_split_stack)
20040 file_end_indicate_split_stack ();
20042 if (cpu_builtin_p)
20044 /* We have expanded a CPU builtin, so we need to emit a reference to
20045 the special symbol that LIBC uses to declare it supports the
20046 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20047 switch_to_section (data_section);
20048 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20049 fprintf (asm_out_file, "\t%s %s\n",
20050 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20053 #endif
20055 #if TARGET_XCOFF
20057 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20058 #define HAVE_XCOFF_DWARF_EXTRAS 0
20059 #endif
20061 static enum unwind_info_type
20062 rs6000_xcoff_debug_unwind_info (void)
20064 return UI_NONE;
20067 static void
20068 rs6000_xcoff_asm_output_anchor (rtx symbol)
20070 char buffer[100];
20072 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20073 SYMBOL_REF_BLOCK_OFFSET (symbol));
20074 fprintf (asm_out_file, "%s", SET_ASM_OP);
20075 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20076 fprintf (asm_out_file, ",");
20077 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20078 fprintf (asm_out_file, "\n");
20081 static void
20082 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20084 fputs (GLOBAL_ASM_OP, stream);
20085 RS6000_OUTPUT_BASENAME (stream, name);
20086 putc ('\n', stream);
20089 /* A get_unnamed_decl callback, used for read-only sections. PTR
20090 points to the section string variable. */
20092 static void
20093 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20095 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20096 *(const char *const *) directive,
20097 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20100 /* Likewise for read-write sections. */
20102 static void
20103 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20105 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20106 *(const char *const *) directive,
20107 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20110 static void
20111 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20113 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20114 *(const char *const *) directive,
20115 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20118 /* A get_unnamed_section callback, used for switching to toc_section. */
20120 static void
20121 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20123 if (TARGET_MINIMAL_TOC)
20125 /* toc_section is always selected at least once from
20126 rs6000_xcoff_file_start, so this is guaranteed to
20127 always be defined once and only once in each file. */
20128 if (!toc_initialized)
20130 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20131 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20132 toc_initialized = 1;
20134 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20135 (TARGET_32BIT ? "" : ",3"));
20137 else
20138 fputs ("\t.toc\n", asm_out_file);
20141 /* Implement TARGET_ASM_INIT_SECTIONS. */
20143 static void
20144 rs6000_xcoff_asm_init_sections (void)
20146 read_only_data_section
20147 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20148 &xcoff_read_only_section_name);
20150 private_data_section
20151 = get_unnamed_section (SECTION_WRITE,
20152 rs6000_xcoff_output_readwrite_section_asm_op,
20153 &xcoff_private_data_section_name);
20155 read_only_private_data_section
20156 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20157 &xcoff_private_rodata_section_name);
20159 tls_data_section
20160 = get_unnamed_section (SECTION_TLS,
20161 rs6000_xcoff_output_tls_section_asm_op,
20162 &xcoff_tls_data_section_name);
20164 tls_private_data_section
20165 = get_unnamed_section (SECTION_TLS,
20166 rs6000_xcoff_output_tls_section_asm_op,
20167 &xcoff_private_data_section_name);
20169 toc_section
20170 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20172 readonly_data_section = read_only_data_section;
20175 static int
20176 rs6000_xcoff_reloc_rw_mask (void)
20178 return 3;
20181 static void
20182 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20183 tree decl ATTRIBUTE_UNUSED)
20185 int smclass;
20186 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20188 if (flags & SECTION_EXCLUDE)
20189 smclass = 4;
20190 else if (flags & SECTION_DEBUG)
20192 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20193 return;
20195 else if (flags & SECTION_CODE)
20196 smclass = 0;
20197 else if (flags & SECTION_TLS)
20198 smclass = 3;
20199 else if (flags & SECTION_WRITE)
20200 smclass = 2;
20201 else
20202 smclass = 1;
20204 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20205 (flags & SECTION_CODE) ? "." : "",
20206 name, suffix[smclass], flags & SECTION_ENTSIZE);
20209 #define IN_NAMED_SECTION(DECL) \
20210 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20211 && DECL_SECTION_NAME (DECL) != NULL)
20213 static section *
20214 rs6000_xcoff_select_section (tree decl, int reloc,
20215 unsigned HOST_WIDE_INT align)
20217 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20218 named section. */
20219 if (align > BIGGEST_ALIGNMENT)
20221 resolve_unique_section (decl, reloc, true);
20222 if (IN_NAMED_SECTION (decl))
20223 return get_named_section (decl, NULL, reloc);
20226 if (decl_readonly_section (decl, reloc))
20228 if (TREE_PUBLIC (decl))
20229 return read_only_data_section;
20230 else
20231 return read_only_private_data_section;
20233 else
20235 #if HAVE_AS_TLS
20236 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20238 if (TREE_PUBLIC (decl))
20239 return tls_data_section;
20240 else if (bss_initializer_p (decl))
20242 /* Convert to COMMON to emit in BSS. */
20243 DECL_COMMON (decl) = 1;
20244 return tls_comm_section;
20246 else
20247 return tls_private_data_section;
20249 else
20250 #endif
20251 if (TREE_PUBLIC (decl))
20252 return data_section;
20253 else
20254 return private_data_section;
20258 static void
20259 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20261 const char *name;
20263 /* Use select_section for private data and uninitialized data with
20264 alignment <= BIGGEST_ALIGNMENT. */
20265 if (!TREE_PUBLIC (decl)
20266 || DECL_COMMON (decl)
20267 || (DECL_INITIAL (decl) == NULL_TREE
20268 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20269 || DECL_INITIAL (decl) == error_mark_node
20270 || (flag_zero_initialized_in_bss
20271 && initializer_zerop (DECL_INITIAL (decl))))
20272 return;
20274 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20275 name = (*targetm.strip_name_encoding) (name);
20276 set_decl_section_name (decl, name);
20279 /* Select section for constant in constant pool.
20281 On RS/6000, all constants are in the private read-only data area.
20282 However, if this is being placed in the TOC it must be output as a
20283 toc entry. */
20285 static section *
20286 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20287 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20289 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20290 return toc_section;
20291 else
20292 return read_only_private_data_section;
20295 /* Remove any trailing [DS] or the like from the symbol name. */
20297 static const char *
20298 rs6000_xcoff_strip_name_encoding (const char *name)
20300 size_t len;
20301 if (*name == '*')
20302 name++;
20303 len = strlen (name);
20304 if (name[len - 1] == ']')
20305 return ggc_alloc_string (name, len - 4);
20306 else
20307 return name;
20310 /* Section attributes. AIX is always PIC. */
20312 static unsigned int
20313 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20315 unsigned int align;
20316 unsigned int flags = default_section_type_flags (decl, name, reloc);
20318 /* Align to at least UNIT size. */
20319 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20320 align = MIN_UNITS_PER_WORD;
20321 else
20322 /* Increase alignment of large objects if not already stricter. */
20323 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20324 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20325 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20327 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20330 /* Output at beginning of assembler file.
20332 Initialize the section names for the RS/6000 at this point.
20334 Specify filename, including full path, to assembler.
20336 We want to go into the TOC section so at least one .toc will be emitted.
20337 Also, in order to output proper .bs/.es pairs, we need at least one static
20338 [RW] section emitted.
20340 Finally, declare mcount when profiling to make the assembler happy. */
20342 static void
20343 rs6000_xcoff_file_start (void)
20345 rs6000_gen_section_name (&xcoff_bss_section_name,
20346 main_input_filename, ".bss_");
20347 rs6000_gen_section_name (&xcoff_private_data_section_name,
20348 main_input_filename, ".rw_");
20349 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20350 main_input_filename, ".rop_");
20351 rs6000_gen_section_name (&xcoff_read_only_section_name,
20352 main_input_filename, ".ro_");
20353 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20354 main_input_filename, ".tls_");
20355 rs6000_gen_section_name (&xcoff_tbss_section_name,
20356 main_input_filename, ".tbss_[UL]");
20358 fputs ("\t.file\t", asm_out_file);
20359 output_quoted_string (asm_out_file, main_input_filename);
20360 fputc ('\n', asm_out_file);
20361 if (write_symbols != NO_DEBUG)
20362 switch_to_section (private_data_section);
20363 switch_to_section (toc_section);
20364 switch_to_section (text_section);
20365 if (profile_flag)
20366 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20367 rs6000_file_start ();
20370 /* Output at end of assembler file.
20371 On the RS/6000, referencing data should automatically pull in text. */
20373 static void
20374 rs6000_xcoff_file_end (void)
20376 switch_to_section (text_section);
20377 fputs ("_section_.text:\n", asm_out_file);
20378 switch_to_section (data_section);
20379 fputs (TARGET_32BIT
20380 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20381 asm_out_file);
20384 struct declare_alias_data
20386 FILE *file;
20387 bool function_descriptor;
20390 /* Declare alias N. A helper function for for_node_and_aliases. */
20392 static bool
20393 rs6000_declare_alias (struct symtab_node *n, void *d)
20395 struct declare_alias_data *data = (struct declare_alias_data *)d;
20396 /* Main symbol is output specially, because varasm machinery does part of
20397 the job for us - we do not need to declare .globl/lglobs and such. */
20398 if (!n->alias || n->weakref)
20399 return false;
20401 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20402 return false;
20404 /* Prevent assemble_alias from trying to use .set pseudo operation
20405 that does not behave as expected by the middle-end. */
20406 TREE_ASM_WRITTEN (n->decl) = true;
20408 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20409 char *buffer = (char *) alloca (strlen (name) + 2);
20410 char *p;
20411 int dollar_inside = 0;
20413 strcpy (buffer, name);
20414 p = strchr (buffer, '$');
20415 while (p) {
20416 *p = '_';
20417 dollar_inside++;
20418 p = strchr (p + 1, '$');
20420 if (TREE_PUBLIC (n->decl))
20422 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20424 if (dollar_inside) {
20425 if (data->function_descriptor)
20426 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20427 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20429 if (data->function_descriptor)
20431 fputs ("\t.globl .", data->file);
20432 RS6000_OUTPUT_BASENAME (data->file, buffer);
20433 putc ('\n', data->file);
20435 fputs ("\t.globl ", data->file);
20436 RS6000_OUTPUT_BASENAME (data->file, buffer);
20437 putc ('\n', data->file);
20439 #ifdef ASM_WEAKEN_DECL
20440 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20441 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20442 #endif
20444 else
20446 if (dollar_inside)
20448 if (data->function_descriptor)
20449 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20450 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20452 if (data->function_descriptor)
20454 fputs ("\t.lglobl .", data->file);
20455 RS6000_OUTPUT_BASENAME (data->file, buffer);
20456 putc ('\n', data->file);
20458 fputs ("\t.lglobl ", data->file);
20459 RS6000_OUTPUT_BASENAME (data->file, buffer);
20460 putc ('\n', data->file);
20462 if (data->function_descriptor)
20463 fputs (".", data->file);
20464 RS6000_OUTPUT_BASENAME (data->file, buffer);
20465 fputs (":\n", data->file);
20466 return false;
20470 #ifdef HAVE_GAS_HIDDEN
20471 /* Helper function to calculate visibility of a DECL
20472 and return the value as a const string. */
20474 static const char *
20475 rs6000_xcoff_visibility (tree decl)
20477 static const char * const visibility_types[] = {
20478 "", ",protected", ",hidden", ",internal"
20481 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20482 return visibility_types[vis];
20484 #endif
20487 /* This macro produces the initial definition of a function name.
20488 On the RS/6000, we need to place an extra '.' in the function name and
20489 output the function descriptor.
20490 Dollar signs are converted to underscores.
20492 The csect for the function will have already been created when
20493 text_section was selected. We do have to go back to that csect, however.
20495 The third and fourth parameters to the .function pseudo-op (16 and 044)
20496 are placeholders which no longer have any use.
20498 Because AIX assembler's .set command has unexpected semantics, we output
20499 all aliases as alternative labels in front of the definition. */
20501 void
20502 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20504 char *buffer = (char *) alloca (strlen (name) + 1);
20505 char *p;
20506 int dollar_inside = 0;
20507 struct declare_alias_data data = {file, false};
20509 strcpy (buffer, name);
20510 p = strchr (buffer, '$');
20511 while (p) {
20512 *p = '_';
20513 dollar_inside++;
20514 p = strchr (p + 1, '$');
20516 if (TREE_PUBLIC (decl))
20518 if (!RS6000_WEAK || !DECL_WEAK (decl))
20520 if (dollar_inside) {
20521 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20522 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20524 fputs ("\t.globl .", file);
20525 RS6000_OUTPUT_BASENAME (file, buffer);
20526 #ifdef HAVE_GAS_HIDDEN
20527 fputs (rs6000_xcoff_visibility (decl), file);
20528 #endif
20529 putc ('\n', file);
20532 else
20534 if (dollar_inside) {
20535 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20536 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20538 fputs ("\t.lglobl .", file);
20539 RS6000_OUTPUT_BASENAME (file, buffer);
20540 putc ('\n', file);
20542 fputs ("\t.csect ", file);
20543 RS6000_OUTPUT_BASENAME (file, buffer);
20544 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20545 RS6000_OUTPUT_BASENAME (file, buffer);
20546 fputs (":\n", file);
20547 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20548 &data, true);
20549 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20550 RS6000_OUTPUT_BASENAME (file, buffer);
20551 fputs (", TOC[tc0], 0\n", file);
20552 in_section = NULL;
20553 switch_to_section (function_section (decl));
20554 putc ('.', file);
20555 RS6000_OUTPUT_BASENAME (file, buffer);
20556 fputs (":\n", file);
20557 data.function_descriptor = true;
20558 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20559 &data, true);
20560 if (!DECL_IGNORED_P (decl))
20562 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20563 xcoffout_declare_function (file, decl, buffer);
20564 else if (write_symbols == DWARF2_DEBUG)
20566 name = (*targetm.strip_name_encoding) (name);
20567 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20570 return;
20574 /* Output assembly language to globalize a symbol from a DECL,
20575 possibly with visibility. */
20577 void
20578 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20580 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20581 fputs (GLOBAL_ASM_OP, stream);
20582 RS6000_OUTPUT_BASENAME (stream, name);
20583 #ifdef HAVE_GAS_HIDDEN
20584 fputs (rs6000_xcoff_visibility (decl), stream);
20585 #endif
20586 putc ('\n', stream);
20589 /* Output assembly language to define a symbol as COMMON from a DECL,
20590 possibly with visibility. */
20592 void
20593 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20594 tree decl ATTRIBUTE_UNUSED,
20595 const char *name,
20596 unsigned HOST_WIDE_INT size,
20597 unsigned HOST_WIDE_INT align)
20599 unsigned HOST_WIDE_INT align2 = 2;
20601 if (align > 32)
20602 align2 = floor_log2 (align / BITS_PER_UNIT);
20603 else if (size > 4)
20604 align2 = 3;
20606 fputs (COMMON_ASM_OP, stream);
20607 RS6000_OUTPUT_BASENAME (stream, name);
20609 fprintf (stream,
20610 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20611 size, align2);
20613 #ifdef HAVE_GAS_HIDDEN
20614 if (decl != NULL)
20615 fputs (rs6000_xcoff_visibility (decl), stream);
20616 #endif
20617 putc ('\n', stream);
20620 /* This macro produces the initial definition of a object (variable) name.
20621 Because AIX assembler's .set command has unexpected semantics, we output
20622 all aliases as alternative labels in front of the definition. */
20624 void
20625 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20627 struct declare_alias_data data = {file, false};
20628 RS6000_OUTPUT_BASENAME (file, name);
20629 fputs (":\n", file);
20630 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20631 &data, true);
20634 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20636 void
20637 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20639 fputs (integer_asm_op (size, FALSE), file);
20640 assemble_name (file, label);
20641 fputs ("-$", file);
20644 /* Output a symbol offset relative to the dbase for the current object.
20645 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20646 signed offsets.
20648 __gcc_unwind_dbase is embedded in all executables/libraries through
20649 libgcc/config/rs6000/crtdbase.S. */
20651 void
20652 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20654 fputs (integer_asm_op (size, FALSE), file);
20655 assemble_name (file, label);
20656 fputs("-__gcc_unwind_dbase", file);
20659 #ifdef HAVE_AS_TLS
20660 static void
20661 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20663 rtx symbol;
20664 int flags;
20665 const char *symname;
20667 default_encode_section_info (decl, rtl, first);
20669 /* Careful not to prod global register variables. */
20670 if (!MEM_P (rtl))
20671 return;
20672 symbol = XEXP (rtl, 0);
20673 if (!SYMBOL_REF_P (symbol))
20674 return;
20676 flags = SYMBOL_REF_FLAGS (symbol);
20678 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20679 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20681 SYMBOL_REF_FLAGS (symbol) = flags;
20683 /* Append mapping class to extern decls. */
20684 symname = XSTR (symbol, 0);
20685 if (decl /* sync condition with assemble_external () */
20686 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20687 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20688 || TREE_CODE (decl) == FUNCTION_DECL)
20689 && symname[strlen (symname) - 1] != ']')
20691 char *newname = (char *) alloca (strlen (symname) + 5);
20692 strcpy (newname, symname);
20693 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20694 ? "[DS]" : "[UA]"));
20695 XSTR (symbol, 0) = ggc_strdup (newname);
20698 #endif /* HAVE_AS_TLS */
20699 #endif /* TARGET_XCOFF */
20701 void
20702 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20703 const char *name, const char *val)
20705 fputs ("\t.weak\t", stream);
20706 RS6000_OUTPUT_BASENAME (stream, name);
20707 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20708 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20710 if (TARGET_XCOFF)
20711 fputs ("[DS]", stream);
20712 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20713 if (TARGET_XCOFF)
20714 fputs (rs6000_xcoff_visibility (decl), stream);
20715 #endif
20716 fputs ("\n\t.weak\t.", stream);
20717 RS6000_OUTPUT_BASENAME (stream, name);
20719 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20720 if (TARGET_XCOFF)
20721 fputs (rs6000_xcoff_visibility (decl), stream);
20722 #endif
20723 fputc ('\n', stream);
20724 if (val)
20726 #ifdef ASM_OUTPUT_DEF
20727 ASM_OUTPUT_DEF (stream, name, val);
20728 #endif
20729 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20730 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20732 fputs ("\t.set\t.", stream);
20733 RS6000_OUTPUT_BASENAME (stream, name);
20734 fputs (",.", stream);
20735 RS6000_OUTPUT_BASENAME (stream, val);
20736 fputc ('\n', stream);
20742 /* Return true if INSN should not be copied. */
20744 static bool
20745 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20747 return recog_memoized (insn) >= 0
20748 && get_attr_cannot_copy (insn);
20751 /* Compute a (partial) cost for rtx X. Return true if the complete
20752 cost has been computed, and false if subexpressions should be
20753 scanned. In either case, *TOTAL contains the cost result. */
20755 static bool
20756 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20757 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20759 int code = GET_CODE (x);
20761 switch (code)
20763 /* On the RS/6000, if it is valid in the insn, it is free. */
20764 case CONST_INT:
20765 if (((outer_code == SET
20766 || outer_code == PLUS
20767 || outer_code == MINUS)
20768 && (satisfies_constraint_I (x)
20769 || satisfies_constraint_L (x)))
20770 || (outer_code == AND
20771 && (satisfies_constraint_K (x)
20772 || (mode == SImode
20773 ? satisfies_constraint_L (x)
20774 : satisfies_constraint_J (x))))
20775 || ((outer_code == IOR || outer_code == XOR)
20776 && (satisfies_constraint_K (x)
20777 || (mode == SImode
20778 ? satisfies_constraint_L (x)
20779 : satisfies_constraint_J (x))))
20780 || outer_code == ASHIFT
20781 || outer_code == ASHIFTRT
20782 || outer_code == LSHIFTRT
20783 || outer_code == ROTATE
20784 || outer_code == ROTATERT
20785 || outer_code == ZERO_EXTRACT
20786 || (outer_code == MULT
20787 && satisfies_constraint_I (x))
20788 || ((outer_code == DIV || outer_code == UDIV
20789 || outer_code == MOD || outer_code == UMOD)
20790 && exact_log2 (INTVAL (x)) >= 0)
20791 || (outer_code == COMPARE
20792 && (satisfies_constraint_I (x)
20793 || satisfies_constraint_K (x)))
20794 || ((outer_code == EQ || outer_code == NE)
20795 && (satisfies_constraint_I (x)
20796 || satisfies_constraint_K (x)
20797 || (mode == SImode
20798 ? satisfies_constraint_L (x)
20799 : satisfies_constraint_J (x))))
20800 || (outer_code == GTU
20801 && satisfies_constraint_I (x))
20802 || (outer_code == LTU
20803 && satisfies_constraint_P (x)))
20805 *total = 0;
20806 return true;
20808 else if ((outer_code == PLUS
20809 && reg_or_add_cint_operand (x, VOIDmode))
20810 || (outer_code == MINUS
20811 && reg_or_sub_cint_operand (x, VOIDmode))
20812 || ((outer_code == SET
20813 || outer_code == IOR
20814 || outer_code == XOR)
20815 && (INTVAL (x)
20816 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20818 *total = COSTS_N_INSNS (1);
20819 return true;
20821 /* FALLTHRU */
20823 case CONST_DOUBLE:
20824 case CONST_WIDE_INT:
20825 case CONST:
20826 case HIGH:
20827 case SYMBOL_REF:
20828 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20829 return true;
20831 case MEM:
20832 /* When optimizing for size, MEM should be slightly more expensive
20833 than generating address, e.g., (plus (reg) (const)).
20834 L1 cache latency is about two instructions. */
20835 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20836 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20837 *total += COSTS_N_INSNS (100);
20838 return true;
20840 case LABEL_REF:
20841 *total = 0;
20842 return true;
20844 case PLUS:
20845 case MINUS:
20846 if (FLOAT_MODE_P (mode))
20847 *total = rs6000_cost->fp;
20848 else
20849 *total = COSTS_N_INSNS (1);
20850 return false;
20852 case MULT:
20853 if (CONST_INT_P (XEXP (x, 1))
20854 && satisfies_constraint_I (XEXP (x, 1)))
20856 if (INTVAL (XEXP (x, 1)) >= -256
20857 && INTVAL (XEXP (x, 1)) <= 255)
20858 *total = rs6000_cost->mulsi_const9;
20859 else
20860 *total = rs6000_cost->mulsi_const;
20862 else if (mode == SFmode)
20863 *total = rs6000_cost->fp;
20864 else if (FLOAT_MODE_P (mode))
20865 *total = rs6000_cost->dmul;
20866 else if (mode == DImode)
20867 *total = rs6000_cost->muldi;
20868 else
20869 *total = rs6000_cost->mulsi;
20870 return false;
20872 case FMA:
20873 if (mode == SFmode)
20874 *total = rs6000_cost->fp;
20875 else
20876 *total = rs6000_cost->dmul;
20877 break;
20879 case DIV:
20880 case MOD:
20881 if (FLOAT_MODE_P (mode))
20883 *total = mode == DFmode ? rs6000_cost->ddiv
20884 : rs6000_cost->sdiv;
20885 return false;
20887 /* FALLTHRU */
20889 case UDIV:
20890 case UMOD:
20891 if (CONST_INT_P (XEXP (x, 1))
20892 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20894 if (code == DIV || code == MOD)
20895 /* Shift, addze */
20896 *total = COSTS_N_INSNS (2);
20897 else
20898 /* Shift */
20899 *total = COSTS_N_INSNS (1);
20901 else
20903 if (GET_MODE (XEXP (x, 1)) == DImode)
20904 *total = rs6000_cost->divdi;
20905 else
20906 *total = rs6000_cost->divsi;
20908 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20909 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20910 *total += COSTS_N_INSNS (2);
20911 return false;
20913 case CTZ:
20914 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20915 return false;
20917 case FFS:
20918 *total = COSTS_N_INSNS (4);
20919 return false;
20921 case POPCOUNT:
20922 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20923 return false;
20925 case PARITY:
20926 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20927 return false;
20929 case NOT:
20930 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20931 *total = 0;
20932 else
20933 *total = COSTS_N_INSNS (1);
20934 return false;
20936 case AND:
20937 if (CONST_INT_P (XEXP (x, 1)))
20939 rtx left = XEXP (x, 0);
20940 rtx_code left_code = GET_CODE (left);
20942 /* rotate-and-mask: 1 insn. */
20943 if ((left_code == ROTATE
20944 || left_code == ASHIFT
20945 || left_code == LSHIFTRT)
20946 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20948 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20949 if (!CONST_INT_P (XEXP (left, 1)))
20950 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20951 *total += COSTS_N_INSNS (1);
20952 return true;
20955 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20956 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20957 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20958 || (val & 0xffff) == val
20959 || (val & 0xffff0000) == val
20960 || ((val & 0xffff) == 0 && mode == SImode))
20962 *total = rtx_cost (left, mode, AND, 0, speed);
20963 *total += COSTS_N_INSNS (1);
20964 return true;
20967 /* 2 insns. */
20968 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20970 *total = rtx_cost (left, mode, AND, 0, speed);
20971 *total += COSTS_N_INSNS (2);
20972 return true;
20976 *total = COSTS_N_INSNS (1);
20977 return false;
20979 case IOR:
20980 /* FIXME */
20981 *total = COSTS_N_INSNS (1);
20982 return true;
20984 case CLZ:
20985 case XOR:
20986 case ZERO_EXTRACT:
20987 *total = COSTS_N_INSNS (1);
20988 return false;
20990 case ASHIFT:
20991 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20992 the sign extend and shift separately within the insn. */
20993 if (TARGET_EXTSWSLI && mode == DImode
20994 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20995 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20997 *total = 0;
20998 return false;
21000 /* fall through */
21002 case ASHIFTRT:
21003 case LSHIFTRT:
21004 case ROTATE:
21005 case ROTATERT:
21006 /* Handle mul_highpart. */
21007 if (outer_code == TRUNCATE
21008 && GET_CODE (XEXP (x, 0)) == MULT)
21010 if (mode == DImode)
21011 *total = rs6000_cost->muldi;
21012 else
21013 *total = rs6000_cost->mulsi;
21014 return true;
21016 else if (outer_code == AND)
21017 *total = 0;
21018 else
21019 *total = COSTS_N_INSNS (1);
21020 return false;
21022 case SIGN_EXTEND:
21023 case ZERO_EXTEND:
21024 if (MEM_P (XEXP (x, 0)))
21025 *total = 0;
21026 else
21027 *total = COSTS_N_INSNS (1);
21028 return false;
21030 case COMPARE:
21031 case NEG:
21032 case ABS:
21033 if (!FLOAT_MODE_P (mode))
21035 *total = COSTS_N_INSNS (1);
21036 return false;
21038 /* FALLTHRU */
21040 case FLOAT:
21041 case UNSIGNED_FLOAT:
21042 case FIX:
21043 case UNSIGNED_FIX:
21044 case FLOAT_TRUNCATE:
21045 *total = rs6000_cost->fp;
21046 return false;
21048 case FLOAT_EXTEND:
21049 if (mode == DFmode)
21050 *total = rs6000_cost->sfdf_convert;
21051 else
21052 *total = rs6000_cost->fp;
21053 return false;
21055 case CALL:
21056 case IF_THEN_ELSE:
21057 if (!speed)
21059 *total = COSTS_N_INSNS (1);
21060 return true;
21062 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21064 *total = rs6000_cost->fp;
21065 return false;
21067 break;
21069 case NE:
21070 case EQ:
21071 case GTU:
21072 case LTU:
21073 /* Carry bit requires mode == Pmode.
21074 NEG or PLUS already counted so only add one. */
21075 if (mode == Pmode
21076 && (outer_code == NEG || outer_code == PLUS))
21078 *total = COSTS_N_INSNS (1);
21079 return true;
21081 /* FALLTHRU */
21083 case GT:
21084 case LT:
21085 case UNORDERED:
21086 if (outer_code == SET)
21088 if (XEXP (x, 1) == const0_rtx)
21090 *total = COSTS_N_INSNS (2);
21091 return true;
21093 else
21095 *total = COSTS_N_INSNS (3);
21096 return false;
21099 /* CC COMPARE. */
21100 if (outer_code == COMPARE)
21102 *total = 0;
21103 return true;
21105 break;
21107 default:
21108 break;
21111 return false;
21114 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21116 static bool
21117 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21118 int opno, int *total, bool speed)
21120 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21122 fprintf (stderr,
21123 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21124 "opno = %d, total = %d, speed = %s, x:\n",
21125 ret ? "complete" : "scan inner",
21126 GET_MODE_NAME (mode),
21127 GET_RTX_NAME (outer_code),
21128 opno,
21129 *total,
21130 speed ? "true" : "false");
21132 debug_rtx (x);
21134 return ret;
21137 static int
21138 rs6000_insn_cost (rtx_insn *insn, bool speed)
21140 if (recog_memoized (insn) < 0)
21141 return 0;
21143 /* If we are optimizing for size, just use the length. */
21144 if (!speed)
21145 return get_attr_length (insn);
21147 /* Use the cost if provided. */
21148 int cost = get_attr_cost (insn);
21149 if (cost > 0)
21150 return cost;
21152 /* If the insn tells us how many insns there are, use that. Otherwise use
21153 the length/4. Adjust the insn length to remove the extra size that
21154 prefixed instructions take. */
21155 int n = get_attr_num_insns (insn);
21156 if (n == 0)
21158 int length = get_attr_length (insn);
21159 if (get_attr_prefixed (insn) == PREFIXED_YES)
21161 int adjust = 0;
21162 ADJUST_INSN_LENGTH (insn, adjust);
21163 length -= adjust;
21166 n = length / 4;
21169 enum attr_type type = get_attr_type (insn);
21171 switch (type)
21173 case TYPE_LOAD:
21174 case TYPE_FPLOAD:
21175 case TYPE_VECLOAD:
21176 cost = COSTS_N_INSNS (n + 1);
21177 break;
21179 case TYPE_MUL:
21180 switch (get_attr_size (insn))
21182 case SIZE_8:
21183 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21184 break;
21185 case SIZE_16:
21186 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21187 break;
21188 case SIZE_32:
21189 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21190 break;
21191 case SIZE_64:
21192 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21193 break;
21194 default:
21195 gcc_unreachable ();
21197 break;
21198 case TYPE_DIV:
21199 switch (get_attr_size (insn))
21201 case SIZE_32:
21202 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21203 break;
21204 case SIZE_64:
21205 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21206 break;
21207 default:
21208 gcc_unreachable ();
21210 break;
21212 case TYPE_FP:
21213 cost = n * rs6000_cost->fp;
21214 break;
21215 case TYPE_DMUL:
21216 cost = n * rs6000_cost->dmul;
21217 break;
21218 case TYPE_SDIV:
21219 cost = n * rs6000_cost->sdiv;
21220 break;
21221 case TYPE_DDIV:
21222 cost = n * rs6000_cost->ddiv;
21223 break;
21225 case TYPE_SYNC:
21226 case TYPE_LOAD_L:
21227 case TYPE_MFCR:
21228 case TYPE_MFCRF:
21229 cost = COSTS_N_INSNS (n + 2);
21230 break;
21232 default:
21233 cost = COSTS_N_INSNS (n);
21236 return cost;
21239 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21241 static int
21242 rs6000_debug_address_cost (rtx x, machine_mode mode,
21243 addr_space_t as, bool speed)
21245 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21247 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21248 ret, speed ? "true" : "false");
21249 debug_rtx (x);
21251 return ret;
21255 /* A C expression returning the cost of moving data from a register of class
21256 CLASS1 to one of CLASS2. */
21258 static int
21259 rs6000_register_move_cost (machine_mode mode,
21260 reg_class_t from, reg_class_t to)
21262 int ret;
21263 reg_class_t rclass;
21265 if (TARGET_DEBUG_COST)
21266 dbg_cost_ctrl++;
21268 /* If we have VSX, we can easily move between FPR or Altivec registers,
21269 otherwise we can only easily move within classes.
21270 Do this first so we give best-case answers for union classes
21271 containing both gprs and vsx regs. */
21272 HARD_REG_SET to_vsx, from_vsx;
21273 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21274 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21275 if (!hard_reg_set_empty_p (to_vsx)
21276 && !hard_reg_set_empty_p (from_vsx)
21277 && (TARGET_VSX
21278 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21280 int reg = FIRST_FPR_REGNO;
21281 if (TARGET_VSX
21282 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21283 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21284 reg = FIRST_ALTIVEC_REGNO;
21285 ret = 2 * hard_regno_nregs (reg, mode);
21288 /* Moves from/to GENERAL_REGS. */
21289 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21290 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21292 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21294 if (TARGET_DIRECT_MOVE)
21296 /* Keep the cost for direct moves above that for within
21297 a register class even if the actual processor cost is
21298 comparable. We do this because a direct move insn
21299 can't be a nop, whereas with ideal register
21300 allocation a move within the same class might turn
21301 out to be a nop. */
21302 if (rs6000_tune == PROCESSOR_POWER9
21303 || rs6000_tune == PROCESSOR_FUTURE)
21304 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21305 else
21306 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21307 /* SFmode requires a conversion when moving between gprs
21308 and vsx. */
21309 if (mode == SFmode)
21310 ret += 2;
21312 else
21313 ret = (rs6000_memory_move_cost (mode, rclass, false)
21314 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21317 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21318 shift. */
21319 else if (rclass == CR_REGS)
21320 ret = 4;
21322 /* For those processors that have slow LR/CTR moves, make them more
21323 expensive than memory in order to bias spills to memory .*/
21324 else if ((rs6000_tune == PROCESSOR_POWER6
21325 || rs6000_tune == PROCESSOR_POWER7
21326 || rs6000_tune == PROCESSOR_POWER8
21327 || rs6000_tune == PROCESSOR_POWER9)
21328 && reg_class_subset_p (rclass, SPECIAL_REGS))
21329 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21331 else
21332 /* A move will cost one instruction per GPR moved. */
21333 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21336 /* Everything else has to go through GENERAL_REGS. */
21337 else
21338 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21339 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21341 if (TARGET_DEBUG_COST)
21343 if (dbg_cost_ctrl == 1)
21344 fprintf (stderr,
21345 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21346 ret, GET_MODE_NAME (mode), reg_class_names[from],
21347 reg_class_names[to]);
21348 dbg_cost_ctrl--;
21351 return ret;
21354 /* A C expressions returning the cost of moving data of MODE from a register to
21355 or from memory. */
21357 static int
21358 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21359 bool in ATTRIBUTE_UNUSED)
21361 int ret;
21363 if (TARGET_DEBUG_COST)
21364 dbg_cost_ctrl++;
21366 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21367 ret = 4 * hard_regno_nregs (0, mode);
21368 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21369 || reg_classes_intersect_p (rclass, VSX_REGS)))
21370 ret = 4 * hard_regno_nregs (32, mode);
21371 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21372 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21373 else
21374 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21376 if (TARGET_DEBUG_COST)
21378 if (dbg_cost_ctrl == 1)
21379 fprintf (stderr,
21380 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21381 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21382 dbg_cost_ctrl--;
21385 return ret;
21388 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21390 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21391 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21392 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21393 move cost between GENERAL_REGS and VSX_REGS low.
21395 It might seem reasonable to use a union class. After all, if usage
21396 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21397 rather than memory. However, in cases where register pressure of
21398 both is high, like the cactus_adm spec test, allowing
21399 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21400 the first scheduling pass. This is partly due to an allocno of
21401 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21402 class, which gives too high a pressure for GENERAL_REGS and too low
21403 for VSX_REGS. So, force a choice of the subclass here.
21405 The best class is also the union if GENERAL_REGS and VSX_REGS have
21406 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21407 allocno class, since trying to narrow down the class by regno mode
21408 is prone to error. For example, SImode is allowed in VSX regs and
21409 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21410 it would be wrong to choose an allocno of GENERAL_REGS based on
21411 SImode. */
21413 static reg_class_t
21414 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21415 reg_class_t allocno_class,
21416 reg_class_t best_class)
21418 switch (allocno_class)
21420 case GEN_OR_VSX_REGS:
21421 /* best_class must be a subset of allocno_class. */
21422 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21423 || best_class == GEN_OR_FLOAT_REGS
21424 || best_class == VSX_REGS
21425 || best_class == ALTIVEC_REGS
21426 || best_class == FLOAT_REGS
21427 || best_class == GENERAL_REGS
21428 || best_class == BASE_REGS);
21429 /* Use best_class but choose wider classes when copying from the
21430 wider class to best_class is cheap. This mimics IRA choice
21431 of allocno class. */
21432 if (best_class == BASE_REGS)
21433 return GENERAL_REGS;
21434 if (TARGET_VSX
21435 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21436 return VSX_REGS;
21437 return best_class;
21439 default:
21440 break;
21443 return allocno_class;
21446 /* Returns a code for a target-specific builtin that implements
21447 reciprocal of the function, or NULL_TREE if not available. */
21449 static tree
21450 rs6000_builtin_reciprocal (tree fndecl)
21452 switch (DECL_MD_FUNCTION_CODE (fndecl))
21454 case VSX_BUILTIN_XVSQRTDP:
21455 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21456 return NULL_TREE;
21458 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21460 case VSX_BUILTIN_XVSQRTSP:
21461 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21462 return NULL_TREE;
21464 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21466 default:
21467 return NULL_TREE;
21471 /* Load up a constant. If the mode is a vector mode, splat the value across
21472 all of the vector elements. */
21474 static rtx
21475 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21477 rtx reg;
21479 if (mode == SFmode || mode == DFmode)
21481 rtx d = const_double_from_real_value (dconst, mode);
21482 reg = force_reg (mode, d);
21484 else if (mode == V4SFmode)
21486 rtx d = const_double_from_real_value (dconst, SFmode);
21487 rtvec v = gen_rtvec (4, d, d, d, d);
21488 reg = gen_reg_rtx (mode);
21489 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21491 else if (mode == V2DFmode)
21493 rtx d = const_double_from_real_value (dconst, DFmode);
21494 rtvec v = gen_rtvec (2, d, d);
21495 reg = gen_reg_rtx (mode);
21496 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21498 else
21499 gcc_unreachable ();
21501 return reg;
21504 /* Generate an FMA instruction. */
21506 static void
21507 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21509 machine_mode mode = GET_MODE (target);
21510 rtx dst;
21512 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21513 gcc_assert (dst != NULL);
21515 if (dst != target)
21516 emit_move_insn (target, dst);
21519 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21521 static void
21522 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21524 machine_mode mode = GET_MODE (dst);
21525 rtx r;
21527 /* This is a tad more complicated, since the fnma_optab is for
21528 a different expression: fma(-m1, m2, a), which is the same
21529 thing except in the case of signed zeros.
21531 Fortunately we know that if FMA is supported that FNMSUB is
21532 also supported in the ISA. Just expand it directly. */
21534 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21536 r = gen_rtx_NEG (mode, a);
21537 r = gen_rtx_FMA (mode, m1, m2, r);
21538 r = gen_rtx_NEG (mode, r);
21539 emit_insn (gen_rtx_SET (dst, r));
21542 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21543 add a reg_note saying that this was a division. Support both scalar and
21544 vector divide. Assumes no trapping math and finite arguments. */
21546 void
21547 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21549 machine_mode mode = GET_MODE (dst);
21550 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21551 int i;
21553 /* Low precision estimates guarantee 5 bits of accuracy. High
21554 precision estimates guarantee 14 bits of accuracy. SFmode
21555 requires 23 bits of accuracy. DFmode requires 52 bits of
21556 accuracy. Each pass at least doubles the accuracy, leading
21557 to the following. */
21558 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21559 if (mode == DFmode || mode == V2DFmode)
21560 passes++;
21562 enum insn_code code = optab_handler (smul_optab, mode);
21563 insn_gen_fn gen_mul = GEN_FCN (code);
21565 gcc_assert (code != CODE_FOR_nothing);
21567 one = rs6000_load_constant_and_splat (mode, dconst1);
21569 /* x0 = 1./d estimate */
21570 x0 = gen_reg_rtx (mode);
21571 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21572 UNSPEC_FRES)));
21574 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21575 if (passes > 1) {
21577 /* e0 = 1. - d * x0 */
21578 e0 = gen_reg_rtx (mode);
21579 rs6000_emit_nmsub (e0, d, x0, one);
21581 /* x1 = x0 + e0 * x0 */
21582 x1 = gen_reg_rtx (mode);
21583 rs6000_emit_madd (x1, e0, x0, x0);
21585 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21586 ++i, xprev = xnext, eprev = enext) {
21588 /* enext = eprev * eprev */
21589 enext = gen_reg_rtx (mode);
21590 emit_insn (gen_mul (enext, eprev, eprev));
21592 /* xnext = xprev + enext * xprev */
21593 xnext = gen_reg_rtx (mode);
21594 rs6000_emit_madd (xnext, enext, xprev, xprev);
21597 } else
21598 xprev = x0;
21600 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21602 /* u = n * xprev */
21603 u = gen_reg_rtx (mode);
21604 emit_insn (gen_mul (u, n, xprev));
21606 /* v = n - (d * u) */
21607 v = gen_reg_rtx (mode);
21608 rs6000_emit_nmsub (v, d, u, n);
21610 /* dst = (v * xprev) + u */
21611 rs6000_emit_madd (dst, v, xprev, u);
21613 if (note_p)
21614 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21617 /* Goldschmidt's Algorithm for single/double-precision floating point
21618 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21620 void
21621 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21623 machine_mode mode = GET_MODE (src);
21624 rtx e = gen_reg_rtx (mode);
21625 rtx g = gen_reg_rtx (mode);
21626 rtx h = gen_reg_rtx (mode);
21628 /* Low precision estimates guarantee 5 bits of accuracy. High
21629 precision estimates guarantee 14 bits of accuracy. SFmode
21630 requires 23 bits of accuracy. DFmode requires 52 bits of
21631 accuracy. Each pass at least doubles the accuracy, leading
21632 to the following. */
21633 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21634 if (mode == DFmode || mode == V2DFmode)
21635 passes++;
21637 int i;
21638 rtx mhalf;
21639 enum insn_code code = optab_handler (smul_optab, mode);
21640 insn_gen_fn gen_mul = GEN_FCN (code);
21642 gcc_assert (code != CODE_FOR_nothing);
21644 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21646 /* e = rsqrt estimate */
21647 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21648 UNSPEC_RSQRT)));
21650 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21651 if (!recip)
21653 rtx zero = force_reg (mode, CONST0_RTX (mode));
21655 if (mode == SFmode)
21657 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21658 e, zero, mode, 0);
21659 if (target != e)
21660 emit_move_insn (e, target);
21662 else
21664 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21665 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21669 /* g = sqrt estimate. */
21670 emit_insn (gen_mul (g, e, src));
21671 /* h = 1/(2*sqrt) estimate. */
21672 emit_insn (gen_mul (h, e, mhalf));
21674 if (recip)
21676 if (passes == 1)
21678 rtx t = gen_reg_rtx (mode);
21679 rs6000_emit_nmsub (t, g, h, mhalf);
21680 /* Apply correction directly to 1/rsqrt estimate. */
21681 rs6000_emit_madd (dst, e, t, e);
21683 else
21685 for (i = 0; i < passes; i++)
21687 rtx t1 = gen_reg_rtx (mode);
21688 rtx g1 = gen_reg_rtx (mode);
21689 rtx h1 = gen_reg_rtx (mode);
21691 rs6000_emit_nmsub (t1, g, h, mhalf);
21692 rs6000_emit_madd (g1, g, t1, g);
21693 rs6000_emit_madd (h1, h, t1, h);
21695 g = g1;
21696 h = h1;
21698 /* Multiply by 2 for 1/rsqrt. */
21699 emit_insn (gen_add3_insn (dst, h, h));
21702 else
21704 rtx t = gen_reg_rtx (mode);
21705 rs6000_emit_nmsub (t, g, h, mhalf);
21706 rs6000_emit_madd (dst, g, t, g);
21709 return;
21712 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21713 (Power7) targets. DST is the target, and SRC is the argument operand. */
21715 void
21716 rs6000_emit_popcount (rtx dst, rtx src)
21718 machine_mode mode = GET_MODE (dst);
21719 rtx tmp1, tmp2;
21721 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21722 if (TARGET_POPCNTD)
21724 if (mode == SImode)
21725 emit_insn (gen_popcntdsi2 (dst, src));
21726 else
21727 emit_insn (gen_popcntddi2 (dst, src));
21728 return;
21731 tmp1 = gen_reg_rtx (mode);
21733 if (mode == SImode)
21735 emit_insn (gen_popcntbsi2 (tmp1, src));
21736 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21737 NULL_RTX, 0);
21738 tmp2 = force_reg (SImode, tmp2);
21739 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21741 else
21743 emit_insn (gen_popcntbdi2 (tmp1, src));
21744 tmp2 = expand_mult (DImode, tmp1,
21745 GEN_INT ((HOST_WIDE_INT)
21746 0x01010101 << 32 | 0x01010101),
21747 NULL_RTX, 0);
21748 tmp2 = force_reg (DImode, tmp2);
21749 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21754 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21755 target, and SRC is the argument operand. */
21757 void
21758 rs6000_emit_parity (rtx dst, rtx src)
21760 machine_mode mode = GET_MODE (dst);
21761 rtx tmp;
21763 tmp = gen_reg_rtx (mode);
21765 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21766 if (TARGET_CMPB)
21768 if (mode == SImode)
21770 emit_insn (gen_popcntbsi2 (tmp, src));
21771 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21773 else
21775 emit_insn (gen_popcntbdi2 (tmp, src));
21776 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21778 return;
21781 if (mode == SImode)
21783 /* Is mult+shift >= shift+xor+shift+xor? */
21784 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21786 rtx tmp1, tmp2, tmp3, tmp4;
21788 tmp1 = gen_reg_rtx (SImode);
21789 emit_insn (gen_popcntbsi2 (tmp1, src));
21791 tmp2 = gen_reg_rtx (SImode);
21792 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21793 tmp3 = gen_reg_rtx (SImode);
21794 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21796 tmp4 = gen_reg_rtx (SImode);
21797 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21798 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21800 else
21801 rs6000_emit_popcount (tmp, src);
21802 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21804 else
21806 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21807 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21809 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21811 tmp1 = gen_reg_rtx (DImode);
21812 emit_insn (gen_popcntbdi2 (tmp1, src));
21814 tmp2 = gen_reg_rtx (DImode);
21815 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21816 tmp3 = gen_reg_rtx (DImode);
21817 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21819 tmp4 = gen_reg_rtx (DImode);
21820 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21821 tmp5 = gen_reg_rtx (DImode);
21822 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21824 tmp6 = gen_reg_rtx (DImode);
21825 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21826 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21828 else
21829 rs6000_emit_popcount (tmp, src);
21830 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21834 /* Expand an Altivec constant permutation for little endian mode.
21835 OP0 and OP1 are the input vectors and TARGET is the output vector.
21836 SEL specifies the constant permutation vector.
21838 There are two issues: First, the two input operands must be
21839 swapped so that together they form a double-wide array in LE
21840 order. Second, the vperm instruction has surprising behavior
21841 in LE mode: it interprets the elements of the source vectors
21842 in BE mode ("left to right") and interprets the elements of
21843 the destination vector in LE mode ("right to left"). To
21844 correct for this, we must subtract each element of the permute
21845 control vector from 31.
21847 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21848 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21849 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21850 serve as the permute control vector. Then, in BE mode,
21852 vperm 9,10,11,12
21854 places the desired result in vr9. However, in LE mode the
21855 vector contents will be
21857 vr10 = 00000003 00000002 00000001 00000000
21858 vr11 = 00000007 00000006 00000005 00000004
21860 The result of the vperm using the same permute control vector is
21862 vr9 = 05000000 07000000 01000000 03000000
21864 That is, the leftmost 4 bytes of vr10 are interpreted as the
21865 source for the rightmost 4 bytes of vr9, and so on.
21867 If we change the permute control vector to
21869 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21871 and issue
21873 vperm 9,11,10,12
21875 we get the desired
21877 vr9 = 00000006 00000004 00000002 00000000. */
21879 static void
21880 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21881 const vec_perm_indices &sel)
21883 unsigned int i;
21884 rtx perm[16];
21885 rtx constv, unspec;
21887 /* Unpack and adjust the constant selector. */
21888 for (i = 0; i < 16; ++i)
21890 unsigned int elt = 31 - (sel[i] & 31);
21891 perm[i] = GEN_INT (elt);
21894 /* Expand to a permute, swapping the inputs and using the
21895 adjusted selector. */
21896 if (!REG_P (op0))
21897 op0 = force_reg (V16QImode, op0);
21898 if (!REG_P (op1))
21899 op1 = force_reg (V16QImode, op1);
21901 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21902 constv = force_reg (V16QImode, constv);
21903 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21904 UNSPEC_VPERM);
21905 if (!REG_P (target))
21907 rtx tmp = gen_reg_rtx (V16QImode);
21908 emit_move_insn (tmp, unspec);
21909 unspec = tmp;
21912 emit_move_insn (target, unspec);
21915 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21916 permute control vector. But here it's not a constant, so we must
21917 generate a vector NAND or NOR to do the adjustment. */
21919 void
21920 altivec_expand_vec_perm_le (rtx operands[4])
21922 rtx notx, iorx, unspec;
21923 rtx target = operands[0];
21924 rtx op0 = operands[1];
21925 rtx op1 = operands[2];
21926 rtx sel = operands[3];
21927 rtx tmp = target;
21928 rtx norreg = gen_reg_rtx (V16QImode);
21929 machine_mode mode = GET_MODE (target);
21931 /* Get everything in regs so the pattern matches. */
21932 if (!REG_P (op0))
21933 op0 = force_reg (mode, op0);
21934 if (!REG_P (op1))
21935 op1 = force_reg (mode, op1);
21936 if (!REG_P (sel))
21937 sel = force_reg (V16QImode, sel);
21938 if (!REG_P (target))
21939 tmp = gen_reg_rtx (mode);
21941 if (TARGET_P9_VECTOR)
21943 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21944 UNSPEC_VPERMR);
21946 else
21948 /* Invert the selector with a VNAND if available, else a VNOR.
21949 The VNAND is preferred for future fusion opportunities. */
21950 notx = gen_rtx_NOT (V16QImode, sel);
21951 iorx = (TARGET_P8_VECTOR
21952 ? gen_rtx_IOR (V16QImode, notx, notx)
21953 : gen_rtx_AND (V16QImode, notx, notx));
21954 emit_insn (gen_rtx_SET (norreg, iorx));
21956 /* Permute with operands reversed and adjusted selector. */
21957 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21958 UNSPEC_VPERM);
21961 /* Copy into target, possibly by way of a register. */
21962 if (!REG_P (target))
21964 emit_move_insn (tmp, unspec);
21965 unspec = tmp;
21968 emit_move_insn (target, unspec);
21971 /* Expand an Altivec constant permutation. Return true if we match
21972 an efficient implementation; false to fall back to VPERM.
21974 OP0 and OP1 are the input vectors and TARGET is the output vector.
21975 SEL specifies the constant permutation vector. */
21977 static bool
21978 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21979 const vec_perm_indices &sel)
21981 struct altivec_perm_insn {
21982 HOST_WIDE_INT mask;
21983 enum insn_code impl;
21984 unsigned char perm[16];
21986 static const struct altivec_perm_insn patterns[] = {
21987 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21988 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21989 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21990 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21991 { OPTION_MASK_ALTIVEC,
21992 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21993 : CODE_FOR_altivec_vmrglb_direct),
21994 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21995 { OPTION_MASK_ALTIVEC,
21996 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21997 : CODE_FOR_altivec_vmrglh_direct),
21998 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21999 { OPTION_MASK_ALTIVEC,
22000 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22001 : CODE_FOR_altivec_vmrglw_direct),
22002 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22003 { OPTION_MASK_ALTIVEC,
22004 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22005 : CODE_FOR_altivec_vmrghb_direct),
22006 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22007 { OPTION_MASK_ALTIVEC,
22008 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22009 : CODE_FOR_altivec_vmrghh_direct),
22010 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22011 { OPTION_MASK_ALTIVEC,
22012 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22013 : CODE_FOR_altivec_vmrghw_direct),
22014 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22015 { OPTION_MASK_P8_VECTOR,
22016 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22017 : CODE_FOR_p8_vmrgow_v4sf_direct),
22018 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22019 { OPTION_MASK_P8_VECTOR,
22020 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22021 : CODE_FOR_p8_vmrgew_v4sf_direct),
22022 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22025 unsigned int i, j, elt, which;
22026 unsigned char perm[16];
22027 rtx x;
22028 bool one_vec;
22030 /* Unpack the constant selector. */
22031 for (i = which = 0; i < 16; ++i)
22033 elt = sel[i] & 31;
22034 which |= (elt < 16 ? 1 : 2);
22035 perm[i] = elt;
22038 /* Simplify the constant selector based on operands. */
22039 switch (which)
22041 default:
22042 gcc_unreachable ();
22044 case 3:
22045 one_vec = false;
22046 if (!rtx_equal_p (op0, op1))
22047 break;
22048 /* FALLTHRU */
22050 case 2:
22051 for (i = 0; i < 16; ++i)
22052 perm[i] &= 15;
22053 op0 = op1;
22054 one_vec = true;
22055 break;
22057 case 1:
22058 op1 = op0;
22059 one_vec = true;
22060 break;
22063 /* Look for splat patterns. */
22064 if (one_vec)
22066 elt = perm[0];
22068 for (i = 0; i < 16; ++i)
22069 if (perm[i] != elt)
22070 break;
22071 if (i == 16)
22073 if (!BYTES_BIG_ENDIAN)
22074 elt = 15 - elt;
22075 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22076 return true;
22079 if (elt % 2 == 0)
22081 for (i = 0; i < 16; i += 2)
22082 if (perm[i] != elt || perm[i + 1] != elt + 1)
22083 break;
22084 if (i == 16)
22086 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22087 x = gen_reg_rtx (V8HImode);
22088 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22089 GEN_INT (field)));
22090 emit_move_insn (target, gen_lowpart (V16QImode, x));
22091 return true;
22095 if (elt % 4 == 0)
22097 for (i = 0; i < 16; i += 4)
22098 if (perm[i] != elt
22099 || perm[i + 1] != elt + 1
22100 || perm[i + 2] != elt + 2
22101 || perm[i + 3] != elt + 3)
22102 break;
22103 if (i == 16)
22105 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22106 x = gen_reg_rtx (V4SImode);
22107 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22108 GEN_INT (field)));
22109 emit_move_insn (target, gen_lowpart (V16QImode, x));
22110 return true;
22115 /* Look for merge and pack patterns. */
22116 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22118 bool swapped;
22120 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22121 continue;
22123 elt = patterns[j].perm[0];
22124 if (perm[0] == elt)
22125 swapped = false;
22126 else if (perm[0] == elt + 16)
22127 swapped = true;
22128 else
22129 continue;
22130 for (i = 1; i < 16; ++i)
22132 elt = patterns[j].perm[i];
22133 if (swapped)
22134 elt = (elt >= 16 ? elt - 16 : elt + 16);
22135 else if (one_vec && elt >= 16)
22136 elt -= 16;
22137 if (perm[i] != elt)
22138 break;
22140 if (i == 16)
22142 enum insn_code icode = patterns[j].impl;
22143 machine_mode omode = insn_data[icode].operand[0].mode;
22144 machine_mode imode = insn_data[icode].operand[1].mode;
22146 /* For little-endian, don't use vpkuwum and vpkuhum if the
22147 underlying vector type is not V4SI and V8HI, respectively.
22148 For example, using vpkuwum with a V8HI picks up the even
22149 halfwords (BE numbering) when the even halfwords (LE
22150 numbering) are what we need. */
22151 if (!BYTES_BIG_ENDIAN
22152 && icode == CODE_FOR_altivec_vpkuwum_direct
22153 && ((REG_P (op0)
22154 && GET_MODE (op0) != V4SImode)
22155 || (SUBREG_P (op0)
22156 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22157 continue;
22158 if (!BYTES_BIG_ENDIAN
22159 && icode == CODE_FOR_altivec_vpkuhum_direct
22160 && ((REG_P (op0)
22161 && GET_MODE (op0) != V8HImode)
22162 || (SUBREG_P (op0)
22163 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22164 continue;
22166 /* For little-endian, the two input operands must be swapped
22167 (or swapped back) to ensure proper right-to-left numbering
22168 from 0 to 2N-1. */
22169 if (swapped ^ !BYTES_BIG_ENDIAN)
22170 std::swap (op0, op1);
22171 if (imode != V16QImode)
22173 op0 = gen_lowpart (imode, op0);
22174 op1 = gen_lowpart (imode, op1);
22176 if (omode == V16QImode)
22177 x = target;
22178 else
22179 x = gen_reg_rtx (omode);
22180 emit_insn (GEN_FCN (icode) (x, op0, op1));
22181 if (omode != V16QImode)
22182 emit_move_insn (target, gen_lowpart (V16QImode, x));
22183 return true;
22187 if (!BYTES_BIG_ENDIAN)
22189 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22190 return true;
22193 return false;
22196 /* Expand a VSX Permute Doubleword constant permutation.
22197 Return true if we match an efficient implementation. */
22199 static bool
22200 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22201 unsigned char perm0, unsigned char perm1)
22203 rtx x;
22205 /* If both selectors come from the same operand, fold to single op. */
22206 if ((perm0 & 2) == (perm1 & 2))
22208 if (perm0 & 2)
22209 op0 = op1;
22210 else
22211 op1 = op0;
22213 /* If both operands are equal, fold to simpler permutation. */
22214 if (rtx_equal_p (op0, op1))
22216 perm0 = perm0 & 1;
22217 perm1 = (perm1 & 1) + 2;
22219 /* If the first selector comes from the second operand, swap. */
22220 else if (perm0 & 2)
22222 if (perm1 & 2)
22223 return false;
22224 perm0 -= 2;
22225 perm1 += 2;
22226 std::swap (op0, op1);
22228 /* If the second selector does not come from the second operand, fail. */
22229 else if ((perm1 & 2) == 0)
22230 return false;
22232 /* Success! */
22233 if (target != NULL)
22235 machine_mode vmode, dmode;
22236 rtvec v;
22238 vmode = GET_MODE (target);
22239 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22240 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22241 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22242 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22243 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22244 emit_insn (gen_rtx_SET (target, x));
22246 return true;
22249 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22251 static bool
22252 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22253 rtx op1, const vec_perm_indices &sel)
22255 bool testing_p = !target;
22257 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22258 if (TARGET_ALTIVEC && testing_p)
22259 return true;
22261 /* Check for ps_merge* or xxpermdi insns. */
22262 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22264 if (testing_p)
22266 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22267 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22269 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22270 return true;
22273 if (TARGET_ALTIVEC)
22275 /* Force the target-independent code to lower to V16QImode. */
22276 if (vmode != V16QImode)
22277 return false;
22278 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22279 return true;
22282 return false;
22285 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22286 OP0 and OP1 are the input vectors and TARGET is the output vector.
22287 PERM specifies the constant permutation vector. */
22289 static void
22290 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22291 machine_mode vmode, const vec_perm_builder &perm)
22293 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22294 if (x != target)
22295 emit_move_insn (target, x);
22298 /* Expand an extract even operation. */
22300 void
22301 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22303 machine_mode vmode = GET_MODE (target);
22304 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22305 vec_perm_builder perm (nelt, nelt, 1);
22307 for (i = 0; i < nelt; i++)
22308 perm.quick_push (i * 2);
22310 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22313 /* Expand a vector interleave operation. */
22315 void
22316 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22318 machine_mode vmode = GET_MODE (target);
22319 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22320 vec_perm_builder perm (nelt, nelt, 1);
22322 high = (highp ? 0 : nelt / 2);
22323 for (i = 0; i < nelt / 2; i++)
22325 perm.quick_push (i + high);
22326 perm.quick_push (i + nelt + high);
22329 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22332 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22333 void
22334 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22336 HOST_WIDE_INT hwi_scale (scale);
22337 REAL_VALUE_TYPE r_pow;
22338 rtvec v = rtvec_alloc (2);
22339 rtx elt;
22340 rtx scale_vec = gen_reg_rtx (V2DFmode);
22341 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22342 elt = const_double_from_real_value (r_pow, DFmode);
22343 RTVEC_ELT (v, 0) = elt;
22344 RTVEC_ELT (v, 1) = elt;
22345 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22346 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22349 /* Return an RTX representing where to find the function value of a
22350 function returning MODE. */
22351 static rtx
22352 rs6000_complex_function_value (machine_mode mode)
22354 unsigned int regno;
22355 rtx r1, r2;
22356 machine_mode inner = GET_MODE_INNER (mode);
22357 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22359 if (TARGET_FLOAT128_TYPE
22360 && (mode == KCmode
22361 || (mode == TCmode && TARGET_IEEEQUAD)))
22362 regno = ALTIVEC_ARG_RETURN;
22364 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22365 regno = FP_ARG_RETURN;
22367 else
22369 regno = GP_ARG_RETURN;
22371 /* 32-bit is OK since it'll go in r3/r4. */
22372 if (TARGET_32BIT && inner_bytes >= 4)
22373 return gen_rtx_REG (mode, regno);
22376 if (inner_bytes >= 8)
22377 return gen_rtx_REG (mode, regno);
22379 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22380 const0_rtx);
22381 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22382 GEN_INT (inner_bytes));
22383 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22386 /* Return an rtx describing a return value of MODE as a PARALLEL
22387 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22388 stride REG_STRIDE. */
22390 static rtx
22391 rs6000_parallel_return (machine_mode mode,
22392 int n_elts, machine_mode elt_mode,
22393 unsigned int regno, unsigned int reg_stride)
22395 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22397 int i;
22398 for (i = 0; i < n_elts; i++)
22400 rtx r = gen_rtx_REG (elt_mode, regno);
22401 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22402 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22403 regno += reg_stride;
22406 return par;
22409 /* Target hook for TARGET_FUNCTION_VALUE.
22411 An integer value is in r3 and a floating-point value is in fp1,
22412 unless -msoft-float. */
22414 static rtx
22415 rs6000_function_value (const_tree valtype,
22416 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22417 bool outgoing ATTRIBUTE_UNUSED)
22419 machine_mode mode;
22420 unsigned int regno;
22421 machine_mode elt_mode;
22422 int n_elts;
22424 /* Special handling for structs in darwin64. */
22425 if (TARGET_MACHO
22426 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22428 CUMULATIVE_ARGS valcum;
22429 rtx valret;
22431 valcum.words = 0;
22432 valcum.fregno = FP_ARG_MIN_REG;
22433 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22434 /* Do a trial code generation as if this were going to be passed as
22435 an argument; if any part goes in memory, we return NULL. */
22436 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22437 if (valret)
22438 return valret;
22439 /* Otherwise fall through to standard ABI rules. */
22442 mode = TYPE_MODE (valtype);
22444 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22445 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22447 int first_reg, n_regs;
22449 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22451 /* _Decimal128 must use even/odd register pairs. */
22452 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22453 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22455 else
22457 first_reg = ALTIVEC_ARG_RETURN;
22458 n_regs = 1;
22461 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22464 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22465 if (TARGET_32BIT && TARGET_POWERPC64)
22466 switch (mode)
22468 default:
22469 break;
22470 case E_DImode:
22471 case E_SCmode:
22472 case E_DCmode:
22473 case E_TCmode:
22474 int count = GET_MODE_SIZE (mode) / 4;
22475 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22478 if ((INTEGRAL_TYPE_P (valtype)
22479 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22480 || POINTER_TYPE_P (valtype))
22481 mode = TARGET_32BIT ? SImode : DImode;
22483 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22484 /* _Decimal128 must use an even/odd register pair. */
22485 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22486 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22487 && !FLOAT128_VECTOR_P (mode))
22488 regno = FP_ARG_RETURN;
22489 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22490 && targetm.calls.split_complex_arg)
22491 return rs6000_complex_function_value (mode);
22492 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22493 return register is used in both cases, and we won't see V2DImode/V2DFmode
22494 for pure altivec, combine the two cases. */
22495 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22496 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22497 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22498 regno = ALTIVEC_ARG_RETURN;
22499 else
22500 regno = GP_ARG_RETURN;
22502 return gen_rtx_REG (mode, regno);
22505 /* Define how to find the value returned by a library function
22506 assuming the value has mode MODE. */
22508 rs6000_libcall_value (machine_mode mode)
22510 unsigned int regno;
22512 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22513 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22514 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22516 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22517 /* _Decimal128 must use an even/odd register pair. */
22518 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22519 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22520 regno = FP_ARG_RETURN;
22521 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22522 return register is used in both cases, and we won't see V2DImode/V2DFmode
22523 for pure altivec, combine the two cases. */
22524 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22525 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22526 regno = ALTIVEC_ARG_RETURN;
22527 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22528 return rs6000_complex_function_value (mode);
22529 else
22530 regno = GP_ARG_RETURN;
22532 return gen_rtx_REG (mode, regno);
22535 /* Compute register pressure classes. We implement the target hook to avoid
22536 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22537 lead to incorrect estimates of number of available registers and therefor
22538 increased register pressure/spill. */
22539 static int
22540 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22542 int n;
22544 n = 0;
22545 pressure_classes[n++] = GENERAL_REGS;
22546 if (TARGET_VSX)
22547 pressure_classes[n++] = VSX_REGS;
22548 else
22550 if (TARGET_ALTIVEC)
22551 pressure_classes[n++] = ALTIVEC_REGS;
22552 if (TARGET_HARD_FLOAT)
22553 pressure_classes[n++] = FLOAT_REGS;
22555 pressure_classes[n++] = CR_REGS;
22556 pressure_classes[n++] = SPECIAL_REGS;
22558 return n;
22561 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22562 Frame pointer elimination is automatically handled.
22564 For the RS/6000, if frame pointer elimination is being done, we would like
22565 to convert ap into fp, not sp.
22567 We need r30 if -mminimal-toc was specified, and there are constant pool
22568 references. */
22570 static bool
22571 rs6000_can_eliminate (const int from, const int to)
22573 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22574 ? ! frame_pointer_needed
22575 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22576 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22577 || constant_pool_empty_p ()
22578 : true);
22581 /* Define the offset between two registers, FROM to be eliminated and its
22582 replacement TO, at the start of a routine. */
22583 HOST_WIDE_INT
22584 rs6000_initial_elimination_offset (int from, int to)
22586 rs6000_stack_t *info = rs6000_stack_info ();
22587 HOST_WIDE_INT offset;
22589 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22590 offset = info->push_p ? 0 : -info->total_size;
22591 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22593 offset = info->push_p ? 0 : -info->total_size;
22594 if (FRAME_GROWS_DOWNWARD)
22595 offset += info->fixed_size + info->vars_size + info->parm_size;
22597 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22598 offset = FRAME_GROWS_DOWNWARD
22599 ? info->fixed_size + info->vars_size + info->parm_size
22600 : 0;
22601 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22602 offset = info->total_size;
22603 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22604 offset = info->push_p ? info->total_size : 0;
22605 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22606 offset = 0;
22607 else
22608 gcc_unreachable ();
22610 return offset;
22613 /* Fill in sizes of registers used by unwinder. */
22615 static void
22616 rs6000_init_dwarf_reg_sizes_extra (tree address)
22618 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22620 int i;
22621 machine_mode mode = TYPE_MODE (char_type_node);
22622 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22623 rtx mem = gen_rtx_MEM (BLKmode, addr);
22624 rtx value = gen_int_mode (16, mode);
22626 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22627 The unwinder still needs to know the size of Altivec registers. */
22629 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22631 int column = DWARF_REG_TO_UNWIND_COLUMN
22632 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22633 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22635 emit_move_insn (adjust_address (mem, mode, offset), value);
22640 /* Map internal gcc register numbers to debug format register numbers.
22641 FORMAT specifies the type of debug register number to use:
22642 0 -- debug information, except for frame-related sections
22643 1 -- DWARF .debug_frame section
22644 2 -- DWARF .eh_frame section */
22646 unsigned int
22647 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22649 /* On some platforms, we use the standard DWARF register
22650 numbering for .debug_info and .debug_frame. */
22651 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22653 #ifdef RS6000_USE_DWARF_NUMBERING
22654 if (regno <= 31)
22655 return regno;
22656 if (FP_REGNO_P (regno))
22657 return regno - FIRST_FPR_REGNO + 32;
22658 if (ALTIVEC_REGNO_P (regno))
22659 return regno - FIRST_ALTIVEC_REGNO + 1124;
22660 if (regno == LR_REGNO)
22661 return 108;
22662 if (regno == CTR_REGNO)
22663 return 109;
22664 if (regno == CA_REGNO)
22665 return 101; /* XER */
22666 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22667 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22668 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22669 to the DWARF reg for CR. */
22670 if (format == 1 && regno == CR2_REGNO)
22671 return 64;
22672 if (CR_REGNO_P (regno))
22673 return regno - CR0_REGNO + 86;
22674 if (regno == VRSAVE_REGNO)
22675 return 356;
22676 if (regno == VSCR_REGNO)
22677 return 67;
22679 /* These do not make much sense. */
22680 if (regno == FRAME_POINTER_REGNUM)
22681 return 111;
22682 if (regno == ARG_POINTER_REGNUM)
22683 return 67;
22684 if (regno == 64)
22685 return 100;
22687 gcc_unreachable ();
22688 #endif
22691 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22692 information, and also for .eh_frame. */
22693 /* Translate the regnos to their numbers in GCC 7 (and before). */
22694 if (regno <= 31)
22695 return regno;
22696 if (FP_REGNO_P (regno))
22697 return regno - FIRST_FPR_REGNO + 32;
22698 if (ALTIVEC_REGNO_P (regno))
22699 return regno - FIRST_ALTIVEC_REGNO + 77;
22700 if (regno == LR_REGNO)
22701 return 65;
22702 if (regno == CTR_REGNO)
22703 return 66;
22704 if (regno == CA_REGNO)
22705 return 76; /* XER */
22706 if (CR_REGNO_P (regno))
22707 return regno - CR0_REGNO + 68;
22708 if (regno == VRSAVE_REGNO)
22709 return 109;
22710 if (regno == VSCR_REGNO)
22711 return 110;
22713 if (regno == FRAME_POINTER_REGNUM)
22714 return 111;
22715 if (regno == ARG_POINTER_REGNUM)
22716 return 67;
22717 if (regno == 64)
22718 return 64;
22720 gcc_unreachable ();
22723 /* target hook eh_return_filter_mode */
22724 static scalar_int_mode
22725 rs6000_eh_return_filter_mode (void)
22727 return TARGET_32BIT ? SImode : word_mode;
22730 /* Target hook for translate_mode_attribute. */
22731 static machine_mode
22732 rs6000_translate_mode_attribute (machine_mode mode)
22734 if ((FLOAT128_IEEE_P (mode)
22735 && ieee128_float_type_node == long_double_type_node)
22736 || (FLOAT128_IBM_P (mode)
22737 && ibm128_float_type_node == long_double_type_node))
22738 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22739 return mode;
22742 /* Target hook for scalar_mode_supported_p. */
22743 static bool
22744 rs6000_scalar_mode_supported_p (scalar_mode mode)
22746 /* -m32 does not support TImode. This is the default, from
22747 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22748 same ABI as for -m32. But default_scalar_mode_supported_p allows
22749 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22750 for -mpowerpc64. */
22751 if (TARGET_32BIT && mode == TImode)
22752 return false;
22754 if (DECIMAL_FLOAT_MODE_P (mode))
22755 return default_decimal_float_supported_p ();
22756 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22757 return true;
22758 else
22759 return default_scalar_mode_supported_p (mode);
22762 /* Target hook for vector_mode_supported_p. */
22763 static bool
22764 rs6000_vector_mode_supported_p (machine_mode mode)
22766 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22767 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22768 double-double. */
22769 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22770 return true;
22772 else
22773 return false;
22776 /* Target hook for floatn_mode. */
22777 static opt_scalar_float_mode
22778 rs6000_floatn_mode (int n, bool extended)
22780 if (extended)
22782 switch (n)
22784 case 32:
22785 return DFmode;
22787 case 64:
22788 if (TARGET_FLOAT128_TYPE)
22789 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22790 else
22791 return opt_scalar_float_mode ();
22793 case 128:
22794 return opt_scalar_float_mode ();
22796 default:
22797 /* Those are the only valid _FloatNx types. */
22798 gcc_unreachable ();
22801 else
22803 switch (n)
22805 case 32:
22806 return SFmode;
22808 case 64:
22809 return DFmode;
22811 case 128:
22812 if (TARGET_FLOAT128_TYPE)
22813 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22814 else
22815 return opt_scalar_float_mode ();
22817 default:
22818 return opt_scalar_float_mode ();
22824 /* Target hook for c_mode_for_suffix. */
22825 static machine_mode
22826 rs6000_c_mode_for_suffix (char suffix)
22828 if (TARGET_FLOAT128_TYPE)
22830 if (suffix == 'q' || suffix == 'Q')
22831 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22833 /* At the moment, we are not defining a suffix for IBM extended double.
22834 If/when the default for -mabi=ieeelongdouble is changed, and we want
22835 to support __ibm128 constants in legacy library code, we may need to
22836 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22837 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22838 __float80 constants. */
22841 return VOIDmode;
22844 /* Target hook for invalid_arg_for_unprototyped_fn. */
22845 static const char *
22846 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22848 return (!rs6000_darwin64_abi
22849 && typelist == 0
22850 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22851 && (funcdecl == NULL_TREE
22852 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22853 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22854 ? N_("AltiVec argument passed to unprototyped function")
22855 : NULL;
22858 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22859 setup by using __stack_chk_fail_local hidden function instead of
22860 calling __stack_chk_fail directly. Otherwise it is better to call
22861 __stack_chk_fail directly. */
22863 static tree ATTRIBUTE_UNUSED
22864 rs6000_stack_protect_fail (void)
22866 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22867 ? default_hidden_stack_protect_fail ()
22868 : default_external_stack_protect_fail ();
22871 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22873 #if TARGET_ELF
22874 static unsigned HOST_WIDE_INT
22875 rs6000_asan_shadow_offset (void)
22877 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22879 #endif
22881 /* Mask options that we want to support inside of attribute((target)) and
22882 #pragma GCC target operations. Note, we do not include things like
22883 64/32-bit, endianness, hard/soft floating point, etc. that would have
22884 different calling sequences. */
22886 struct rs6000_opt_mask {
22887 const char *name; /* option name */
22888 HOST_WIDE_INT mask; /* mask to set */
22889 bool invert; /* invert sense of mask */
22890 bool valid_target; /* option is a target option */
22893 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22895 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22896 { "cmpb", OPTION_MASK_CMPB, false, true },
22897 { "crypto", OPTION_MASK_CRYPTO, false, true },
22898 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22899 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22900 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22901 false, true },
22902 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22903 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22904 { "fprnd", OPTION_MASK_FPRND, false, true },
22905 { "future", OPTION_MASK_FUTURE, false, true },
22906 { "hard-dfp", OPTION_MASK_DFP, false, true },
22907 { "htm", OPTION_MASK_HTM, false, true },
22908 { "isel", OPTION_MASK_ISEL, false, true },
22909 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22910 { "mfpgpr", 0, false, true },
22911 { "modulo", OPTION_MASK_MODULO, false, true },
22912 { "mulhw", OPTION_MASK_MULHW, false, true },
22913 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22914 { "pcrel", OPTION_MASK_PCREL, false, true },
22915 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22916 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22917 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22918 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22919 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22920 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22921 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22922 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22923 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22924 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22925 { "prefixed", OPTION_MASK_PREFIXED, false, true },
22926 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22927 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22928 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22929 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22930 { "string", 0, false, true },
22931 { "update", OPTION_MASK_NO_UPDATE, true , true },
22932 { "vsx", OPTION_MASK_VSX, false, true },
22933 #ifdef OPTION_MASK_64BIT
22934 #if TARGET_AIX_OS
22935 { "aix64", OPTION_MASK_64BIT, false, false },
22936 { "aix32", OPTION_MASK_64BIT, true, false },
22937 #else
22938 { "64", OPTION_MASK_64BIT, false, false },
22939 { "32", OPTION_MASK_64BIT, true, false },
22940 #endif
22941 #endif
22942 #ifdef OPTION_MASK_EABI
22943 { "eabi", OPTION_MASK_EABI, false, false },
22944 #endif
22945 #ifdef OPTION_MASK_LITTLE_ENDIAN
22946 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22947 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22948 #endif
22949 #ifdef OPTION_MASK_RELOCATABLE
22950 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22951 #endif
22952 #ifdef OPTION_MASK_STRICT_ALIGN
22953 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22954 #endif
22955 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22956 { "string", 0, false, false },
22959 /* Builtin mask mapping for printing the flags. */
22960 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22962 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22963 { "vsx", RS6000_BTM_VSX, false, false },
22964 { "fre", RS6000_BTM_FRE, false, false },
22965 { "fres", RS6000_BTM_FRES, false, false },
22966 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22967 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22968 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22969 { "cell", RS6000_BTM_CELL, false, false },
22970 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22971 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22972 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22973 { "crypto", RS6000_BTM_CRYPTO, false, false },
22974 { "htm", RS6000_BTM_HTM, false, false },
22975 { "hard-dfp", RS6000_BTM_DFP, false, false },
22976 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22977 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22978 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22979 { "float128", RS6000_BTM_FLOAT128, false, false },
22980 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22983 /* Option variables that we want to support inside attribute((target)) and
22984 #pragma GCC target operations. */
22986 struct rs6000_opt_var {
22987 const char *name; /* option name */
22988 size_t global_offset; /* offset of the option in global_options. */
22989 size_t target_offset; /* offset of the option in target options. */
22992 static struct rs6000_opt_var const rs6000_opt_vars[] =
22994 { "friz",
22995 offsetof (struct gcc_options, x_TARGET_FRIZ),
22996 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22997 { "avoid-indexed-addresses",
22998 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22999 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23000 { "longcall",
23001 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23002 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23003 { "optimize-swaps",
23004 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23005 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23006 { "allow-movmisalign",
23007 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23008 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23009 { "sched-groups",
23010 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23011 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23012 { "always-hint",
23013 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23014 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23015 { "align-branch-targets",
23016 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23017 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23018 { "sched-prolog",
23019 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23020 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23021 { "sched-epilog",
23022 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23023 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23024 { "speculate-indirect-jumps",
23025 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23026 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23029 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23030 parsing. Return true if there were no errors. */
23032 static bool
23033 rs6000_inner_target_options (tree args, bool attr_p)
23035 bool ret = true;
23037 if (args == NULL_TREE)
23040 else if (TREE_CODE (args) == STRING_CST)
23042 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23043 char *q;
23045 while ((q = strtok (p, ",")) != NULL)
23047 bool error_p = false;
23048 bool not_valid_p = false;
23049 const char *cpu_opt = NULL;
23051 p = NULL;
23052 if (strncmp (q, "cpu=", 4) == 0)
23054 int cpu_index = rs6000_cpu_name_lookup (q+4);
23055 if (cpu_index >= 0)
23056 rs6000_cpu_index = cpu_index;
23057 else
23059 error_p = true;
23060 cpu_opt = q+4;
23063 else if (strncmp (q, "tune=", 5) == 0)
23065 int tune_index = rs6000_cpu_name_lookup (q+5);
23066 if (tune_index >= 0)
23067 rs6000_tune_index = tune_index;
23068 else
23070 error_p = true;
23071 cpu_opt = q+5;
23074 else
23076 size_t i;
23077 bool invert = false;
23078 char *r = q;
23080 error_p = true;
23081 if (strncmp (r, "no-", 3) == 0)
23083 invert = true;
23084 r += 3;
23087 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23088 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23090 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23092 if (!rs6000_opt_masks[i].valid_target)
23093 not_valid_p = true;
23094 else
23096 error_p = false;
23097 rs6000_isa_flags_explicit |= mask;
23099 /* VSX needs altivec, so -mvsx automagically sets
23100 altivec and disables -mavoid-indexed-addresses. */
23101 if (!invert)
23103 if (mask == OPTION_MASK_VSX)
23105 mask |= OPTION_MASK_ALTIVEC;
23106 TARGET_AVOID_XFORM = 0;
23110 if (rs6000_opt_masks[i].invert)
23111 invert = !invert;
23113 if (invert)
23114 rs6000_isa_flags &= ~mask;
23115 else
23116 rs6000_isa_flags |= mask;
23118 break;
23121 if (error_p && !not_valid_p)
23123 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23124 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23126 size_t j = rs6000_opt_vars[i].global_offset;
23127 *((int *) ((char *)&global_options + j)) = !invert;
23128 error_p = false;
23129 not_valid_p = false;
23130 break;
23135 if (error_p)
23137 const char *eprefix, *esuffix;
23139 ret = false;
23140 if (attr_p)
23142 eprefix = "__attribute__((__target__(";
23143 esuffix = ")))";
23145 else
23147 eprefix = "#pragma GCC target ";
23148 esuffix = "";
23151 if (cpu_opt)
23152 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23153 q, esuffix);
23154 else if (not_valid_p)
23155 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23156 else
23157 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23162 else if (TREE_CODE (args) == TREE_LIST)
23166 tree value = TREE_VALUE (args);
23167 if (value)
23169 bool ret2 = rs6000_inner_target_options (value, attr_p);
23170 if (!ret2)
23171 ret = false;
23173 args = TREE_CHAIN (args);
23175 while (args != NULL_TREE);
23178 else
23180 error ("attribute %<target%> argument not a string");
23181 return false;
23184 return ret;
23187 /* Print out the target options as a list for -mdebug=target. */
23189 static void
23190 rs6000_debug_target_options (tree args, const char *prefix)
23192 if (args == NULL_TREE)
23193 fprintf (stderr, "%s<NULL>", prefix);
23195 else if (TREE_CODE (args) == STRING_CST)
23197 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23198 char *q;
23200 while ((q = strtok (p, ",")) != NULL)
23202 p = NULL;
23203 fprintf (stderr, "%s\"%s\"", prefix, q);
23204 prefix = ", ";
23208 else if (TREE_CODE (args) == TREE_LIST)
23212 tree value = TREE_VALUE (args);
23213 if (value)
23215 rs6000_debug_target_options (value, prefix);
23216 prefix = ", ";
23218 args = TREE_CHAIN (args);
23220 while (args != NULL_TREE);
23223 else
23224 gcc_unreachable ();
23226 return;
23230 /* Hook to validate attribute((target("..."))). */
23232 static bool
23233 rs6000_valid_attribute_p (tree fndecl,
23234 tree ARG_UNUSED (name),
23235 tree args,
23236 int flags)
23238 struct cl_target_option cur_target;
23239 bool ret;
23240 tree old_optimize;
23241 tree new_target, new_optimize;
23242 tree func_optimize;
23244 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23246 if (TARGET_DEBUG_TARGET)
23248 tree tname = DECL_NAME (fndecl);
23249 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23250 if (tname)
23251 fprintf (stderr, "function: %.*s\n",
23252 (int) IDENTIFIER_LENGTH (tname),
23253 IDENTIFIER_POINTER (tname));
23254 else
23255 fprintf (stderr, "function: unknown\n");
23257 fprintf (stderr, "args:");
23258 rs6000_debug_target_options (args, " ");
23259 fprintf (stderr, "\n");
23261 if (flags)
23262 fprintf (stderr, "flags: 0x%x\n", flags);
23264 fprintf (stderr, "--------------------\n");
23267 /* attribute((target("default"))) does nothing, beyond
23268 affecting multi-versioning. */
23269 if (TREE_VALUE (args)
23270 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23271 && TREE_CHAIN (args) == NULL_TREE
23272 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23273 return true;
23275 old_optimize = build_optimization_node (&global_options);
23276 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23278 /* If the function changed the optimization levels as well as setting target
23279 options, start with the optimizations specified. */
23280 if (func_optimize && func_optimize != old_optimize)
23281 cl_optimization_restore (&global_options,
23282 TREE_OPTIMIZATION (func_optimize));
23284 /* The target attributes may also change some optimization flags, so update
23285 the optimization options if necessary. */
23286 cl_target_option_save (&cur_target, &global_options);
23287 rs6000_cpu_index = rs6000_tune_index = -1;
23288 ret = rs6000_inner_target_options (args, true);
23290 /* Set up any additional state. */
23291 if (ret)
23293 ret = rs6000_option_override_internal (false);
23294 new_target = build_target_option_node (&global_options);
23296 else
23297 new_target = NULL;
23299 new_optimize = build_optimization_node (&global_options);
23301 if (!new_target)
23302 ret = false;
23304 else if (fndecl)
23306 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23308 if (old_optimize != new_optimize)
23309 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23312 cl_target_option_restore (&global_options, &cur_target);
23314 if (old_optimize != new_optimize)
23315 cl_optimization_restore (&global_options,
23316 TREE_OPTIMIZATION (old_optimize));
23318 return ret;
23322 /* Hook to validate the current #pragma GCC target and set the state, and
23323 update the macros based on what was changed. If ARGS is NULL, then
23324 POP_TARGET is used to reset the options. */
23326 bool
23327 rs6000_pragma_target_parse (tree args, tree pop_target)
23329 tree prev_tree = build_target_option_node (&global_options);
23330 tree cur_tree;
23331 struct cl_target_option *prev_opt, *cur_opt;
23332 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23333 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23335 if (TARGET_DEBUG_TARGET)
23337 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23338 fprintf (stderr, "args:");
23339 rs6000_debug_target_options (args, " ");
23340 fprintf (stderr, "\n");
23342 if (pop_target)
23344 fprintf (stderr, "pop_target:\n");
23345 debug_tree (pop_target);
23347 else
23348 fprintf (stderr, "pop_target: <NULL>\n");
23350 fprintf (stderr, "--------------------\n");
23353 if (! args)
23355 cur_tree = ((pop_target)
23356 ? pop_target
23357 : target_option_default_node);
23358 cl_target_option_restore (&global_options,
23359 TREE_TARGET_OPTION (cur_tree));
23361 else
23363 rs6000_cpu_index = rs6000_tune_index = -1;
23364 if (!rs6000_inner_target_options (args, false)
23365 || !rs6000_option_override_internal (false)
23366 || (cur_tree = build_target_option_node (&global_options))
23367 == NULL_TREE)
23369 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23370 fprintf (stderr, "invalid pragma\n");
23372 return false;
23376 target_option_current_node = cur_tree;
23377 rs6000_activate_target_options (target_option_current_node);
23379 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23380 change the macros that are defined. */
23381 if (rs6000_target_modify_macros_ptr)
23383 prev_opt = TREE_TARGET_OPTION (prev_tree);
23384 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23385 prev_flags = prev_opt->x_rs6000_isa_flags;
23387 cur_opt = TREE_TARGET_OPTION (cur_tree);
23388 cur_flags = cur_opt->x_rs6000_isa_flags;
23389 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23391 diff_bumask = (prev_bumask ^ cur_bumask);
23392 diff_flags = (prev_flags ^ cur_flags);
23394 if ((diff_flags != 0) || (diff_bumask != 0))
23396 /* Delete old macros. */
23397 rs6000_target_modify_macros_ptr (false,
23398 prev_flags & diff_flags,
23399 prev_bumask & diff_bumask);
23401 /* Define new macros. */
23402 rs6000_target_modify_macros_ptr (true,
23403 cur_flags & diff_flags,
23404 cur_bumask & diff_bumask);
23408 return true;
23412 /* Remember the last target of rs6000_set_current_function. */
23413 static GTY(()) tree rs6000_previous_fndecl;
23415 /* Restore target's globals from NEW_TREE and invalidate the
23416 rs6000_previous_fndecl cache. */
23418 void
23419 rs6000_activate_target_options (tree new_tree)
23421 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23422 if (TREE_TARGET_GLOBALS (new_tree))
23423 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23424 else if (new_tree == target_option_default_node)
23425 restore_target_globals (&default_target_globals);
23426 else
23427 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23428 rs6000_previous_fndecl = NULL_TREE;
23431 /* Establish appropriate back-end context for processing the function
23432 FNDECL. The argument might be NULL to indicate processing at top
23433 level, outside of any function scope. */
23434 static void
23435 rs6000_set_current_function (tree fndecl)
23437 if (TARGET_DEBUG_TARGET)
23439 fprintf (stderr, "\n==================== rs6000_set_current_function");
23441 if (fndecl)
23442 fprintf (stderr, ", fndecl %s (%p)",
23443 (DECL_NAME (fndecl)
23444 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23445 : "<unknown>"), (void *)fndecl);
23447 if (rs6000_previous_fndecl)
23448 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23450 fprintf (stderr, "\n");
23453 /* Only change the context if the function changes. This hook is called
23454 several times in the course of compiling a function, and we don't want to
23455 slow things down too much or call target_reinit when it isn't safe. */
23456 if (fndecl == rs6000_previous_fndecl)
23457 return;
23459 tree old_tree;
23460 if (rs6000_previous_fndecl == NULL_TREE)
23461 old_tree = target_option_current_node;
23462 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23463 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23464 else
23465 old_tree = target_option_default_node;
23467 tree new_tree;
23468 if (fndecl == NULL_TREE)
23470 if (old_tree != target_option_current_node)
23471 new_tree = target_option_current_node;
23472 else
23473 new_tree = NULL_TREE;
23475 else
23477 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23478 if (new_tree == NULL_TREE)
23479 new_tree = target_option_default_node;
23482 if (TARGET_DEBUG_TARGET)
23484 if (new_tree)
23486 fprintf (stderr, "\nnew fndecl target specific options:\n");
23487 debug_tree (new_tree);
23490 if (old_tree)
23492 fprintf (stderr, "\nold fndecl target specific options:\n");
23493 debug_tree (old_tree);
23496 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23497 fprintf (stderr, "--------------------\n");
23500 if (new_tree && old_tree != new_tree)
23501 rs6000_activate_target_options (new_tree);
23503 if (fndecl)
23504 rs6000_previous_fndecl = fndecl;
23508 /* Save the current options */
23510 static void
23511 rs6000_function_specific_save (struct cl_target_option *ptr,
23512 struct gcc_options *opts)
23514 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23515 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23518 /* Restore the current options */
23520 static void
23521 rs6000_function_specific_restore (struct gcc_options *opts,
23522 struct cl_target_option *ptr)
23525 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23526 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23527 (void) rs6000_option_override_internal (false);
23530 /* Print the current options */
23532 static void
23533 rs6000_function_specific_print (FILE *file, int indent,
23534 struct cl_target_option *ptr)
23536 rs6000_print_isa_options (file, indent, "Isa options set",
23537 ptr->x_rs6000_isa_flags);
23539 rs6000_print_isa_options (file, indent, "Isa options explicit",
23540 ptr->x_rs6000_isa_flags_explicit);
23543 /* Helper function to print the current isa or misc options on a line. */
23545 static void
23546 rs6000_print_options_internal (FILE *file,
23547 int indent,
23548 const char *string,
23549 HOST_WIDE_INT flags,
23550 const char *prefix,
23551 const struct rs6000_opt_mask *opts,
23552 size_t num_elements)
23554 size_t i;
23555 size_t start_column = 0;
23556 size_t cur_column;
23557 size_t max_column = 120;
23558 size_t prefix_len = strlen (prefix);
23559 size_t comma_len = 0;
23560 const char *comma = "";
23562 if (indent)
23563 start_column += fprintf (file, "%*s", indent, "");
23565 if (!flags)
23567 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23568 return;
23571 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23573 /* Print the various mask options. */
23574 cur_column = start_column;
23575 for (i = 0; i < num_elements; i++)
23577 bool invert = opts[i].invert;
23578 const char *name = opts[i].name;
23579 const char *no_str = "";
23580 HOST_WIDE_INT mask = opts[i].mask;
23581 size_t len = comma_len + prefix_len + strlen (name);
23583 if (!invert)
23585 if ((flags & mask) == 0)
23587 no_str = "no-";
23588 len += strlen ("no-");
23591 flags &= ~mask;
23594 else
23596 if ((flags & mask) != 0)
23598 no_str = "no-";
23599 len += strlen ("no-");
23602 flags |= mask;
23605 cur_column += len;
23606 if (cur_column > max_column)
23608 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23609 cur_column = start_column + len;
23610 comma = "";
23613 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23614 comma = ", ";
23615 comma_len = strlen (", ");
23618 fputs ("\n", file);
23621 /* Helper function to print the current isa options on a line. */
23623 static void
23624 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23625 HOST_WIDE_INT flags)
23627 rs6000_print_options_internal (file, indent, string, flags, "-m",
23628 &rs6000_opt_masks[0],
23629 ARRAY_SIZE (rs6000_opt_masks));
23632 static void
23633 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23634 HOST_WIDE_INT flags)
23636 rs6000_print_options_internal (file, indent, string, flags, "",
23637 &rs6000_builtin_mask_names[0],
23638 ARRAY_SIZE (rs6000_builtin_mask_names));
23641 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23642 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23643 -mupper-regs-df, etc.).
23645 If the user used -mno-power8-vector, we need to turn off all of the implicit
23646 ISA 2.07 and 3.0 options that relate to the vector unit.
23648 If the user used -mno-power9-vector, we need to turn off all of the implicit
23649 ISA 3.0 options that relate to the vector unit.
23651 This function does not handle explicit options such as the user specifying
23652 -mdirect-move. These are handled in rs6000_option_override_internal, and
23653 the appropriate error is given if needed.
23655 We return a mask of all of the implicit options that should not be enabled
23656 by default. */
23658 static HOST_WIDE_INT
23659 rs6000_disable_incompatible_switches (void)
23661 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23662 size_t i, j;
23664 static const struct {
23665 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23666 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23667 const char *const name; /* name of the switch. */
23668 } flags[] = {
23669 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23670 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23671 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23672 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23673 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
23676 for (i = 0; i < ARRAY_SIZE (flags); i++)
23678 HOST_WIDE_INT no_flag = flags[i].no_flag;
23680 if ((rs6000_isa_flags & no_flag) == 0
23681 && (rs6000_isa_flags_explicit & no_flag) != 0)
23683 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23684 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23685 & rs6000_isa_flags
23686 & dep_flags);
23688 if (set_flags)
23690 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23691 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23693 set_flags &= ~rs6000_opt_masks[j].mask;
23694 error ("%<-mno-%s%> turns off %<-m%s%>",
23695 flags[i].name,
23696 rs6000_opt_masks[j].name);
23699 gcc_assert (!set_flags);
23702 rs6000_isa_flags &= ~dep_flags;
23703 ignore_masks |= no_flag | dep_flags;
23707 return ignore_masks;
23711 /* Helper function for printing the function name when debugging. */
23713 static const char *
23714 get_decl_name (tree fn)
23716 tree name;
23718 if (!fn)
23719 return "<null>";
23721 name = DECL_NAME (fn);
23722 if (!name)
23723 return "<no-name>";
23725 return IDENTIFIER_POINTER (name);
23728 /* Return the clone id of the target we are compiling code for in a target
23729 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23730 the priority list for the target clones (ordered from lowest to
23731 highest). */
23733 static int
23734 rs6000_clone_priority (tree fndecl)
23736 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23737 HOST_WIDE_INT isa_masks;
23738 int ret = CLONE_DEFAULT;
23739 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23740 const char *attrs_str = NULL;
23742 attrs = TREE_VALUE (TREE_VALUE (attrs));
23743 attrs_str = TREE_STRING_POINTER (attrs);
23745 /* Return priority zero for default function. Return the ISA needed for the
23746 function if it is not the default. */
23747 if (strcmp (attrs_str, "default") != 0)
23749 if (fn_opts == NULL_TREE)
23750 fn_opts = target_option_default_node;
23752 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23753 isa_masks = rs6000_isa_flags;
23754 else
23755 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23757 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23758 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23759 break;
23762 if (TARGET_DEBUG_TARGET)
23763 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23764 get_decl_name (fndecl), ret);
23766 return ret;
23769 /* This compares the priority of target features in function DECL1 and DECL2.
23770 It returns positive value if DECL1 is higher priority, negative value if
23771 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23772 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23774 static int
23775 rs6000_compare_version_priority (tree decl1, tree decl2)
23777 int priority1 = rs6000_clone_priority (decl1);
23778 int priority2 = rs6000_clone_priority (decl2);
23779 int ret = priority1 - priority2;
23781 if (TARGET_DEBUG_TARGET)
23782 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23783 get_decl_name (decl1), get_decl_name (decl2), ret);
23785 return ret;
23788 /* Make a dispatcher declaration for the multi-versioned function DECL.
23789 Calls to DECL function will be replaced with calls to the dispatcher
23790 by the front-end. Returns the decl of the dispatcher function. */
23792 static tree
23793 rs6000_get_function_versions_dispatcher (void *decl)
23795 tree fn = (tree) decl;
23796 struct cgraph_node *node = NULL;
23797 struct cgraph_node *default_node = NULL;
23798 struct cgraph_function_version_info *node_v = NULL;
23799 struct cgraph_function_version_info *first_v = NULL;
23801 tree dispatch_decl = NULL;
23803 struct cgraph_function_version_info *default_version_info = NULL;
23804 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23806 if (TARGET_DEBUG_TARGET)
23807 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23808 get_decl_name (fn));
23810 node = cgraph_node::get (fn);
23811 gcc_assert (node != NULL);
23813 node_v = node->function_version ();
23814 gcc_assert (node_v != NULL);
23816 if (node_v->dispatcher_resolver != NULL)
23817 return node_v->dispatcher_resolver;
23819 /* Find the default version and make it the first node. */
23820 first_v = node_v;
23821 /* Go to the beginning of the chain. */
23822 while (first_v->prev != NULL)
23823 first_v = first_v->prev;
23825 default_version_info = first_v;
23826 while (default_version_info != NULL)
23828 const tree decl2 = default_version_info->this_node->decl;
23829 if (is_function_default_version (decl2))
23830 break;
23831 default_version_info = default_version_info->next;
23834 /* If there is no default node, just return NULL. */
23835 if (default_version_info == NULL)
23836 return NULL;
23838 /* Make default info the first node. */
23839 if (first_v != default_version_info)
23841 default_version_info->prev->next = default_version_info->next;
23842 if (default_version_info->next)
23843 default_version_info->next->prev = default_version_info->prev;
23844 first_v->prev = default_version_info;
23845 default_version_info->next = first_v;
23846 default_version_info->prev = NULL;
23849 default_node = default_version_info->this_node;
23851 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23852 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23853 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23854 "exports hardware capability bits");
23855 #else
23857 if (targetm.has_ifunc_p ())
23859 struct cgraph_function_version_info *it_v = NULL;
23860 struct cgraph_node *dispatcher_node = NULL;
23861 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23863 /* Right now, the dispatching is done via ifunc. */
23864 dispatch_decl = make_dispatcher_decl (default_node->decl);
23866 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23867 gcc_assert (dispatcher_node != NULL);
23868 dispatcher_node->dispatcher_function = 1;
23869 dispatcher_version_info
23870 = dispatcher_node->insert_new_function_version ();
23871 dispatcher_version_info->next = default_version_info;
23872 dispatcher_node->definition = 1;
23874 /* Set the dispatcher for all the versions. */
23875 it_v = default_version_info;
23876 while (it_v != NULL)
23878 it_v->dispatcher_resolver = dispatch_decl;
23879 it_v = it_v->next;
23882 else
23884 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23885 "multiversioning needs ifunc which is not supported "
23886 "on this target");
23888 #endif
23890 return dispatch_decl;
23893 /* Make the resolver function decl to dispatch the versions of a multi-
23894 versioned function, DEFAULT_DECL. Create an empty basic block in the
23895 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23896 function. */
23898 static tree
23899 make_resolver_func (const tree default_decl,
23900 const tree dispatch_decl,
23901 basic_block *empty_bb)
23903 /* Make the resolver function static. The resolver function returns
23904 void *. */
23905 tree decl_name = clone_function_name (default_decl, "resolver");
23906 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23907 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23908 tree decl = build_fn_decl (resolver_name, type);
23909 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23911 DECL_NAME (decl) = decl_name;
23912 TREE_USED (decl) = 1;
23913 DECL_ARTIFICIAL (decl) = 1;
23914 DECL_IGNORED_P (decl) = 0;
23915 TREE_PUBLIC (decl) = 0;
23916 DECL_UNINLINABLE (decl) = 1;
23918 /* Resolver is not external, body is generated. */
23919 DECL_EXTERNAL (decl) = 0;
23920 DECL_EXTERNAL (dispatch_decl) = 0;
23922 DECL_CONTEXT (decl) = NULL_TREE;
23923 DECL_INITIAL (decl) = make_node (BLOCK);
23924 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23926 if (DECL_COMDAT_GROUP (default_decl)
23927 || TREE_PUBLIC (default_decl))
23929 /* In this case, each translation unit with a call to this
23930 versioned function will put out a resolver. Ensure it
23931 is comdat to keep just one copy. */
23932 DECL_COMDAT (decl) = 1;
23933 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
23935 else
23936 TREE_PUBLIC (dispatch_decl) = 0;
23938 /* Build result decl and add to function_decl. */
23939 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23940 DECL_CONTEXT (t) = decl;
23941 DECL_ARTIFICIAL (t) = 1;
23942 DECL_IGNORED_P (t) = 1;
23943 DECL_RESULT (decl) = t;
23945 gimplify_function_tree (decl);
23946 push_cfun (DECL_STRUCT_FUNCTION (decl));
23947 *empty_bb = init_lowered_empty_function (decl, false,
23948 profile_count::uninitialized ());
23950 cgraph_node::add_new_function (decl, true);
23951 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23953 pop_cfun ();
23955 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23956 DECL_ATTRIBUTES (dispatch_decl)
23957 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23959 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23961 return decl;
23964 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23965 return a pointer to VERSION_DECL if we are running on a machine that
23966 supports the index CLONE_ISA hardware architecture bits. This function will
23967 be called during version dispatch to decide which function version to
23968 execute. It returns the basic block at the end, to which more conditions
23969 can be added. */
23971 static basic_block
23972 add_condition_to_bb (tree function_decl, tree version_decl,
23973 int clone_isa, basic_block new_bb)
23975 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23977 gcc_assert (new_bb != NULL);
23978 gimple_seq gseq = bb_seq (new_bb);
23981 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23982 build_fold_addr_expr (version_decl));
23983 tree result_var = create_tmp_var (ptr_type_node);
23984 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23985 gimple *return_stmt = gimple_build_return (result_var);
23987 if (clone_isa == CLONE_DEFAULT)
23989 gimple_seq_add_stmt (&gseq, convert_stmt);
23990 gimple_seq_add_stmt (&gseq, return_stmt);
23991 set_bb_seq (new_bb, gseq);
23992 gimple_set_bb (convert_stmt, new_bb);
23993 gimple_set_bb (return_stmt, new_bb);
23994 pop_cfun ();
23995 return new_bb;
23998 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23999 tree cond_var = create_tmp_var (bool_int_type_node);
24000 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24001 const char *arg_str = rs6000_clone_map[clone_isa].name;
24002 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24003 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24004 gimple_call_set_lhs (call_cond_stmt, cond_var);
24006 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24007 gimple_set_bb (call_cond_stmt, new_bb);
24008 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24010 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24011 NULL_TREE, NULL_TREE);
24012 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24013 gimple_set_bb (if_else_stmt, new_bb);
24014 gimple_seq_add_stmt (&gseq, if_else_stmt);
24016 gimple_seq_add_stmt (&gseq, convert_stmt);
24017 gimple_seq_add_stmt (&gseq, return_stmt);
24018 set_bb_seq (new_bb, gseq);
24020 basic_block bb1 = new_bb;
24021 edge e12 = split_block (bb1, if_else_stmt);
24022 basic_block bb2 = e12->dest;
24023 e12->flags &= ~EDGE_FALLTHRU;
24024 e12->flags |= EDGE_TRUE_VALUE;
24026 edge e23 = split_block (bb2, return_stmt);
24027 gimple_set_bb (convert_stmt, bb2);
24028 gimple_set_bb (return_stmt, bb2);
24030 basic_block bb3 = e23->dest;
24031 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24033 remove_edge (e23);
24034 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24036 pop_cfun ();
24037 return bb3;
24040 /* This function generates the dispatch function for multi-versioned functions.
24041 DISPATCH_DECL is the function which will contain the dispatch logic.
24042 FNDECLS are the function choices for dispatch, and is a tree chain.
24043 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24044 code is generated. */
24046 static int
24047 dispatch_function_versions (tree dispatch_decl,
24048 void *fndecls_p,
24049 basic_block *empty_bb)
24051 int ix;
24052 tree ele;
24053 vec<tree> *fndecls;
24054 tree clones[CLONE_MAX];
24056 if (TARGET_DEBUG_TARGET)
24057 fputs ("dispatch_function_versions, top\n", stderr);
24059 gcc_assert (dispatch_decl != NULL
24060 && fndecls_p != NULL
24061 && empty_bb != NULL);
24063 /* fndecls_p is actually a vector. */
24064 fndecls = static_cast<vec<tree> *> (fndecls_p);
24066 /* At least one more version other than the default. */
24067 gcc_assert (fndecls->length () >= 2);
24069 /* The first version in the vector is the default decl. */
24070 memset ((void *) clones, '\0', sizeof (clones));
24071 clones[CLONE_DEFAULT] = (*fndecls)[0];
24073 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24074 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24075 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24076 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24077 to insert the code here to do the call. */
24079 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24081 int priority = rs6000_clone_priority (ele);
24082 if (!clones[priority])
24083 clones[priority] = ele;
24086 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24087 if (clones[ix])
24089 if (TARGET_DEBUG_TARGET)
24090 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24091 ix, get_decl_name (clones[ix]));
24093 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24094 *empty_bb);
24097 return 0;
24100 /* Generate the dispatching code body to dispatch multi-versioned function
24101 DECL. The target hook is called to process the "target" attributes and
24102 provide the code to dispatch the right function at run-time. NODE points
24103 to the dispatcher decl whose body will be created. */
24105 static tree
24106 rs6000_generate_version_dispatcher_body (void *node_p)
24108 tree resolver;
24109 basic_block empty_bb;
24110 struct cgraph_node *node = (cgraph_node *) node_p;
24111 struct cgraph_function_version_info *ninfo = node->function_version ();
24113 if (ninfo->dispatcher_resolver)
24114 return ninfo->dispatcher_resolver;
24116 /* node is going to be an alias, so remove the finalized bit. */
24117 node->definition = false;
24119 /* The first version in the chain corresponds to the default version. */
24120 ninfo->dispatcher_resolver = resolver
24121 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24123 if (TARGET_DEBUG_TARGET)
24124 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24125 get_decl_name (resolver));
24127 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24128 auto_vec<tree, 2> fn_ver_vec;
24130 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24131 vinfo;
24132 vinfo = vinfo->next)
24134 struct cgraph_node *version = vinfo->this_node;
24135 /* Check for virtual functions here again, as by this time it should
24136 have been determined if this function needs a vtable index or
24137 not. This happens for methods in derived classes that override
24138 virtual methods in base classes but are not explicitly marked as
24139 virtual. */
24140 if (DECL_VINDEX (version->decl))
24141 sorry ("Virtual function multiversioning not supported");
24143 fn_ver_vec.safe_push (version->decl);
24146 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24147 cgraph_edge::rebuild_edges ();
24148 pop_cfun ();
24149 return resolver;
24153 /* Hook to determine if one function can safely inline another. */
24155 static bool
24156 rs6000_can_inline_p (tree caller, tree callee)
24158 bool ret = false;
24159 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24160 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24162 /* If the callee has no option attributes, then it is ok to inline. */
24163 if (!callee_tree)
24164 ret = true;
24166 else
24168 HOST_WIDE_INT caller_isa;
24169 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24170 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24171 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24173 /* If the caller has option attributes, then use them.
24174 Otherwise, use the command line options. */
24175 if (caller_tree)
24176 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24177 else
24178 caller_isa = rs6000_isa_flags;
24180 /* The callee's options must be a subset of the caller's options, i.e.
24181 a vsx function may inline an altivec function, but a no-vsx function
24182 must not inline a vsx function. However, for those options that the
24183 callee has explicitly enabled or disabled, then we must enforce that
24184 the callee's and caller's options match exactly; see PR70010. */
24185 if (((caller_isa & callee_isa) == callee_isa)
24186 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24187 ret = true;
24190 if (TARGET_DEBUG_TARGET)
24191 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24192 get_decl_name (caller), get_decl_name (callee),
24193 (ret ? "can" : "cannot"));
24195 return ret;
24198 /* Allocate a stack temp and fixup the address so it meets the particular
24199 memory requirements (either offetable or REG+REG addressing). */
24202 rs6000_allocate_stack_temp (machine_mode mode,
24203 bool offsettable_p,
24204 bool reg_reg_p)
24206 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24207 rtx addr = XEXP (stack, 0);
24208 int strict_p = reload_completed;
24210 if (!legitimate_indirect_address_p (addr, strict_p))
24212 if (offsettable_p
24213 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24214 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24216 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24217 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24220 return stack;
24223 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24224 convert to such a form to deal with memory reference instructions
24225 like STFIWX and LDBRX that only take reg+reg addressing. */
24228 rs6000_force_indexed_or_indirect_mem (rtx x)
24230 machine_mode mode = GET_MODE (x);
24232 gcc_assert (MEM_P (x));
24233 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24235 rtx addr = XEXP (x, 0);
24236 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24238 rtx reg = XEXP (addr, 0);
24239 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24240 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24241 gcc_assert (REG_P (reg));
24242 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24243 addr = reg;
24245 else if (GET_CODE (addr) == PRE_MODIFY)
24247 rtx reg = XEXP (addr, 0);
24248 rtx expr = XEXP (addr, 1);
24249 gcc_assert (REG_P (reg));
24250 gcc_assert (GET_CODE (expr) == PLUS);
24251 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24252 addr = reg;
24255 if (GET_CODE (addr) == PLUS)
24257 rtx op0 = XEXP (addr, 0);
24258 rtx op1 = XEXP (addr, 1);
24259 op0 = force_reg (Pmode, op0);
24260 op1 = force_reg (Pmode, op1);
24261 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24263 else
24264 x = replace_equiv_address (x, force_reg (Pmode, addr));
24267 return x;
24270 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24272 On the RS/6000, all integer constants are acceptable, most won't be valid
24273 for particular insns, though. Only easy FP constants are acceptable. */
24275 static bool
24276 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24278 if (TARGET_ELF && tls_referenced_p (x))
24279 return false;
24281 if (CONST_DOUBLE_P (x))
24282 return easy_fp_constant (x, mode);
24284 if (GET_CODE (x) == CONST_VECTOR)
24285 return easy_vector_constant (x, mode);
24287 return true;
24291 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24293 static bool
24294 chain_already_loaded (rtx_insn *last)
24296 for (; last != NULL; last = PREV_INSN (last))
24298 if (NONJUMP_INSN_P (last))
24300 rtx patt = PATTERN (last);
24302 if (GET_CODE (patt) == SET)
24304 rtx lhs = XEXP (patt, 0);
24306 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24307 return true;
24311 return false;
24314 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24316 void
24317 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24319 rtx func = func_desc;
24320 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24321 rtx toc_load = NULL_RTX;
24322 rtx toc_restore = NULL_RTX;
24323 rtx func_addr;
24324 rtx abi_reg = NULL_RTX;
24325 rtx call[5];
24326 int n_call;
24327 rtx insn;
24328 bool is_pltseq_longcall;
24330 if (global_tlsarg)
24331 tlsarg = global_tlsarg;
24333 /* Handle longcall attributes. */
24334 is_pltseq_longcall = false;
24335 if ((INTVAL (cookie) & CALL_LONG) != 0
24336 && GET_CODE (func_desc) == SYMBOL_REF)
24338 func = rs6000_longcall_ref (func_desc, tlsarg);
24339 if (TARGET_PLTSEQ)
24340 is_pltseq_longcall = true;
24343 /* Handle indirect calls. */
24344 if (!SYMBOL_REF_P (func)
24345 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24347 if (!rs6000_pcrel_p (cfun))
24349 /* Save the TOC into its reserved slot before the call,
24350 and prepare to restore it after the call. */
24351 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24352 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24353 gen_rtvec (1, stack_toc_offset),
24354 UNSPEC_TOCSLOT);
24355 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24357 /* Can we optimize saving the TOC in the prologue or
24358 do we need to do it at every call? */
24359 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24360 cfun->machine->save_toc_in_prologue = true;
24361 else
24363 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24364 rtx stack_toc_mem = gen_frame_mem (Pmode,
24365 gen_rtx_PLUS (Pmode, stack_ptr,
24366 stack_toc_offset));
24367 MEM_VOLATILE_P (stack_toc_mem) = 1;
24368 if (is_pltseq_longcall)
24370 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24371 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24372 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24374 else
24375 emit_move_insn (stack_toc_mem, toc_reg);
24379 if (DEFAULT_ABI == ABI_ELFv2)
24381 /* A function pointer in the ELFv2 ABI is just a plain address, but
24382 the ABI requires it to be loaded into r12 before the call. */
24383 func_addr = gen_rtx_REG (Pmode, 12);
24384 if (!rtx_equal_p (func_addr, func))
24385 emit_move_insn (func_addr, func);
24386 abi_reg = func_addr;
24387 /* Indirect calls via CTR are strongly preferred over indirect
24388 calls via LR, so move the address there. Needed to mark
24389 this insn for linker plt sequence editing too. */
24390 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24391 if (is_pltseq_longcall)
24393 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24394 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24395 emit_insn (gen_rtx_SET (func_addr, mark_func));
24396 v = gen_rtvec (2, func_addr, func_desc);
24397 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24399 else
24400 emit_move_insn (func_addr, abi_reg);
24402 else
24404 /* A function pointer under AIX is a pointer to a data area whose
24405 first word contains the actual address of the function, whose
24406 second word contains a pointer to its TOC, and whose third word
24407 contains a value to place in the static chain register (r11).
24408 Note that if we load the static chain, our "trampoline" need
24409 not have any executable code. */
24411 /* Load up address of the actual function. */
24412 func = force_reg (Pmode, func);
24413 func_addr = gen_reg_rtx (Pmode);
24414 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24416 /* Indirect calls via CTR are strongly preferred over indirect
24417 calls via LR, so move the address there. */
24418 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24419 emit_move_insn (ctr_reg, func_addr);
24420 func_addr = ctr_reg;
24422 /* Prepare to load the TOC of the called function. Note that the
24423 TOC load must happen immediately before the actual call so
24424 that unwinding the TOC registers works correctly. See the
24425 comment in frob_update_context. */
24426 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24427 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24428 gen_rtx_PLUS (Pmode, func,
24429 func_toc_offset));
24430 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24432 /* If we have a static chain, load it up. But, if the call was
24433 originally direct, the 3rd word has not been written since no
24434 trampoline has been built, so we ought not to load it, lest we
24435 override a static chain value. */
24436 if (!(GET_CODE (func_desc) == SYMBOL_REF
24437 && SYMBOL_REF_FUNCTION_P (func_desc))
24438 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24439 && !chain_already_loaded (get_current_sequence ()->next->last))
24441 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24442 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24443 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24444 gen_rtx_PLUS (Pmode, func,
24445 func_sc_offset));
24446 emit_move_insn (sc_reg, func_sc_mem);
24447 abi_reg = sc_reg;
24451 else
24453 /* No TOC register needed for calls from PC-relative callers. */
24454 if (!rs6000_pcrel_p (cfun))
24455 /* Direct calls use the TOC: for local calls, the callee will
24456 assume the TOC register is set; for non-local calls, the
24457 PLT stub needs the TOC register. */
24458 abi_reg = toc_reg;
24459 func_addr = func;
24462 /* Create the call. */
24463 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24464 if (value != NULL_RTX)
24465 call[0] = gen_rtx_SET (value, call[0]);
24466 call[1] = gen_rtx_USE (VOIDmode, cookie);
24467 n_call = 2;
24469 if (toc_load)
24470 call[n_call++] = toc_load;
24471 if (toc_restore)
24472 call[n_call++] = toc_restore;
24474 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24476 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24477 insn = emit_call_insn (insn);
24479 /* Mention all registers defined by the ABI to hold information
24480 as uses in CALL_INSN_FUNCTION_USAGE. */
24481 if (abi_reg)
24482 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24485 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24487 void
24488 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24490 rtx call[2];
24491 rtx insn;
24493 gcc_assert (INTVAL (cookie) == 0);
24495 if (global_tlsarg)
24496 tlsarg = global_tlsarg;
24498 /* Create the call. */
24499 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24500 if (value != NULL_RTX)
24501 call[0] = gen_rtx_SET (value, call[0]);
24503 call[1] = simple_return_rtx;
24505 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24506 insn = emit_call_insn (insn);
24508 /* Note use of the TOC register. */
24509 if (!rs6000_pcrel_p (cfun))
24510 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24511 gen_rtx_REG (Pmode, TOC_REGNUM));
24514 /* Expand code to perform a call under the SYSV4 ABI. */
24516 void
24517 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24519 rtx func = func_desc;
24520 rtx func_addr;
24521 rtx call[4];
24522 rtx insn;
24523 rtx abi_reg = NULL_RTX;
24524 int n;
24526 if (global_tlsarg)
24527 tlsarg = global_tlsarg;
24529 /* Handle longcall attributes. */
24530 if ((INTVAL (cookie) & CALL_LONG) != 0
24531 && GET_CODE (func_desc) == SYMBOL_REF)
24533 func = rs6000_longcall_ref (func_desc, tlsarg);
24534 /* If the longcall was implemented as an inline PLT call using
24535 PLT unspecs then func will be REG:r11. If not, func will be
24536 a pseudo reg. The inline PLT call sequence supports lazy
24537 linking (and longcalls to functions in dlopen'd libraries).
24538 The other style of longcalls don't. The lazy linking entry
24539 to the dynamic symbol resolver requires r11 be the function
24540 address (as it is for linker generated PLT stubs). Ensure
24541 r11 stays valid to the bctrl by marking r11 used by the call. */
24542 if (TARGET_PLTSEQ)
24543 abi_reg = func;
24546 /* Handle indirect calls. */
24547 if (GET_CODE (func) != SYMBOL_REF)
24549 func = force_reg (Pmode, func);
24551 /* Indirect calls via CTR are strongly preferred over indirect
24552 calls via LR, so move the address there. That can't be left
24553 to reload because we want to mark every instruction in an
24554 inline PLT call sequence with a reloc, enabling the linker to
24555 edit the sequence back to a direct call when that makes sense. */
24556 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24557 if (abi_reg)
24559 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24560 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24561 emit_insn (gen_rtx_SET (func_addr, mark_func));
24562 v = gen_rtvec (2, func_addr, func_desc);
24563 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24565 else
24566 emit_move_insn (func_addr, func);
24568 else
24569 func_addr = func;
24571 /* Create the call. */
24572 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24573 if (value != NULL_RTX)
24574 call[0] = gen_rtx_SET (value, call[0]);
24576 call[1] = gen_rtx_USE (VOIDmode, cookie);
24577 n = 2;
24578 if (TARGET_SECURE_PLT
24579 && flag_pic
24580 && GET_CODE (func_addr) == SYMBOL_REF
24581 && !SYMBOL_REF_LOCAL_P (func_addr))
24582 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24584 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24586 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24587 insn = emit_call_insn (insn);
24588 if (abi_reg)
24589 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24592 /* Expand code to perform a sibling call under the SysV4 ABI. */
24594 void
24595 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24597 rtx func = func_desc;
24598 rtx func_addr;
24599 rtx call[3];
24600 rtx insn;
24601 rtx abi_reg = NULL_RTX;
24603 if (global_tlsarg)
24604 tlsarg = global_tlsarg;
24606 /* Handle longcall attributes. */
24607 if ((INTVAL (cookie) & CALL_LONG) != 0
24608 && GET_CODE (func_desc) == SYMBOL_REF)
24610 func = rs6000_longcall_ref (func_desc, tlsarg);
24611 /* If the longcall was implemented as an inline PLT call using
24612 PLT unspecs then func will be REG:r11. If not, func will be
24613 a pseudo reg. The inline PLT call sequence supports lazy
24614 linking (and longcalls to functions in dlopen'd libraries).
24615 The other style of longcalls don't. The lazy linking entry
24616 to the dynamic symbol resolver requires r11 be the function
24617 address (as it is for linker generated PLT stubs). Ensure
24618 r11 stays valid to the bctr by marking r11 used by the call. */
24619 if (TARGET_PLTSEQ)
24620 abi_reg = func;
24623 /* Handle indirect calls. */
24624 if (GET_CODE (func) != SYMBOL_REF)
24626 func = force_reg (Pmode, func);
24628 /* Indirect sibcalls must go via CTR. That can't be left to
24629 reload because we want to mark every instruction in an inline
24630 PLT call sequence with a reloc, enabling the linker to edit
24631 the sequence back to a direct call when that makes sense. */
24632 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24633 if (abi_reg)
24635 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24636 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24637 emit_insn (gen_rtx_SET (func_addr, mark_func));
24638 v = gen_rtvec (2, func_addr, func_desc);
24639 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24641 else
24642 emit_move_insn (func_addr, func);
24644 else
24645 func_addr = func;
24647 /* Create the call. */
24648 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24649 if (value != NULL_RTX)
24650 call[0] = gen_rtx_SET (value, call[0]);
24652 call[1] = gen_rtx_USE (VOIDmode, cookie);
24653 call[2] = simple_return_rtx;
24655 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24656 insn = emit_call_insn (insn);
24657 if (abi_reg)
24658 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24661 #if TARGET_MACHO
24663 /* Expand code to perform a call under the Darwin ABI.
24664 Modulo handling of mlongcall, this is much the same as sysv.
24665 if/when the longcall optimisation is removed, we could drop this
24666 code and use the sysv case (taking care to avoid the tls stuff).
24668 We can use this for sibcalls too, if needed. */
24670 void
24671 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24672 rtx cookie, bool sibcall)
24674 rtx func = func_desc;
24675 rtx func_addr;
24676 rtx call[3];
24677 rtx insn;
24678 int cookie_val = INTVAL (cookie);
24679 bool make_island = false;
24681 /* Handle longcall attributes, there are two cases for Darwin:
24682 1) Newer linkers are capable of synthesising any branch islands needed.
24683 2) We need a helper branch island synthesised by the compiler.
24684 The second case has mostly been retired and we don't use it for m64.
24685 In fact, it's is an optimisation, we could just indirect as sysv does..
24686 ... however, backwards compatibility for now.
24687 If we're going to use this, then we need to keep the CALL_LONG bit set,
24688 so that we can pick up the special insn form later. */
24689 if ((cookie_val & CALL_LONG) != 0
24690 && GET_CODE (func_desc) == SYMBOL_REF)
24692 /* FIXME: the longcall opt should not hang off this flag, it is most
24693 likely incorrect for kernel-mode code-generation. */
24694 if (darwin_symbol_stubs && TARGET_32BIT)
24695 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24696 else
24698 /* The linker is capable of doing this, but the user explicitly
24699 asked for -mlongcall, so we'll do the 'normal' version. */
24700 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24701 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24705 /* Handle indirect calls. */
24706 if (GET_CODE (func) != SYMBOL_REF)
24708 func = force_reg (Pmode, func);
24710 /* Indirect calls via CTR are strongly preferred over indirect
24711 calls via LR, and are required for indirect sibcalls, so move
24712 the address there. */
24713 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24714 emit_move_insn (func_addr, func);
24716 else
24717 func_addr = func;
24719 /* Create the call. */
24720 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24721 if (value != NULL_RTX)
24722 call[0] = gen_rtx_SET (value, call[0]);
24724 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24726 if (sibcall)
24727 call[2] = simple_return_rtx;
24728 else
24729 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24731 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24732 insn = emit_call_insn (insn);
24733 /* Now we have the debug info in the insn, we can set up the branch island
24734 if we're using one. */
24735 if (make_island)
24737 tree funname = get_identifier (XSTR (func_desc, 0));
24739 if (no_previous_def (funname))
24741 rtx label_rtx = gen_label_rtx ();
24742 char *label_buf, temp_buf[256];
24743 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24744 CODE_LABEL_NUMBER (label_rtx));
24745 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24746 tree labelname = get_identifier (label_buf);
24747 add_compiler_branch_island (labelname, funname,
24748 insn_line ((const rtx_insn*)insn));
24752 #endif
24754 void
24755 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24756 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24758 #if TARGET_MACHO
24759 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24760 #else
24761 gcc_unreachable();
24762 #endif
24766 void
24767 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24768 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24770 #if TARGET_MACHO
24771 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24772 #else
24773 gcc_unreachable();
24774 #endif
24777 /* Return whether we should generate PC-relative code for FNDECL. */
24778 bool
24779 rs6000_fndecl_pcrel_p (const_tree fndecl)
24781 if (DEFAULT_ABI != ABI_ELFv2)
24782 return false;
24784 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24786 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24787 && TARGET_CMODEL == CMODEL_MEDIUM);
24790 /* Return whether we should generate PC-relative code for *FN. */
24791 bool
24792 rs6000_pcrel_p (struct function *fn)
24794 if (DEFAULT_ABI != ABI_ELFv2)
24795 return false;
24797 /* Optimize usual case. */
24798 if (fn == cfun)
24799 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24800 && TARGET_CMODEL == CMODEL_MEDIUM);
24802 return rs6000_fndecl_pcrel_p (fn->decl);
24806 /* Given an address (ADDR), a mode (MODE), and what the format of the
24807 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24808 for the address. */
24810 enum insn_form
24811 address_to_insn_form (rtx addr,
24812 machine_mode mode,
24813 enum non_prefixed_form non_prefixed_format)
24815 /* Single register is easy. */
24816 if (REG_P (addr) || SUBREG_P (addr))
24817 return INSN_FORM_BASE_REG;
24819 /* If the non prefixed instruction format doesn't support offset addressing,
24820 make sure only indexed addressing is allowed.
24822 We special case SDmode so that the register allocator does not try to move
24823 SDmode through GPR registers, but instead uses the 32-bit integer load and
24824 store instructions for the floating point registers. */
24825 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24827 if (GET_CODE (addr) != PLUS)
24828 return INSN_FORM_BAD;
24830 rtx op0 = XEXP (addr, 0);
24831 rtx op1 = XEXP (addr, 1);
24832 if (!REG_P (op0) && !SUBREG_P (op0))
24833 return INSN_FORM_BAD;
24835 if (!REG_P (op1) && !SUBREG_P (op1))
24836 return INSN_FORM_BAD;
24838 return INSN_FORM_X;
24841 /* Deal with update forms. */
24842 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24843 return INSN_FORM_UPDATE;
24845 /* Handle PC-relative symbols and labels. Check for both local and
24846 external symbols. Assume labels are always local. TLS symbols
24847 are not PC-relative for rs6000. */
24848 if (TARGET_PCREL)
24850 if (LABEL_REF_P (addr))
24851 return INSN_FORM_PCREL_LOCAL;
24853 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
24855 if (!SYMBOL_REF_LOCAL_P (addr))
24856 return INSN_FORM_PCREL_EXTERNAL;
24857 else
24858 return INSN_FORM_PCREL_LOCAL;
24862 if (GET_CODE (addr) == CONST)
24863 addr = XEXP (addr, 0);
24865 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24866 if (GET_CODE (addr) == LO_SUM)
24867 return INSN_FORM_LO_SUM;
24869 /* Everything below must be an offset address of some form. */
24870 if (GET_CODE (addr) != PLUS)
24871 return INSN_FORM_BAD;
24873 rtx op0 = XEXP (addr, 0);
24874 rtx op1 = XEXP (addr, 1);
24876 /* Check for indexed addresses. */
24877 if (REG_P (op1) || SUBREG_P (op1))
24879 if (REG_P (op0) || SUBREG_P (op0))
24880 return INSN_FORM_X;
24882 return INSN_FORM_BAD;
24885 if (!CONST_INT_P (op1))
24886 return INSN_FORM_BAD;
24888 HOST_WIDE_INT offset = INTVAL (op1);
24889 if (!SIGNED_INTEGER_34BIT_P (offset))
24890 return INSN_FORM_BAD;
24892 /* Check for local and external PC-relative addresses. Labels are always
24893 local. TLS symbols are not PC-relative for rs6000. */
24894 if (TARGET_PCREL)
24896 if (LABEL_REF_P (op0))
24897 return INSN_FORM_PCREL_LOCAL;
24899 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
24901 if (!SYMBOL_REF_LOCAL_P (op0))
24902 return INSN_FORM_PCREL_EXTERNAL;
24903 else
24904 return INSN_FORM_PCREL_LOCAL;
24908 /* If it isn't PC-relative, the address must use a base register. */
24909 if (!REG_P (op0) && !SUBREG_P (op0))
24910 return INSN_FORM_BAD;
24912 /* Large offsets must be prefixed. */
24913 if (!SIGNED_INTEGER_16BIT_P (offset))
24915 if (TARGET_PREFIXED)
24916 return INSN_FORM_PREFIXED_NUMERIC;
24918 return INSN_FORM_BAD;
24921 /* We have a 16-bit offset, see what default instruction format to use. */
24922 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24924 unsigned size = GET_MODE_SIZE (mode);
24926 /* On 64-bit systems, assume 64-bit integers need to use DS form
24927 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24928 (for LXV and STXV). TImode is problematical in that its normal usage
24929 is expected to be GPRs where it wants a DS instruction format, but if
24930 it goes into the vector registers, it wants a DQ instruction
24931 format. */
24932 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24933 non_prefixed_format = NON_PREFIXED_DS;
24935 else if (TARGET_VSX && size >= 16
24936 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24937 non_prefixed_format = NON_PREFIXED_DQ;
24939 else
24940 non_prefixed_format = NON_PREFIXED_D;
24943 /* Classify the D/DS/DQ-form addresses. */
24944 switch (non_prefixed_format)
24946 /* Instruction format D, all 16 bits are valid. */
24947 case NON_PREFIXED_D:
24948 return INSN_FORM_D;
24950 /* Instruction format DS, bottom 2 bits must be 0. */
24951 case NON_PREFIXED_DS:
24952 if ((offset & 3) == 0)
24953 return INSN_FORM_DS;
24955 else if (TARGET_PREFIXED)
24956 return INSN_FORM_PREFIXED_NUMERIC;
24958 else
24959 return INSN_FORM_BAD;
24961 /* Instruction format DQ, bottom 4 bits must be 0. */
24962 case NON_PREFIXED_DQ:
24963 if ((offset & 15) == 0)
24964 return INSN_FORM_DQ;
24966 else if (TARGET_PREFIXED)
24967 return INSN_FORM_PREFIXED_NUMERIC;
24969 else
24970 return INSN_FORM_BAD;
24972 default:
24973 break;
24976 return INSN_FORM_BAD;
24979 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24980 instruction format (D/DS/DQ) used for offset memory. */
24982 static enum non_prefixed_form
24983 reg_to_non_prefixed (rtx reg, machine_mode mode)
24985 /* If it isn't a register, use the defaults. */
24986 if (!REG_P (reg) && !SUBREG_P (reg))
24987 return NON_PREFIXED_DEFAULT;
24989 unsigned int r = reg_or_subregno (reg);
24991 /* If we have a pseudo, use the default instruction format. */
24992 if (!HARD_REGISTER_NUM_P (r))
24993 return NON_PREFIXED_DEFAULT;
24995 unsigned size = GET_MODE_SIZE (mode);
24997 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24998 128-bit floating point, and 128-bit integers. Before power9, only indexed
24999 addressing was available for vectors. */
25000 if (FP_REGNO_P (r))
25002 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25003 return NON_PREFIXED_D;
25005 else if (size < 8)
25006 return NON_PREFIXED_X;
25008 else if (TARGET_VSX && size >= 16
25009 && (VECTOR_MODE_P (mode)
25010 || FLOAT128_VECTOR_P (mode)
25011 || mode == TImode || mode == CTImode))
25012 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25014 else
25015 return NON_PREFIXED_DEFAULT;
25018 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25019 128-bit floating point, and 128-bit integers. Before power9, only indexed
25020 addressing was available. */
25021 else if (ALTIVEC_REGNO_P (r))
25023 if (!TARGET_P9_VECTOR)
25024 return NON_PREFIXED_X;
25026 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25027 return NON_PREFIXED_DS;
25029 else if (size < 8)
25030 return NON_PREFIXED_X;
25032 else if (TARGET_VSX && size >= 16
25033 && (VECTOR_MODE_P (mode)
25034 || FLOAT128_VECTOR_P (mode)
25035 || mode == TImode || mode == CTImode))
25036 return NON_PREFIXED_DQ;
25038 else
25039 return NON_PREFIXED_DEFAULT;
25042 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25043 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25044 through the GPR registers for memory operations. */
25045 else if (TARGET_POWERPC64 && size >= 8)
25046 return NON_PREFIXED_DS;
25048 return NON_PREFIXED_D;
25052 /* Whether a load instruction is a prefixed instruction. This is called from
25053 the prefixed attribute processing. */
25055 bool
25056 prefixed_load_p (rtx_insn *insn)
25058 /* Validate the insn to make sure it is a normal load insn. */
25059 extract_insn_cached (insn);
25060 if (recog_data.n_operands < 2)
25061 return false;
25063 rtx reg = recog_data.operand[0];
25064 rtx mem = recog_data.operand[1];
25066 if (!REG_P (reg) && !SUBREG_P (reg))
25067 return false;
25069 if (!MEM_P (mem))
25070 return false;
25072 /* Prefixed load instructions do not support update or indexed forms. */
25073 if (get_attr_indexed (insn) == INDEXED_YES
25074 || get_attr_update (insn) == UPDATE_YES)
25075 return false;
25077 /* LWA uses the DS format instead of the D format that LWZ uses. */
25078 enum non_prefixed_form non_prefixed;
25079 machine_mode reg_mode = GET_MODE (reg);
25080 machine_mode mem_mode = GET_MODE (mem);
25082 if (mem_mode == SImode && reg_mode == DImode
25083 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25084 non_prefixed = NON_PREFIXED_DS;
25086 else
25087 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25089 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25092 /* Whether a store instruction is a prefixed instruction. This is called from
25093 the prefixed attribute processing. */
25095 bool
25096 prefixed_store_p (rtx_insn *insn)
25098 /* Validate the insn to make sure it is a normal store insn. */
25099 extract_insn_cached (insn);
25100 if (recog_data.n_operands < 2)
25101 return false;
25103 rtx mem = recog_data.operand[0];
25104 rtx reg = recog_data.operand[1];
25106 if (!REG_P (reg) && !SUBREG_P (reg))
25107 return false;
25109 if (!MEM_P (mem))
25110 return false;
25112 /* Prefixed store instructions do not support update or indexed forms. */
25113 if (get_attr_indexed (insn) == INDEXED_YES
25114 || get_attr_update (insn) == UPDATE_YES)
25115 return false;
25117 machine_mode mem_mode = GET_MODE (mem);
25118 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25119 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25122 /* Whether a load immediate or add instruction is a prefixed instruction. This
25123 is called from the prefixed attribute processing. */
25125 bool
25126 prefixed_paddi_p (rtx_insn *insn)
25128 rtx set = single_set (insn);
25129 if (!set)
25130 return false;
25132 rtx dest = SET_DEST (set);
25133 rtx src = SET_SRC (set);
25135 if (!REG_P (dest) && !SUBREG_P (dest))
25136 return false;
25138 /* Is this a load immediate that can't be done with a simple ADDI or
25139 ADDIS? */
25140 if (CONST_INT_P (src))
25141 return (satisfies_constraint_eI (src)
25142 && !satisfies_constraint_I (src)
25143 && !satisfies_constraint_L (src));
25145 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25146 ADDIS? */
25147 if (GET_CODE (src) == PLUS)
25149 rtx op1 = XEXP (src, 1);
25151 return (CONST_INT_P (op1)
25152 && satisfies_constraint_eI (op1)
25153 && !satisfies_constraint_I (op1)
25154 && !satisfies_constraint_L (op1));
25157 /* If not, is it a load of a PC-relative address? */
25158 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25159 return false;
25161 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25162 return false;
25164 enum insn_form iform = address_to_insn_form (src, Pmode,
25165 NON_PREFIXED_DEFAULT);
25167 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25170 /* Whether the next instruction needs a 'p' prefix issued before the
25171 instruction is printed out. */
25172 static bool next_insn_prefixed_p;
25174 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25175 outputting the assembler code. On the PowerPC, we remember if the current
25176 insn is a prefixed insn where we need to emit a 'p' before the insn.
25178 In addition, if the insn is part of a PC-relative reference to an external
25179 label optimization, this is recorded also. */
25180 void
25181 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25183 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25184 return;
25187 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25188 We use it to emit a 'p' for prefixed insns that is set in
25189 FINAL_PRESCAN_INSN. */
25190 void
25191 rs6000_asm_output_opcode (FILE *stream)
25193 if (next_insn_prefixed_p)
25194 fprintf (stream, "p");
25196 return;
25199 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25200 should be adjusted to reflect any required changes. This macro is used when
25201 there is some systematic length adjustment required that would be difficult
25202 to express in the length attribute.
25204 In the PowerPC, we use this to adjust the length of an instruction if one or
25205 more prefixed instructions are generated, using the attribute
25206 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25207 hardware requires that a prefied instruciton does not cross a 64-byte
25208 boundary. This means the compiler has to assume the length of the first
25209 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25210 already set for the non-prefixed instruction, we just need to udpate for the
25211 difference. */
25214 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25216 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
25218 rtx pattern = PATTERN (insn);
25219 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25220 && get_attr_prefixed (insn) == PREFIXED_YES)
25222 int num_prefixed = get_attr_max_prefixed_insns (insn);
25223 length += 4 * (num_prefixed + 1);
25227 return length;
25231 #ifdef HAVE_GAS_HIDDEN
25232 # define USE_HIDDEN_LINKONCE 1
25233 #else
25234 # define USE_HIDDEN_LINKONCE 0
25235 #endif
25237 /* Fills in the label name that should be used for a 476 link stack thunk. */
25239 void
25240 get_ppc476_thunk_name (char name[32])
25242 gcc_assert (TARGET_LINK_STACK);
25244 if (USE_HIDDEN_LINKONCE)
25245 sprintf (name, "__ppc476.get_thunk");
25246 else
25247 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25250 /* This function emits the simple thunk routine that is used to preserve
25251 the link stack on the 476 cpu. */
25253 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25254 static void
25255 rs6000_code_end (void)
25257 char name[32];
25258 tree decl;
25260 if (!TARGET_LINK_STACK)
25261 return;
25263 get_ppc476_thunk_name (name);
25265 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25266 build_function_type_list (void_type_node, NULL_TREE));
25267 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25268 NULL_TREE, void_type_node);
25269 TREE_PUBLIC (decl) = 1;
25270 TREE_STATIC (decl) = 1;
25272 #if RS6000_WEAK
25273 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25275 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25276 targetm.asm_out.unique_section (decl, 0);
25277 switch_to_section (get_named_section (decl, NULL, 0));
25278 DECL_WEAK (decl) = 1;
25279 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25280 targetm.asm_out.globalize_label (asm_out_file, name);
25281 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25282 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25284 else
25285 #endif
25287 switch_to_section (text_section);
25288 ASM_OUTPUT_LABEL (asm_out_file, name);
25291 DECL_INITIAL (decl) = make_node (BLOCK);
25292 current_function_decl = decl;
25293 allocate_struct_function (decl, false);
25294 init_function_start (decl);
25295 first_function_block_is_cold = false;
25296 /* Make sure unwind info is emitted for the thunk if needed. */
25297 final_start_function (emit_barrier (), asm_out_file, 1);
25299 fputs ("\tblr\n", asm_out_file);
25301 final_end_function ();
25302 init_insn_lengths ();
25303 free_after_compilation (cfun);
25304 set_cfun (NULL);
25305 current_function_decl = NULL;
25308 /* Add r30 to hard reg set if the prologue sets it up and it is not
25309 pic_offset_table_rtx. */
25311 static void
25312 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25314 if (!TARGET_SINGLE_PIC_BASE
25315 && TARGET_TOC
25316 && TARGET_MINIMAL_TOC
25317 && !constant_pool_empty_p ())
25318 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25319 if (cfun->machine->split_stack_argp_used)
25320 add_to_hard_reg_set (&set->set, Pmode, 12);
25322 /* Make sure the hard reg set doesn't include r2, which was possibly added
25323 via PIC_OFFSET_TABLE_REGNUM. */
25324 if (TARGET_TOC)
25325 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25329 /* Helper function for rs6000_split_logical to emit a logical instruction after
25330 spliting the operation to single GPR registers.
25332 DEST is the destination register.
25333 OP1 and OP2 are the input source registers.
25334 CODE is the base operation (AND, IOR, XOR, NOT).
25335 MODE is the machine mode.
25336 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25337 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25338 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25340 static void
25341 rs6000_split_logical_inner (rtx dest,
25342 rtx op1,
25343 rtx op2,
25344 enum rtx_code code,
25345 machine_mode mode,
25346 bool complement_final_p,
25347 bool complement_op1_p,
25348 bool complement_op2_p)
25350 rtx bool_rtx;
25352 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25353 if (op2 && CONST_INT_P (op2)
25354 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25355 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25357 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25358 HOST_WIDE_INT value = INTVAL (op2) & mask;
25360 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25361 if (code == AND)
25363 if (value == 0)
25365 emit_insn (gen_rtx_SET (dest, const0_rtx));
25366 return;
25369 else if (value == mask)
25371 if (!rtx_equal_p (dest, op1))
25372 emit_insn (gen_rtx_SET (dest, op1));
25373 return;
25377 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25378 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25379 else if (code == IOR || code == XOR)
25381 if (value == 0)
25383 if (!rtx_equal_p (dest, op1))
25384 emit_insn (gen_rtx_SET (dest, op1));
25385 return;
25390 if (code == AND && mode == SImode
25391 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25393 emit_insn (gen_andsi3 (dest, op1, op2));
25394 return;
25397 if (complement_op1_p)
25398 op1 = gen_rtx_NOT (mode, op1);
25400 if (complement_op2_p)
25401 op2 = gen_rtx_NOT (mode, op2);
25403 /* For canonical RTL, if only one arm is inverted it is the first. */
25404 if (!complement_op1_p && complement_op2_p)
25405 std::swap (op1, op2);
25407 bool_rtx = ((code == NOT)
25408 ? gen_rtx_NOT (mode, op1)
25409 : gen_rtx_fmt_ee (code, mode, op1, op2));
25411 if (complement_final_p)
25412 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25414 emit_insn (gen_rtx_SET (dest, bool_rtx));
25417 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25418 operations are split immediately during RTL generation to allow for more
25419 optimizations of the AND/IOR/XOR.
25421 OPERANDS is an array containing the destination and two input operands.
25422 CODE is the base operation (AND, IOR, XOR, NOT).
25423 MODE is the machine mode.
25424 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25425 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25426 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25427 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25428 formation of the AND instructions. */
25430 static void
25431 rs6000_split_logical_di (rtx operands[3],
25432 enum rtx_code code,
25433 bool complement_final_p,
25434 bool complement_op1_p,
25435 bool complement_op2_p)
25437 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25438 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25439 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25440 enum hi_lo { hi = 0, lo = 1 };
25441 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25442 size_t i;
25444 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25445 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25446 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25447 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25449 if (code == NOT)
25450 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25451 else
25453 if (!CONST_INT_P (operands[2]))
25455 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25456 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25458 else
25460 HOST_WIDE_INT value = INTVAL (operands[2]);
25461 HOST_WIDE_INT value_hi_lo[2];
25463 gcc_assert (!complement_final_p);
25464 gcc_assert (!complement_op1_p);
25465 gcc_assert (!complement_op2_p);
25467 value_hi_lo[hi] = value >> 32;
25468 value_hi_lo[lo] = value & lower_32bits;
25470 for (i = 0; i < 2; i++)
25472 HOST_WIDE_INT sub_value = value_hi_lo[i];
25474 if (sub_value & sign_bit)
25475 sub_value |= upper_32bits;
25477 op2_hi_lo[i] = GEN_INT (sub_value);
25479 /* If this is an AND instruction, check to see if we need to load
25480 the value in a register. */
25481 if (code == AND && sub_value != -1 && sub_value != 0
25482 && !and_operand (op2_hi_lo[i], SImode))
25483 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25488 for (i = 0; i < 2; i++)
25490 /* Split large IOR/XOR operations. */
25491 if ((code == IOR || code == XOR)
25492 && CONST_INT_P (op2_hi_lo[i])
25493 && !complement_final_p
25494 && !complement_op1_p
25495 && !complement_op2_p
25496 && !logical_const_operand (op2_hi_lo[i], SImode))
25498 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25499 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25500 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25501 rtx tmp = gen_reg_rtx (SImode);
25503 /* Make sure the constant is sign extended. */
25504 if ((hi_16bits & sign_bit) != 0)
25505 hi_16bits |= upper_32bits;
25507 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25508 code, SImode, false, false, false);
25510 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25511 code, SImode, false, false, false);
25513 else
25514 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25515 code, SImode, complement_final_p,
25516 complement_op1_p, complement_op2_p);
25519 return;
25522 /* Split the insns that make up boolean operations operating on multiple GPR
25523 registers. The boolean MD patterns ensure that the inputs either are
25524 exactly the same as the output registers, or there is no overlap.
25526 OPERANDS is an array containing the destination and two input operands.
25527 CODE is the base operation (AND, IOR, XOR, NOT).
25528 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25529 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25530 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25532 void
25533 rs6000_split_logical (rtx operands[3],
25534 enum rtx_code code,
25535 bool complement_final_p,
25536 bool complement_op1_p,
25537 bool complement_op2_p)
25539 machine_mode mode = GET_MODE (operands[0]);
25540 machine_mode sub_mode;
25541 rtx op0, op1, op2;
25542 int sub_size, regno0, regno1, nregs, i;
25544 /* If this is DImode, use the specialized version that can run before
25545 register allocation. */
25546 if (mode == DImode && !TARGET_POWERPC64)
25548 rs6000_split_logical_di (operands, code, complement_final_p,
25549 complement_op1_p, complement_op2_p);
25550 return;
25553 op0 = operands[0];
25554 op1 = operands[1];
25555 op2 = (code == NOT) ? NULL_RTX : operands[2];
25556 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25557 sub_size = GET_MODE_SIZE (sub_mode);
25558 regno0 = REGNO (op0);
25559 regno1 = REGNO (op1);
25561 gcc_assert (reload_completed);
25562 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25563 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25565 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25566 gcc_assert (nregs > 1);
25568 if (op2 && REG_P (op2))
25569 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25571 for (i = 0; i < nregs; i++)
25573 int offset = i * sub_size;
25574 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25575 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25576 rtx sub_op2 = ((code == NOT)
25577 ? NULL_RTX
25578 : simplify_subreg (sub_mode, op2, mode, offset));
25580 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25581 complement_final_p, complement_op1_p,
25582 complement_op2_p);
25585 return;
25589 /* Return true if the peephole2 can combine a load involving a combination of
25590 an addis instruction and a load with an offset that can be fused together on
25591 a power8. */
25593 bool
25594 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25595 rtx addis_value, /* addis value. */
25596 rtx target, /* target register that is loaded. */
25597 rtx mem) /* bottom part of the memory addr. */
25599 rtx addr;
25600 rtx base_reg;
25602 /* Validate arguments. */
25603 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25604 return false;
25606 if (!base_reg_operand (target, GET_MODE (target)))
25607 return false;
25609 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25610 return false;
25612 /* Allow sign/zero extension. */
25613 if (GET_CODE (mem) == ZERO_EXTEND
25614 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25615 mem = XEXP (mem, 0);
25617 if (!MEM_P (mem))
25618 return false;
25620 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25621 return false;
25623 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25624 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25625 return false;
25627 /* Validate that the register used to load the high value is either the
25628 register being loaded, or we can safely replace its use.
25630 This function is only called from the peephole2 pass and we assume that
25631 there are 2 instructions in the peephole (addis and load), so we want to
25632 check if the target register was not used in the memory address and the
25633 register to hold the addis result is dead after the peephole. */
25634 if (REGNO (addis_reg) != REGNO (target))
25636 if (reg_mentioned_p (target, mem))
25637 return false;
25639 if (!peep2_reg_dead_p (2, addis_reg))
25640 return false;
25642 /* If the target register being loaded is the stack pointer, we must
25643 avoid loading any other value into it, even temporarily. */
25644 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25645 return false;
25648 base_reg = XEXP (addr, 0);
25649 return REGNO (addis_reg) == REGNO (base_reg);
25652 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25653 sequence. We adjust the addis register to use the target register. If the
25654 load sign extends, we adjust the code to do the zero extending load, and an
25655 explicit sign extension later since the fusion only covers zero extending
25656 loads.
25658 The operands are:
25659 operands[0] register set with addis (to be replaced with target)
25660 operands[1] value set via addis
25661 operands[2] target register being loaded
25662 operands[3] D-form memory reference using operands[0]. */
25664 void
25665 expand_fusion_gpr_load (rtx *operands)
25667 rtx addis_value = operands[1];
25668 rtx target = operands[2];
25669 rtx orig_mem = operands[3];
25670 rtx new_addr, new_mem, orig_addr, offset;
25671 enum rtx_code plus_or_lo_sum;
25672 machine_mode target_mode = GET_MODE (target);
25673 machine_mode extend_mode = target_mode;
25674 machine_mode ptr_mode = Pmode;
25675 enum rtx_code extend = UNKNOWN;
25677 if (GET_CODE (orig_mem) == ZERO_EXTEND
25678 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25680 extend = GET_CODE (orig_mem);
25681 orig_mem = XEXP (orig_mem, 0);
25682 target_mode = GET_MODE (orig_mem);
25685 gcc_assert (MEM_P (orig_mem));
25687 orig_addr = XEXP (orig_mem, 0);
25688 plus_or_lo_sum = GET_CODE (orig_addr);
25689 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25691 offset = XEXP (orig_addr, 1);
25692 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25693 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25695 if (extend != UNKNOWN)
25696 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25698 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25699 UNSPEC_FUSION_GPR);
25700 emit_insn (gen_rtx_SET (target, new_mem));
25702 if (extend == SIGN_EXTEND)
25704 int sub_off = ((BYTES_BIG_ENDIAN)
25705 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25706 : 0);
25707 rtx sign_reg
25708 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25710 emit_insn (gen_rtx_SET (target,
25711 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25714 return;
25717 /* Emit the addis instruction that will be part of a fused instruction
25718 sequence. */
25720 void
25721 emit_fusion_addis (rtx target, rtx addis_value)
25723 rtx fuse_ops[10];
25724 const char *addis_str = NULL;
25726 /* Emit the addis instruction. */
25727 fuse_ops[0] = target;
25728 if (satisfies_constraint_L (addis_value))
25730 fuse_ops[1] = addis_value;
25731 addis_str = "lis %0,%v1";
25734 else if (GET_CODE (addis_value) == PLUS)
25736 rtx op0 = XEXP (addis_value, 0);
25737 rtx op1 = XEXP (addis_value, 1);
25739 if (REG_P (op0) && CONST_INT_P (op1)
25740 && satisfies_constraint_L (op1))
25742 fuse_ops[1] = op0;
25743 fuse_ops[2] = op1;
25744 addis_str = "addis %0,%1,%v2";
25748 else if (GET_CODE (addis_value) == HIGH)
25750 rtx value = XEXP (addis_value, 0);
25751 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25753 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25754 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25755 if (TARGET_ELF)
25756 addis_str = "addis %0,%2,%1@toc@ha";
25758 else if (TARGET_XCOFF)
25759 addis_str = "addis %0,%1@u(%2)";
25761 else
25762 gcc_unreachable ();
25765 else if (GET_CODE (value) == PLUS)
25767 rtx op0 = XEXP (value, 0);
25768 rtx op1 = XEXP (value, 1);
25770 if (GET_CODE (op0) == UNSPEC
25771 && XINT (op0, 1) == UNSPEC_TOCREL
25772 && CONST_INT_P (op1))
25774 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25775 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25776 fuse_ops[3] = op1;
25777 if (TARGET_ELF)
25778 addis_str = "addis %0,%2,%1+%3@toc@ha";
25780 else if (TARGET_XCOFF)
25781 addis_str = "addis %0,%1+%3@u(%2)";
25783 else
25784 gcc_unreachable ();
25788 else if (satisfies_constraint_L (value))
25790 fuse_ops[1] = value;
25791 addis_str = "lis %0,%v1";
25794 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25796 fuse_ops[1] = value;
25797 addis_str = "lis %0,%1@ha";
25801 if (!addis_str)
25802 fatal_insn ("Could not generate addis value for fusion", addis_value);
25804 output_asm_insn (addis_str, fuse_ops);
25807 /* Emit a D-form load or store instruction that is the second instruction
25808 of a fusion sequence. */
25810 static void
25811 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25813 rtx fuse_ops[10];
25814 char insn_template[80];
25816 fuse_ops[0] = load_reg;
25817 fuse_ops[1] = addis_reg;
25819 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25821 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25822 fuse_ops[2] = offset;
25823 output_asm_insn (insn_template, fuse_ops);
25826 else if (GET_CODE (offset) == UNSPEC
25827 && XINT (offset, 1) == UNSPEC_TOCREL)
25829 if (TARGET_ELF)
25830 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25832 else if (TARGET_XCOFF)
25833 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25835 else
25836 gcc_unreachable ();
25838 fuse_ops[2] = XVECEXP (offset, 0, 0);
25839 output_asm_insn (insn_template, fuse_ops);
25842 else if (GET_CODE (offset) == PLUS
25843 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25844 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25845 && CONST_INT_P (XEXP (offset, 1)))
25847 rtx tocrel_unspec = XEXP (offset, 0);
25848 if (TARGET_ELF)
25849 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25851 else if (TARGET_XCOFF)
25852 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25854 else
25855 gcc_unreachable ();
25857 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25858 fuse_ops[3] = XEXP (offset, 1);
25859 output_asm_insn (insn_template, fuse_ops);
25862 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25864 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25866 fuse_ops[2] = offset;
25867 output_asm_insn (insn_template, fuse_ops);
25870 else
25871 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25873 return;
25876 /* Given an address, convert it into the addis and load offset parts. Addresses
25877 created during the peephole2 process look like:
25878 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25879 (unspec [(...)] UNSPEC_TOCREL)) */
25881 static void
25882 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25884 rtx hi, lo;
25886 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25888 hi = XEXP (addr, 0);
25889 lo = XEXP (addr, 1);
25891 else
25892 gcc_unreachable ();
25894 *p_hi = hi;
25895 *p_lo = lo;
25898 /* Return a string to fuse an addis instruction with a gpr load to the same
25899 register that we loaded up the addis instruction. The address that is used
25900 is the logical address that was formed during peephole2:
25901 (lo_sum (high) (low-part))
25903 The code is complicated, so we call output_asm_insn directly, and just
25904 return "". */
25906 const char *
25907 emit_fusion_gpr_load (rtx target, rtx mem)
25909 rtx addis_value;
25910 rtx addr;
25911 rtx load_offset;
25912 const char *load_str = NULL;
25913 machine_mode mode;
25915 if (GET_CODE (mem) == ZERO_EXTEND)
25916 mem = XEXP (mem, 0);
25918 gcc_assert (REG_P (target) && MEM_P (mem));
25920 addr = XEXP (mem, 0);
25921 fusion_split_address (addr, &addis_value, &load_offset);
25923 /* Now emit the load instruction to the same register. */
25924 mode = GET_MODE (mem);
25925 switch (mode)
25927 case E_QImode:
25928 load_str = "lbz";
25929 break;
25931 case E_HImode:
25932 load_str = "lhz";
25933 break;
25935 case E_SImode:
25936 case E_SFmode:
25937 load_str = "lwz";
25938 break;
25940 case E_DImode:
25941 case E_DFmode:
25942 gcc_assert (TARGET_POWERPC64);
25943 load_str = "ld";
25944 break;
25946 default:
25947 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25950 /* Emit the addis instruction. */
25951 emit_fusion_addis (target, addis_value);
25953 /* Emit the D-form load instruction. */
25954 emit_fusion_load (target, target, load_offset, load_str);
25956 return "";
25960 #ifdef RS6000_GLIBC_ATOMIC_FENV
25961 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25962 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25963 #endif
25965 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25967 static void
25968 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25970 if (!TARGET_HARD_FLOAT)
25972 #ifdef RS6000_GLIBC_ATOMIC_FENV
25973 if (atomic_hold_decl == NULL_TREE)
25975 atomic_hold_decl
25976 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25977 get_identifier ("__atomic_feholdexcept"),
25978 build_function_type_list (void_type_node,
25979 double_ptr_type_node,
25980 NULL_TREE));
25981 TREE_PUBLIC (atomic_hold_decl) = 1;
25982 DECL_EXTERNAL (atomic_hold_decl) = 1;
25985 if (atomic_clear_decl == NULL_TREE)
25987 atomic_clear_decl
25988 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25989 get_identifier ("__atomic_feclearexcept"),
25990 build_function_type_list (void_type_node,
25991 NULL_TREE));
25992 TREE_PUBLIC (atomic_clear_decl) = 1;
25993 DECL_EXTERNAL (atomic_clear_decl) = 1;
25996 tree const_double = build_qualified_type (double_type_node,
25997 TYPE_QUAL_CONST);
25998 tree const_double_ptr = build_pointer_type (const_double);
25999 if (atomic_update_decl == NULL_TREE)
26001 atomic_update_decl
26002 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26003 get_identifier ("__atomic_feupdateenv"),
26004 build_function_type_list (void_type_node,
26005 const_double_ptr,
26006 NULL_TREE));
26007 TREE_PUBLIC (atomic_update_decl) = 1;
26008 DECL_EXTERNAL (atomic_update_decl) = 1;
26011 tree fenv_var = create_tmp_var_raw (double_type_node);
26012 TREE_ADDRESSABLE (fenv_var) = 1;
26013 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
26015 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26016 *clear = build_call_expr (atomic_clear_decl, 0);
26017 *update = build_call_expr (atomic_update_decl, 1,
26018 fold_convert (const_double_ptr, fenv_addr));
26019 #endif
26020 return;
26023 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26024 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26025 tree call_mffs = build_call_expr (mffs, 0);
26027 /* Generates the equivalent of feholdexcept (&fenv_var)
26029 *fenv_var = __builtin_mffs ();
26030 double fenv_hold;
26031 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26032 __builtin_mtfsf (0xff, fenv_hold); */
26034 /* Mask to clear everything except for the rounding modes and non-IEEE
26035 arithmetic flag. */
26036 const unsigned HOST_WIDE_INT hold_exception_mask =
26037 HOST_WIDE_INT_C (0xffffffff00000007);
26039 tree fenv_var = create_tmp_var_raw (double_type_node);
26041 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
26043 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26044 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26045 build_int_cst (uint64_type_node,
26046 hold_exception_mask));
26048 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26049 fenv_llu_and);
26051 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26052 build_int_cst (unsigned_type_node, 0xff),
26053 fenv_hold_mtfsf);
26055 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26057 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26059 double fenv_clear = __builtin_mffs ();
26060 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26061 __builtin_mtfsf (0xff, fenv_clear); */
26063 /* Mask to clear everything except for the rounding modes and non-IEEE
26064 arithmetic flag. */
26065 const unsigned HOST_WIDE_INT clear_exception_mask =
26066 HOST_WIDE_INT_C (0xffffffff00000000);
26068 tree fenv_clear = create_tmp_var_raw (double_type_node);
26070 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
26072 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26073 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26074 fenv_clean_llu,
26075 build_int_cst (uint64_type_node,
26076 clear_exception_mask));
26078 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26079 fenv_clear_llu_and);
26081 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26082 build_int_cst (unsigned_type_node, 0xff),
26083 fenv_clear_mtfsf);
26085 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26087 /* Generates the equivalent of feupdateenv (&fenv_var)
26089 double old_fenv = __builtin_mffs ();
26090 double fenv_update;
26091 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26092 (*(uint64_t*)fenv_var 0x1ff80fff);
26093 __builtin_mtfsf (0xff, fenv_update); */
26095 const unsigned HOST_WIDE_INT update_exception_mask =
26096 HOST_WIDE_INT_C (0xffffffff1fffff00);
26097 const unsigned HOST_WIDE_INT new_exception_mask =
26098 HOST_WIDE_INT_C (0x1ff80fff);
26100 tree old_fenv = create_tmp_var_raw (double_type_node);
26101 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
26103 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26104 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26105 build_int_cst (uint64_type_node,
26106 update_exception_mask));
26108 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26109 build_int_cst (uint64_type_node,
26110 new_exception_mask));
26112 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26113 old_llu_and, new_llu_and);
26115 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26116 new_llu_mask);
26118 tree update_mtfsf = build_call_expr (mtfsf, 2,
26119 build_int_cst (unsigned_type_node, 0xff),
26120 fenv_update_mtfsf);
26122 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26125 void
26126 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26128 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26130 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26131 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26133 /* The destination of the vmrgew instruction layout is:
26134 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26135 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26136 vmrgew instruction will be correct. */
26137 if (BYTES_BIG_ENDIAN)
26139 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26140 GEN_INT (0)));
26141 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26142 GEN_INT (3)));
26144 else
26146 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26147 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26150 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26151 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26153 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26154 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26156 if (BYTES_BIG_ENDIAN)
26157 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26158 else
26159 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26162 void
26163 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26165 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26167 rtx_tmp0 = gen_reg_rtx (V2DImode);
26168 rtx_tmp1 = gen_reg_rtx (V2DImode);
26170 /* The destination of the vmrgew instruction layout is:
26171 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26172 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26173 vmrgew instruction will be correct. */
26174 if (BYTES_BIG_ENDIAN)
26176 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26177 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26179 else
26181 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26182 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26185 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26186 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26188 if (signed_convert)
26190 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26191 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26193 else
26195 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26196 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26199 if (BYTES_BIG_ENDIAN)
26200 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26201 else
26202 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26205 void
26206 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26207 rtx src2)
26209 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26211 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26212 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26214 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26215 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26217 rtx_tmp2 = gen_reg_rtx (V4SImode);
26218 rtx_tmp3 = gen_reg_rtx (V4SImode);
26220 if (signed_convert)
26222 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26223 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26225 else
26227 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26228 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26231 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26234 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26236 static bool
26237 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26238 optimization_type opt_type)
26240 switch (op)
26242 case rsqrt_optab:
26243 return (opt_type == OPTIMIZE_FOR_SPEED
26244 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26246 default:
26247 return true;
26251 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26253 static HOST_WIDE_INT
26254 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26256 if (TREE_CODE (exp) == STRING_CST
26257 && (STRICT_ALIGNMENT || !optimize_size))
26258 return MAX (align, BITS_PER_WORD);
26259 return align;
26262 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26264 static HOST_WIDE_INT
26265 rs6000_starting_frame_offset (void)
26267 if (FRAME_GROWS_DOWNWARD)
26268 return 0;
26269 return RS6000_STARTING_FRAME_OFFSET;
26273 /* Create an alias for a mangled name where we have changed the mangling (in
26274 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26275 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26277 #if TARGET_ELF && RS6000_WEAK
26278 static void
26279 rs6000_globalize_decl_name (FILE * stream, tree decl)
26281 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26283 targetm.asm_out.globalize_label (stream, name);
26285 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26287 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26288 const char *old_name;
26290 ieee128_mangling_gcc_8_1 = true;
26291 lang_hooks.set_decl_assembler_name (decl);
26292 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26293 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26294 ieee128_mangling_gcc_8_1 = false;
26296 if (strcmp (name, old_name) != 0)
26298 fprintf (stream, "\t.weak %s\n", old_name);
26299 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26303 #endif
26306 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26307 function names from <foo>l to <foo>f128 if the default long double type is
26308 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26309 include file switches the names on systems that support long double as IEEE
26310 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26311 In the future, glibc will export names like __ieee128_sinf128 and we can
26312 switch to using those instead of using sinf128, which pollutes the user's
26313 namespace.
26315 This will switch the names for Fortran math functions as well (which doesn't
26316 use math.h). However, Fortran needs other changes to the compiler and
26317 library before you can switch the real*16 type at compile time.
26319 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26320 only do this if the default is that long double is IBM extended double, and
26321 the user asked for IEEE 128-bit. */
26323 static tree
26324 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26326 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26327 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26329 size_t len = IDENTIFIER_LENGTH (id);
26330 const char *name = IDENTIFIER_POINTER (id);
26332 if (name[len - 1] == 'l')
26334 bool uses_ieee128_p = false;
26335 tree type = TREE_TYPE (decl);
26336 machine_mode ret_mode = TYPE_MODE (type);
26338 /* See if the function returns a IEEE 128-bit floating point type or
26339 complex type. */
26340 if (ret_mode == TFmode || ret_mode == TCmode)
26341 uses_ieee128_p = true;
26342 else
26344 function_args_iterator args_iter;
26345 tree arg;
26347 /* See if the function passes a IEEE 128-bit floating point type
26348 or complex type. */
26349 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26351 machine_mode arg_mode = TYPE_MODE (arg);
26352 if (arg_mode == TFmode || arg_mode == TCmode)
26354 uses_ieee128_p = true;
26355 break;
26360 /* If we passed or returned an IEEE 128-bit floating point type,
26361 change the name. */
26362 if (uses_ieee128_p)
26364 char *name2 = (char *) alloca (len + 4);
26365 memcpy (name2, name, len - 1);
26366 strcpy (name2 + len - 1, "f128");
26367 id = get_identifier (name2);
26372 return id;
26375 /* Predict whether the given loop in gimple will be transformed in the RTL
26376 doloop_optimize pass. */
26378 static bool
26379 rs6000_predict_doloop_p (struct loop *loop)
26381 gcc_assert (loop);
26383 /* On rs6000, targetm.can_use_doloop_p is actually
26384 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26385 if (loop->inner != NULL)
26387 if (dump_file && (dump_flags & TDF_DETAILS))
26388 fprintf (dump_file, "Predict doloop failure due to"
26389 " loop nesting.\n");
26390 return false;
26393 return true;
26396 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
26398 static bool
26399 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
26401 gcc_assert (MEM_P (mem));
26403 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
26404 type addresses, so don't allow MEMs with those address types to be
26405 substituted as an equivalent expression. See PR93974 for details. */
26406 if (GET_CODE (XEXP (mem, 0)) == AND)
26407 return true;
26409 return false;
26412 struct gcc_target targetm = TARGET_INITIALIZER;
26414 #include "gt-rs6000.h"