rename DBX_REGISTER_NUMBER to DEBUGGER_REGNO
[official-gcc.git] / gcc / config / rs6000 / rs6000.cc
blob8b4edd281cafe6c02944e87883c8428c6221012c
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "rs6000-internal.h"
81 #include "opts.h"
83 /* This file should be included last. */
84 #include "target-def.h"
86 extern tree rs6000_builtin_mask_for_load (void);
87 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
88 extern tree rs6000_builtin_reciprocal (tree);
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
95 properly defined. */
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
99 #else
100 #define TARGET_IEEEQUAD_DEFAULT 0
101 #endif
102 #endif
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
107 #endif
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected = false;
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size;
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
131 # endif
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float = false;
138 bool rs6000_passes_long_double = false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector = false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct = false;
143 #endif
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
158 static int dbg_cost_ctrl;
160 /* Flag to say the TOC is initialized */
161 int toc_initialized, need_toc_init;
162 char toc_label_name[10];
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more;
168 static GTY(()) section *read_only_data_section;
169 static GTY(()) section *private_data_section;
170 static GTY(()) section *tls_data_section;
171 static GTY(()) section *tls_private_data_section;
172 static GTY(()) section *read_only_private_data_section;
173 static GTY(()) section *sdata2_section;
175 section *toc_section = 0;
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
179 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
181 /* Register classes for various constraints that are based on the target
182 switches. */
183 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align[NUM_MACHINE_MODES];
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
192 /* Masks to determine which reciprocal esitmate instructions to generate
193 automatically. */
194 enum rs6000_recip_mask {
195 RECIP_SF_DIV = 0x001, /* Use divide estimate */
196 RECIP_DF_DIV = 0x002,
197 RECIP_V4SF_DIV = 0x004,
198 RECIP_V2DF_DIV = 0x008,
200 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT = 0x020,
202 RECIP_V4SF_RSQRT = 0x040,
203 RECIP_V2DF_RSQRT = 0x080,
205 /* Various combination of flags for -mrecip=xxx. */
206 RECIP_NONE = 0,
207 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
211 RECIP_HIGH_PRECISION = RECIP_ALL,
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
218 /* -mrecip options. */
219 static struct
221 const char *string; /* option name */
222 unsigned int mask; /* mask bits to set */
223 } recip_options[] = {
224 { "all", RECIP_ALL },
225 { "none", RECIP_NONE },
226 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
227 | RECIP_V2DF_DIV) },
228 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
229 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
230 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT) },
232 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
233 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
240 enum {
241 CLONE_DEFAULT = 0, /* default clone. */
242 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
247 CLONE_MAX
250 /* Map compiler ISA bits into HWCAP names. */
251 struct clone_map {
252 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
253 const char *name; /* name to use in __builtin_cpu_supports. */
256 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p = false;
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
285 enum rs6000_reg_type {
286 NO_REG_TYPE,
287 PSEUDO_REG_TYPE,
288 GPR_REG_TYPE,
289 VSX_REG_TYPE,
290 ALTIVEC_REG_TYPE,
291 FPR_REG_TYPE,
292 SPR_REG_TYPE,
293 CR_REG_TYPE
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
310 enum rs6000_reload_reg_type {
311 RELOAD_REG_GPR, /* General purpose registers. */
312 RELOAD_REG_FPR, /* Traditional floating point regs. */
313 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
315 N_RELOAD_REG
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
320 bits. */
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type {
326 const char *name; /* Register class name. */
327 int reg; /* Register in the register class. */
330 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
331 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type;
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr {
353 enum insn_code reload_load; /* INSN to reload for loading. */
354 enum insn_code reload_store; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
359 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
362 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
365 static inline bool
366 mode_supports_pre_incdec_p (machine_mode mode)
368 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
369 != 0);
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
373 static inline bool
374 mode_supports_pre_modify_p (machine_mode mode)
376 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
377 != 0);
380 /* Return true if we have D-form addressing in altivec registers. */
381 static inline bool
382 mode_supports_vmx_dform (machine_mode mode)
384 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
390 static inline bool
391 mode_supports_dq_form (machine_mode mode)
393 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
394 != 0);
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
413 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
415 rtx out_set, in_set;
416 rtx out_pat, in_pat;
417 rtx out_exp, in_exp;
418 int i, j;
420 in_set = single_set (in_insn);
421 if (in_set)
423 if (MEM_P (SET_DEST (in_set)))
425 out_set = single_set (out_insn);
426 if (!out_set)
428 out_pat = PATTERN (out_insn);
429 if (GET_CODE (out_pat) == PARALLEL)
431 for (i = 0; i < XVECLEN (out_pat, 0); i++)
433 out_exp = XVECEXP (out_pat, 0, i);
434 if ((GET_CODE (out_exp) == CLOBBER)
435 || (GET_CODE (out_exp) == USE))
436 continue;
437 else if (GET_CODE (out_exp) != SET)
438 return false;
444 else
446 in_pat = PATTERN (in_insn);
447 if (GET_CODE (in_pat) != PARALLEL)
448 return false;
450 for (i = 0; i < XVECLEN (in_pat, 0); i++)
452 in_exp = XVECEXP (in_pat, 0, i);
453 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
454 continue;
455 else if (GET_CODE (in_exp) != SET)
456 return false;
458 if (MEM_P (SET_DEST (in_exp)))
460 out_set = single_set (out_insn);
461 if (!out_set)
463 out_pat = PATTERN (out_insn);
464 if (GET_CODE (out_pat) != PARALLEL)
465 return false;
466 for (j = 0; j < XVECLEN (out_pat, 0); j++)
468 out_exp = XVECEXP (out_pat, 0, j);
469 if ((GET_CODE (out_exp) == CLOBBER)
470 || (GET_CODE (out_exp) == USE))
471 continue;
472 else if (GET_CODE (out_exp) != SET)
473 return false;
479 return store_data_bypass_p (out_insn, in_insn);
483 /* Processor costs (relative to an add) */
485 const struct processor_costs *rs6000_cost;
487 /* Instruction size costs on 32bit processors. */
488 static const
489 struct processor_costs size32_cost = {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
501 0, /* l1 cache */
502 0, /* l2 cache */
503 0, /* streams */
504 0, /* SF->DF convert */
507 /* Instruction size costs on 64bit processors. */
508 static const
509 struct processor_costs size64_cost = {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
521 0, /* l1 cache */
522 0, /* l2 cache */
523 0, /* streams */
524 0, /* SF->DF convert */
527 /* Instruction costs on RS64A processors. */
528 static const
529 struct processor_costs rs64a_cost = {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
541 128, /* l1 cache */
542 2048, /* l2 cache */
543 1, /* streams */
544 0, /* SF->DF convert */
547 /* Instruction costs on MPCCORE processors. */
548 static const
549 struct processor_costs mpccore_cost = {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
561 4, /* l1 cache */
562 16, /* l2 cache */
563 1, /* streams */
564 0, /* SF->DF convert */
567 /* Instruction costs on PPC403 processors. */
568 static const
569 struct processor_costs ppc403_cost = {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
581 4, /* l1 cache */
582 16, /* l2 cache */
583 1, /* streams */
584 0, /* SF->DF convert */
587 /* Instruction costs on PPC405 processors. */
588 static const
589 struct processor_costs ppc405_cost = {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
601 16, /* l1 cache */
602 128, /* l2 cache */
603 1, /* streams */
604 0, /* SF->DF convert */
607 /* Instruction costs on PPC440 processors. */
608 static const
609 struct processor_costs ppc440_cost = {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
621 32, /* l1 cache */
622 256, /* l2 cache */
623 1, /* streams */
624 0, /* SF->DF convert */
627 /* Instruction costs on PPC476 processors. */
628 static const
629 struct processor_costs ppc476_cost = {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
641 32, /* l1 cache */
642 512, /* l2 cache */
643 1, /* streams */
644 0, /* SF->DF convert */
647 /* Instruction costs on PPC601 processors. */
648 static const
649 struct processor_costs ppc601_cost = {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
661 32, /* l1 cache */
662 256, /* l2 cache */
663 1, /* streams */
664 0, /* SF->DF convert */
667 /* Instruction costs on PPC603 processors. */
668 static const
669 struct processor_costs ppc603_cost = {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
681 8, /* l1 cache */
682 64, /* l2 cache */
683 1, /* streams */
684 0, /* SF->DF convert */
687 /* Instruction costs on PPC604 processors. */
688 static const
689 struct processor_costs ppc604_cost = {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
701 16, /* l1 cache */
702 512, /* l2 cache */
703 1, /* streams */
704 0, /* SF->DF convert */
707 /* Instruction costs on PPC604e processors. */
708 static const
709 struct processor_costs ppc604e_cost = {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
721 32, /* l1 cache */
722 1024, /* l2 cache */
723 1, /* streams */
724 0, /* SF->DF convert */
727 /* Instruction costs on PPC620 processors. */
728 static const
729 struct processor_costs ppc620_cost = {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
741 32, /* l1 cache */
742 1024, /* l2 cache */
743 1, /* streams */
744 0, /* SF->DF convert */
747 /* Instruction costs on PPC630 processors. */
748 static const
749 struct processor_costs ppc630_cost = {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
761 64, /* l1 cache */
762 1024, /* l2 cache */
763 1, /* streams */
764 0, /* SF->DF convert */
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
769 static const
770 struct processor_costs ppccell_cost = {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
782 32, /* l1 cache */
783 512, /* l2 cache */
784 6, /* streams */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC750 and PPC7400 processors. */
789 static const
790 struct processor_costs ppc750_cost = {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
802 32, /* l1 cache */
803 512, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
808 /* Instruction costs on PPC7450 processors. */
809 static const
810 struct processor_costs ppc7450_cost = {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
822 32, /* l1 cache */
823 1024, /* l2 cache */
824 1, /* streams */
825 0, /* SF->DF convert */
828 /* Instruction costs on PPC8540 processors. */
829 static const
830 struct processor_costs ppc8540_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
842 32, /* l1 cache */
843 256, /* l2 cache */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
848 /* Instruction costs on E300C2 and E300C3 cores. */
849 static const
850 struct processor_costs ppce300c2c3_cost = {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
862 16, /* l1 cache */
863 16, /* l2 cache */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
868 /* Instruction costs on PPCE500MC processors. */
869 static const
870 struct processor_costs ppce500mc_cost = {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
882 32, /* l1 cache */
883 128, /* l2 cache */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
888 /* Instruction costs on PPCE500MC64 processors. */
889 static const
890 struct processor_costs ppce500mc64_cost = {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
902 32, /* l1 cache */
903 128, /* l2 cache */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
908 /* Instruction costs on PPCE5500 processors. */
909 static const
910 struct processor_costs ppce5500_cost = {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
922 32, /* l1 cache */
923 128, /* l2 cache */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
928 /* Instruction costs on PPCE6500 processors. */
929 static const
930 struct processor_costs ppce6500_cost = {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
942 32, /* l1 cache */
943 128, /* l2 cache */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
948 /* Instruction costs on AppliedMicro Titan processors. */
949 static const
950 struct processor_costs titan_cost = {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
962 32, /* l1 cache */
963 512, /* l2 cache */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
968 /* Instruction costs on POWER4 and POWER5 processors. */
969 static const
970 struct processor_costs power4_cost = {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
982 32, /* l1 cache */
983 1024, /* l2 cache */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
988 /* Instruction costs on POWER6 processors. */
989 static const
990 struct processor_costs power6_cost = {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1002 64, /* l1 cache */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1008 /* Instruction costs on POWER7 processors. */
1009 static const
1010 struct processor_costs power7_cost = {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1022 32, /* l1 cache */
1023 256, /* l2 cache */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1028 /* Instruction costs on POWER8 processors. */
1029 static const
1030 struct processor_costs power8_cost = {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1042 32, /* l1 cache */
1043 512, /* l2 cache */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1048 /* Instruction costs on POWER9 processors. */
1049 static const
1050 struct processor_costs power9_cost = {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1062 32, /* l1 cache */
1063 512, /* l2 cache */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1068 /* Instruction costs on POWER10 processors. */
1069 static const
1070 struct processor_costs power10_cost = {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1082 32, /* l1 cache */
1083 512, /* l2 cache */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1088 /* Instruction costs on POWER A2 processors. */
1089 static const
1090 struct processor_costs ppca2_cost = {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1102 16, /* l1 cache */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1112 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1113 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1114 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1115 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1116 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1117 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1118 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1119 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1120 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1121 bool);
1122 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1123 unsigned int);
1124 static bool is_microcoded_insn (rtx_insn *);
1125 static bool is_nonpipeline_insn (rtx_insn *);
1126 static bool is_cracked_insn (rtx_insn *);
1127 static bool is_load_insn (rtx, rtx *);
1128 static bool is_store_insn (rtx, rtx *);
1129 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1130 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1131 static bool insn_must_be_first_in_group (rtx_insn *);
1132 static bool insn_must_be_last_in_group (rtx_insn *);
1133 bool easy_vector_constant (rtx, machine_mode);
1134 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1135 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1136 #if TARGET_MACHO
1137 static tree get_prev_label (tree);
1138 #endif
1139 static bool rs6000_mode_dependent_address (const_rtx);
1140 static bool rs6000_debug_mode_dependent_address (const_rtx);
1141 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1142 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1143 machine_mode, rtx);
1144 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1145 machine_mode,
1146 rtx);
1147 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1148 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1149 enum reg_class);
1150 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1151 reg_class_t,
1152 reg_class_t);
1153 static bool rs6000_debug_can_change_mode_class (machine_mode,
1154 machine_mode,
1155 reg_class_t);
1157 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1158 = rs6000_mode_dependent_address;
1160 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1161 machine_mode, rtx)
1162 = rs6000_secondary_reload_class;
1164 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1165 = rs6000_preferred_reload_class;
1167 const int INSN_NOT_AVAILABLE = -1;
1169 static void rs6000_print_isa_options (FILE *, int, const char *,
1170 HOST_WIDE_INT);
1171 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1173 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1174 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1175 enum rs6000_reg_type,
1176 machine_mode,
1177 secondary_reload_info *,
1178 bool);
1179 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1181 /* Hash table stuff for keeping track of TOC entries. */
1183 struct GTY((for_user)) toc_hash_struct
1185 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1186 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1187 rtx key;
1188 machine_mode key_mode;
1189 int labelno;
1192 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1194 static hashval_t hash (toc_hash_struct *);
1195 static bool equal (toc_hash_struct *, toc_hash_struct *);
1198 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1202 /* Default register names. */
1203 char rs6000_reg_names[][8] =
1205 /* GPRs */
1206 "0", "1", "2", "3", "4", "5", "6", "7",
1207 "8", "9", "10", "11", "12", "13", "14", "15",
1208 "16", "17", "18", "19", "20", "21", "22", "23",
1209 "24", "25", "26", "27", "28", "29", "30", "31",
1210 /* FPRs */
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1215 /* VRs */
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1220 /* lr ctr ca ap */
1221 "lr", "ctr", "ca", "ap",
1222 /* cr0..cr7 */
1223 "0", "1", "2", "3", "4", "5", "6", "7",
1224 /* vrsave vscr sfp */
1225 "vrsave", "vscr", "sfp",
1228 #ifdef TARGET_REGNAMES
1229 static const char alt_reg_names[][8] =
1231 /* GPRs */
1232 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1233 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1234 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1235 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1236 /* FPRs */
1237 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1238 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1239 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1240 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1241 /* VRs */
1242 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1243 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1244 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1245 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1246 /* lr ctr ca ap */
1247 "lr", "ctr", "ca", "ap",
1248 /* cr0..cr7 */
1249 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1250 /* vrsave vscr sfp */
1251 "vrsave", "vscr", "sfp",
1253 #endif
1255 /* Table of valid machine attributes. */
1257 static const struct attribute_spec rs6000_attribute_table[] =
1259 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1260 affects_type_identity, handler, exclude } */
1261 { "altivec", 1, 1, false, true, false, false,
1262 rs6000_handle_altivec_attribute, NULL },
1263 { "longcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "shortcall", 0, 0, false, true, true, false,
1266 rs6000_handle_longcall_attribute, NULL },
1267 { "ms_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 { "gcc_struct", 0, 0, false, false, false, false,
1270 rs6000_handle_struct_attribute, NULL },
1271 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1272 SUBTARGET_ATTRIBUTE_TABLE,
1273 #endif
1274 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1277 #ifndef TARGET_PROFILE_KERNEL
1278 #define TARGET_PROFILE_KERNEL 0
1279 #endif
1281 /* Initialize the GCC target structure. */
1282 #undef TARGET_ATTRIBUTE_TABLE
1283 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1284 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1285 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1286 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1287 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1289 #undef TARGET_ASM_ALIGNED_DI_OP
1290 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1292 /* Default unaligned ops are only provided for ELF. Find the ops needed
1293 for non-ELF systems. */
1294 #ifndef OBJECT_FORMAT_ELF
1295 #if TARGET_XCOFF
1296 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1297 64-bit targets. */
1298 #undef TARGET_ASM_UNALIGNED_HI_OP
1299 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1300 #undef TARGET_ASM_UNALIGNED_SI_OP
1301 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1302 #undef TARGET_ASM_UNALIGNED_DI_OP
1303 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1304 #else
1305 /* For Darwin. */
1306 #undef TARGET_ASM_UNALIGNED_HI_OP
1307 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1308 #undef TARGET_ASM_UNALIGNED_SI_OP
1309 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1310 #undef TARGET_ASM_UNALIGNED_DI_OP
1311 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1312 #undef TARGET_ASM_ALIGNED_DI_OP
1313 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1314 #endif
1315 #endif
1317 /* This hook deals with fixups for relocatable code and DI-mode objects
1318 in 64-bit code. */
1319 #undef TARGET_ASM_INTEGER
1320 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1322 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1323 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1324 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1325 #endif
1327 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1328 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1329 rs6000_print_patchable_function_entry
1331 #undef TARGET_SET_UP_BY_PROLOGUE
1332 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1334 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1336 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1337 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1338 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1340 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1341 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1342 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1343 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1344 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1345 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1347 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1348 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1350 #undef TARGET_INTERNAL_ARG_POINTER
1351 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1353 #undef TARGET_HAVE_TLS
1354 #define TARGET_HAVE_TLS HAVE_AS_TLS
1356 #undef TARGET_CANNOT_FORCE_CONST_MEM
1357 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1359 #undef TARGET_DELEGITIMIZE_ADDRESS
1360 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1362 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1363 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1365 #undef TARGET_LEGITIMATE_COMBINED_INSN
1366 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1368 #undef TARGET_ASM_FUNCTION_PROLOGUE
1369 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1370 #undef TARGET_ASM_FUNCTION_EPILOGUE
1371 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1373 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1374 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1376 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1377 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1379 #undef TARGET_LEGITIMIZE_ADDRESS
1380 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1382 #undef TARGET_SCHED_VARIABLE_ISSUE
1383 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1385 #undef TARGET_SCHED_ISSUE_RATE
1386 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1387 #undef TARGET_SCHED_ADJUST_COST
1388 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1389 #undef TARGET_SCHED_ADJUST_PRIORITY
1390 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1391 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1392 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1393 #undef TARGET_SCHED_INIT
1394 #define TARGET_SCHED_INIT rs6000_sched_init
1395 #undef TARGET_SCHED_FINISH
1396 #define TARGET_SCHED_FINISH rs6000_sched_finish
1397 #undef TARGET_SCHED_REORDER
1398 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1399 #undef TARGET_SCHED_REORDER2
1400 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1402 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1403 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1405 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1406 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1408 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1409 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1410 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1411 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1412 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1413 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1414 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1415 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1417 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1418 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1420 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1421 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1422 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1423 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1424 rs6000_builtin_support_vector_misalignment
1425 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1426 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1427 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1428 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1429 rs6000_builtin_vectorization_cost
1430 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1431 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1432 rs6000_preferred_simd_mode
1433 #undef TARGET_VECTORIZE_CREATE_COSTS
1434 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1458 #if TARGET_MACHO
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1461 #endif
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1549 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1550 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1551 rs6000_libgcc_floating_mode_supported_p
1553 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1554 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1556 #undef TARGET_FLOATN_MODE
1557 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1559 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1560 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1562 #undef TARGET_MD_ASM_ADJUST
1563 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1565 #undef TARGET_OPTION_OVERRIDE
1566 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1568 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1570 rs6000_builtin_vectorized_function
1572 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1573 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1574 rs6000_builtin_md_vectorized_function
1576 #undef TARGET_STACK_PROTECT_GUARD
1577 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1579 #if !TARGET_MACHO
1580 #undef TARGET_STACK_PROTECT_FAIL
1581 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1582 #endif
1584 #ifdef HAVE_AS_TLS
1585 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1586 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1587 #endif
1589 /* Use a 32-bit anchor range. This leads to sequences like:
1591 addis tmp,anchor,high
1592 add dest,tmp,low
1594 where tmp itself acts as an anchor, and can be shared between
1595 accesses to the same 64k page. */
1596 #undef TARGET_MIN_ANCHOR_OFFSET
1597 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1598 #undef TARGET_MAX_ANCHOR_OFFSET
1599 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1600 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1601 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1602 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1603 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1605 #undef TARGET_BUILTIN_RECIPROCAL
1606 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1608 #undef TARGET_SECONDARY_RELOAD
1609 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1610 #undef TARGET_SECONDARY_MEMORY_NEEDED
1611 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1612 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1613 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1615 #undef TARGET_LEGITIMATE_ADDRESS_P
1616 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1618 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1619 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1621 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1622 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1624 #undef TARGET_CAN_ELIMINATE
1625 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1628 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1630 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1631 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1633 #undef TARGET_TRAMPOLINE_INIT
1634 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1636 #undef TARGET_FUNCTION_VALUE
1637 #define TARGET_FUNCTION_VALUE rs6000_function_value
1639 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1640 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1642 #undef TARGET_OPTION_SAVE
1643 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1645 #undef TARGET_OPTION_RESTORE
1646 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1648 #undef TARGET_OPTION_PRINT
1649 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1651 #undef TARGET_CAN_INLINE_P
1652 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1654 #undef TARGET_SET_CURRENT_FUNCTION
1655 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1657 #undef TARGET_LEGITIMATE_CONSTANT_P
1658 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1660 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1661 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1663 #undef TARGET_CAN_USE_DOLOOP_P
1664 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1666 #undef TARGET_PREDICT_DOLOOP_P
1667 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1669 #undef TARGET_HAVE_COUNT_REG_DECR_P
1670 #define TARGET_HAVE_COUNT_REG_DECR_P true
1672 /* 1000000000 is infinite cost in IVOPTs. */
1673 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1674 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1676 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1677 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1679 #undef TARGET_PREFERRED_DOLOOP_MODE
1680 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1682 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1683 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1685 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1686 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1687 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1688 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1689 #undef TARGET_UNWIND_WORD_MODE
1690 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1692 #undef TARGET_OFFLOAD_OPTIONS
1693 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1695 #undef TARGET_C_MODE_FOR_SUFFIX
1696 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1698 #undef TARGET_INVALID_BINARY_OP
1699 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1701 #undef TARGET_OPTAB_SUPPORTED_P
1702 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1704 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1705 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1707 #undef TARGET_COMPARE_VERSION_PRIORITY
1708 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1710 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1711 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1712 rs6000_generate_version_dispatcher_body
1714 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1715 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1716 rs6000_get_function_versions_dispatcher
1718 #undef TARGET_OPTION_FUNCTION_VERSIONS
1719 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1721 #undef TARGET_HARD_REGNO_NREGS
1722 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1723 #undef TARGET_HARD_REGNO_MODE_OK
1724 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1726 #undef TARGET_MODES_TIEABLE_P
1727 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1729 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1730 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1731 rs6000_hard_regno_call_part_clobbered
1733 #undef TARGET_SLOW_UNALIGNED_ACCESS
1734 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1736 #undef TARGET_CAN_CHANGE_MODE_CLASS
1737 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1739 #undef TARGET_CONSTANT_ALIGNMENT
1740 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1742 #undef TARGET_STARTING_FRAME_OFFSET
1743 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1745 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1746 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1748 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1749 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1751 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1752 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1753 rs6000_cannot_substitute_mem_equiv_p
1755 #undef TARGET_INVALID_CONVERSION
1756 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1758 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1759 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1761 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1762 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1765 /* Processor table. */
1766 struct rs6000_ptt
1768 const char *const name; /* Canonical processor name. */
1769 const enum processor_type processor; /* Processor type enum value. */
1770 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1773 static struct rs6000_ptt const processor_target_table[] =
1775 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1776 #include "rs6000-cpus.def"
1777 #undef RS6000_CPU
1780 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1781 name is invalid. */
1783 static int
1784 rs6000_cpu_name_lookup (const char *name)
1786 size_t i;
1788 if (name != NULL)
1790 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1791 if (! strcmp (name, processor_target_table[i].name))
1792 return (int)i;
1795 return -1;
1799 /* Return number of consecutive hard regs needed starting at reg REGNO
1800 to hold something of mode MODE.
1801 This is ordinarily the length in words of a value of mode MODE
1802 but can be less for certain modes in special long registers.
1804 POWER and PowerPC GPRs hold 32 bits worth;
1805 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1807 static int
1808 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1810 unsigned HOST_WIDE_INT reg_size;
1812 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1813 128-bit floating point that can go in vector registers, which has VSX
1814 memory addressing. */
1815 if (FP_REGNO_P (regno))
1816 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1817 ? UNITS_PER_VSX_WORD
1818 : UNITS_PER_FP_WORD);
1820 else if (ALTIVEC_REGNO_P (regno))
1821 reg_size = UNITS_PER_ALTIVEC_WORD;
1823 else
1824 reg_size = UNITS_PER_WORD;
1826 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1829 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1830 MODE. */
1831 static int
1832 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1834 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1836 if (COMPLEX_MODE_P (mode))
1837 mode = GET_MODE_INNER (mode);
1839 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1840 registers. */
1841 if (mode == OOmode)
1842 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1844 /* MMA accumulator modes need FPR registers divisible by 4. */
1845 if (mode == XOmode)
1846 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1848 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1849 register combinations, and use PTImode where we need to deal with quad
1850 word memory operations. Don't allow quad words in the argument or frame
1851 pointer registers, just registers 0..31. */
1852 if (mode == PTImode)
1853 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1854 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1855 && ((regno & 1) == 0));
1857 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1858 implementations. Don't allow an item to be split between a FP register
1859 and an Altivec register. Allow TImode in all VSX registers if the user
1860 asked for it. */
1861 if (TARGET_VSX && VSX_REGNO_P (regno)
1862 && (VECTOR_MEM_VSX_P (mode)
1863 || VECTOR_ALIGNMENT_P (mode)
1864 || reg_addr[mode].scalar_in_vmx_p
1865 || mode == TImode
1866 || (TARGET_VADDUQM && mode == V1TImode)))
1868 if (FP_REGNO_P (regno))
1869 return FP_REGNO_P (last_regno);
1871 if (ALTIVEC_REGNO_P (regno))
1873 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1874 return 0;
1876 return ALTIVEC_REGNO_P (last_regno);
1880 /* The GPRs can hold any mode, but values bigger than one register
1881 cannot go past R31. */
1882 if (INT_REGNO_P (regno))
1883 return INT_REGNO_P (last_regno);
1885 /* The float registers (except for VSX vector modes) can only hold floating
1886 modes and DImode. */
1887 if (FP_REGNO_P (regno))
1889 if (VECTOR_ALIGNMENT_P (mode))
1890 return false;
1892 if (SCALAR_FLOAT_MODE_P (mode)
1893 && (mode != TDmode || (regno % 2) == 0)
1894 && FP_REGNO_P (last_regno))
1895 return 1;
1897 if (GET_MODE_CLASS (mode) == MODE_INT)
1899 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1900 return 1;
1902 if (TARGET_P8_VECTOR && (mode == SImode))
1903 return 1;
1905 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1906 return 1;
1909 return 0;
1912 /* The CR register can only hold CC modes. */
1913 if (CR_REGNO_P (regno))
1914 return GET_MODE_CLASS (mode) == MODE_CC;
1916 if (CA_REGNO_P (regno))
1917 return mode == Pmode || mode == SImode;
1919 /* AltiVec only in AldyVec registers. */
1920 if (ALTIVEC_REGNO_P (regno))
1921 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1922 || mode == V1TImode);
1924 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1925 and it must be able to fit within the register set. */
1927 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1930 /* Implement TARGET_HARD_REGNO_NREGS. */
1932 static unsigned int
1933 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1935 return rs6000_hard_regno_nregs[mode][regno];
1938 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1940 static bool
1941 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1943 return rs6000_hard_regno_mode_ok_p[mode][regno];
1946 /* Implement TARGET_MODES_TIEABLE_P.
1948 PTImode cannot tie with other modes because PTImode is restricted to even
1949 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1950 57744).
1952 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1953 registers) or XOmode (vector quad, restricted to FPR registers divisible
1954 by 4) to tie with other modes.
1956 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1957 128-bit floating point on VSX systems ties with other vectors. */
1959 static bool
1960 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1962 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1963 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1964 return mode1 == mode2;
1966 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1967 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1968 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1969 return false;
1971 if (SCALAR_FLOAT_MODE_P (mode1))
1972 return SCALAR_FLOAT_MODE_P (mode2);
1973 if (SCALAR_FLOAT_MODE_P (mode2))
1974 return false;
1976 if (GET_MODE_CLASS (mode1) == MODE_CC)
1977 return GET_MODE_CLASS (mode2) == MODE_CC;
1978 if (GET_MODE_CLASS (mode2) == MODE_CC)
1979 return false;
1981 return true;
1984 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1986 static bool
1987 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1988 machine_mode mode)
1990 if (TARGET_32BIT
1991 && TARGET_POWERPC64
1992 && GET_MODE_SIZE (mode) > 4
1993 && INT_REGNO_P (regno))
1994 return true;
1996 if (TARGET_VSX
1997 && FP_REGNO_P (regno)
1998 && GET_MODE_SIZE (mode) > 8
1999 && !FLOAT128_2REG_P (mode))
2000 return true;
2002 return false;
2005 /* Print interesting facts about registers. */
2006 static void
2007 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2009 int r, m;
2011 for (r = first_regno; r <= last_regno; ++r)
2013 const char *comma = "";
2014 int len;
2016 if (first_regno == last_regno)
2017 fprintf (stderr, "%s:\t", reg_name);
2018 else
2019 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2021 len = 8;
2022 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2023 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2025 if (len > 70)
2027 fprintf (stderr, ",\n\t");
2028 len = 8;
2029 comma = "";
2032 if (rs6000_hard_regno_nregs[m][r] > 1)
2033 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2034 rs6000_hard_regno_nregs[m][r]);
2035 else
2036 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2038 comma = ", ";
2041 if (call_used_or_fixed_reg_p (r))
2043 if (len > 70)
2045 fprintf (stderr, ",\n\t");
2046 len = 8;
2047 comma = "";
2050 len += fprintf (stderr, "%s%s", comma, "call-used");
2051 comma = ", ";
2054 if (fixed_regs[r])
2056 if (len > 70)
2058 fprintf (stderr, ",\n\t");
2059 len = 8;
2060 comma = "";
2063 len += fprintf (stderr, "%s%s", comma, "fixed");
2064 comma = ", ";
2067 if (len > 70)
2069 fprintf (stderr, ",\n\t");
2070 comma = "";
2073 len += fprintf (stderr, "%sreg-class = %s", comma,
2074 reg_class_names[(int)rs6000_regno_regclass[r]]);
2075 comma = ", ";
2077 if (len > 70)
2079 fprintf (stderr, ",\n\t");
2080 comma = "";
2083 fprintf (stderr, "%sregno = %d\n", comma, r);
2087 static const char *
2088 rs6000_debug_vector_unit (enum rs6000_vector v)
2090 const char *ret;
2092 switch (v)
2094 case VECTOR_NONE: ret = "none"; break;
2095 case VECTOR_ALTIVEC: ret = "altivec"; break;
2096 case VECTOR_VSX: ret = "vsx"; break;
2097 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2098 default: ret = "unknown"; break;
2101 return ret;
2104 /* Inner function printing just the address mask for a particular reload
2105 register class. */
2106 DEBUG_FUNCTION char *
2107 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2109 static char ret[8];
2110 char *p = ret;
2112 if ((mask & RELOAD_REG_VALID) != 0)
2113 *p++ = 'v';
2114 else if (keep_spaces)
2115 *p++ = ' ';
2117 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2118 *p++ = 'm';
2119 else if (keep_spaces)
2120 *p++ = ' ';
2122 if ((mask & RELOAD_REG_INDEXED) != 0)
2123 *p++ = 'i';
2124 else if (keep_spaces)
2125 *p++ = ' ';
2127 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2128 *p++ = 'O';
2129 else if ((mask & RELOAD_REG_OFFSET) != 0)
2130 *p++ = 'o';
2131 else if (keep_spaces)
2132 *p++ = ' ';
2134 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2135 *p++ = '+';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2139 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2140 *p++ = '+';
2141 else if (keep_spaces)
2142 *p++ = ' ';
2144 if ((mask & RELOAD_REG_AND_M16) != 0)
2145 *p++ = '&';
2146 else if (keep_spaces)
2147 *p++ = ' ';
2149 *p = '\0';
2151 return ret;
2154 /* Print the address masks in a human readble fashion. */
2155 DEBUG_FUNCTION void
2156 rs6000_debug_print_mode (ssize_t m)
2158 ssize_t rc;
2159 int spaces = 0;
2161 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2162 for (rc = 0; rc < N_RELOAD_REG; rc++)
2163 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2164 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2166 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2167 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2169 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2170 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2171 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2172 spaces = 0;
2174 else
2175 spaces += strlen (" Reload=sl");
2177 if (reg_addr[m].scalar_in_vmx_p)
2179 fprintf (stderr, "%*s Upper=y", spaces, "");
2180 spaces = 0;
2182 else
2183 spaces += strlen (" Upper=y");
2185 if (rs6000_vector_unit[m] != VECTOR_NONE
2186 || rs6000_vector_mem[m] != VECTOR_NONE)
2188 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2189 spaces, "",
2190 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2191 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2194 fputs ("\n", stderr);
2197 #define DEBUG_FMT_ID "%-32s= "
2198 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2199 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2200 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2202 /* Print various interesting information with -mdebug=reg. */
2203 static void
2204 rs6000_debug_reg_global (void)
2206 static const char *const tf[2] = { "false", "true" };
2207 const char *nl = (const char *)0;
2208 int m;
2209 size_t m1, m2, v;
2210 char costly_num[20];
2211 char nop_num[20];
2212 char flags_buffer[40];
2213 const char *costly_str;
2214 const char *nop_str;
2215 const char *trace_str;
2216 const char *abi_str;
2217 const char *cmodel_str;
2218 struct cl_target_option cl_opts;
2220 /* Modes we want tieable information on. */
2221 static const machine_mode print_tieable_modes[] = {
2222 QImode,
2223 HImode,
2224 SImode,
2225 DImode,
2226 TImode,
2227 PTImode,
2228 SFmode,
2229 DFmode,
2230 TFmode,
2231 IFmode,
2232 KFmode,
2233 SDmode,
2234 DDmode,
2235 TDmode,
2236 V2SImode,
2237 V2SFmode,
2238 V16QImode,
2239 V8HImode,
2240 V4SImode,
2241 V2DImode,
2242 V1TImode,
2243 V32QImode,
2244 V16HImode,
2245 V8SImode,
2246 V4DImode,
2247 V2TImode,
2248 V4SFmode,
2249 V2DFmode,
2250 V8SFmode,
2251 V4DFmode,
2252 OOmode,
2253 XOmode,
2254 CCmode,
2255 CCUNSmode,
2256 CCEQmode,
2257 CCFPmode,
2260 /* Virtual regs we are interested in. */
2261 const static struct {
2262 int regno; /* register number. */
2263 const char *name; /* register name. */
2264 } virtual_regs[] = {
2265 { STACK_POINTER_REGNUM, "stack pointer:" },
2266 { TOC_REGNUM, "toc: " },
2267 { STATIC_CHAIN_REGNUM, "static chain: " },
2268 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2269 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2270 { ARG_POINTER_REGNUM, "arg pointer: " },
2271 { FRAME_POINTER_REGNUM, "frame pointer:" },
2272 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2273 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2274 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2275 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2276 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2277 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2278 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2279 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2280 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2283 fputs ("\nHard register information:\n", stderr);
2284 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2285 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2286 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2287 LAST_ALTIVEC_REGNO,
2288 "vs");
2289 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2290 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2291 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2292 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2293 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2294 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2296 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2297 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2298 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2300 fprintf (stderr,
2301 "\n"
2302 "d reg_class = %s\n"
2303 "v reg_class = %s\n"
2304 "wa reg_class = %s\n"
2305 "we reg_class = %s\n"
2306 "wr reg_class = %s\n"
2307 "wx reg_class = %s\n"
2308 "wA reg_class = %s\n"
2309 "\n",
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2311 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2312 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2313 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2314 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2315 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2318 nl = "\n";
2319 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2320 rs6000_debug_print_mode (m);
2322 fputs ("\n", stderr);
2324 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2326 machine_mode mode1 = print_tieable_modes[m1];
2327 bool first_time = true;
2329 nl = (const char *)0;
2330 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2332 machine_mode mode2 = print_tieable_modes[m2];
2333 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2335 if (first_time)
2337 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2338 nl = "\n";
2339 first_time = false;
2342 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2346 if (!first_time)
2347 fputs ("\n", stderr);
2350 if (nl)
2351 fputs (nl, stderr);
2353 if (rs6000_recip_control)
2355 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2357 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2358 if (rs6000_recip_bits[m])
2360 fprintf (stderr,
2361 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2362 GET_MODE_NAME (m),
2363 (RS6000_RECIP_AUTO_RE_P (m)
2364 ? "auto"
2365 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2366 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2367 ? "auto"
2368 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2371 fputs ("\n", stderr);
2374 if (rs6000_cpu_index >= 0)
2376 const char *name = processor_target_table[rs6000_cpu_index].name;
2377 HOST_WIDE_INT flags
2378 = processor_target_table[rs6000_cpu_index].target_enable;
2380 sprintf (flags_buffer, "-mcpu=%s flags", name);
2381 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2383 else
2384 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2386 if (rs6000_tune_index >= 0)
2388 const char *name = processor_target_table[rs6000_tune_index].name;
2389 HOST_WIDE_INT flags
2390 = processor_target_table[rs6000_tune_index].target_enable;
2392 sprintf (flags_buffer, "-mtune=%s flags", name);
2393 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2395 else
2396 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2398 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2399 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2400 rs6000_isa_flags);
2402 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2403 rs6000_isa_flags_explicit);
2405 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2407 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2408 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2410 switch (rs6000_sched_costly_dep)
2412 case max_dep_latency:
2413 costly_str = "max_dep_latency";
2414 break;
2416 case no_dep_costly:
2417 costly_str = "no_dep_costly";
2418 break;
2420 case all_deps_costly:
2421 costly_str = "all_deps_costly";
2422 break;
2424 case true_store_to_load_dep_costly:
2425 costly_str = "true_store_to_load_dep_costly";
2426 break;
2428 case store_to_load_dep_costly:
2429 costly_str = "store_to_load_dep_costly";
2430 break;
2432 default:
2433 costly_str = costly_num;
2434 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2435 break;
2438 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2440 switch (rs6000_sched_insert_nops)
2442 case sched_finish_regroup_exact:
2443 nop_str = "sched_finish_regroup_exact";
2444 break;
2446 case sched_finish_pad_groups:
2447 nop_str = "sched_finish_pad_groups";
2448 break;
2450 case sched_finish_none:
2451 nop_str = "sched_finish_none";
2452 break;
2454 default:
2455 nop_str = nop_num;
2456 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2457 break;
2460 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2462 switch (rs6000_sdata)
2464 default:
2465 case SDATA_NONE:
2466 break;
2468 case SDATA_DATA:
2469 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2470 break;
2472 case SDATA_SYSV:
2473 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2474 break;
2476 case SDATA_EABI:
2477 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2478 break;
2482 switch (rs6000_traceback)
2484 case traceback_default: trace_str = "default"; break;
2485 case traceback_none: trace_str = "none"; break;
2486 case traceback_part: trace_str = "part"; break;
2487 case traceback_full: trace_str = "full"; break;
2488 default: trace_str = "unknown"; break;
2491 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2493 switch (rs6000_current_cmodel)
2495 case CMODEL_SMALL: cmodel_str = "small"; break;
2496 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2497 case CMODEL_LARGE: cmodel_str = "large"; break;
2498 default: cmodel_str = "unknown"; break;
2501 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2503 switch (rs6000_current_abi)
2505 case ABI_NONE: abi_str = "none"; break;
2506 case ABI_AIX: abi_str = "aix"; break;
2507 case ABI_ELFv2: abi_str = "ELFv2"; break;
2508 case ABI_V4: abi_str = "V4"; break;
2509 case ABI_DARWIN: abi_str = "darwin"; break;
2510 default: abi_str = "unknown"; break;
2513 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2515 if (rs6000_altivec_abi)
2516 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2518 if (rs6000_aix_extabi)
2519 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2521 if (rs6000_darwin64_abi)
2522 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2524 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2525 (TARGET_SOFT_FLOAT ? "true" : "false"));
2527 if (TARGET_LINK_STACK)
2528 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2530 if (TARGET_P8_FUSION)
2532 char options[80];
2534 strcpy (options, "power8");
2535 if (TARGET_P8_FUSION_SIGN)
2536 strcat (options, ", sign");
2538 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2541 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2542 TARGET_SECURE_PLT ? "secure" : "bss");
2543 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2544 aix_struct_return ? "aix" : "sysv");
2545 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2546 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2547 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2548 tf[!!rs6000_align_branch_targets]);
2549 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2550 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2551 rs6000_long_double_type_size);
2552 if (rs6000_long_double_type_size > 64)
2554 fprintf (stderr, DEBUG_FMT_S, "long double type",
2555 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2556 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2557 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2559 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2560 (int)rs6000_sched_restricted_insns_priority);
2561 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2562 (int)END_BUILTINS);
2564 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2565 (int)TARGET_FLOAT128_ENABLE_TYPE);
2567 if (TARGET_VSX)
2568 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2569 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2571 if (TARGET_DIRECT_MOVE_128)
2572 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2573 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2577 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2578 legitimate address support to figure out the appropriate addressing to
2579 use. */
2581 static void
2582 rs6000_setup_reg_addr_masks (void)
2584 ssize_t rc, reg, m, nregs;
2585 addr_mask_type any_addr_mask, addr_mask;
2587 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2589 machine_mode m2 = (machine_mode) m;
2590 bool complex_p = false;
2591 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2592 size_t msize;
2594 if (COMPLEX_MODE_P (m2))
2596 complex_p = true;
2597 m2 = GET_MODE_INNER (m2);
2600 msize = GET_MODE_SIZE (m2);
2602 /* SDmode is special in that we want to access it only via REG+REG
2603 addressing on power7 and above, since we want to use the LFIWZX and
2604 STFIWZX instructions to load it. */
2605 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2607 any_addr_mask = 0;
2608 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2610 addr_mask = 0;
2611 reg = reload_reg_map[rc].reg;
2613 /* Can mode values go in the GPR/FPR/Altivec registers? */
2614 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2616 bool small_int_vsx_p = (small_int_p
2617 && (rc == RELOAD_REG_FPR
2618 || rc == RELOAD_REG_VMX));
2620 nregs = rs6000_hard_regno_nregs[m][reg];
2621 addr_mask |= RELOAD_REG_VALID;
2623 /* Indicate if the mode takes more than 1 physical register. If
2624 it takes a single register, indicate it can do REG+REG
2625 addressing. Small integers in VSX registers can only do
2626 REG+REG addressing. */
2627 if (small_int_vsx_p)
2628 addr_mask |= RELOAD_REG_INDEXED;
2629 else if (nregs > 1 || m == BLKmode || complex_p)
2630 addr_mask |= RELOAD_REG_MULTIPLE;
2631 else
2632 addr_mask |= RELOAD_REG_INDEXED;
2634 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2635 addressing. If we allow scalars into Altivec registers,
2636 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2638 For VSX systems, we don't allow update addressing for
2639 DFmode/SFmode if those registers can go in both the
2640 traditional floating point registers and Altivec registers.
2641 The load/store instructions for the Altivec registers do not
2642 have update forms. If we allowed update addressing, it seems
2643 to break IV-OPT code using floating point if the index type is
2644 int instead of long (PR target/81550 and target/84042). */
2646 if (TARGET_UPDATE
2647 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2648 && msize <= 8
2649 && !VECTOR_MODE_P (m2)
2650 && !VECTOR_ALIGNMENT_P (m2)
2651 && !complex_p
2652 && (m != E_DFmode || !TARGET_VSX)
2653 && (m != E_SFmode || !TARGET_P8_VECTOR)
2654 && !small_int_vsx_p)
2656 addr_mask |= RELOAD_REG_PRE_INCDEC;
2658 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2659 we don't allow PRE_MODIFY for some multi-register
2660 operations. */
2661 switch (m)
2663 default:
2664 addr_mask |= RELOAD_REG_PRE_MODIFY;
2665 break;
2667 case E_DImode:
2668 if (TARGET_POWERPC64)
2669 addr_mask |= RELOAD_REG_PRE_MODIFY;
2670 break;
2672 case E_DFmode:
2673 case E_DDmode:
2674 if (TARGET_HARD_FLOAT)
2675 addr_mask |= RELOAD_REG_PRE_MODIFY;
2676 break;
2681 /* GPR and FPR registers can do REG+OFFSET addressing, except
2682 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2683 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2684 if ((addr_mask != 0) && !indexed_only_p
2685 && msize <= 8
2686 && (rc == RELOAD_REG_GPR
2687 || ((msize == 8 || m2 == SFmode)
2688 && (rc == RELOAD_REG_FPR
2689 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2690 addr_mask |= RELOAD_REG_OFFSET;
2692 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2693 instructions are enabled. The offset for 128-bit VSX registers is
2694 only 12-bits. While GPRs can handle the full offset range, VSX
2695 registers can only handle the restricted range. */
2696 else if ((addr_mask != 0) && !indexed_only_p
2697 && msize == 16 && TARGET_P9_VECTOR
2698 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2699 || (m2 == TImode && TARGET_VSX)))
2701 addr_mask |= RELOAD_REG_OFFSET;
2702 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2703 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2706 /* Vector pairs can do both indexed and offset loads if the
2707 instructions are enabled, otherwise they can only do offset loads
2708 since it will be broken into two vector moves. Vector quads can
2709 only do offset loads. */
2710 else if ((addr_mask != 0) && TARGET_MMA
2711 && (m2 == OOmode || m2 == XOmode))
2713 addr_mask |= RELOAD_REG_OFFSET;
2714 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2716 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2717 if (m2 == OOmode)
2718 addr_mask |= RELOAD_REG_INDEXED;
2722 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2723 addressing on 128-bit types. */
2724 if (rc == RELOAD_REG_VMX && msize == 16
2725 && (addr_mask & RELOAD_REG_VALID) != 0)
2726 addr_mask |= RELOAD_REG_AND_M16;
2728 reg_addr[m].addr_mask[rc] = addr_mask;
2729 any_addr_mask |= addr_mask;
2732 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2737 /* Initialize the various global tables that are based on register size. */
2738 static void
2739 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2741 ssize_t r, m, c;
2742 int align64;
2743 int align32;
2745 /* Precalculate REGNO_REG_CLASS. */
2746 rs6000_regno_regclass[0] = GENERAL_REGS;
2747 for (r = 1; r < 32; ++r)
2748 rs6000_regno_regclass[r] = BASE_REGS;
2750 for (r = 32; r < 64; ++r)
2751 rs6000_regno_regclass[r] = FLOAT_REGS;
2753 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2754 rs6000_regno_regclass[r] = NO_REGS;
2756 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2757 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2759 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2760 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2761 rs6000_regno_regclass[r] = CR_REGS;
2763 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2764 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2765 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2766 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2767 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2768 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2769 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2771 /* Precalculate register class to simpler reload register class. We don't
2772 need all of the register classes that are combinations of different
2773 classes, just the simple ones that have constraint letters. */
2774 for (c = 0; c < N_REG_CLASSES; c++)
2775 reg_class_to_reg_type[c] = NO_REG_TYPE;
2777 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2779 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2780 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2781 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2782 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2783 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2784 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2785 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2786 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2788 if (TARGET_VSX)
2790 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2791 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2793 else
2795 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2796 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2799 /* Precalculate the valid memory formats as well as the vector information,
2800 this must be set up before the rs6000_hard_regno_nregs_internal calls
2801 below. */
2802 gcc_assert ((int)VECTOR_NONE == 0);
2803 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2804 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2806 gcc_assert ((int)CODE_FOR_nothing == 0);
2807 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2809 gcc_assert ((int)NO_REGS == 0);
2810 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2812 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2813 believes it can use native alignment or still uses 128-bit alignment. */
2814 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2816 align64 = 64;
2817 align32 = 32;
2819 else
2821 align64 = 128;
2822 align32 = 128;
2825 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2826 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2827 if (TARGET_FLOAT128_TYPE)
2829 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2830 rs6000_vector_align[KFmode] = 128;
2832 if (FLOAT128_IEEE_P (TFmode))
2834 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2835 rs6000_vector_align[TFmode] = 128;
2839 /* V2DF mode, VSX only. */
2840 if (TARGET_VSX)
2842 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2843 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2844 rs6000_vector_align[V2DFmode] = align64;
2847 /* V4SF mode, either VSX or Altivec. */
2848 if (TARGET_VSX)
2850 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2851 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2852 rs6000_vector_align[V4SFmode] = align32;
2854 else if (TARGET_ALTIVEC)
2856 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2857 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2858 rs6000_vector_align[V4SFmode] = align32;
2861 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2862 and stores. */
2863 if (TARGET_ALTIVEC)
2865 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2866 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2867 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2868 rs6000_vector_align[V4SImode] = align32;
2869 rs6000_vector_align[V8HImode] = align32;
2870 rs6000_vector_align[V16QImode] = align32;
2872 if (TARGET_VSX)
2874 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2875 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2876 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2878 else
2880 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2881 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2882 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2886 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2887 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2888 if (TARGET_VSX)
2890 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2891 rs6000_vector_unit[V2DImode]
2892 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2893 rs6000_vector_align[V2DImode] = align64;
2895 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2896 rs6000_vector_unit[V1TImode]
2897 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2898 rs6000_vector_align[V1TImode] = 128;
2901 /* DFmode, see if we want to use the VSX unit. Memory is handled
2902 differently, so don't set rs6000_vector_mem. */
2903 if (TARGET_VSX)
2905 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2906 rs6000_vector_align[DFmode] = 64;
2909 /* SFmode, see if we want to use the VSX unit. */
2910 if (TARGET_P8_VECTOR)
2912 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2913 rs6000_vector_align[SFmode] = 32;
2916 /* Allow TImode in VSX register and set the VSX memory macros. */
2917 if (TARGET_VSX)
2919 rs6000_vector_mem[TImode] = VECTOR_VSX;
2920 rs6000_vector_align[TImode] = align64;
2923 /* Add support for vector pairs and vector quad registers. */
2924 if (TARGET_MMA)
2926 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2927 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2928 rs6000_vector_align[OOmode] = 256;
2930 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2931 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2932 rs6000_vector_align[XOmode] = 512;
2935 /* Register class constraints for the constraints that depend on compile
2936 switches. When the VSX code was added, different constraints were added
2937 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2938 of the VSX registers are used. The register classes for scalar floating
2939 point types is set, based on whether we allow that type into the upper
2940 (Altivec) registers. GCC has register classes to target the Altivec
2941 registers for load/store operations, to select using a VSX memory
2942 operation instead of the traditional floating point operation. The
2943 constraints are:
2945 d - Register class to use with traditional DFmode instructions.
2946 v - Altivec register.
2947 wa - Any VSX register.
2948 wc - Reserved to represent individual CR bits (used in LLVM).
2949 wn - always NO_REGS.
2950 wr - GPR if 64-bit mode is permitted.
2951 wx - Float register if we can do 32-bit int stores. */
2953 if (TARGET_HARD_FLOAT)
2954 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2955 if (TARGET_ALTIVEC)
2956 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2957 if (TARGET_VSX)
2958 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2960 if (TARGET_POWERPC64)
2962 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2963 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2966 if (TARGET_STFIWX)
2967 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2969 /* Support for new direct moves (ISA 3.0 + 64bit). */
2970 if (TARGET_DIRECT_MOVE_128)
2971 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2973 /* Set up the reload helper and direct move functions. */
2974 if (TARGET_VSX || TARGET_ALTIVEC)
2976 if (TARGET_64BIT)
2978 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2979 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2980 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2981 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2982 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2983 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2984 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2985 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2986 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2987 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2988 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2989 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2990 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2991 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2992 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2993 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2994 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2995 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2996 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2997 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2999 if (FLOAT128_VECTOR_P (KFmode))
3001 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3002 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3005 if (FLOAT128_VECTOR_P (TFmode))
3007 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3008 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3011 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3012 available. */
3013 if (TARGET_NO_SDMODE_STACK)
3015 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3016 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3019 if (TARGET_VSX)
3021 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3022 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3025 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3027 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3028 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3029 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3030 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3031 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3032 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3033 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3034 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3035 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3037 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3038 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3039 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3040 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3041 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3042 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3043 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3044 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3045 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3047 if (FLOAT128_VECTOR_P (KFmode))
3049 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3050 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3053 if (FLOAT128_VECTOR_P (TFmode))
3055 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3056 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3059 if (TARGET_MMA)
3061 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3062 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3063 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3064 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3068 else
3070 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3071 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3072 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3073 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3074 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3075 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3076 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3077 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3078 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3079 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3080 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3081 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3082 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3083 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3084 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3085 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3086 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3087 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3088 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3089 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3091 if (FLOAT128_VECTOR_P (KFmode))
3093 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3094 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3097 if (FLOAT128_IEEE_P (TFmode))
3099 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3100 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3103 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3104 available. */
3105 if (TARGET_NO_SDMODE_STACK)
3107 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3108 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3111 if (TARGET_VSX)
3113 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3114 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3117 if (TARGET_DIRECT_MOVE)
3119 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3120 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3121 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3125 reg_addr[DFmode].scalar_in_vmx_p = true;
3126 reg_addr[DImode].scalar_in_vmx_p = true;
3128 if (TARGET_P8_VECTOR)
3130 reg_addr[SFmode].scalar_in_vmx_p = true;
3131 reg_addr[SImode].scalar_in_vmx_p = true;
3133 if (TARGET_P9_VECTOR)
3135 reg_addr[HImode].scalar_in_vmx_p = true;
3136 reg_addr[QImode].scalar_in_vmx_p = true;
3141 /* Precalculate HARD_REGNO_NREGS. */
3142 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3143 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3144 rs6000_hard_regno_nregs[m][r]
3145 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3147 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3148 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3149 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3150 rs6000_hard_regno_mode_ok_p[m][r]
3151 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3153 /* Precalculate CLASS_MAX_NREGS sizes. */
3154 for (c = 0; c < LIM_REG_CLASSES; ++c)
3156 int reg_size;
3158 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3159 reg_size = UNITS_PER_VSX_WORD;
3161 else if (c == ALTIVEC_REGS)
3162 reg_size = UNITS_PER_ALTIVEC_WORD;
3164 else if (c == FLOAT_REGS)
3165 reg_size = UNITS_PER_FP_WORD;
3167 else
3168 reg_size = UNITS_PER_WORD;
3170 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3172 machine_mode m2 = (machine_mode)m;
3173 int reg_size2 = reg_size;
3175 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3176 in VSX. */
3177 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3178 reg_size2 = UNITS_PER_FP_WORD;
3180 rs6000_class_max_nregs[m][c]
3181 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3185 /* Calculate which modes to automatically generate code to use a the
3186 reciprocal divide and square root instructions. In the future, possibly
3187 automatically generate the instructions even if the user did not specify
3188 -mrecip. The older machines double precision reciprocal sqrt estimate is
3189 not accurate enough. */
3190 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3191 if (TARGET_FRES)
3192 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3193 if (TARGET_FRE)
3194 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3195 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3196 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3197 if (VECTOR_UNIT_VSX_P (V2DFmode))
3198 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3200 if (TARGET_FRSQRTES)
3201 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3202 if (TARGET_FRSQRTE)
3203 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3204 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3205 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3206 if (VECTOR_UNIT_VSX_P (V2DFmode))
3207 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3209 if (rs6000_recip_control)
3211 if (!flag_finite_math_only)
3212 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3213 "-ffast-math");
3214 if (flag_trapping_math)
3215 warning (0, "%qs requires %qs or %qs", "-mrecip",
3216 "-fno-trapping-math", "-ffast-math");
3217 if (!flag_reciprocal_math)
3218 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3219 "-ffast-math");
3220 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3222 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3223 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3224 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3226 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3227 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3228 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3230 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3231 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3232 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3234 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3235 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3236 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3238 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3239 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3240 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3243 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3244 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3247 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3248 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3251 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3252 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3256 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3257 legitimate address support to figure out the appropriate addressing to
3258 use. */
3259 rs6000_setup_reg_addr_masks ();
3261 if (global_init_p || TARGET_DEBUG_TARGET)
3263 if (TARGET_DEBUG_REG)
3264 rs6000_debug_reg_global ();
3266 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3267 fprintf (stderr,
3268 "SImode variable mult cost = %d\n"
3269 "SImode constant mult cost = %d\n"
3270 "SImode short constant mult cost = %d\n"
3271 "DImode multipliciation cost = %d\n"
3272 "SImode division cost = %d\n"
3273 "DImode division cost = %d\n"
3274 "Simple fp operation cost = %d\n"
3275 "DFmode multiplication cost = %d\n"
3276 "SFmode division cost = %d\n"
3277 "DFmode division cost = %d\n"
3278 "cache line size = %d\n"
3279 "l1 cache size = %d\n"
3280 "l2 cache size = %d\n"
3281 "simultaneous prefetches = %d\n"
3282 "\n",
3283 rs6000_cost->mulsi,
3284 rs6000_cost->mulsi_const,
3285 rs6000_cost->mulsi_const9,
3286 rs6000_cost->muldi,
3287 rs6000_cost->divsi,
3288 rs6000_cost->divdi,
3289 rs6000_cost->fp,
3290 rs6000_cost->dmul,
3291 rs6000_cost->sdiv,
3292 rs6000_cost->ddiv,
3293 rs6000_cost->cache_line_size,
3294 rs6000_cost->l1_cache_size,
3295 rs6000_cost->l2_cache_size,
3296 rs6000_cost->simultaneous_prefetches);
3300 #if TARGET_MACHO
3301 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3303 static void
3304 darwin_rs6000_override_options (void)
3306 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3307 off. */
3308 rs6000_altivec_abi = 1;
3309 TARGET_ALTIVEC_VRSAVE = 1;
3310 rs6000_current_abi = ABI_DARWIN;
3312 if (DEFAULT_ABI == ABI_DARWIN
3313 && TARGET_64BIT)
3314 darwin_one_byte_bool = 1;
3316 if (TARGET_64BIT && ! TARGET_POWERPC64)
3318 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3319 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3322 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3323 optimisation, and will not work with the most generic case (where the
3324 symbol is undefined external, but there is no symbl stub). */
3325 if (TARGET_64BIT)
3326 rs6000_default_long_calls = 0;
3328 /* ld_classic is (so far) still used for kernel (static) code, and supports
3329 the JBSR longcall / branch islands. */
3330 if (flag_mkernel)
3332 rs6000_default_long_calls = 1;
3334 /* Allow a kext author to do -mkernel -mhard-float. */
3335 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3336 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3339 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3340 Altivec. */
3341 if (!flag_mkernel && !flag_apple_kext
3342 && TARGET_64BIT
3343 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3344 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3346 /* Unless the user (not the configurer) has explicitly overridden
3347 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3348 G4 unless targeting the kernel. */
3349 if (!flag_mkernel
3350 && !flag_apple_kext
3351 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3352 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3353 && ! OPTION_SET_P (rs6000_cpu_index))
3355 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3358 #endif
3360 /* If not otherwise specified by a target, make 'long double' equivalent to
3361 'double'. */
3363 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3364 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3365 #endif
3367 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3368 to clobber the XER[CA] bit because clobbering that bit without telling
3369 the compiler worked just fine with versions of GCC before GCC 5, and
3370 breaking a lot of older code in ways that are hard to track down is
3371 not such a great idea. */
3373 static rtx_insn *
3374 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3375 vec<machine_mode> & /*input_modes*/,
3376 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3377 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3379 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3380 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3381 return NULL;
3384 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3385 but is called when the optimize level is changed via an attribute or
3386 pragma or when it is reset at the end of the code affected by the
3387 attribute or pragma. It is not called at the beginning of compilation
3388 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3389 actions then, you should have TARGET_OPTION_OVERRIDE call
3390 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3392 static void
3393 rs6000_override_options_after_change (void)
3395 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3396 turns -frename-registers on. */
3397 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3398 || (OPTION_SET_P (flag_unroll_all_loops)
3399 && flag_unroll_all_loops))
3401 if (!OPTION_SET_P (unroll_only_small_loops))
3402 unroll_only_small_loops = 0;
3403 if (!OPTION_SET_P (flag_rename_registers))
3404 flag_rename_registers = 1;
3405 if (!OPTION_SET_P (flag_cunroll_grow_size))
3406 flag_cunroll_grow_size = 1;
3408 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3409 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3411 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3412 if (rs6000_rop_protect)
3413 flag_shrink_wrap = 0;
3416 #ifdef TARGET_USES_LINUX64_OPT
3417 static void
3418 rs6000_linux64_override_options ()
3420 if (!OPTION_SET_P (rs6000_alignment_flags))
3421 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3422 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3424 if (DEFAULT_ABI != ABI_AIX)
3426 rs6000_current_abi = ABI_AIX;
3427 error (INVALID_64BIT, "call");
3429 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3430 if (ELFv2_ABI_CHECK)
3432 rs6000_current_abi = ABI_ELFv2;
3433 if (dot_symbols)
3434 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3436 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3438 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3439 error (INVALID_64BIT, "relocatable");
3441 if (rs6000_isa_flags & OPTION_MASK_EABI)
3443 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3444 error (INVALID_64BIT, "eabi");
3446 if (TARGET_PROTOTYPE)
3448 target_prototype = 0;
3449 error (INVALID_64BIT, "prototype");
3451 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3453 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3454 error ("%<-m64%> requires a PowerPC64 cpu");
3456 if (!OPTION_SET_P (rs6000_current_cmodel))
3457 SET_CMODEL (CMODEL_MEDIUM);
3458 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3460 if (OPTION_SET_P (rs6000_current_cmodel)
3461 && rs6000_current_cmodel != CMODEL_SMALL)
3462 error ("%<-mcmodel%> incompatible with other toc options");
3463 if (TARGET_MINIMAL_TOC)
3464 SET_CMODEL (CMODEL_SMALL);
3465 else if (TARGET_PCREL
3466 || (PCREL_SUPPORTED_BY_OS
3467 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3468 /* Ignore -mno-minimal-toc. */
3470 else
3471 SET_CMODEL (CMODEL_SMALL);
3473 if (rs6000_current_cmodel != CMODEL_SMALL)
3475 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3476 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3477 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3478 TARGET_NO_SUM_IN_TOC = 0;
3480 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3482 if (OPTION_SET_P (rs6000_pltseq))
3483 warning (0, "%qs unsupported for this ABI",
3484 "-mpltseq");
3485 rs6000_pltseq = false;
3488 else if (TARGET_64BIT)
3489 error (INVALID_32BIT, "32");
3490 else
3492 if (TARGET_PROFILE_KERNEL)
3494 profile_kernel = 0;
3495 error (INVALID_32BIT, "profile-kernel");
3497 if (OPTION_SET_P (rs6000_current_cmodel))
3499 SET_CMODEL (CMODEL_SMALL);
3500 error (INVALID_32BIT, "cmodel");
3504 #endif
3506 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3507 This support is only in little endian GLIBC 2.32 or newer. */
3508 static bool
3509 glibc_supports_ieee_128bit (void)
3511 #ifdef OPTION_GLIBC
3512 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3513 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3514 return true;
3515 #endif /* OPTION_GLIBC. */
3517 return false;
3520 /* Override command line options.
3522 Combine build-specific configuration information with options
3523 specified on the command line to set various state variables which
3524 influence code generation, optimization, and expansion of built-in
3525 functions. Assure that command-line configuration preferences are
3526 compatible with each other and with the build configuration; issue
3527 warnings while adjusting configuration or error messages while
3528 rejecting configuration.
3530 Upon entry to this function:
3532 This function is called once at the beginning of
3533 compilation, and then again at the start and end of compiling
3534 each section of code that has a different configuration, as
3535 indicated, for example, by adding the
3537 __attribute__((__target__("cpu=power9")))
3539 qualifier to a function definition or, for example, by bracketing
3540 code between
3542 #pragma GCC target("altivec")
3546 #pragma GCC reset_options
3548 directives. Parameter global_init_p is true for the initial
3549 invocation, which initializes global variables, and false for all
3550 subsequent invocations.
3553 Various global state information is assumed to be valid. This
3554 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3555 default CPU specified at build configure time, TARGET_DEFAULT,
3556 representing the default set of option flags for the default
3557 target, and OPTION_SET_P (rs6000_isa_flags), representing
3558 which options were requested on the command line.
3560 Upon return from this function:
3562 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3563 was set by name on the command line. Additionally, if certain
3564 attributes are automatically enabled or disabled by this function
3565 in order to assure compatibility between options and
3566 configuration, the flags associated with those attributes are
3567 also set. By setting these "explicit bits", we avoid the risk
3568 that other code might accidentally overwrite these particular
3569 attributes with "default values".
3571 The various bits of rs6000_isa_flags are set to indicate the
3572 target options that have been selected for the most current
3573 compilation efforts. This has the effect of also turning on the
3574 associated TARGET_XXX values since these are macros which are
3575 generally defined to test the corresponding bit of the
3576 rs6000_isa_flags variable.
3578 Various other global variables and fields of global structures
3579 (over 50 in all) are initialized to reflect the desired options
3580 for the most current compilation efforts. */
3582 static bool
3583 rs6000_option_override_internal (bool global_init_p)
3585 bool ret = true;
3587 HOST_WIDE_INT set_masks;
3588 HOST_WIDE_INT ignore_masks;
3589 int cpu_index = -1;
3590 int tune_index;
3591 struct cl_target_option *main_target_opt
3592 = ((global_init_p || target_option_default_node == NULL)
3593 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3595 /* Print defaults. */
3596 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3597 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3599 /* Remember the explicit arguments. */
3600 if (global_init_p)
3601 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3603 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3604 library functions, so warn about it. The flag may be useful for
3605 performance studies from time to time though, so don't disable it
3606 entirely. */
3607 if (OPTION_SET_P (rs6000_alignment_flags)
3608 && rs6000_alignment_flags == MASK_ALIGN_POWER
3609 && DEFAULT_ABI == ABI_DARWIN
3610 && TARGET_64BIT)
3611 warning (0, "%qs is not supported for 64-bit Darwin;"
3612 " it is incompatible with the installed C and C++ libraries",
3613 "-malign-power");
3615 /* Numerous experiment shows that IRA based loop pressure
3616 calculation works better for RTL loop invariant motion on targets
3617 with enough (>= 32) registers. It is an expensive optimization.
3618 So it is on only for peak performance. */
3619 if (optimize >= 3 && global_init_p
3620 && !OPTION_SET_P (flag_ira_loop_pressure))
3621 flag_ira_loop_pressure = 1;
3623 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3624 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3625 options were already specified. */
3626 if (flag_sanitize & SANITIZE_USER_ADDRESS
3627 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3628 flag_asynchronous_unwind_tables = 1;
3630 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3631 loop unroller is active. It is only checked during unrolling, so
3632 we can just set it on by default. */
3633 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3634 flag_variable_expansion_in_unroller = 1;
3636 /* Set the pointer size. */
3637 if (TARGET_64BIT)
3639 rs6000_pmode = DImode;
3640 rs6000_pointer_size = 64;
3642 else
3644 rs6000_pmode = SImode;
3645 rs6000_pointer_size = 32;
3648 /* Some OSs don't support saving the high part of 64-bit registers on context
3649 switch. Other OSs don't support saving Altivec registers. On those OSs,
3650 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3651 if the user wants either, the user must explicitly specify them and we
3652 won't interfere with the user's specification. */
3654 set_masks = POWERPC_MASKS;
3655 #ifdef OS_MISSING_POWERPC64
3656 if (OS_MISSING_POWERPC64)
3657 set_masks &= ~OPTION_MASK_POWERPC64;
3658 #endif
3659 #ifdef OS_MISSING_ALTIVEC
3660 if (OS_MISSING_ALTIVEC)
3661 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3662 | OTHER_VSX_VECTOR_MASKS);
3663 #endif
3665 /* Don't override by the processor default if given explicitly. */
3666 set_masks &= ~rs6000_isa_flags_explicit;
3668 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3669 the cpu in a target attribute or pragma, but did not specify a tuning
3670 option, use the cpu for the tuning option rather than the option specified
3671 with -mtune on the command line. Process a '--with-cpu' configuration
3672 request as an implicit --cpu. */
3673 if (rs6000_cpu_index >= 0)
3674 cpu_index = rs6000_cpu_index;
3675 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3676 cpu_index = main_target_opt->x_rs6000_cpu_index;
3677 else if (OPTION_TARGET_CPU_DEFAULT)
3678 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3680 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3681 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3682 with those from the cpu, except for options that were explicitly set. If
3683 we don't have a cpu, do not override the target bits set in
3684 TARGET_DEFAULT. */
3685 if (cpu_index >= 0)
3687 rs6000_cpu_index = cpu_index;
3688 rs6000_isa_flags &= ~set_masks;
3689 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3690 & set_masks);
3692 else
3694 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3695 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3696 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3697 to using rs6000_isa_flags, we need to do the initialization here.
3699 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3700 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3701 HOST_WIDE_INT flags;
3702 if (TARGET_DEFAULT)
3703 flags = TARGET_DEFAULT;
3704 else
3706 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3707 const char *default_cpu = (!TARGET_POWERPC64
3708 ? "powerpc"
3709 : (BYTES_BIG_ENDIAN
3710 ? "powerpc64"
3711 : "powerpc64le"));
3712 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3713 flags = processor_target_table[default_cpu_index].target_enable;
3715 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3718 if (rs6000_tune_index >= 0)
3719 tune_index = rs6000_tune_index;
3720 else if (cpu_index >= 0)
3721 rs6000_tune_index = tune_index = cpu_index;
3722 else
3724 size_t i;
3725 enum processor_type tune_proc
3726 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3728 tune_index = -1;
3729 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3730 if (processor_target_table[i].processor == tune_proc)
3732 tune_index = i;
3733 break;
3737 if (cpu_index >= 0)
3738 rs6000_cpu = processor_target_table[cpu_index].processor;
3739 else
3740 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3742 gcc_assert (tune_index >= 0);
3743 rs6000_tune = processor_target_table[tune_index].processor;
3745 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3746 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3747 || rs6000_cpu == PROCESSOR_PPCE5500)
3749 if (TARGET_ALTIVEC)
3750 error ("AltiVec not supported in this target");
3753 /* If we are optimizing big endian systems for space, use the load/store
3754 multiple instructions. */
3755 if (BYTES_BIG_ENDIAN && optimize_size)
3756 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3758 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3759 because the hardware doesn't support the instructions used in little
3760 endian mode, and causes an alignment trap. The 750 does not cause an
3761 alignment trap (except when the target is unaligned). */
3763 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3765 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3766 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3767 warning (0, "%qs is not supported on little endian systems",
3768 "-mmultiple");
3771 /* If little-endian, default to -mstrict-align on older processors.
3772 Testing for direct_move matches power8 and later. */
3773 if (!BYTES_BIG_ENDIAN
3774 && !(processor_target_table[tune_index].target_enable
3775 & OPTION_MASK_DIRECT_MOVE))
3776 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3778 /* Add some warnings for VSX. */
3779 if (TARGET_VSX)
3781 const char *msg = NULL;
3782 if (!TARGET_HARD_FLOAT)
3784 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3785 msg = N_("%<-mvsx%> requires hardware floating point");
3786 else
3788 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3789 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3792 else if (TARGET_AVOID_XFORM > 0)
3793 msg = N_("%<-mvsx%> needs indexed addressing");
3794 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3795 & OPTION_MASK_ALTIVEC))
3797 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3798 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3799 else
3800 msg = N_("%<-mno-altivec%> disables vsx");
3803 if (msg)
3805 warning (0, msg);
3806 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3807 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3811 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3812 the -mcpu setting to enable options that conflict. */
3813 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3814 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3815 | OPTION_MASK_ALTIVEC
3816 | OPTION_MASK_VSX)) != 0)
3817 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3818 | OPTION_MASK_DIRECT_MOVE)
3819 & ~rs6000_isa_flags_explicit);
3821 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3822 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3824 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3825 off all of the options that depend on those flags. */
3826 ignore_masks = rs6000_disable_incompatible_switches ();
3828 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3829 unless the user explicitly used the -mno-<option> to disable the code. */
3830 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3831 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3832 else if (TARGET_P9_MINMAX)
3834 if (cpu_index >= 0)
3836 if (cpu_index == PROCESSOR_POWER9)
3838 /* legacy behavior: allow -mcpu=power9 with certain
3839 capabilities explicitly disabled. */
3840 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3842 else
3843 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3844 "for <xxx> less than power9", "-mcpu");
3846 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3847 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3848 & rs6000_isa_flags_explicit))
3849 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3850 were explicitly cleared. */
3851 error ("%qs incompatible with explicitly disabled options",
3852 "-mpower9-minmax");
3853 else
3854 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3856 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3857 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3858 else if (TARGET_VSX)
3859 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3860 else if (TARGET_POPCNTD)
3861 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3862 else if (TARGET_DFP)
3863 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3864 else if (TARGET_CMPB)
3865 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3866 else if (TARGET_FPRND)
3867 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3868 else if (TARGET_POPCNTB)
3869 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3870 else if (TARGET_ALTIVEC)
3871 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3873 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3874 target attribute or pragma which automatically enables both options,
3875 unless the altivec ABI was set. This is set by default for 64-bit, but
3876 not for 32-bit. Don't move this before the above code using ignore_masks,
3877 since it can reset the cleared VSX/ALTIVEC flag again. */
3878 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3879 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3880 & ~rs6000_isa_flags_explicit);
3882 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3884 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3885 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3886 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3889 if (!TARGET_FPRND && TARGET_VSX)
3891 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3892 /* TARGET_VSX = 1 implies Power 7 and newer */
3893 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3894 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3897 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3899 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3900 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3901 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3904 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3906 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3907 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3908 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3911 if (TARGET_P8_VECTOR && !TARGET_VSX)
3913 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3914 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3915 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3916 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3918 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3919 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3920 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3922 else
3924 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3925 not explicit. */
3926 rs6000_isa_flags |= OPTION_MASK_VSX;
3927 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3931 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3933 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3934 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3935 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3938 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3939 silently turn off quad memory mode. */
3940 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3942 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3943 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3945 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3946 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3948 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3949 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3952 /* Non-atomic quad memory load/store are disabled for little endian, since
3953 the words are reversed, but atomic operations can still be done by
3954 swapping the words. */
3955 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3957 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3958 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3959 "mode"));
3961 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3964 /* Assume if the user asked for normal quad memory instructions, they want
3965 the atomic versions as well, unless they explicity told us not to use quad
3966 word atomic instructions. */
3967 if (TARGET_QUAD_MEMORY
3968 && !TARGET_QUAD_MEMORY_ATOMIC
3969 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3970 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3972 /* If we can shrink-wrap the TOC register save separately, then use
3973 -msave-toc-indirect unless explicitly disabled. */
3974 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3975 && flag_shrink_wrap_separate
3976 && optimize_function_for_speed_p (cfun))
3977 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3979 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3980 generating power8 instructions. Power9 does not optimize power8 fusion
3981 cases. */
3982 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3984 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3985 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3986 else
3987 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3990 /* Setting additional fusion flags turns on base fusion. */
3991 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3993 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3995 if (TARGET_P8_FUSION_SIGN)
3996 error ("%qs requires %qs", "-mpower8-fusion-sign",
3997 "-mpower8-fusion");
3999 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4001 else
4002 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4005 /* Power8 does not fuse sign extended loads with the addis. If we are
4006 optimizing at high levels for speed, convert a sign extended load into a
4007 zero extending load, and an explicit sign extension. */
4008 if (TARGET_P8_FUSION
4009 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4010 && optimize_function_for_speed_p (cfun)
4011 && optimize >= 3)
4012 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4014 /* ISA 3.0 vector instructions include ISA 2.07. */
4015 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4017 /* We prefer to not mention undocumented options in
4018 error messages. However, if users have managed to select
4019 power9-vector without selecting power8-vector, they
4020 already know about undocumented flags. */
4021 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4022 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4023 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4024 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4026 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4027 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4028 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4030 else
4032 /* OPTION_MASK_P9_VECTOR is explicit and
4033 OPTION_MASK_P8_VECTOR is not explicit. */
4034 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4035 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4039 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4040 support. If we only have ISA 2.06 support, and the user did not specify
4041 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4042 but we don't enable the full vectorization support */
4043 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4044 TARGET_ALLOW_MOVMISALIGN = 1;
4046 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4048 if (TARGET_ALLOW_MOVMISALIGN > 0
4049 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4050 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4052 TARGET_ALLOW_MOVMISALIGN = 0;
4055 /* Determine when unaligned vector accesses are permitted, and when
4056 they are preferred over masked Altivec loads. Note that if
4057 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4058 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4059 not true. */
4060 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4062 if (!TARGET_VSX)
4064 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4065 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4067 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4070 else if (!TARGET_ALLOW_MOVMISALIGN)
4072 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4073 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4074 "-mallow-movmisalign");
4076 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4080 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4082 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4083 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4084 else
4085 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4088 /* Use long double size to select the appropriate long double. We use
4089 TYPE_PRECISION to differentiate the 3 different long double types. We map
4090 128 into the precision used for TFmode. */
4091 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4092 ? 64
4093 : FLOAT_PRECISION_TFmode);
4095 /* Set long double size before the IEEE 128-bit tests. */
4096 if (!OPTION_SET_P (rs6000_long_double_type_size))
4098 if (main_target_opt != NULL
4099 && (main_target_opt->x_rs6000_long_double_type_size
4100 != default_long_double_size))
4101 error ("target attribute or pragma changes %<long double%> size");
4102 else
4103 rs6000_long_double_type_size = default_long_double_size;
4105 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4106 ; /* The option value can be seen when cl_target_option_restore is called. */
4107 else if (rs6000_long_double_type_size == 128)
4108 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4110 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4111 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4112 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4113 those systems will not pick up this default. Warn if the user changes the
4114 default unless -Wno-psabi. */
4115 if (!OPTION_SET_P (rs6000_ieeequad))
4116 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4118 else if (TARGET_LONG_DOUBLE_128)
4120 if (global_options.x_rs6000_ieeequad
4121 && (!TARGET_POPCNTD || !TARGET_VSX))
4122 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4124 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4126 /* Determine if the user can change the default long double type at
4127 compilation time. You need GLIBC 2.32 or newer to be able to
4128 change the long double type. Only issue one warning. */
4129 static bool warned_change_long_double;
4131 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4133 warned_change_long_double = true;
4134 if (TARGET_IEEEQUAD)
4135 warning (OPT_Wpsabi, "Using IEEE extended precision "
4136 "%<long double%>");
4137 else
4138 warning (OPT_Wpsabi, "Using IBM extended precision "
4139 "%<long double%>");
4144 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4145 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4146 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4147 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4148 the keyword as well as the type. */
4149 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4151 /* IEEE 128-bit floating point requires VSX support. */
4152 if (TARGET_FLOAT128_KEYWORD)
4154 if (!TARGET_VSX)
4156 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4157 error ("%qs requires VSX support", "-mfloat128");
4159 TARGET_FLOAT128_TYPE = 0;
4160 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4161 | OPTION_MASK_FLOAT128_HW);
4163 else if (!TARGET_FLOAT128_TYPE)
4165 TARGET_FLOAT128_TYPE = 1;
4166 warning (0, "The %<-mfloat128%> option may not be fully supported");
4170 /* Enable the __float128 keyword under Linux by default. */
4171 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4172 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4173 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4175 /* If we have are supporting the float128 type and full ISA 3.0 support,
4176 enable -mfloat128-hardware by default. However, don't enable the
4177 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4178 because sometimes the compiler wants to put things in an integer
4179 container, and if we don't have __int128 support, it is impossible. */
4180 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4181 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4182 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4183 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4185 if (TARGET_FLOAT128_HW
4186 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4188 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4189 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4191 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4194 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4196 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4197 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4199 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4202 /* Enable -mprefixed by default on power10 systems. */
4203 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4204 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4206 /* -mprefixed requires -mcpu=power10 (or later). */
4207 else if (TARGET_PREFIXED && !TARGET_POWER10)
4209 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4210 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4212 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4215 /* -mpcrel requires prefixed load/store addressing. */
4216 if (TARGET_PCREL && !TARGET_PREFIXED)
4218 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4219 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4221 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4224 /* Print the options after updating the defaults. */
4225 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4226 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4228 /* E500mc does "better" if we inline more aggressively. Respect the
4229 user's opinion, though. */
4230 if (rs6000_block_move_inline_limit == 0
4231 && (rs6000_tune == PROCESSOR_PPCE500MC
4232 || rs6000_tune == PROCESSOR_PPCE500MC64
4233 || rs6000_tune == PROCESSOR_PPCE5500
4234 || rs6000_tune == PROCESSOR_PPCE6500))
4235 rs6000_block_move_inline_limit = 128;
4237 /* store_one_arg depends on expand_block_move to handle at least the
4238 size of reg_parm_stack_space. */
4239 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4240 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4242 if (global_init_p)
4244 /* If the appropriate debug option is enabled, replace the target hooks
4245 with debug versions that call the real version and then prints
4246 debugging information. */
4247 if (TARGET_DEBUG_COST)
4249 targetm.rtx_costs = rs6000_debug_rtx_costs;
4250 targetm.address_cost = rs6000_debug_address_cost;
4251 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4254 if (TARGET_DEBUG_ADDR)
4256 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4257 targetm.legitimize_address = rs6000_debug_legitimize_address;
4258 rs6000_secondary_reload_class_ptr
4259 = rs6000_debug_secondary_reload_class;
4260 targetm.secondary_memory_needed
4261 = rs6000_debug_secondary_memory_needed;
4262 targetm.can_change_mode_class
4263 = rs6000_debug_can_change_mode_class;
4264 rs6000_preferred_reload_class_ptr
4265 = rs6000_debug_preferred_reload_class;
4266 rs6000_mode_dependent_address_ptr
4267 = rs6000_debug_mode_dependent_address;
4270 if (rs6000_veclibabi_name)
4272 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4273 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4274 else
4276 error ("unknown vectorization library ABI type in "
4277 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4278 ret = false;
4283 /* Enable Altivec ABI for AIX -maltivec. */
4284 if (TARGET_XCOFF
4285 && (TARGET_ALTIVEC || TARGET_VSX)
4286 && !OPTION_SET_P (rs6000_altivec_abi))
4288 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4289 error ("target attribute or pragma changes AltiVec ABI");
4290 else
4291 rs6000_altivec_abi = 1;
4294 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4295 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4296 be explicitly overridden in either case. */
4297 if (TARGET_ELF)
4299 if (!OPTION_SET_P (rs6000_altivec_abi)
4300 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4302 if (main_target_opt != NULL &&
4303 !main_target_opt->x_rs6000_altivec_abi)
4304 error ("target attribute or pragma changes AltiVec ABI");
4305 else
4306 rs6000_altivec_abi = 1;
4310 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4311 So far, the only darwin64 targets are also MACH-O. */
4312 if (TARGET_MACHO
4313 && DEFAULT_ABI == ABI_DARWIN
4314 && TARGET_64BIT)
4316 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4317 error ("target attribute or pragma changes darwin64 ABI");
4318 else
4320 rs6000_darwin64_abi = 1;
4321 /* Default to natural alignment, for better performance. */
4322 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4326 /* Place FP constants in the constant pool instead of TOC
4327 if section anchors enabled. */
4328 if (flag_section_anchors
4329 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4330 TARGET_NO_FP_IN_TOC = 1;
4332 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4333 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4335 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4336 SUBTARGET_OVERRIDE_OPTIONS;
4337 #endif
4338 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4339 SUBSUBTARGET_OVERRIDE_OPTIONS;
4340 #endif
4341 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4342 SUB3TARGET_OVERRIDE_OPTIONS;
4343 #endif
4345 /* If the ABI has support for PC-relative relocations, enable it by default.
4346 This test depends on the sub-target tests above setting the code model to
4347 medium for ELF v2 systems. */
4348 if (PCREL_SUPPORTED_BY_OS
4349 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4350 rs6000_isa_flags |= OPTION_MASK_PCREL;
4352 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4353 after the subtarget override options are done. */
4354 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4356 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4357 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4359 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4362 /* Enable -mmma by default on power10 systems. */
4363 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4364 rs6000_isa_flags |= OPTION_MASK_MMA;
4366 if (TARGET_POWER10
4367 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4368 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4370 /* Turn off vector pair/mma options on non-power10 systems. */
4371 else if (!TARGET_POWER10 && TARGET_MMA)
4373 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4374 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4376 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4379 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4380 such as "*movoo" uses vector pair access which use VSX registers.
4381 So make MMA require VSX support here. */
4382 if (TARGET_MMA && !TARGET_VSX)
4384 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4385 error ("%qs requires %qs", "-mmma", "-mvsx");
4386 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4389 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4390 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4392 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4393 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4395 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4396 && rs6000_tune != PROCESSOR_POWER5
4397 && rs6000_tune != PROCESSOR_POWER6
4398 && rs6000_tune != PROCESSOR_POWER7
4399 && rs6000_tune != PROCESSOR_POWER8
4400 && rs6000_tune != PROCESSOR_POWER9
4401 && rs6000_tune != PROCESSOR_POWER10
4402 && rs6000_tune != PROCESSOR_PPCA2
4403 && rs6000_tune != PROCESSOR_CELL
4404 && rs6000_tune != PROCESSOR_PPC476);
4405 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4406 || rs6000_tune == PROCESSOR_POWER5
4407 || rs6000_tune == PROCESSOR_POWER7
4408 || rs6000_tune == PROCESSOR_POWER8);
4409 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4410 || rs6000_tune == PROCESSOR_POWER5
4411 || rs6000_tune == PROCESSOR_POWER6
4412 || rs6000_tune == PROCESSOR_POWER7
4413 || rs6000_tune == PROCESSOR_POWER8
4414 || rs6000_tune == PROCESSOR_POWER9
4415 || rs6000_tune == PROCESSOR_POWER10
4416 || rs6000_tune == PROCESSOR_PPCE500MC
4417 || rs6000_tune == PROCESSOR_PPCE500MC64
4418 || rs6000_tune == PROCESSOR_PPCE5500
4419 || rs6000_tune == PROCESSOR_PPCE6500);
4421 /* Allow debug switches to override the above settings. These are set to -1
4422 in rs6000.opt to indicate the user hasn't directly set the switch. */
4423 if (TARGET_ALWAYS_HINT >= 0)
4424 rs6000_always_hint = TARGET_ALWAYS_HINT;
4426 if (TARGET_SCHED_GROUPS >= 0)
4427 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4429 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4430 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4432 rs6000_sched_restricted_insns_priority
4433 = (rs6000_sched_groups ? 1 : 0);
4435 /* Handle -msched-costly-dep option. */
4436 rs6000_sched_costly_dep
4437 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4439 if (rs6000_sched_costly_dep_str)
4441 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4442 rs6000_sched_costly_dep = no_dep_costly;
4443 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4444 rs6000_sched_costly_dep = all_deps_costly;
4445 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4446 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4447 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4448 rs6000_sched_costly_dep = store_to_load_dep_costly;
4449 else
4450 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4451 atoi (rs6000_sched_costly_dep_str));
4454 /* Handle -minsert-sched-nops option. */
4455 rs6000_sched_insert_nops
4456 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4458 if (rs6000_sched_insert_nops_str)
4460 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4461 rs6000_sched_insert_nops = sched_finish_none;
4462 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4463 rs6000_sched_insert_nops = sched_finish_pad_groups;
4464 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4465 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4466 else
4467 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4468 atoi (rs6000_sched_insert_nops_str));
4471 /* Handle stack protector */
4472 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4473 #ifdef TARGET_THREAD_SSP_OFFSET
4474 rs6000_stack_protector_guard = SSP_TLS;
4475 #else
4476 rs6000_stack_protector_guard = SSP_GLOBAL;
4477 #endif
4479 #ifdef TARGET_THREAD_SSP_OFFSET
4480 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4481 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4482 #endif
4484 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4486 char *endp;
4487 const char *str = rs6000_stack_protector_guard_offset_str;
4489 errno = 0;
4490 long offset = strtol (str, &endp, 0);
4491 if (!*str || *endp || errno)
4492 error ("%qs is not a valid number in %qs", str,
4493 "-mstack-protector-guard-offset=");
4495 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4496 || (TARGET_64BIT && (offset & 3)))
4497 error ("%qs is not a valid offset in %qs", str,
4498 "-mstack-protector-guard-offset=");
4500 rs6000_stack_protector_guard_offset = offset;
4503 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4505 const char *str = rs6000_stack_protector_guard_reg_str;
4506 int reg = decode_reg_name (str);
4508 if (!IN_RANGE (reg, 1, 31))
4509 error ("%qs is not a valid base register in %qs", str,
4510 "-mstack-protector-guard-reg=");
4512 rs6000_stack_protector_guard_reg = reg;
4515 if (rs6000_stack_protector_guard == SSP_TLS
4516 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4517 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4519 if (global_init_p)
4521 #ifdef TARGET_REGNAMES
4522 /* If the user desires alternate register names, copy in the
4523 alternate names now. */
4524 if (TARGET_REGNAMES)
4525 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4526 #endif
4528 /* Set aix_struct_return last, after the ABI is determined.
4529 If -maix-struct-return or -msvr4-struct-return was explicitly
4530 used, don't override with the ABI default. */
4531 if (!OPTION_SET_P (aix_struct_return))
4532 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4534 #if 0
4535 /* IBM XL compiler defaults to unsigned bitfields. */
4536 if (TARGET_XL_COMPAT)
4537 flag_signed_bitfields = 0;
4538 #endif
4540 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4541 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4543 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4545 /* We can only guarantee the availability of DI pseudo-ops when
4546 assembling for 64-bit targets. */
4547 if (!TARGET_64BIT)
4549 targetm.asm_out.aligned_op.di = NULL;
4550 targetm.asm_out.unaligned_op.di = NULL;
4554 /* Set branch target alignment, if not optimizing for size. */
4555 if (!optimize_size)
4557 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4558 aligned 8byte to avoid misprediction by the branch predictor. */
4559 if (rs6000_tune == PROCESSOR_TITAN
4560 || rs6000_tune == PROCESSOR_CELL)
4562 if (flag_align_functions && !str_align_functions)
4563 str_align_functions = "8";
4564 if (flag_align_jumps && !str_align_jumps)
4565 str_align_jumps = "8";
4566 if (flag_align_loops && !str_align_loops)
4567 str_align_loops = "8";
4569 if (rs6000_align_branch_targets)
4571 if (flag_align_functions && !str_align_functions)
4572 str_align_functions = "16";
4573 if (flag_align_jumps && !str_align_jumps)
4574 str_align_jumps = "16";
4575 if (flag_align_loops && !str_align_loops)
4577 can_override_loop_align = 1;
4578 str_align_loops = "16";
4583 /* Arrange to save and restore machine status around nested functions. */
4584 init_machine_status = rs6000_init_machine_status;
4586 /* We should always be splitting complex arguments, but we can't break
4587 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4588 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4589 targetm.calls.split_complex_arg = NULL;
4591 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4592 if (DEFAULT_ABI == ABI_AIX)
4593 targetm.calls.custom_function_descriptors = 0;
4596 /* Initialize rs6000_cost with the appropriate target costs. */
4597 if (optimize_size)
4598 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4599 else
4600 switch (rs6000_tune)
4602 case PROCESSOR_RS64A:
4603 rs6000_cost = &rs64a_cost;
4604 break;
4606 case PROCESSOR_MPCCORE:
4607 rs6000_cost = &mpccore_cost;
4608 break;
4610 case PROCESSOR_PPC403:
4611 rs6000_cost = &ppc403_cost;
4612 break;
4614 case PROCESSOR_PPC405:
4615 rs6000_cost = &ppc405_cost;
4616 break;
4618 case PROCESSOR_PPC440:
4619 rs6000_cost = &ppc440_cost;
4620 break;
4622 case PROCESSOR_PPC476:
4623 rs6000_cost = &ppc476_cost;
4624 break;
4626 case PROCESSOR_PPC601:
4627 rs6000_cost = &ppc601_cost;
4628 break;
4630 case PROCESSOR_PPC603:
4631 rs6000_cost = &ppc603_cost;
4632 break;
4634 case PROCESSOR_PPC604:
4635 rs6000_cost = &ppc604_cost;
4636 break;
4638 case PROCESSOR_PPC604e:
4639 rs6000_cost = &ppc604e_cost;
4640 break;
4642 case PROCESSOR_PPC620:
4643 rs6000_cost = &ppc620_cost;
4644 break;
4646 case PROCESSOR_PPC630:
4647 rs6000_cost = &ppc630_cost;
4648 break;
4650 case PROCESSOR_CELL:
4651 rs6000_cost = &ppccell_cost;
4652 break;
4654 case PROCESSOR_PPC750:
4655 case PROCESSOR_PPC7400:
4656 rs6000_cost = &ppc750_cost;
4657 break;
4659 case PROCESSOR_PPC7450:
4660 rs6000_cost = &ppc7450_cost;
4661 break;
4663 case PROCESSOR_PPC8540:
4664 case PROCESSOR_PPC8548:
4665 rs6000_cost = &ppc8540_cost;
4666 break;
4668 case PROCESSOR_PPCE300C2:
4669 case PROCESSOR_PPCE300C3:
4670 rs6000_cost = &ppce300c2c3_cost;
4671 break;
4673 case PROCESSOR_PPCE500MC:
4674 rs6000_cost = &ppce500mc_cost;
4675 break;
4677 case PROCESSOR_PPCE500MC64:
4678 rs6000_cost = &ppce500mc64_cost;
4679 break;
4681 case PROCESSOR_PPCE5500:
4682 rs6000_cost = &ppce5500_cost;
4683 break;
4685 case PROCESSOR_PPCE6500:
4686 rs6000_cost = &ppce6500_cost;
4687 break;
4689 case PROCESSOR_TITAN:
4690 rs6000_cost = &titan_cost;
4691 break;
4693 case PROCESSOR_POWER4:
4694 case PROCESSOR_POWER5:
4695 rs6000_cost = &power4_cost;
4696 break;
4698 case PROCESSOR_POWER6:
4699 rs6000_cost = &power6_cost;
4700 break;
4702 case PROCESSOR_POWER7:
4703 rs6000_cost = &power7_cost;
4704 break;
4706 case PROCESSOR_POWER8:
4707 rs6000_cost = &power8_cost;
4708 break;
4710 case PROCESSOR_POWER9:
4711 rs6000_cost = &power9_cost;
4712 break;
4714 case PROCESSOR_POWER10:
4715 rs6000_cost = &power10_cost;
4716 break;
4718 case PROCESSOR_PPCA2:
4719 rs6000_cost = &ppca2_cost;
4720 break;
4722 default:
4723 gcc_unreachable ();
4726 if (global_init_p)
4728 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4729 param_simultaneous_prefetches,
4730 rs6000_cost->simultaneous_prefetches);
4731 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4732 param_l1_cache_size,
4733 rs6000_cost->l1_cache_size);
4734 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4735 param_l1_cache_line_size,
4736 rs6000_cost->cache_line_size);
4737 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4738 param_l2_cache_size,
4739 rs6000_cost->l2_cache_size);
4741 /* Increase loop peeling limits based on performance analysis. */
4742 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4743 param_max_peeled_insns, 400);
4744 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4745 param_max_completely_peeled_insns, 400);
4747 /* The lxvl/stxvl instructions don't perform well before Power10. */
4748 if (TARGET_POWER10)
4749 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4750 param_vect_partial_vector_usage, 1);
4751 else
4752 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4753 param_vect_partial_vector_usage, 0);
4755 /* Use the 'model' -fsched-pressure algorithm by default. */
4756 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4757 param_sched_pressure_algorithm,
4758 SCHED_PRESSURE_MODEL);
4760 /* If using typedef char *va_list, signal that
4761 __builtin_va_start (&ap, 0) can be optimized to
4762 ap = __builtin_next_arg (0). */
4763 if (DEFAULT_ABI != ABI_V4)
4764 targetm.expand_builtin_va_start = NULL;
4767 rs6000_override_options_after_change ();
4769 /* If not explicitly specified via option, decide whether to generate indexed
4770 load/store instructions. A value of -1 indicates that the
4771 initial value of this variable has not been overwritten. During
4772 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4773 if (TARGET_AVOID_XFORM == -1)
4774 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4775 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4776 need indexed accesses and the type used is the scalar type of the element
4777 being loaded or stored. */
4778 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4779 && !TARGET_ALTIVEC);
4781 /* Set the -mrecip options. */
4782 if (rs6000_recip_name)
4784 char *p = ASTRDUP (rs6000_recip_name);
4785 char *q;
4786 unsigned int mask, i;
4787 bool invert;
4789 while ((q = strtok (p, ",")) != NULL)
4791 p = NULL;
4792 if (*q == '!')
4794 invert = true;
4795 q++;
4797 else
4798 invert = false;
4800 if (!strcmp (q, "default"))
4801 mask = ((TARGET_RECIP_PRECISION)
4802 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4803 else
4805 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4806 if (!strcmp (q, recip_options[i].string))
4808 mask = recip_options[i].mask;
4809 break;
4812 if (i == ARRAY_SIZE (recip_options))
4814 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4815 invert = false;
4816 mask = 0;
4817 ret = false;
4821 if (invert)
4822 rs6000_recip_control &= ~mask;
4823 else
4824 rs6000_recip_control |= mask;
4828 /* Initialize all of the registers. */
4829 rs6000_init_hard_regno_mode_ok (global_init_p);
4831 /* Save the initial options in case the user does function specific options */
4832 if (global_init_p)
4833 target_option_default_node = target_option_current_node
4834 = build_target_option_node (&global_options, &global_options_set);
4836 /* If not explicitly specified via option, decide whether to generate the
4837 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4838 if (TARGET_LINK_STACK == -1)
4839 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4841 /* Deprecate use of -mno-speculate-indirect-jumps. */
4842 if (!rs6000_speculate_indirect_jumps)
4843 warning (0, "%qs is deprecated and not recommended in any circumstances",
4844 "-mno-speculate-indirect-jumps");
4846 return ret;
4849 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4850 define the target cpu type. */
4852 static void
4853 rs6000_option_override (void)
4855 (void) rs6000_option_override_internal (true);
4859 /* Implement LOOP_ALIGN. */
4860 align_flags
4861 rs6000_loop_align (rtx label)
4863 basic_block bb;
4864 int ninsns;
4866 /* Don't override loop alignment if -falign-loops was specified. */
4867 if (!can_override_loop_align)
4868 return align_loops;
4870 bb = BLOCK_FOR_INSN (label);
4871 ninsns = num_loop_insns(bb->loop_father);
4873 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4874 if (ninsns > 4 && ninsns <= 8
4875 && (rs6000_tune == PROCESSOR_POWER4
4876 || rs6000_tune == PROCESSOR_POWER5
4877 || rs6000_tune == PROCESSOR_POWER6
4878 || rs6000_tune == PROCESSOR_POWER7
4879 || rs6000_tune == PROCESSOR_POWER8))
4880 return align_flags (5);
4881 else
4882 return align_loops;
4885 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4886 after applying N number of iterations. This routine does not determine
4887 how may iterations are required to reach desired alignment. */
4889 static bool
4890 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4892 if (is_packed)
4893 return false;
4895 if (TARGET_32BIT)
4897 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4898 return true;
4900 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4901 return true;
4903 return false;
4905 else
4907 if (TARGET_MACHO)
4908 return false;
4910 /* Assuming that all other types are naturally aligned. CHECKME! */
4911 return true;
4915 /* Return true if the vector misalignment factor is supported by the
4916 target. */
4917 static bool
4918 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4919 const_tree type,
4920 int misalignment,
4921 bool is_packed)
4923 if (TARGET_VSX)
4925 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4926 return true;
4928 /* Return if movmisalign pattern is not supported for this mode. */
4929 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4930 return false;
4932 if (misalignment == -1)
4934 /* Misalignment factor is unknown at compile time but we know
4935 it's word aligned. */
4936 if (rs6000_vector_alignment_reachable (type, is_packed))
4938 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4940 if (element_size == 64 || element_size == 32)
4941 return true;
4944 return false;
4947 /* VSX supports word-aligned vector. */
4948 if (misalignment % 4 == 0)
4949 return true;
4951 return false;
4954 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4955 static int
4956 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4957 tree vectype, int misalign)
4959 unsigned elements;
4960 tree elem_type;
4962 switch (type_of_cost)
4964 case scalar_stmt:
4965 case scalar_store:
4966 case vector_stmt:
4967 case vector_store:
4968 case vec_to_scalar:
4969 case scalar_to_vec:
4970 case cond_branch_not_taken:
4971 return 1;
4972 case scalar_load:
4973 case vector_load:
4974 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4975 return 2;
4977 case vec_perm:
4978 /* Power7 has only one permute unit, make it a bit expensive. */
4979 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4980 return 3;
4981 else
4982 return 1;
4984 case vec_promote_demote:
4985 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4986 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4987 return 4;
4988 else
4989 return 1;
4991 case cond_branch_taken:
4992 return 3;
4994 case unaligned_load:
4995 case vector_gather_load:
4996 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4997 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4998 return 2;
5000 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5002 elements = TYPE_VECTOR_SUBPARTS (vectype);
5003 /* See PR102767, consider V1TI to keep consistency. */
5004 if (elements == 2 || elements == 1)
5005 /* Double word aligned. */
5006 return 4;
5008 if (elements == 4)
5010 switch (misalign)
5012 case 8:
5013 /* Double word aligned. */
5014 return 4;
5016 case -1:
5017 /* Unknown misalignment. */
5018 case 4:
5019 case 12:
5020 /* Word aligned. */
5021 return 33;
5023 default:
5024 gcc_unreachable ();
5029 if (TARGET_ALTIVEC)
5030 /* Misaligned loads are not supported. */
5031 gcc_unreachable ();
5033 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5034 return 4;
5036 case unaligned_store:
5037 case vector_scatter_store:
5038 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5039 return 1;
5041 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5043 elements = TYPE_VECTOR_SUBPARTS (vectype);
5044 /* See PR102767, consider V1TI to keep consistency. */
5045 if (elements == 2 || elements == 1)
5046 /* Double word aligned. */
5047 return 2;
5049 if (elements == 4)
5051 switch (misalign)
5053 case 8:
5054 /* Double word aligned. */
5055 return 2;
5057 case -1:
5058 /* Unknown misalignment. */
5059 case 4:
5060 case 12:
5061 /* Word aligned. */
5062 return 23;
5064 default:
5065 gcc_unreachable ();
5070 if (TARGET_ALTIVEC)
5071 /* Misaligned stores are not supported. */
5072 gcc_unreachable ();
5074 return 2;
5076 case vec_construct:
5077 /* This is a rough approximation assuming non-constant elements
5078 constructed into a vector via element insertion. FIXME:
5079 vec_construct is not granular enough for uniformly good
5080 decisions. If the initialization is a splat, this is
5081 cheaper than we estimate. Improve this someday. */
5082 elem_type = TREE_TYPE (vectype);
5083 /* 32-bit vectors loaded into registers are stored as double
5084 precision, so we need 2 permutes, 2 converts, and 1 merge
5085 to construct a vector of short floats from them. */
5086 if (SCALAR_FLOAT_TYPE_P (elem_type)
5087 && TYPE_PRECISION (elem_type) == 32)
5088 return 5;
5089 /* On POWER9, integer vector types are built up in GPRs and then
5090 use a direct move (2 cycles). For POWER8 this is even worse,
5091 as we need two direct moves and a merge, and the direct moves
5092 are five cycles. */
5093 else if (INTEGRAL_TYPE_P (elem_type))
5095 if (TARGET_P9_VECTOR)
5096 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5097 else
5098 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5100 else
5101 /* V2DFmode doesn't need a direct move. */
5102 return 2;
5104 default:
5105 gcc_unreachable ();
5109 /* Implement targetm.vectorize.preferred_simd_mode. */
5111 static machine_mode
5112 rs6000_preferred_simd_mode (scalar_mode mode)
5114 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5116 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5117 return vmode.require ();
5119 return word_mode;
5122 class rs6000_cost_data : public vector_costs
5124 public:
5125 using vector_costs::vector_costs;
5127 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5128 stmt_vec_info stmt_info, slp_tree, tree vectype,
5129 int misalign,
5130 vect_cost_model_location where) override;
5131 void finish_cost (const vector_costs *) override;
5133 protected:
5134 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5135 vect_cost_model_location, unsigned int);
5136 void density_test (loop_vec_info);
5137 void adjust_vect_cost_per_loop (loop_vec_info);
5139 /* Total number of vectorized stmts (loop only). */
5140 unsigned m_nstmts = 0;
5141 /* Total number of loads (loop only). */
5142 unsigned m_nloads = 0;
5143 /* Possible extra penalized cost on vector construction (loop only). */
5144 unsigned m_extra_ctor_cost = 0;
5145 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5146 instruction is needed by the vectorization. */
5147 bool m_vect_nonmem = false;
5150 /* Test for likely overcommitment of vector hardware resources. If a
5151 loop iteration is relatively large, and too large a percentage of
5152 instructions in the loop are vectorized, the cost model may not
5153 adequately reflect delays from unavailable vector resources.
5154 Penalize the loop body cost for this case. */
5156 void
5157 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5159 /* This density test only cares about the cost of vector version of the
5160 loop, so immediately return if we are passed costing for the scalar
5161 version (namely computing single scalar iteration cost). */
5162 if (m_costing_for_scalar)
5163 return;
5165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5166 basic_block *bbs = get_loop_body (loop);
5167 int nbbs = loop->num_nodes;
5168 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5170 for (int i = 0; i < nbbs; i++)
5172 basic_block bb = bbs[i];
5173 gimple_stmt_iterator gsi;
5175 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5177 gimple *stmt = gsi_stmt (gsi);
5178 if (is_gimple_debug (stmt))
5179 continue;
5181 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5183 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5184 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5185 not_vec_cost++;
5189 free (bbs);
5190 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5192 if (density_pct > rs6000_density_pct_threshold
5193 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5195 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5196 if (dump_enabled_p ())
5197 dump_printf_loc (MSG_NOTE, vect_location,
5198 "density %d%%, cost %d exceeds threshold, penalizing "
5199 "loop body cost by %u%%\n", density_pct,
5200 vec_cost + not_vec_cost, rs6000_density_penalty);
5203 /* Check whether we need to penalize the body cost to account
5204 for excess strided or elementwise loads. */
5205 if (m_extra_ctor_cost > 0)
5207 gcc_assert (m_nloads <= m_nstmts);
5208 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5210 /* It's likely to be bounded by latency and execution resources
5211 from many scalar loads which are strided or elementwise loads
5212 into a vector if both conditions below are found:
5213 1. there are many loads, it's easy to result in a long wait
5214 for load units;
5215 2. load has a big proportion of all vectorized statements,
5216 it's not easy to schedule other statements to spread among
5217 the loads.
5218 One typical case is the innermost loop of the hotspot of SPEC2017
5219 503.bwaves_r without loop interchange. */
5220 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5221 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5223 m_costs[vect_body] += m_extra_ctor_cost;
5224 if (dump_enabled_p ())
5225 dump_printf_loc (MSG_NOTE, vect_location,
5226 "Found %u loads and "
5227 "load pct. %u%% exceed "
5228 "the threshold, "
5229 "penalizing loop body "
5230 "cost by extra cost %u "
5231 "for ctor.\n",
5232 m_nloads, load_pct,
5233 m_extra_ctor_cost);
5238 /* Implement targetm.vectorize.create_costs. */
5240 static vector_costs *
5241 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5243 return new rs6000_cost_data (vinfo, costing_for_scalar);
5246 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5247 For some statement, we would like to further fine-grain tweak the cost on
5248 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5249 information on statement operation codes etc. One typical case here is
5250 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5251 for scalar cost, but it should be priced more whatever transformed to either
5252 compare + branch or compare + isel instructions. */
5254 static unsigned
5255 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5256 struct _stmt_vec_info *stmt_info)
5258 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5259 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5261 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5262 if (subcode == COND_EXPR)
5263 return 2;
5266 return 0;
5269 /* Helper function for add_stmt_cost. Check each statement cost
5270 entry, gather information and update the target_cost fields
5271 accordingly. */
5272 void
5273 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5274 stmt_vec_info stmt_info,
5275 vect_cost_model_location where,
5276 unsigned int orig_count)
5279 /* Check whether we're doing something other than just a copy loop.
5280 Not all such loops may be profitably vectorized; see
5281 rs6000_finish_cost. */
5282 if (kind == vec_to_scalar
5283 || kind == vec_perm
5284 || kind == vec_promote_demote
5285 || kind == vec_construct
5286 || kind == scalar_to_vec
5287 || (where == vect_body && kind == vector_stmt))
5288 m_vect_nonmem = true;
5290 /* Gather some information when we are costing the vectorized instruction
5291 for the statements located in a loop body. */
5292 if (!m_costing_for_scalar
5293 && is_a<loop_vec_info> (m_vinfo)
5294 && where == vect_body)
5296 m_nstmts += orig_count;
5298 if (kind == scalar_load || kind == vector_load
5299 || kind == unaligned_load || kind == vector_gather_load)
5300 m_nloads += orig_count;
5302 /* Power processors do not currently have instructions for strided
5303 and elementwise loads, and instead we must generate multiple
5304 scalar loads. This leads to undercounting of the cost. We
5305 account for this by scaling the construction cost by the number
5306 of elements involved, and saving this as extra cost that we may
5307 or may not need to apply. When finalizing the cost of the loop,
5308 the extra penalty is applied when the load density heuristics
5309 are satisfied. */
5310 if (kind == vec_construct && stmt_info
5311 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5312 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5313 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5315 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5316 unsigned int nunits = vect_nunits_for_cost (vectype);
5317 /* As PR103702 shows, it's possible that vectorizer wants to do
5318 costings for only one unit here, it's no need to do any
5319 penalization for it, so simply early return here. */
5320 if (nunits == 1)
5321 return;
5322 /* i386 port adopts nunits * stmt_cost as the penalized cost
5323 for this kind of penalization, we used to follow it but
5324 found it could result in an unreliable body cost especially
5325 for V16QI/V8HI modes. To make it better, we choose this
5326 new heuristic: for each scalar load, we use 2 as penalized
5327 cost for the case with 2 nunits and use 1 for the other
5328 cases. It's without much supporting theory, mainly
5329 concluded from the broad performance evaluations on Power8,
5330 Power9 and Power10. One possibly related point is that:
5331 vector construction for more units would use more insns,
5332 it has more chances to schedule them better (even run in
5333 parallelly when enough available units at that time), so
5334 it seems reasonable not to penalize that much for them. */
5335 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5336 unsigned int extra_cost = nunits * adjusted_cost;
5337 m_extra_ctor_cost += extra_cost;
5342 unsigned
5343 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5344 stmt_vec_info stmt_info, slp_tree,
5345 tree vectype, int misalign,
5346 vect_cost_model_location where)
5348 unsigned retval = 0;
5350 if (flag_vect_cost_model)
5352 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5353 misalign);
5354 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5355 /* Statements in an inner loop relative to the loop being
5356 vectorized are weighted more heavily. The value here is
5357 arbitrary and could potentially be improved with analysis. */
5358 unsigned int orig_count = count;
5359 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5360 m_costs[where] += retval;
5362 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5365 return retval;
5368 /* For some target specific vectorization cost which can't be handled per stmt,
5369 we check the requisite conditions and adjust the vectorization cost
5370 accordingly if satisfied. One typical example is to model shift cost for
5371 vector with length by counting number of required lengths under condition
5372 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5374 void
5375 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5377 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5379 rgroup_controls *rgc;
5380 unsigned int num_vectors_m1;
5381 unsigned int shift_cnt = 0;
5382 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5383 if (rgc->type)
5384 /* Each length needs one shift to fill into bits 0-7. */
5385 shift_cnt += num_vectors_m1 + 1;
5387 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5388 NULL_TREE, 0, vect_body);
5392 void
5393 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5395 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5397 adjust_vect_cost_per_loop (loop_vinfo);
5398 density_test (loop_vinfo);
5400 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5401 that require versioning for any reason. The vectorization is at
5402 best a wash inside the loop, and the versioning checks make
5403 profitability highly unlikely and potentially quite harmful. */
5404 if (!m_vect_nonmem
5405 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5406 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5407 m_costs[vect_body] += 10000;
5410 vector_costs::finish_cost (scalar_costs);
5413 /* Implement targetm.loop_unroll_adjust. */
5415 static unsigned
5416 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5418 if (unroll_only_small_loops)
5420 /* TODO: These are hardcoded values right now. We probably should use
5421 a PARAM here. */
5422 if (loop->ninsns <= 6)
5423 return MIN (4, nunroll);
5424 if (loop->ninsns <= 10)
5425 return MIN (2, nunroll);
5427 return 0;
5430 return nunroll;
5433 /* Returns a function decl for a vectorized version of the builtin function
5434 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5435 if it is not available.
5437 Implement targetm.vectorize.builtin_vectorized_function. */
5439 static tree
5440 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5441 tree type_in)
5443 machine_mode in_mode, out_mode;
5444 int in_n, out_n;
5446 if (TARGET_DEBUG_BUILTIN)
5447 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5448 combined_fn_name (combined_fn (fn)),
5449 GET_MODE_NAME (TYPE_MODE (type_out)),
5450 GET_MODE_NAME (TYPE_MODE (type_in)));
5452 /* TODO: Should this be gcc_assert? */
5453 if (TREE_CODE (type_out) != VECTOR_TYPE
5454 || TREE_CODE (type_in) != VECTOR_TYPE)
5455 return NULL_TREE;
5457 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5458 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5459 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5460 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5462 switch (fn)
5464 CASE_CFN_COPYSIGN:
5465 if (VECTOR_UNIT_VSX_P (V2DFmode)
5466 && out_mode == DFmode && out_n == 2
5467 && in_mode == DFmode && in_n == 2)
5468 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5469 if (VECTOR_UNIT_VSX_P (V4SFmode)
5470 && out_mode == SFmode && out_n == 4
5471 && in_mode == SFmode && in_n == 4)
5472 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5473 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5474 && out_mode == SFmode && out_n == 4
5475 && in_mode == SFmode && in_n == 4)
5476 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5477 break;
5478 CASE_CFN_CEIL:
5479 if (VECTOR_UNIT_VSX_P (V2DFmode)
5480 && out_mode == DFmode && out_n == 2
5481 && in_mode == DFmode && in_n == 2)
5482 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5483 if (VECTOR_UNIT_VSX_P (V4SFmode)
5484 && out_mode == SFmode && out_n == 4
5485 && in_mode == SFmode && in_n == 4)
5486 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5487 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5488 && out_mode == SFmode && out_n == 4
5489 && in_mode == SFmode && in_n == 4)
5490 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5491 break;
5492 CASE_CFN_FLOOR:
5493 if (VECTOR_UNIT_VSX_P (V2DFmode)
5494 && out_mode == DFmode && out_n == 2
5495 && in_mode == DFmode && in_n == 2)
5496 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5497 if (VECTOR_UNIT_VSX_P (V4SFmode)
5498 && out_mode == SFmode && out_n == 4
5499 && in_mode == SFmode && in_n == 4)
5500 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5501 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5502 && out_mode == SFmode && out_n == 4
5503 && in_mode == SFmode && in_n == 4)
5504 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5505 break;
5506 CASE_CFN_FMA:
5507 if (VECTOR_UNIT_VSX_P (V2DFmode)
5508 && out_mode == DFmode && out_n == 2
5509 && in_mode == DFmode && in_n == 2)
5510 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5511 if (VECTOR_UNIT_VSX_P (V4SFmode)
5512 && out_mode == SFmode && out_n == 4
5513 && in_mode == SFmode && in_n == 4)
5514 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5515 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5516 && out_mode == SFmode && out_n == 4
5517 && in_mode == SFmode && in_n == 4)
5518 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5519 break;
5520 CASE_CFN_TRUNC:
5521 if (VECTOR_UNIT_VSX_P (V2DFmode)
5522 && out_mode == DFmode && out_n == 2
5523 && in_mode == DFmode && in_n == 2)
5524 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5525 if (VECTOR_UNIT_VSX_P (V4SFmode)
5526 && out_mode == SFmode && out_n == 4
5527 && in_mode == SFmode && in_n == 4)
5528 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5529 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5530 && out_mode == SFmode && out_n == 4
5531 && in_mode == SFmode && in_n == 4)
5532 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5533 break;
5534 CASE_CFN_NEARBYINT:
5535 if (VECTOR_UNIT_VSX_P (V2DFmode)
5536 && flag_unsafe_math_optimizations
5537 && out_mode == DFmode && out_n == 2
5538 && in_mode == DFmode && in_n == 2)
5539 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5540 if (VECTOR_UNIT_VSX_P (V4SFmode)
5541 && flag_unsafe_math_optimizations
5542 && out_mode == SFmode && out_n == 4
5543 && in_mode == SFmode && in_n == 4)
5544 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5545 break;
5546 CASE_CFN_RINT:
5547 if (VECTOR_UNIT_VSX_P (V2DFmode)
5548 && !flag_trapping_math
5549 && out_mode == DFmode && out_n == 2
5550 && in_mode == DFmode && in_n == 2)
5551 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5552 if (VECTOR_UNIT_VSX_P (V4SFmode)
5553 && !flag_trapping_math
5554 && out_mode == SFmode && out_n == 4
5555 && in_mode == SFmode && in_n == 4)
5556 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5557 break;
5558 default:
5559 break;
5562 /* Generate calls to libmass if appropriate. */
5563 if (rs6000_veclib_handler)
5564 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5566 return NULL_TREE;
5569 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5570 library with vectorized intrinsics. */
5572 static tree
5573 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5574 tree type_in)
5576 char name[32];
5577 const char *suffix = NULL;
5578 tree fntype, new_fndecl, bdecl = NULL_TREE;
5579 int n_args = 1;
5580 const char *bname;
5581 machine_mode el_mode, in_mode;
5582 int n, in_n;
5584 /* Libmass is suitable for unsafe math only as it does not correctly support
5585 parts of IEEE with the required precision such as denormals. Only support
5586 it if we have VSX to use the simd d2 or f4 functions.
5587 XXX: Add variable length support. */
5588 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5589 return NULL_TREE;
5591 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5592 n = TYPE_VECTOR_SUBPARTS (type_out);
5593 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5594 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5595 if (el_mode != in_mode
5596 || n != in_n)
5597 return NULL_TREE;
5599 switch (fn)
5601 CASE_CFN_ATAN2:
5602 CASE_CFN_HYPOT:
5603 CASE_CFN_POW:
5604 n_args = 2;
5605 gcc_fallthrough ();
5607 CASE_CFN_ACOS:
5608 CASE_CFN_ACOSH:
5609 CASE_CFN_ASIN:
5610 CASE_CFN_ASINH:
5611 CASE_CFN_ATAN:
5612 CASE_CFN_ATANH:
5613 CASE_CFN_CBRT:
5614 CASE_CFN_COS:
5615 CASE_CFN_COSH:
5616 CASE_CFN_ERF:
5617 CASE_CFN_ERFC:
5618 CASE_CFN_EXP2:
5619 CASE_CFN_EXP:
5620 CASE_CFN_EXPM1:
5621 CASE_CFN_LGAMMA:
5622 CASE_CFN_LOG10:
5623 CASE_CFN_LOG1P:
5624 CASE_CFN_LOG2:
5625 CASE_CFN_LOG:
5626 CASE_CFN_SIN:
5627 CASE_CFN_SINH:
5628 CASE_CFN_SQRT:
5629 CASE_CFN_TAN:
5630 CASE_CFN_TANH:
5631 if (el_mode == DFmode && n == 2)
5633 bdecl = mathfn_built_in (double_type_node, fn);
5634 suffix = "d2"; /* pow -> powd2 */
5636 else if (el_mode == SFmode && n == 4)
5638 bdecl = mathfn_built_in (float_type_node, fn);
5639 suffix = "4"; /* powf -> powf4 */
5641 else
5642 return NULL_TREE;
5643 if (!bdecl)
5644 return NULL_TREE;
5645 break;
5647 default:
5648 return NULL_TREE;
5651 gcc_assert (suffix != NULL);
5652 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5653 if (!bname)
5654 return NULL_TREE;
5656 strcpy (name, bname + strlen ("__builtin_"));
5657 strcat (name, suffix);
5659 if (n_args == 1)
5660 fntype = build_function_type_list (type_out, type_in, NULL);
5661 else if (n_args == 2)
5662 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5663 else
5664 gcc_unreachable ();
5666 /* Build a function declaration for the vectorized function. */
5667 new_fndecl = build_decl (BUILTINS_LOCATION,
5668 FUNCTION_DECL, get_identifier (name), fntype);
5669 TREE_PUBLIC (new_fndecl) = 1;
5670 DECL_EXTERNAL (new_fndecl) = 1;
5671 DECL_IS_NOVOPS (new_fndecl) = 1;
5672 TREE_READONLY (new_fndecl) = 1;
5674 return new_fndecl;
5678 /* Default CPU string for rs6000*_file_start functions. */
5679 static const char *rs6000_default_cpu;
5681 #ifdef USING_ELFOS_H
5682 const char *rs6000_machine;
5684 const char *
5685 rs6000_machine_from_flags (void)
5687 /* e300 and e500 */
5688 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5689 return "e300";
5690 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5691 return "e500";
5692 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5693 return "e500mc";
5694 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5695 return "e500mc64";
5696 if (rs6000_cpu == PROCESSOR_PPCE5500)
5697 return "e5500";
5698 if (rs6000_cpu == PROCESSOR_PPCE6500)
5699 return "e6500";
5701 /* 400 series */
5702 if (rs6000_cpu == PROCESSOR_PPC403)
5703 return "\"403\"";
5704 if (rs6000_cpu == PROCESSOR_PPC405)
5705 return "\"405\"";
5706 if (rs6000_cpu == PROCESSOR_PPC440)
5707 return "\"440\"";
5708 if (rs6000_cpu == PROCESSOR_PPC476)
5709 return "\"476\"";
5711 /* A2 */
5712 if (rs6000_cpu == PROCESSOR_PPCA2)
5713 return "a2";
5715 /* Cell BE */
5716 if (rs6000_cpu == PROCESSOR_CELL)
5717 return "cell";
5719 /* Titan */
5720 if (rs6000_cpu == PROCESSOR_TITAN)
5721 return "titan";
5723 /* 500 series and 800 series */
5724 if (rs6000_cpu == PROCESSOR_MPCCORE)
5725 return "\"821\"";
5727 #if 0
5728 /* This (and ppc64 below) are disabled here (for now at least) because
5729 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5730 are #define'd as some of these. Untangling that is a job for later. */
5732 /* 600 series and 700 series, "classic" */
5733 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5734 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5735 || rs6000_cpu == PROCESSOR_PPC750)
5736 return "ppc";
5737 #endif
5739 /* Classic with AltiVec, "G4" */
5740 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5741 return "\"7450\"";
5743 #if 0
5744 /* The older 64-bit CPUs */
5745 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5746 || rs6000_cpu == PROCESSOR_RS64A)
5747 return "ppc64";
5748 #endif
5750 HOST_WIDE_INT flags = rs6000_isa_flags;
5752 /* Disable the flags that should never influence the .machine selection. */
5753 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5755 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5756 return "power10";
5757 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5758 return "power9";
5759 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5760 return "power8";
5761 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5762 return "power7";
5763 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5764 return "power6";
5765 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5766 return "power5";
5767 if ((flags & ISA_2_1_MASKS) != 0)
5768 return "power4";
5769 if ((flags & OPTION_MASK_POWERPC64) != 0)
5770 return "ppc64";
5771 return "ppc";
5774 void
5775 emit_asm_machine (void)
5777 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5779 #endif
5781 /* Do anything needed at the start of the asm file. */
5783 static void
5784 rs6000_file_start (void)
5786 char buffer[80];
5787 const char *start = buffer;
5788 FILE *file = asm_out_file;
5790 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5792 default_file_start ();
5794 if (flag_verbose_asm)
5796 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5798 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5800 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5801 start = "";
5804 if (OPTION_SET_P (rs6000_cpu_index))
5806 fprintf (file, "%s -mcpu=%s", start,
5807 processor_target_table[rs6000_cpu_index].name);
5808 start = "";
5811 if (OPTION_SET_P (rs6000_tune_index))
5813 fprintf (file, "%s -mtune=%s", start,
5814 processor_target_table[rs6000_tune_index].name);
5815 start = "";
5818 if (PPC405_ERRATUM77)
5820 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5821 start = "";
5824 #ifdef USING_ELFOS_H
5825 switch (rs6000_sdata)
5827 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5828 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5829 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5830 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5833 if (rs6000_sdata && g_switch_value)
5835 fprintf (file, "%s -G %d", start,
5836 g_switch_value);
5837 start = "";
5839 #endif
5841 if (*start == '\0')
5842 putc ('\n', file);
5845 #ifdef USING_ELFOS_H
5846 rs6000_machine = rs6000_machine_from_flags ();
5847 emit_asm_machine ();
5848 #endif
5850 if (DEFAULT_ABI == ABI_ELFv2)
5851 fprintf (file, "\t.abiversion 2\n");
5855 /* Return nonzero if this function is known to have a null epilogue. */
5858 direct_return (void)
5860 if (reload_completed)
5862 rs6000_stack_t *info = rs6000_stack_info ();
5864 if (info->first_gp_reg_save == 32
5865 && info->first_fp_reg_save == 64
5866 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5867 && ! info->lr_save_p
5868 && ! info->cr_save_p
5869 && info->vrsave_size == 0
5870 && ! info->push_p)
5871 return 1;
5874 return 0;
5877 /* Helper for num_insns_constant. Calculate number of instructions to
5878 load VALUE to a single gpr using combinations of addi, addis, ori,
5879 oris, sldi and rldimi instructions. */
5881 static int
5882 num_insns_constant_gpr (HOST_WIDE_INT value)
5884 /* signed constant loadable with addi */
5885 if (SIGNED_INTEGER_16BIT_P (value))
5886 return 1;
5888 /* constant loadable with addis */
5889 else if ((value & 0xffff) == 0
5890 && (value >> 31 == -1 || value >> 31 == 0))
5891 return 1;
5893 /* PADDI can support up to 34 bit signed integers. */
5894 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5895 return 1;
5897 else if (TARGET_POWERPC64)
5899 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5900 HOST_WIDE_INT high = value >> 31;
5902 if (high == 0 || high == -1)
5903 return 2;
5905 high >>= 1;
5907 if (low == 0 || low == high)
5908 return num_insns_constant_gpr (high) + 1;
5909 else if (high == 0)
5910 return num_insns_constant_gpr (low) + 1;
5911 else
5912 return (num_insns_constant_gpr (high)
5913 + num_insns_constant_gpr (low) + 1);
5916 else
5917 return 2;
5920 /* Helper for num_insns_constant. Allow constants formed by the
5921 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5922 and handle modes that require multiple gprs. */
5924 static int
5925 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5927 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5928 int total = 0;
5929 while (nregs-- > 0)
5931 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5932 int insns = num_insns_constant_gpr (low);
5933 if (insns > 2
5934 /* We won't get more than 2 from num_insns_constant_gpr
5935 except when TARGET_POWERPC64 and mode is DImode or
5936 wider, so the register mode must be DImode. */
5937 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5938 insns = 2;
5939 total += insns;
5940 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5941 it all at once would be UB. */
5942 value >>= (BITS_PER_WORD - 1);
5943 value >>= 1;
5945 return total;
5948 /* Return the number of instructions it takes to form a constant in as
5949 many gprs are needed for MODE. */
5952 num_insns_constant (rtx op, machine_mode mode)
5954 HOST_WIDE_INT val;
5956 switch (GET_CODE (op))
5958 case CONST_INT:
5959 val = INTVAL (op);
5960 break;
5962 case CONST_WIDE_INT:
5964 int insns = 0;
5965 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5966 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5967 DImode);
5968 return insns;
5971 case CONST_DOUBLE:
5973 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5975 if (mode == SFmode || mode == SDmode)
5977 long l;
5979 if (mode == SDmode)
5980 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5981 else
5982 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5983 /* See the first define_split in rs6000.md handling a
5984 const_double_operand. */
5985 val = l;
5986 mode = SImode;
5988 else if (mode == DFmode || mode == DDmode)
5990 long l[2];
5992 if (mode == DDmode)
5993 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5994 else
5995 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5997 /* See the second (32-bit) and third (64-bit) define_split
5998 in rs6000.md handling a const_double_operand. */
5999 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6000 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6001 mode = DImode;
6003 else if (mode == TFmode || mode == TDmode
6004 || mode == KFmode || mode == IFmode)
6006 long l[4];
6007 int insns;
6009 if (mode == TDmode)
6010 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6011 else
6012 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6014 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6015 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6016 insns = num_insns_constant_multi (val, DImode);
6017 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6018 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6019 insns += num_insns_constant_multi (val, DImode);
6020 return insns;
6022 else
6023 gcc_unreachable ();
6025 break;
6027 default:
6028 gcc_unreachable ();
6031 return num_insns_constant_multi (val, mode);
6034 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6035 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6036 corresponding element of the vector, but for V4SFmode, the
6037 corresponding "float" is interpreted as an SImode integer. */
6039 HOST_WIDE_INT
6040 const_vector_elt_as_int (rtx op, unsigned int elt)
6042 rtx tmp;
6044 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6045 gcc_assert (GET_MODE (op) != V2DImode
6046 && GET_MODE (op) != V2DFmode);
6048 tmp = CONST_VECTOR_ELT (op, elt);
6049 if (GET_MODE (op) == V4SFmode)
6050 tmp = gen_lowpart (SImode, tmp);
6051 return INTVAL (tmp);
6054 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6055 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6056 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6057 all items are set to the same value and contain COPIES replicas of the
6058 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6059 operand and the others are set to the value of the operand's msb. */
6061 static bool
6062 vspltis_constant (rtx op, unsigned step, unsigned copies)
6064 machine_mode mode = GET_MODE (op);
6065 machine_mode inner = GET_MODE_INNER (mode);
6067 unsigned i;
6068 unsigned nunits;
6069 unsigned bitsize;
6070 unsigned mask;
6072 HOST_WIDE_INT val;
6073 HOST_WIDE_INT splat_val;
6074 HOST_WIDE_INT msb_val;
6076 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6077 return false;
6079 nunits = GET_MODE_NUNITS (mode);
6080 bitsize = GET_MODE_BITSIZE (inner);
6081 mask = GET_MODE_MASK (inner);
6083 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6084 splat_val = val;
6085 msb_val = val >= 0 ? 0 : -1;
6087 if (val == 0 && step > 1)
6089 /* Special case for loading most significant bit with step > 1.
6090 In that case, match 0s in all but step-1s elements, where match
6091 EASY_VECTOR_MSB. */
6092 for (i = 1; i < nunits; ++i)
6094 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6095 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6096 if ((i & (step - 1)) == step - 1)
6098 if (!EASY_VECTOR_MSB (elt_val, inner))
6099 break;
6101 else if (elt_val)
6102 break;
6104 if (i == nunits)
6105 return true;
6108 /* Construct the value to be splatted, if possible. If not, return 0. */
6109 for (i = 2; i <= copies; i *= 2)
6111 HOST_WIDE_INT small_val;
6112 bitsize /= 2;
6113 small_val = splat_val >> bitsize;
6114 mask >>= bitsize;
6115 if (splat_val != ((HOST_WIDE_INT)
6116 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6117 | (small_val & mask)))
6118 return false;
6119 splat_val = small_val;
6120 inner = smallest_int_mode_for_size (bitsize);
6123 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6124 if (EASY_VECTOR_15 (splat_val))
6127 /* Also check if we can splat, and then add the result to itself. Do so if
6128 the value is positive, of if the splat instruction is using OP's mode;
6129 for splat_val < 0, the splat and the add should use the same mode. */
6130 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6131 && (splat_val >= 0 || (step == 1 && copies == 1)))
6134 /* Also check if are loading up the most significant bit which can be done by
6135 loading up -1 and shifting the value left by -1. Only do this for
6136 step 1 here, for larger steps it is done earlier. */
6137 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6140 else
6141 return false;
6143 /* Check if VAL is present in every STEP-th element, and the
6144 other elements are filled with its most significant bit. */
6145 for (i = 1; i < nunits; ++i)
6147 HOST_WIDE_INT desired_val;
6148 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6149 if ((i & (step - 1)) == 0)
6150 desired_val = val;
6151 else
6152 desired_val = msb_val;
6154 if (desired_val != const_vector_elt_as_int (op, elt))
6155 return false;
6158 return true;
6161 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6162 instruction, filling in the bottom elements with 0 or -1.
6164 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6165 for the number of zeroes to shift in, or negative for the number of 0xff
6166 bytes to shift in.
6168 OP is a CONST_VECTOR. */
6171 vspltis_shifted (rtx op)
6173 machine_mode mode = GET_MODE (op);
6174 machine_mode inner = GET_MODE_INNER (mode);
6176 unsigned i, j;
6177 unsigned nunits;
6178 unsigned mask;
6180 HOST_WIDE_INT val;
6182 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6183 return false;
6185 /* We need to create pseudo registers to do the shift, so don't recognize
6186 shift vector constants after reload. Don't match it even before RA
6187 after split1 is done, because there won't be further splitting pass
6188 before RA to do the splitting. */
6189 if (!can_create_pseudo_p ()
6190 || (cfun->curr_properties & PROP_rtl_split_insns))
6191 return false;
6193 nunits = GET_MODE_NUNITS (mode);
6194 mask = GET_MODE_MASK (inner);
6196 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6198 /* Check if the value can really be the operand of a vspltis[bhw]. */
6199 if (EASY_VECTOR_15 (val))
6202 /* Also check if we are loading up the most significant bit which can be done
6203 by loading up -1 and shifting the value left by -1. */
6204 else if (EASY_VECTOR_MSB (val, inner))
6207 else
6208 return 0;
6210 /* Check if VAL is present in every STEP-th element until we find elements
6211 that are 0 or all 1 bits. */
6212 for (i = 1; i < nunits; ++i)
6214 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6215 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6217 /* If the value isn't the splat value, check for the remaining elements
6218 being 0/-1. */
6219 if (val != elt_val)
6221 if (elt_val == 0)
6223 for (j = i+1; j < nunits; ++j)
6225 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6226 if (const_vector_elt_as_int (op, elt2) != 0)
6227 return 0;
6230 return (nunits - i) * GET_MODE_SIZE (inner);
6233 else if ((elt_val & mask) == mask)
6235 for (j = i+1; j < nunits; ++j)
6237 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6238 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6239 return 0;
6242 return -((nunits - i) * GET_MODE_SIZE (inner));
6245 else
6246 return 0;
6250 /* If all elements are equal, we don't need to do VSLDOI. */
6251 return 0;
6255 /* Return non-zero (element mode byte size) if OP is of the given MODE
6256 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6259 easy_altivec_constant (rtx op, machine_mode mode)
6261 unsigned step, copies;
6263 if (mode == VOIDmode)
6264 mode = GET_MODE (op);
6265 else if (mode != GET_MODE (op))
6266 return 0;
6268 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6269 constants. */
6270 if (mode == V2DFmode)
6271 return zero_constant (op, mode) ? 8 : 0;
6273 else if (mode == V2DImode)
6275 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6276 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6277 return 0;
6279 if (zero_constant (op, mode))
6280 return 8;
6282 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6283 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6284 return 8;
6286 return 0;
6289 /* V1TImode is a special container for TImode. Ignore for now. */
6290 else if (mode == V1TImode)
6291 return 0;
6293 /* Start with a vspltisw. */
6294 step = GET_MODE_NUNITS (mode) / 4;
6295 copies = 1;
6297 if (vspltis_constant (op, step, copies))
6298 return 4;
6300 /* Then try with a vspltish. */
6301 if (step == 1)
6302 copies <<= 1;
6303 else
6304 step >>= 1;
6306 if (vspltis_constant (op, step, copies))
6307 return 2;
6309 /* And finally a vspltisb. */
6310 if (step == 1)
6311 copies <<= 1;
6312 else
6313 step >>= 1;
6315 if (vspltis_constant (op, step, copies))
6316 return 1;
6318 if (vspltis_shifted (op) != 0)
6319 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6321 return 0;
6324 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6325 result is OP. Abort if it is not possible. */
6328 gen_easy_altivec_constant (rtx op)
6330 machine_mode mode = GET_MODE (op);
6331 int nunits = GET_MODE_NUNITS (mode);
6332 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6333 unsigned step = nunits / 4;
6334 unsigned copies = 1;
6336 /* Start with a vspltisw. */
6337 if (vspltis_constant (op, step, copies))
6338 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6340 /* Then try with a vspltish. */
6341 if (step == 1)
6342 copies <<= 1;
6343 else
6344 step >>= 1;
6346 if (vspltis_constant (op, step, copies))
6347 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6349 /* And finally a vspltisb. */
6350 if (step == 1)
6351 copies <<= 1;
6352 else
6353 step >>= 1;
6355 if (vspltis_constant (op, step, copies))
6356 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6358 gcc_unreachable ();
6361 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6362 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6364 Return the number of instructions needed (1 or 2) into the address pointed
6365 via NUM_INSNS_PTR.
6367 Return the constant that is being split via CONSTANT_PTR. */
6369 bool
6370 xxspltib_constant_p (rtx op,
6371 machine_mode mode,
6372 int *num_insns_ptr,
6373 int *constant_ptr)
6375 size_t nunits = GET_MODE_NUNITS (mode);
6376 size_t i;
6377 HOST_WIDE_INT value;
6378 rtx element;
6380 /* Set the returned values to out of bound values. */
6381 *num_insns_ptr = -1;
6382 *constant_ptr = 256;
6384 if (!TARGET_P9_VECTOR)
6385 return false;
6387 if (mode == VOIDmode)
6388 mode = GET_MODE (op);
6390 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6391 return false;
6393 /* Handle (vec_duplicate <constant>). */
6394 if (GET_CODE (op) == VEC_DUPLICATE)
6396 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6397 && mode != V2DImode)
6398 return false;
6400 element = XEXP (op, 0);
6401 if (!CONST_INT_P (element))
6402 return false;
6404 value = INTVAL (element);
6405 if (!IN_RANGE (value, -128, 127))
6406 return false;
6409 /* Handle (const_vector [...]). */
6410 else if (GET_CODE (op) == CONST_VECTOR)
6412 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6413 && mode != V2DImode)
6414 return false;
6416 element = CONST_VECTOR_ELT (op, 0);
6417 if (!CONST_INT_P (element))
6418 return false;
6420 value = INTVAL (element);
6421 if (!IN_RANGE (value, -128, 127))
6422 return false;
6424 for (i = 1; i < nunits; i++)
6426 element = CONST_VECTOR_ELT (op, i);
6427 if (!CONST_INT_P (element))
6428 return false;
6430 if (value != INTVAL (element))
6431 return false;
6435 /* Handle integer constants being loaded into the upper part of the VSX
6436 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6437 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6438 else if (CONST_INT_P (op))
6440 if (!SCALAR_INT_MODE_P (mode))
6441 return false;
6443 value = INTVAL (op);
6444 if (!IN_RANGE (value, -128, 127))
6445 return false;
6447 if (!IN_RANGE (value, -1, 0))
6449 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6450 return false;
6452 if (EASY_VECTOR_15 (value))
6453 return false;
6457 else
6458 return false;
6460 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6461 sign extend. Special case 0/-1 to allow getting any VSX register instead
6462 of an Altivec register. */
6463 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6464 && EASY_VECTOR_15 (value))
6465 return false;
6467 /* Return # of instructions and the constant byte for XXSPLTIB. */
6468 if (mode == V16QImode)
6469 *num_insns_ptr = 1;
6471 else if (IN_RANGE (value, -1, 0))
6472 *num_insns_ptr = 1;
6474 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6475 single XXSPLTIW or XXSPLTIDP instruction. */
6476 else if (vsx_prefixed_constant (op, mode))
6477 return false;
6479 /* Return XXSPLITB followed by a sign extend operation to convert the
6480 constant to V8HImode or V4SImode. */
6481 else
6482 *num_insns_ptr = 2;
6484 *constant_ptr = (int) value;
6485 return true;
6488 const char *
6489 output_vec_const_move (rtx *operands)
6491 int shift;
6492 machine_mode mode;
6493 rtx dest, vec;
6495 dest = operands[0];
6496 vec = operands[1];
6497 mode = GET_MODE (dest);
6499 if (TARGET_VSX)
6501 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6502 int xxspltib_value = 256;
6503 int num_insns = -1;
6505 if (zero_constant (vec, mode))
6507 if (TARGET_P9_VECTOR)
6508 return "xxspltib %x0,0";
6510 else if (dest_vmx_p)
6511 return "vspltisw %0,0";
6513 else
6514 return "xxlxor %x0,%x0,%x0";
6517 if (all_ones_constant (vec, mode))
6519 if (TARGET_P9_VECTOR)
6520 return "xxspltib %x0,255";
6522 else if (dest_vmx_p)
6523 return "vspltisw %0,-1";
6525 else if (TARGET_P8_VECTOR)
6526 return "xxlorc %x0,%x0,%x0";
6528 else
6529 gcc_unreachable ();
6532 vec_const_128bit_type vsx_const;
6533 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6535 unsigned imm = constant_generates_lxvkq (&vsx_const);
6536 if (imm)
6538 operands[2] = GEN_INT (imm);
6539 return "lxvkq %x0,%2";
6542 imm = constant_generates_xxspltiw (&vsx_const);
6543 if (imm)
6545 operands[2] = GEN_INT (imm);
6546 return "xxspltiw %x0,%2";
6549 imm = constant_generates_xxspltidp (&vsx_const);
6550 if (imm)
6552 operands[2] = GEN_INT (imm);
6553 return "xxspltidp %x0,%2";
6557 if (TARGET_P9_VECTOR
6558 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6560 if (num_insns == 1)
6562 operands[2] = GEN_INT (xxspltib_value & 0xff);
6563 return "xxspltib %x0,%2";
6566 return "#";
6570 if (TARGET_ALTIVEC)
6572 rtx splat_vec;
6574 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6575 if (zero_constant (vec, mode))
6576 return "vspltisw %0,0";
6578 if (all_ones_constant (vec, mode))
6579 return "vspltisw %0,-1";
6581 /* Do we need to construct a value using VSLDOI? */
6582 shift = vspltis_shifted (vec);
6583 if (shift != 0)
6584 return "#";
6586 splat_vec = gen_easy_altivec_constant (vec);
6587 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6588 operands[1] = XEXP (splat_vec, 0);
6589 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6590 return "#";
6592 switch (GET_MODE (splat_vec))
6594 case E_V4SImode:
6595 return "vspltisw %0,%1";
6597 case E_V8HImode:
6598 return "vspltish %0,%1";
6600 case E_V16QImode:
6601 return "vspltisb %0,%1";
6603 default:
6604 gcc_unreachable ();
6608 gcc_unreachable ();
6611 /* Initialize vector TARGET to VALS. */
6613 void
6614 rs6000_expand_vector_init (rtx target, rtx vals)
6616 machine_mode mode = GET_MODE (target);
6617 machine_mode inner_mode = GET_MODE_INNER (mode);
6618 unsigned int n_elts = GET_MODE_NUNITS (mode);
6619 int n_var = 0, one_var = -1;
6620 bool all_same = true, all_const_zero = true;
6621 rtx x, mem;
6622 unsigned int i;
6624 for (i = 0; i < n_elts; ++i)
6626 x = XVECEXP (vals, 0, i);
6627 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6628 ++n_var, one_var = i;
6629 else if (x != CONST0_RTX (inner_mode))
6630 all_const_zero = false;
6632 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6633 all_same = false;
6636 if (n_var == 0)
6638 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6639 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6640 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6642 /* Zero register. */
6643 emit_move_insn (target, CONST0_RTX (mode));
6644 return;
6646 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6648 /* Splat immediate. */
6649 emit_insn (gen_rtx_SET (target, const_vec));
6650 return;
6652 else
6654 /* Load from constant pool. */
6655 emit_move_insn (target, const_vec);
6656 return;
6660 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6661 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6663 rtx op[2];
6664 size_t i;
6665 size_t num_elements = all_same ? 1 : 2;
6666 for (i = 0; i < num_elements; i++)
6668 op[i] = XVECEXP (vals, 0, i);
6669 /* Just in case there is a SUBREG with a smaller mode, do a
6670 conversion. */
6671 if (GET_MODE (op[i]) != inner_mode)
6673 rtx tmp = gen_reg_rtx (inner_mode);
6674 convert_move (tmp, op[i], 0);
6675 op[i] = tmp;
6677 /* Allow load with splat double word. */
6678 else if (MEM_P (op[i]))
6680 if (!all_same)
6681 op[i] = force_reg (inner_mode, op[i]);
6683 else if (!REG_P (op[i]))
6684 op[i] = force_reg (inner_mode, op[i]);
6687 if (all_same)
6689 if (mode == V2DFmode)
6690 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6691 else
6692 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6694 else
6696 if (mode == V2DFmode)
6697 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6698 else
6699 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6701 return;
6704 /* Special case initializing vector int if we are on 64-bit systems with
6705 direct move or we have the ISA 3.0 instructions. */
6706 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6707 && TARGET_DIRECT_MOVE_64BIT)
6709 if (all_same)
6711 rtx element0 = XVECEXP (vals, 0, 0);
6712 if (MEM_P (element0))
6713 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6714 else
6715 element0 = force_reg (SImode, element0);
6717 if (TARGET_P9_VECTOR)
6718 emit_insn (gen_vsx_splat_v4si (target, element0));
6719 else
6721 rtx tmp = gen_reg_rtx (DImode);
6722 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6723 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6725 return;
6727 else
6729 rtx elements[4];
6730 size_t i;
6732 for (i = 0; i < 4; i++)
6733 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6735 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6736 elements[2], elements[3]));
6737 return;
6741 /* With single precision floating point on VSX, know that internally single
6742 precision is actually represented as a double, and either make 2 V2DF
6743 vectors, and convert these vectors to single precision, or do one
6744 conversion, and splat the result to the other elements. */
6745 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6747 if (all_same)
6749 rtx element0 = XVECEXP (vals, 0, 0);
6751 if (TARGET_P9_VECTOR)
6753 if (MEM_P (element0))
6754 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6756 emit_insn (gen_vsx_splat_v4sf (target, element0));
6759 else
6761 rtx freg = gen_reg_rtx (V4SFmode);
6762 rtx sreg = force_reg (SFmode, element0);
6763 rtx cvt = (TARGET_XSCVDPSPN
6764 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6765 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6767 emit_insn (cvt);
6768 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6769 const0_rtx));
6772 else
6774 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6776 rtx tmp_sf[4];
6777 rtx tmp_si[4];
6778 rtx tmp_di[4];
6779 rtx mrg_di[4];
6780 for (i = 0; i < 4; i++)
6782 tmp_si[i] = gen_reg_rtx (SImode);
6783 tmp_di[i] = gen_reg_rtx (DImode);
6784 mrg_di[i] = gen_reg_rtx (DImode);
6785 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6786 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6787 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6790 if (!BYTES_BIG_ENDIAN)
6792 std::swap (tmp_di[0], tmp_di[1]);
6793 std::swap (tmp_di[2], tmp_di[3]);
6796 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6797 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6798 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6799 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6801 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6802 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6803 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6805 else
6807 rtx dbl_even = gen_reg_rtx (V2DFmode);
6808 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6809 rtx flt_even = gen_reg_rtx (V4SFmode);
6810 rtx flt_odd = gen_reg_rtx (V4SFmode);
6811 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6812 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6813 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6814 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6816 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6817 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6818 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6819 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6820 rs6000_expand_extract_even (target, flt_even, flt_odd);
6823 return;
6826 /* Special case initializing vector short/char that are splats if we are on
6827 64-bit systems with direct move. */
6828 if (all_same && TARGET_DIRECT_MOVE_64BIT
6829 && (mode == V16QImode || mode == V8HImode))
6831 rtx op0 = XVECEXP (vals, 0, 0);
6832 rtx di_tmp = gen_reg_rtx (DImode);
6834 if (!REG_P (op0))
6835 op0 = force_reg (GET_MODE_INNER (mode), op0);
6837 if (mode == V16QImode)
6839 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6840 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6841 return;
6844 if (mode == V8HImode)
6846 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6847 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6848 return;
6852 /* Store value to stack temp. Load vector element. Splat. However, splat
6853 of 64-bit items is not supported on Altivec. */
6854 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6856 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6857 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6858 XVECEXP (vals, 0, 0));
6859 x = gen_rtx_UNSPEC (VOIDmode,
6860 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6861 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6862 gen_rtvec (2,
6863 gen_rtx_SET (target, mem),
6864 x)));
6865 x = gen_rtx_VEC_SELECT (inner_mode, target,
6866 gen_rtx_PARALLEL (VOIDmode,
6867 gen_rtvec (1, const0_rtx)));
6868 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6869 return;
6872 /* One field is non-constant. Load constant then overwrite
6873 varying field. */
6874 if (n_var == 1)
6876 rtx copy = copy_rtx (vals);
6878 /* Load constant part of vector, substitute neighboring value for
6879 varying element. */
6880 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6881 rs6000_expand_vector_init (target, copy);
6883 /* Insert variable. */
6884 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6885 GEN_INT (one_var));
6886 return;
6889 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6891 rtx op[16];
6892 /* Force the values into word_mode registers. */
6893 for (i = 0; i < n_elts; i++)
6895 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6896 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6897 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6900 /* Take unsigned char big endianness on 64bit as example for below
6901 construction, the input values are: A, B, C, D, ..., O, P. */
6903 if (TARGET_DIRECT_MOVE_128)
6905 /* Move to VSX register with vec_concat, each has 2 values.
6906 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6907 vr1[1] = { xxxxxxxC, xxxxxxxD };
6909 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6910 rtx vr1[8];
6911 for (i = 0; i < n_elts / 2; i++)
6913 vr1[i] = gen_reg_rtx (V2DImode);
6914 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6915 op[i * 2 + 1]));
6918 /* Pack vectors with 2 values into vectors with 4 values.
6919 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6920 vr2[1] = { xxxExxxF, xxxGxxxH };
6921 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6922 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6923 rtx vr2[4];
6924 for (i = 0; i < n_elts / 4; i++)
6926 vr2[i] = gen_reg_rtx (V4SImode);
6927 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6928 vr1[i * 2 + 1]));
6931 /* Pack vectors with 4 values into vectors with 8 values.
6932 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6933 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6934 rtx vr3[2];
6935 for (i = 0; i < n_elts / 8; i++)
6937 vr3[i] = gen_reg_rtx (V8HImode);
6938 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
6939 vr2[i * 2 + 1]));
6942 /* If it's V8HImode, it's done and return it. */
6943 if (mode == V8HImode)
6945 emit_insn (gen_rtx_SET (target, vr3[0]));
6946 return;
6949 /* Pack vectors with 8 values into 16 values. */
6950 rtx res = gen_reg_rtx (V16QImode);
6951 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
6952 emit_insn (gen_rtx_SET (target, res));
6954 else
6956 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
6957 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
6958 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
6959 rtx perm_idx;
6961 /* Set up some common gen routines and values. */
6962 if (BYTES_BIG_ENDIAN)
6964 if (mode == V16QImode)
6966 merge_v16qi = gen_altivec_vmrghb;
6967 merge_v8hi = gen_altivec_vmrglh;
6969 else
6970 merge_v8hi = gen_altivec_vmrghh;
6972 merge_v4si = gen_altivec_vmrglw;
6973 perm_idx = GEN_INT (3);
6975 else
6977 if (mode == V16QImode)
6979 merge_v16qi = gen_altivec_vmrglb;
6980 merge_v8hi = gen_altivec_vmrghh;
6982 else
6983 merge_v8hi = gen_altivec_vmrglh;
6985 merge_v4si = gen_altivec_vmrghw;
6986 perm_idx = GEN_INT (0);
6989 /* Move to VSX register with direct move.
6990 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
6991 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
6993 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
6994 rtx vr_qi[16];
6995 for (i = 0; i < n_elts; i++)
6997 vr_qi[i] = gen_reg_rtx (V16QImode);
6998 if (TARGET_POWERPC64)
6999 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7000 else
7001 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7004 /* Merge/move to vector short.
7005 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7006 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7008 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7009 rtx vr_hi[8];
7010 for (i = 0; i < 8; i++)
7012 rtx tmp = vr_qi[i];
7013 if (mode == V16QImode)
7015 tmp = gen_reg_rtx (V16QImode);
7016 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7018 vr_hi[i] = gen_reg_rtx (V8HImode);
7019 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7022 /* Merge vector short to vector int.
7023 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7024 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7026 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7027 rtx vr_si[4];
7028 for (i = 0; i < 4; i++)
7030 rtx tmp = gen_reg_rtx (V8HImode);
7031 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7032 vr_si[i] = gen_reg_rtx (V4SImode);
7033 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7036 /* Merge vector int to vector long.
7037 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7038 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7039 rtx vr_di[2];
7040 for (i = 0; i < 2; i++)
7042 rtx tmp = gen_reg_rtx (V4SImode);
7043 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7044 vr_di[i] = gen_reg_rtx (V2DImode);
7045 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7048 rtx res = gen_reg_rtx (V2DImode);
7049 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7050 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7053 return;
7056 /* Construct the vector in memory one field at a time
7057 and load the whole vector. */
7058 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7059 for (i = 0; i < n_elts; i++)
7060 emit_move_insn (adjust_address_nv (mem, inner_mode,
7061 i * GET_MODE_SIZE (inner_mode)),
7062 XVECEXP (vals, 0, i));
7063 emit_move_insn (target, mem);
7066 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7067 is variable and also counts by vector element size for p9 and above. */
7069 static void
7070 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7072 machine_mode mode = GET_MODE (target);
7074 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7076 machine_mode inner_mode = GET_MODE (val);
7078 int width = GET_MODE_SIZE (inner_mode);
7080 gcc_assert (width >= 1 && width <= 8);
7082 int shift = exact_log2 (width);
7084 machine_mode idx_mode = GET_MODE (idx);
7086 machine_mode shift_mode;
7087 rtx (*gen_ashl)(rtx, rtx, rtx);
7088 rtx (*gen_lvsl)(rtx, rtx);
7089 rtx (*gen_lvsr)(rtx, rtx);
7091 if (TARGET_POWERPC64)
7093 shift_mode = DImode;
7094 gen_ashl = gen_ashldi3;
7095 gen_lvsl = gen_altivec_lvsl_reg_di;
7096 gen_lvsr = gen_altivec_lvsr_reg_di;
7098 else
7100 shift_mode = SImode;
7101 gen_ashl = gen_ashlsi3;
7102 gen_lvsl = gen_altivec_lvsl_reg_si;
7103 gen_lvsr = gen_altivec_lvsr_reg_si;
7105 /* Generate the IDX for permute shift, width is the vector element size.
7106 idx = idx * width. */
7107 rtx tmp = gen_reg_rtx (shift_mode);
7108 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7110 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7112 /* lvsr v1,0,idx. */
7113 rtx pcvr = gen_reg_rtx (V16QImode);
7114 emit_insn (gen_lvsr (pcvr, tmp));
7116 /* lvsl v2,0,idx. */
7117 rtx pcvl = gen_reg_rtx (V16QImode);
7118 emit_insn (gen_lvsl (pcvl, tmp));
7120 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7122 rtx permr
7123 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
7124 emit_insn (permr);
7126 rs6000_expand_vector_set (target, val, const0_rtx);
7128 rtx perml
7129 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
7130 emit_insn (perml);
7133 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7134 is variable and also counts by vector element size for p7 & p8. */
7136 static void
7137 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7139 machine_mode mode = GET_MODE (target);
7141 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7143 machine_mode inner_mode = GET_MODE (val);
7144 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7146 int width = GET_MODE_SIZE (inner_mode);
7147 gcc_assert (width >= 1 && width <= 4);
7149 int shift = exact_log2 (width);
7151 machine_mode idx_mode = GET_MODE (idx);
7153 machine_mode shift_mode;
7154 rtx (*gen_ashl)(rtx, rtx, rtx);
7155 rtx (*gen_add)(rtx, rtx, rtx);
7156 rtx (*gen_sub)(rtx, rtx, rtx);
7157 rtx (*gen_lvsl)(rtx, rtx);
7159 if (TARGET_POWERPC64)
7161 shift_mode = DImode;
7162 gen_ashl = gen_ashldi3;
7163 gen_add = gen_adddi3;
7164 gen_sub = gen_subdi3;
7165 gen_lvsl = gen_altivec_lvsl_reg_di;
7167 else
7169 shift_mode = SImode;
7170 gen_ashl = gen_ashlsi3;
7171 gen_add = gen_addsi3;
7172 gen_sub = gen_subsi3;
7173 gen_lvsl = gen_altivec_lvsl_reg_si;
7176 /* idx = idx * width. */
7177 rtx tmp = gen_reg_rtx (shift_mode);
7178 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7180 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7182 /* For LE: idx = idx + 8. */
7183 if (!BYTES_BIG_ENDIAN)
7184 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7185 else
7186 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7188 /* lxv vs33, mask.
7189 DImode: 0xffffffffffffffff0000000000000000
7190 SImode: 0x00000000ffffffff0000000000000000
7191 HImode: 0x000000000000ffff0000000000000000.
7192 QImode: 0x00000000000000ff0000000000000000. */
7193 rtx mask = gen_reg_rtx (V16QImode);
7194 rtx mask_v2di = gen_reg_rtx (V2DImode);
7195 rtvec v = rtvec_alloc (2);
7196 if (!BYTES_BIG_ENDIAN)
7198 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7199 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7201 else
7203 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7204 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7206 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7207 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7208 emit_insn (gen_rtx_SET (mask, sub_mask));
7210 /* mtvsrd[wz] f0,tmp_val. */
7211 rtx tmp_val = gen_reg_rtx (SImode);
7212 if (inner_mode == E_SFmode)
7213 if (TARGET_DIRECT_MOVE_64BIT)
7214 emit_insn (gen_movsi_from_sf (tmp_val, val));
7215 else
7217 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7218 emit_insn (gen_movsf_hardfloat (stack, val));
7219 rtx stack2 = copy_rtx (stack);
7220 PUT_MODE (stack2, SImode);
7221 emit_move_insn (tmp_val, stack2);
7223 else
7224 tmp_val = force_reg (SImode, val);
7226 rtx val_v16qi = gen_reg_rtx (V16QImode);
7227 rtx val_v2di = gen_reg_rtx (V2DImode);
7228 rtvec vec_val = rtvec_alloc (2);
7229 if (!BYTES_BIG_ENDIAN)
7231 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7232 RTVEC_ELT (vec_val, 1) = tmp_val;
7234 else
7236 RTVEC_ELT (vec_val, 0) = tmp_val;
7237 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7239 emit_insn (
7240 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7241 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7242 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7244 /* lvsl 13,0,idx. */
7245 rtx pcv = gen_reg_rtx (V16QImode);
7246 emit_insn (gen_lvsl (pcv, tmp));
7248 /* vperm 1,1,1,13. */
7249 /* vperm 0,0,0,13. */
7250 rtx val_perm = gen_reg_rtx (V16QImode);
7251 rtx mask_perm = gen_reg_rtx (V16QImode);
7252 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7253 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7255 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7257 /* xxsel 34,34,32,33. */
7258 emit_insn (
7259 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7262 /* Set field ELT_RTX of TARGET to VAL. */
7264 void
7265 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7267 machine_mode mode = GET_MODE (target);
7268 machine_mode inner_mode = GET_MODE_INNER (mode);
7269 rtx reg = gen_reg_rtx (mode);
7270 rtx mask, mem, x;
7271 int width = GET_MODE_SIZE (inner_mode);
7272 int i;
7274 val = force_reg (GET_MODE (val), val);
7276 if (VECTOR_MEM_VSX_P (mode))
7278 if (!CONST_INT_P (elt_rtx))
7280 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7281 when elt_rtx is variable. */
7282 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7284 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7285 return;
7287 else if (TARGET_VSX)
7289 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7290 return;
7292 else
7293 gcc_assert (CONST_INT_P (elt_rtx));
7296 rtx insn = NULL_RTX;
7298 if (mode == V2DFmode)
7299 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7301 else if (mode == V2DImode)
7302 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7304 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7306 if (mode == V4SImode)
7307 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7308 else if (mode == V8HImode)
7309 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7310 else if (mode == V16QImode)
7311 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7312 else if (mode == V4SFmode)
7313 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7316 if (insn)
7318 emit_insn (insn);
7319 return;
7323 /* Simplify setting single element vectors like V1TImode. */
7324 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7325 && INTVAL (elt_rtx) == 0)
7327 emit_move_insn (target, gen_lowpart (mode, val));
7328 return;
7331 /* Load single variable value. */
7332 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7333 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7334 x = gen_rtx_UNSPEC (VOIDmode,
7335 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7336 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7337 gen_rtvec (2,
7338 gen_rtx_SET (reg, mem),
7339 x)));
7341 /* Linear sequence. */
7342 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7343 for (i = 0; i < 16; ++i)
7344 XVECEXP (mask, 0, i) = GEN_INT (i);
7346 /* Set permute mask to insert element into target. */
7347 for (i = 0; i < width; ++i)
7348 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7349 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7351 if (BYTES_BIG_ENDIAN)
7352 x = gen_rtx_UNSPEC (mode,
7353 gen_rtvec (3, target, reg,
7354 force_reg (V16QImode, x)),
7355 UNSPEC_VPERM);
7356 else
7358 if (TARGET_P9_VECTOR)
7359 x = gen_rtx_UNSPEC (mode,
7360 gen_rtvec (3, reg, target,
7361 force_reg (V16QImode, x)),
7362 UNSPEC_VPERMR);
7363 else
7365 /* Invert selector. We prefer to generate VNAND on P8 so
7366 that future fusion opportunities can kick in, but must
7367 generate VNOR elsewhere. */
7368 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7369 rtx iorx = (TARGET_P8_VECTOR
7370 ? gen_rtx_IOR (V16QImode, notx, notx)
7371 : gen_rtx_AND (V16QImode, notx, notx));
7372 rtx tmp = gen_reg_rtx (V16QImode);
7373 emit_insn (gen_rtx_SET (tmp, iorx));
7375 /* Permute with operands reversed and adjusted selector. */
7376 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7377 UNSPEC_VPERM);
7381 emit_insn (gen_rtx_SET (target, x));
7384 /* Extract field ELT from VEC into TARGET. */
7386 void
7387 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7389 machine_mode mode = GET_MODE (vec);
7390 machine_mode inner_mode = GET_MODE_INNER (mode);
7391 rtx mem;
7393 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7395 switch (mode)
7397 default:
7398 break;
7399 case E_V1TImode:
7400 emit_move_insn (target, gen_lowpart (TImode, vec));
7401 break;
7402 case E_V2DFmode:
7403 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7404 return;
7405 case E_V2DImode:
7406 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7407 return;
7408 case E_V4SFmode:
7409 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7410 return;
7411 case E_V16QImode:
7412 if (TARGET_DIRECT_MOVE_64BIT)
7414 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7415 return;
7417 else
7418 break;
7419 case E_V8HImode:
7420 if (TARGET_DIRECT_MOVE_64BIT)
7422 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7423 return;
7425 else
7426 break;
7427 case E_V4SImode:
7428 if (TARGET_DIRECT_MOVE_64BIT)
7430 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7431 return;
7433 break;
7436 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7437 && TARGET_DIRECT_MOVE_64BIT)
7439 if (GET_MODE (elt) != DImode)
7441 rtx tmp = gen_reg_rtx (DImode);
7442 convert_move (tmp, elt, 0);
7443 elt = tmp;
7445 else if (!REG_P (elt))
7446 elt = force_reg (DImode, elt);
7448 switch (mode)
7450 case E_V1TImode:
7451 emit_move_insn (target, gen_lowpart (TImode, vec));
7452 return;
7454 case E_V2DFmode:
7455 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7456 return;
7458 case E_V2DImode:
7459 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7460 return;
7462 case E_V4SFmode:
7463 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7464 return;
7466 case E_V4SImode:
7467 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7468 return;
7470 case E_V8HImode:
7471 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7472 return;
7474 case E_V16QImode:
7475 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7476 return;
7478 default:
7479 gcc_unreachable ();
7483 /* Allocate mode-sized buffer. */
7484 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7486 emit_move_insn (mem, vec);
7487 if (CONST_INT_P (elt))
7489 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7491 /* Add offset to field within buffer matching vector element. */
7492 mem = adjust_address_nv (mem, inner_mode,
7493 modulo_elt * GET_MODE_SIZE (inner_mode));
7494 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7496 else
7498 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7499 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7500 rtx new_addr = gen_reg_rtx (Pmode);
7502 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7503 if (ele_size > 1)
7504 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7505 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7506 new_addr = change_address (mem, inner_mode, new_addr);
7507 emit_move_insn (target, new_addr);
7511 /* Return the offset within a memory object (MEM) of a vector type to a given
7512 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7513 the element is constant, we return a constant integer.
7515 Otherwise, we use a base register temporary to calculate the offset after
7516 masking it to fit within the bounds of the vector and scaling it. The
7517 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7518 built-in function. */
7520 static rtx
7521 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7523 if (CONST_INT_P (element))
7524 return GEN_INT (INTVAL (element) * scalar_size);
7526 /* All insns should use the 'Q' constraint (address is a single register) if
7527 the element number is not a constant. */
7528 gcc_assert (satisfies_constraint_Q (mem));
7530 /* Mask the element to make sure the element number is between 0 and the
7531 maximum number of elements - 1 so that we don't generate an address
7532 outside the vector. */
7533 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7534 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7535 emit_insn (gen_rtx_SET (base_tmp, and_op));
7537 /* Shift the element to get the byte offset from the element number. */
7538 int shift = exact_log2 (scalar_size);
7539 gcc_assert (shift >= 0);
7541 if (shift > 0)
7543 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7544 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7547 return base_tmp;
7550 /* Helper function update PC-relative addresses when we are adjusting a memory
7551 address (ADDR) to a vector to point to a scalar field within the vector with
7552 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7553 use the base register temporary (BASE_TMP) to form the address. */
7555 static rtx
7556 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7558 rtx new_addr = NULL;
7560 gcc_assert (CONST_INT_P (element_offset));
7562 if (GET_CODE (addr) == CONST)
7563 addr = XEXP (addr, 0);
7565 if (GET_CODE (addr) == PLUS)
7567 rtx op0 = XEXP (addr, 0);
7568 rtx op1 = XEXP (addr, 1);
7570 if (CONST_INT_P (op1))
7572 HOST_WIDE_INT offset
7573 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7575 if (offset == 0)
7576 new_addr = op0;
7578 else
7580 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7581 new_addr = gen_rtx_CONST (Pmode, plus);
7585 else
7587 emit_move_insn (base_tmp, addr);
7588 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7592 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7594 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7595 new_addr = gen_rtx_CONST (Pmode, plus);
7598 else
7599 gcc_unreachable ();
7601 return new_addr;
7604 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7605 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7606 temporary (BASE_TMP) to fixup the address. Return the new memory address
7607 that is valid for reads or writes to a given register (SCALAR_REG).
7609 This function is expected to be called after reload is completed when we are
7610 splitting insns. The temporary BASE_TMP might be set multiple times with
7611 this code. */
7614 rs6000_adjust_vec_address (rtx scalar_reg,
7615 rtx mem,
7616 rtx element,
7617 rtx base_tmp,
7618 machine_mode scalar_mode)
7620 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7621 rtx addr = XEXP (mem, 0);
7622 rtx new_addr;
7624 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7625 gcc_assert (!reg_mentioned_p (base_tmp, element));
7627 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7628 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7630 /* Calculate what we need to add to the address to get the element
7631 address. */
7632 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7634 /* Create the new address pointing to the element within the vector. If we
7635 are adding 0, we don't have to change the address. */
7636 if (element_offset == const0_rtx)
7637 new_addr = addr;
7639 /* A simple indirect address can be converted into a reg + offset
7640 address. */
7641 else if (REG_P (addr) || SUBREG_P (addr))
7642 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7644 /* For references to local static variables, fold a constant offset into the
7645 address. */
7646 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7647 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7649 /* Optimize D-FORM addresses with constant offset with a constant element, to
7650 include the element offset in the address directly. */
7651 else if (GET_CODE (addr) == PLUS)
7653 rtx op0 = XEXP (addr, 0);
7654 rtx op1 = XEXP (addr, 1);
7656 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7657 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7659 /* op0 should never be r0, because r0+offset is not valid. But it
7660 doesn't hurt to make sure it is not r0. */
7661 gcc_assert (reg_or_subregno (op0) != 0);
7663 /* D-FORM address with constant element number. */
7664 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7665 rtx offset_rtx = GEN_INT (offset);
7666 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7668 else
7670 /* If we don't have a D-FORM address with a constant element number,
7671 add the two elements in the current address. Then add the offset.
7673 Previously, we tried to add the offset to OP1 and change the
7674 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7675 complicated because we had to verify that op1 was not GPR0 and we
7676 had a constant element offset (due to the way ADDI is defined).
7677 By doing the add of OP0 and OP1 first, and then adding in the
7678 offset, it has the benefit that if D-FORM instructions are
7679 allowed, the offset is part of the memory access to the vector
7680 element. */
7681 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7682 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7686 else
7688 emit_move_insn (base_tmp, addr);
7689 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7692 /* If the address isn't valid, move the address into the temporary base
7693 register. Some reasons it could not be valid include:
7695 The address offset overflowed the 16 or 34 bit offset size;
7696 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7697 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7698 Only X_FORM loads can be done, and the address is D_FORM. */
7700 enum insn_form iform
7701 = address_to_insn_form (new_addr, scalar_mode,
7702 reg_to_non_prefixed (scalar_reg, scalar_mode));
7704 if (iform == INSN_FORM_BAD)
7706 emit_move_insn (base_tmp, new_addr);
7707 new_addr = base_tmp;
7710 return change_address (mem, scalar_mode, new_addr);
7713 /* Split a variable vec_extract operation into the component instructions. */
7715 void
7716 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7717 rtx tmp_altivec)
7719 machine_mode mode = GET_MODE (src);
7720 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7721 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7722 int byte_shift = exact_log2 (scalar_size);
7724 gcc_assert (byte_shift >= 0);
7726 /* If we are given a memory address, optimize to load just the element. We
7727 don't have to adjust the vector element number on little endian
7728 systems. */
7729 if (MEM_P (src))
7731 emit_move_insn (dest,
7732 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7733 scalar_mode));
7734 return;
7737 else if (REG_P (src) || SUBREG_P (src))
7739 int num_elements = GET_MODE_NUNITS (mode);
7740 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7741 int bit_shift = 7 - exact_log2 (num_elements);
7742 rtx element2;
7743 unsigned int dest_regno = reg_or_subregno (dest);
7744 unsigned int src_regno = reg_or_subregno (src);
7745 unsigned int element_regno = reg_or_subregno (element);
7747 gcc_assert (REG_P (tmp_gpr));
7749 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7750 a general purpose register. */
7751 if (TARGET_P9_VECTOR
7752 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7753 && INT_REGNO_P (dest_regno)
7754 && ALTIVEC_REGNO_P (src_regno)
7755 && INT_REGNO_P (element_regno))
7757 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7758 rtx element_si = gen_rtx_REG (SImode, element_regno);
7760 if (mode == V16QImode)
7761 emit_insn (BYTES_BIG_ENDIAN
7762 ? gen_vextublx (dest_si, element_si, src)
7763 : gen_vextubrx (dest_si, element_si, src));
7765 else if (mode == V8HImode)
7767 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7768 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7769 emit_insn (BYTES_BIG_ENDIAN
7770 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7771 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7775 else
7777 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7778 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7779 emit_insn (BYTES_BIG_ENDIAN
7780 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7781 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7784 return;
7788 gcc_assert (REG_P (tmp_altivec));
7790 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7791 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7792 will shift the element into the upper position (adding 3 to convert a
7793 byte shift into a bit shift). */
7794 if (scalar_size == 8)
7796 if (!BYTES_BIG_ENDIAN)
7798 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7799 element2 = tmp_gpr;
7801 else
7802 element2 = element;
7804 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7805 bit. */
7806 emit_insn (gen_rtx_SET (tmp_gpr,
7807 gen_rtx_AND (DImode,
7808 gen_rtx_ASHIFT (DImode,
7809 element2,
7810 GEN_INT (6)),
7811 GEN_INT (64))));
7813 else
7815 if (!BYTES_BIG_ENDIAN)
7817 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7819 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7820 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7821 element2 = tmp_gpr;
7823 else
7824 element2 = element;
7826 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7829 /* Get the value into the lower byte of the Altivec register where VSLO
7830 expects it. */
7831 if (TARGET_P9_VECTOR)
7832 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7833 else if (can_create_pseudo_p ())
7834 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7835 else
7837 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7838 emit_move_insn (tmp_di, tmp_gpr);
7839 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7842 /* Do the VSLO to get the value into the final location. */
7843 switch (mode)
7845 case E_V2DFmode:
7846 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7847 return;
7849 case E_V2DImode:
7850 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7851 return;
7853 case E_V4SFmode:
7855 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7856 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7857 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7858 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7859 tmp_altivec));
7861 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7862 return;
7865 case E_V4SImode:
7866 case E_V8HImode:
7867 case E_V16QImode:
7869 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7870 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7871 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7872 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7873 tmp_altivec));
7874 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7875 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7876 GEN_INT (64 - bits_in_element)));
7877 return;
7880 default:
7881 gcc_unreachable ();
7884 return;
7886 else
7887 gcc_unreachable ();
7890 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7891 selects whether the alignment is abi mandated, optional, or
7892 both abi and optional alignment. */
7894 unsigned int
7895 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7897 if (how != align_opt)
7899 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7900 align = 128;
7903 if (how != align_abi)
7905 if (TREE_CODE (type) == ARRAY_TYPE
7906 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7908 if (align < BITS_PER_WORD)
7909 align = BITS_PER_WORD;
7913 return align;
7916 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7917 instructions simply ignore the low bits; VSX memory instructions
7918 are aligned to 4 or 8 bytes. */
7920 static bool
7921 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7923 return (STRICT_ALIGNMENT
7924 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7925 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7926 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7927 && (int) align < VECTOR_ALIGN (mode)))));
7930 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
7932 unsigned int
7933 rs6000_special_adjust_field_align (tree type, unsigned int computed)
7935 if (computed <= 32 || TYPE_PACKED (type))
7936 return computed;
7938 /* Strip initial arrays. */
7939 while (TREE_CODE (type) == ARRAY_TYPE)
7940 type = TREE_TYPE (type);
7942 /* If RECORD or UNION, recursively find the first field. */
7943 while (AGGREGATE_TYPE_P (type))
7945 tree field = TYPE_FIELDS (type);
7947 /* Skip all non field decls */
7948 while (field != NULL
7949 && (TREE_CODE (field) != FIELD_DECL
7950 || DECL_FIELD_ABI_IGNORED (field)))
7951 field = DECL_CHAIN (field);
7953 if (! field)
7954 break;
7956 /* A packed field does not contribute any extra alignment. */
7957 if (DECL_PACKED (field))
7958 return computed;
7960 type = TREE_TYPE (field);
7962 /* Strip arrays. */
7963 while (TREE_CODE (type) == ARRAY_TYPE)
7964 type = TREE_TYPE (type);
7967 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
7968 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
7969 computed = MIN (computed, 32);
7971 return computed;
7974 /* AIX increases natural record alignment to doubleword if the innermost first
7975 field is an FP double while the FP fields remain word aligned.
7976 Only called if TYPE initially is a RECORD or UNION. */
7978 unsigned int
7979 rs6000_special_round_type_align (tree type, unsigned int computed,
7980 unsigned int specified)
7982 unsigned int align = MAX (computed, specified);
7984 if (TYPE_PACKED (type) || align >= 64)
7985 return align;
7987 /* If RECORD or UNION, recursively find the first field. */
7990 tree field = TYPE_FIELDS (type);
7992 /* Skip all non field decls */
7993 while (field != NULL
7994 && (TREE_CODE (field) != FIELD_DECL
7995 || DECL_FIELD_ABI_IGNORED (field)))
7996 field = DECL_CHAIN (field);
7998 if (! field)
7999 break;
8001 /* A packed field does not contribute any extra alignment. */
8002 if (DECL_PACKED (field))
8003 return align;
8005 type = TREE_TYPE (field);
8007 /* Strip arrays. */
8008 while (TREE_CODE (type) == ARRAY_TYPE)
8009 type = TREE_TYPE (type);
8010 } while (AGGREGATE_TYPE_P (type));
8012 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8013 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8014 align = MAX (align, 64);
8016 return align;
8019 /* Darwin increases record alignment to the natural alignment of
8020 the first field. */
8022 unsigned int
8023 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8024 unsigned int specified)
8026 unsigned int align = MAX (computed, specified);
8028 if (TYPE_PACKED (type))
8029 return align;
8031 /* Find the first field, looking down into aggregates. */
8032 do {
8033 tree field = TYPE_FIELDS (type);
8034 /* Skip all non field decls */
8035 while (field != NULL
8036 && (TREE_CODE (field) != FIELD_DECL
8037 || DECL_FIELD_ABI_IGNORED (field)))
8038 field = DECL_CHAIN (field);
8039 if (! field)
8040 break;
8041 /* A packed field does not contribute any extra alignment. */
8042 if (DECL_PACKED (field))
8043 return align;
8044 type = TREE_TYPE (field);
8045 while (TREE_CODE (type) == ARRAY_TYPE)
8046 type = TREE_TYPE (type);
8047 } while (AGGREGATE_TYPE_P (type));
8049 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8050 align = MAX (align, TYPE_ALIGN (type));
8052 return align;
8055 /* Return 1 for an operand in small memory on V.4/eabi. */
8058 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8059 machine_mode mode ATTRIBUTE_UNUSED)
8061 #if TARGET_ELF
8062 rtx sym_ref;
8064 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8065 return 0;
8067 if (DEFAULT_ABI != ABI_V4)
8068 return 0;
8070 if (SYMBOL_REF_P (op))
8071 sym_ref = op;
8073 else if (GET_CODE (op) != CONST
8074 || GET_CODE (XEXP (op, 0)) != PLUS
8075 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8076 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8077 return 0;
8079 else
8081 rtx sum = XEXP (op, 0);
8082 HOST_WIDE_INT summand;
8084 /* We have to be careful here, because it is the referenced address
8085 that must be 32k from _SDA_BASE_, not just the symbol. */
8086 summand = INTVAL (XEXP (sum, 1));
8087 if (summand < 0 || summand > g_switch_value)
8088 return 0;
8090 sym_ref = XEXP (sum, 0);
8093 return SYMBOL_REF_SMALL_P (sym_ref);
8094 #else
8095 return 0;
8096 #endif
8099 /* Return true if either operand is a general purpose register. */
8101 bool
8102 gpr_or_gpr_p (rtx op0, rtx op1)
8104 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8105 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8108 /* Return true if this is a move direct operation between GPR registers and
8109 floating point/VSX registers. */
8111 bool
8112 direct_move_p (rtx op0, rtx op1)
8114 if (!REG_P (op0) || !REG_P (op1))
8115 return false;
8117 if (!TARGET_DIRECT_MOVE)
8118 return false;
8120 int regno0 = REGNO (op0);
8121 int regno1 = REGNO (op1);
8122 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8123 return false;
8125 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8126 return true;
8128 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8129 return true;
8131 return false;
8134 /* Return true if the ADDR is an acceptable address for a quad memory
8135 operation of mode MODE (either LQ/STQ for general purpose registers, or
8136 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8137 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8138 3.0 LXV/STXV instruction. */
8140 bool
8141 quad_address_p (rtx addr, machine_mode mode, bool strict)
8143 rtx op0, op1;
8145 if (GET_MODE_SIZE (mode) < 16)
8146 return false;
8148 if (legitimate_indirect_address_p (addr, strict))
8149 return true;
8151 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8152 return false;
8154 /* Is this a valid prefixed address? If the bottom four bits of the offset
8155 are non-zero, we could use a prefixed instruction (which does not have the
8156 DQ-form constraint that the traditional instruction had) instead of
8157 forcing the unaligned offset to a GPR. */
8158 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8159 return true;
8161 if (GET_CODE (addr) != PLUS)
8162 return false;
8164 op0 = XEXP (addr, 0);
8165 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8166 return false;
8168 op1 = XEXP (addr, 1);
8169 if (!CONST_INT_P (op1))
8170 return false;
8172 return quad_address_offset_p (INTVAL (op1));
8175 /* Return true if this is a load or store quad operation. This function does
8176 not handle the atomic quad memory instructions. */
8178 bool
8179 quad_load_store_p (rtx op0, rtx op1)
8181 bool ret;
8183 if (!TARGET_QUAD_MEMORY)
8184 ret = false;
8186 else if (REG_P (op0) && MEM_P (op1))
8187 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8188 && quad_memory_operand (op1, GET_MODE (op1))
8189 && !reg_overlap_mentioned_p (op0, op1));
8191 else if (MEM_P (op0) && REG_P (op1))
8192 ret = (quad_memory_operand (op0, GET_MODE (op0))
8193 && quad_int_reg_operand (op1, GET_MODE (op1)));
8195 else
8196 ret = false;
8198 if (TARGET_DEBUG_ADDR)
8200 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8201 ret ? "true" : "false");
8202 debug_rtx (gen_rtx_SET (op0, op1));
8205 return ret;
8208 /* Given an address, return a constant offset term if one exists. */
8210 static rtx
8211 address_offset (rtx op)
8213 if (GET_CODE (op) == PRE_INC
8214 || GET_CODE (op) == PRE_DEC)
8215 op = XEXP (op, 0);
8216 else if (GET_CODE (op) == PRE_MODIFY
8217 || GET_CODE (op) == LO_SUM)
8218 op = XEXP (op, 1);
8220 if (GET_CODE (op) == CONST)
8221 op = XEXP (op, 0);
8223 if (GET_CODE (op) == PLUS)
8224 op = XEXP (op, 1);
8226 if (CONST_INT_P (op))
8227 return op;
8229 return NULL_RTX;
8232 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8233 the mode. If we can't find (or don't know) the alignment of the symbol
8234 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8235 should be pessimistic]. Offsets are validated in the same way as for
8236 reg + offset. */
8237 static bool
8238 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8240 /* We should not get here with this. */
8241 gcc_checking_assert (! mode_supports_dq_form (mode));
8243 if (GET_CODE (x) == CONST)
8244 x = XEXP (x, 0);
8246 /* If we are building PIC code, then any symbol must be wrapped in an
8247 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8248 bool machopic_offs_p = false;
8249 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8251 x = XVECEXP (x, 0, 0);
8252 machopic_offs_p = true;
8255 rtx sym = NULL_RTX;
8256 unsigned HOST_WIDE_INT offset = 0;
8258 if (GET_CODE (x) == PLUS)
8260 sym = XEXP (x, 0);
8261 if (! SYMBOL_REF_P (sym))
8262 return false;
8263 if (!CONST_INT_P (XEXP (x, 1)))
8264 return false;
8265 offset = INTVAL (XEXP (x, 1));
8267 else if (SYMBOL_REF_P (x))
8268 sym = x;
8269 else if (CONST_INT_P (x))
8270 offset = INTVAL (x);
8271 else if (GET_CODE (x) == LABEL_REF)
8272 offset = 0; // We assume code labels are Pmode aligned
8273 else
8274 return false; // not sure what we have here.
8276 /* If we don't know the alignment of the thing to which the symbol refers,
8277 we assume optimistically it is "enough".
8278 ??? maybe we should be pessimistic instead. */
8279 unsigned align = 0;
8281 if (sym)
8283 tree decl = SYMBOL_REF_DECL (sym);
8284 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8285 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8286 return false;
8287 #if TARGET_MACHO
8288 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8289 /* The decl in an indirection symbol is the original one, which might
8290 be less aligned than the indirection. Our indirections are always
8291 pointer-aligned. */
8293 else
8294 #endif
8295 if (decl && DECL_ALIGN (decl))
8296 align = DECL_ALIGN_UNIT (decl);
8299 unsigned int extra = 0;
8300 switch (mode)
8302 case E_DFmode:
8303 case E_DDmode:
8304 case E_DImode:
8305 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8306 addressing. */
8307 if (VECTOR_MEM_VSX_P (mode))
8308 return false;
8310 if (!TARGET_POWERPC64)
8311 extra = 4;
8312 else if ((offset & 3) || (align & 3))
8313 return false;
8314 break;
8316 case E_TFmode:
8317 case E_IFmode:
8318 case E_KFmode:
8319 case E_TDmode:
8320 case E_TImode:
8321 case E_PTImode:
8322 extra = 8;
8323 if (!TARGET_POWERPC64)
8324 extra = 12;
8325 else if ((offset & 3) || (align & 3))
8326 return false;
8327 break;
8329 default:
8330 break;
8333 /* We only care if the access(es) would cause a change to the high part. */
8334 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8335 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8338 /* Return true if the MEM operand is a memory operand suitable for use
8339 with a (full width, possibly multiple) gpr load/store. On
8340 powerpc64 this means the offset must be divisible by 4.
8341 Implements 'Y' constraint.
8343 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8344 a constraint function we know the operand has satisfied a suitable
8345 memory predicate.
8347 Offsetting a lo_sum should not be allowed, except where we know by
8348 alignment that a 32k boundary is not crossed. Note that by
8349 "offsetting" here we mean a further offset to access parts of the
8350 MEM. It's fine to have a lo_sum where the inner address is offset
8351 from a sym, since the same sym+offset will appear in the high part
8352 of the address calculation. */
8354 bool
8355 mem_operand_gpr (rtx op, machine_mode mode)
8357 unsigned HOST_WIDE_INT offset;
8358 int extra;
8359 rtx addr = XEXP (op, 0);
8361 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8362 if (TARGET_UPDATE
8363 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8364 && mode_supports_pre_incdec_p (mode)
8365 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8366 return true;
8368 /* Allow prefixed instructions if supported. If the bottom two bits of the
8369 offset are non-zero, we could use a prefixed instruction (which does not
8370 have the DS-form constraint that the traditional instruction had) instead
8371 of forcing the unaligned offset to a GPR. */
8372 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8373 return true;
8375 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8376 really OK. Doing this early avoids teaching all the other machinery
8377 about them. */
8378 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8379 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8381 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8382 if (!rs6000_offsettable_memref_p (op, mode, false))
8383 return false;
8385 op = address_offset (addr);
8386 if (op == NULL_RTX)
8387 return true;
8389 offset = INTVAL (op);
8390 if (TARGET_POWERPC64 && (offset & 3) != 0)
8391 return false;
8393 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8394 if (extra < 0)
8395 extra = 0;
8397 if (GET_CODE (addr) == LO_SUM)
8398 /* For lo_sum addresses, we must allow any offset except one that
8399 causes a wrap, so test only the low 16 bits. */
8400 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8402 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8405 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8406 enforce an offset divisible by 4 even for 32-bit. */
8408 bool
8409 mem_operand_ds_form (rtx op, machine_mode mode)
8411 unsigned HOST_WIDE_INT offset;
8412 int extra;
8413 rtx addr = XEXP (op, 0);
8415 /* Allow prefixed instructions if supported. If the bottom two bits of the
8416 offset are non-zero, we could use a prefixed instruction (which does not
8417 have the DS-form constraint that the traditional instruction had) instead
8418 of forcing the unaligned offset to a GPR. */
8419 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8420 return true;
8422 if (!offsettable_address_p (false, mode, addr))
8423 return false;
8425 op = address_offset (addr);
8426 if (op == NULL_RTX)
8427 return true;
8429 offset = INTVAL (op);
8430 if ((offset & 3) != 0)
8431 return false;
8433 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8434 if (extra < 0)
8435 extra = 0;
8437 if (GET_CODE (addr) == LO_SUM)
8438 /* For lo_sum addresses, we must allow any offset except one that
8439 causes a wrap, so test only the low 16 bits. */
8440 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8442 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8445 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8447 static bool
8448 reg_offset_addressing_ok_p (machine_mode mode)
8450 switch (mode)
8452 case E_V16QImode:
8453 case E_V8HImode:
8454 case E_V4SFmode:
8455 case E_V4SImode:
8456 case E_V2DFmode:
8457 case E_V2DImode:
8458 case E_V1TImode:
8459 case E_TImode:
8460 case E_TFmode:
8461 case E_KFmode:
8462 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8463 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8464 a vector mode, if we want to use the VSX registers to move it around,
8465 we need to restrict ourselves to reg+reg addressing. Similarly for
8466 IEEE 128-bit floating point that is passed in a single vector
8467 register. */
8468 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8469 return mode_supports_dq_form (mode);
8470 break;
8472 /* The vector pair/quad types support offset addressing if the
8473 underlying vectors support offset addressing. */
8474 case E_OOmode:
8475 case E_XOmode:
8476 return TARGET_MMA;
8478 case E_SDmode:
8479 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8480 addressing for the LFIWZX and STFIWX instructions. */
8481 if (TARGET_NO_SDMODE_STACK)
8482 return false;
8483 break;
8485 default:
8486 break;
8489 return true;
8492 static bool
8493 virtual_stack_registers_memory_p (rtx op)
8495 int regnum;
8497 if (REG_P (op))
8498 regnum = REGNO (op);
8500 else if (GET_CODE (op) == PLUS
8501 && REG_P (XEXP (op, 0))
8502 && CONST_INT_P (XEXP (op, 1)))
8503 regnum = REGNO (XEXP (op, 0));
8505 else
8506 return false;
8508 return (regnum >= FIRST_VIRTUAL_REGISTER
8509 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8512 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8513 is known to not straddle a 32k boundary. This function is used
8514 to determine whether -mcmodel=medium code can use TOC pointer
8515 relative addressing for OP. This means the alignment of the TOC
8516 pointer must also be taken into account, and unfortunately that is
8517 only 8 bytes. */
8519 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8520 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8521 #endif
8523 static bool
8524 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8525 machine_mode mode)
8527 tree decl;
8528 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8530 if (!SYMBOL_REF_P (op))
8531 return false;
8533 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8534 SYMBOL_REF. */
8535 if (mode_supports_dq_form (mode))
8536 return false;
8538 dsize = GET_MODE_SIZE (mode);
8539 decl = SYMBOL_REF_DECL (op);
8540 if (!decl)
8542 if (dsize == 0)
8543 return false;
8545 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8546 replacing memory addresses with an anchor plus offset. We
8547 could find the decl by rummaging around in the block->objects
8548 VEC for the given offset but that seems like too much work. */
8549 dalign = BITS_PER_UNIT;
8550 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8551 && SYMBOL_REF_ANCHOR_P (op)
8552 && SYMBOL_REF_BLOCK (op) != NULL)
8554 struct object_block *block = SYMBOL_REF_BLOCK (op);
8556 dalign = block->alignment;
8557 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8559 else if (CONSTANT_POOL_ADDRESS_P (op))
8561 /* It would be nice to have get_pool_align().. */
8562 machine_mode cmode = get_pool_mode (op);
8564 dalign = GET_MODE_ALIGNMENT (cmode);
8567 else if (DECL_P (decl))
8569 dalign = DECL_ALIGN (decl);
8571 if (dsize == 0)
8573 /* Allow BLKmode when the entire object is known to not
8574 cross a 32k boundary. */
8575 if (!DECL_SIZE_UNIT (decl))
8576 return false;
8578 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8579 return false;
8581 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8582 if (dsize > 32768)
8583 return false;
8585 dalign /= BITS_PER_UNIT;
8586 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8587 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8588 return dalign >= dsize;
8591 else
8592 gcc_unreachable ();
8594 /* Find how many bits of the alignment we know for this access. */
8595 dalign /= BITS_PER_UNIT;
8596 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8597 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8598 mask = dalign - 1;
8599 lsb = offset & -offset;
8600 mask &= lsb - 1;
8601 dalign = mask + 1;
8603 return dalign >= dsize;
8606 static bool
8607 constant_pool_expr_p (rtx op)
8609 rtx base, offset;
8611 split_const (op, &base, &offset);
8612 return (SYMBOL_REF_P (base)
8613 && CONSTANT_POOL_ADDRESS_P (base)
8614 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8617 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8618 use that as the register to put the HIGH value into if register allocation
8619 is already done. */
8622 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8624 rtx tocrel, tocreg, hi;
8626 gcc_assert (TARGET_TOC);
8628 if (TARGET_DEBUG_ADDR)
8630 if (SYMBOL_REF_P (symbol))
8631 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8632 XSTR (symbol, 0));
8633 else
8635 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8636 GET_RTX_NAME (GET_CODE (symbol)));
8637 debug_rtx (symbol);
8641 if (!can_create_pseudo_p ())
8642 df_set_regs_ever_live (TOC_REGISTER, true);
8644 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8645 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8646 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8647 return tocrel;
8649 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8650 if (largetoc_reg != NULL)
8652 emit_move_insn (largetoc_reg, hi);
8653 hi = largetoc_reg;
8655 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8658 /* These are only used to pass through from print_operand/print_operand_address
8659 to rs6000_output_addr_const_extra over the intervening function
8660 output_addr_const which is not target code. */
8661 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8663 /* Return true if OP is a toc pointer relative address (the output
8664 of create_TOC_reference). If STRICT, do not match non-split
8665 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8666 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8667 TOCREL_OFFSET_RET respectively. */
8669 bool
8670 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8671 const_rtx *tocrel_offset_ret)
8673 if (!TARGET_TOC)
8674 return false;
8676 if (TARGET_CMODEL != CMODEL_SMALL)
8678 /* When strict ensure we have everything tidy. */
8679 if (strict
8680 && !(GET_CODE (op) == LO_SUM
8681 && REG_P (XEXP (op, 0))
8682 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8683 return false;
8685 /* When not strict, allow non-split TOC addresses and also allow
8686 (lo_sum (high ..)) TOC addresses created during reload. */
8687 if (GET_CODE (op) == LO_SUM)
8688 op = XEXP (op, 1);
8691 const_rtx tocrel_base = op;
8692 const_rtx tocrel_offset = const0_rtx;
8694 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8696 tocrel_base = XEXP (op, 0);
8697 tocrel_offset = XEXP (op, 1);
8700 if (tocrel_base_ret)
8701 *tocrel_base_ret = tocrel_base;
8702 if (tocrel_offset_ret)
8703 *tocrel_offset_ret = tocrel_offset;
8705 return (GET_CODE (tocrel_base) == UNSPEC
8706 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8707 && REG_P (XVECEXP (tocrel_base, 0, 1))
8708 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8711 /* Return true if X is a constant pool address, and also for cmodel=medium
8712 if X is a toc-relative address known to be offsettable within MODE. */
8714 bool
8715 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8716 bool strict)
8718 const_rtx tocrel_base, tocrel_offset;
8719 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8720 && (TARGET_CMODEL != CMODEL_MEDIUM
8721 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8722 || mode == QImode
8723 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8724 INTVAL (tocrel_offset), mode)));
8727 static bool
8728 legitimate_small_data_p (machine_mode mode, rtx x)
8730 return (DEFAULT_ABI == ABI_V4
8731 && !flag_pic && !TARGET_TOC
8732 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8733 && small_data_operand (x, mode));
8736 bool
8737 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8738 bool strict, bool worst_case)
8740 unsigned HOST_WIDE_INT offset;
8741 unsigned int extra;
8743 if (GET_CODE (x) != PLUS)
8744 return false;
8745 if (!REG_P (XEXP (x, 0)))
8746 return false;
8747 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8748 return false;
8749 if (mode_supports_dq_form (mode))
8750 return quad_address_p (x, mode, strict);
8751 if (!reg_offset_addressing_ok_p (mode))
8752 return virtual_stack_registers_memory_p (x);
8753 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8754 return true;
8755 if (!CONST_INT_P (XEXP (x, 1)))
8756 return false;
8758 offset = INTVAL (XEXP (x, 1));
8759 extra = 0;
8760 switch (mode)
8762 case E_DFmode:
8763 case E_DDmode:
8764 case E_DImode:
8765 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8766 addressing. */
8767 if (VECTOR_MEM_VSX_P (mode))
8768 return false;
8770 if (!worst_case)
8771 break;
8772 if (!TARGET_POWERPC64)
8773 extra = 4;
8774 else if (offset & 3)
8775 return false;
8776 break;
8778 case E_TFmode:
8779 case E_IFmode:
8780 case E_KFmode:
8781 case E_TDmode:
8782 case E_TImode:
8783 case E_PTImode:
8784 extra = 8;
8785 if (!worst_case)
8786 break;
8787 if (!TARGET_POWERPC64)
8788 extra = 12;
8789 else if (offset & 3)
8790 return false;
8791 break;
8793 default:
8794 break;
8797 if (TARGET_PREFIXED)
8798 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8799 else
8800 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8803 bool
8804 legitimate_indexed_address_p (rtx x, int strict)
8806 rtx op0, op1;
8808 if (GET_CODE (x) != PLUS)
8809 return false;
8811 op0 = XEXP (x, 0);
8812 op1 = XEXP (x, 1);
8814 return (REG_P (op0) && REG_P (op1)
8815 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8816 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8817 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8818 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8821 bool
8822 avoiding_indexed_address_p (machine_mode mode)
8824 unsigned int msize = GET_MODE_SIZE (mode);
8826 /* Avoid indexed addressing for modes that have non-indexed load/store
8827 instruction forms. On power10, vector pairs have an indexed
8828 form, but vector quads don't. */
8829 if (msize > 16)
8830 return msize != 32;
8832 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8835 bool
8836 legitimate_indirect_address_p (rtx x, int strict)
8838 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8841 bool
8842 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8844 if (!TARGET_MACHO || !flag_pic
8845 || mode != SImode || !MEM_P (x))
8846 return false;
8847 x = XEXP (x, 0);
8849 if (GET_CODE (x) != LO_SUM)
8850 return false;
8851 if (!REG_P (XEXP (x, 0)))
8852 return false;
8853 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8854 return false;
8855 x = XEXP (x, 1);
8857 return CONSTANT_P (x);
8860 static bool
8861 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8863 if (GET_CODE (x) != LO_SUM)
8864 return false;
8865 if (!REG_P (XEXP (x, 0)))
8866 return false;
8867 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8868 return false;
8869 /* quad word addresses are restricted, and we can't use LO_SUM. */
8870 if (mode_supports_dq_form (mode))
8871 return false;
8872 x = XEXP (x, 1);
8874 if (TARGET_ELF)
8876 bool large_toc_ok;
8878 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8879 return false;
8880 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8881 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8882 recognizes some LO_SUM addresses as valid although this
8883 function says opposite. In most cases, LRA through different
8884 transformations can generate correct code for address reloads.
8885 It cannot manage only some LO_SUM cases. So we need to add
8886 code here saying that some addresses are still valid. */
8887 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8888 && small_toc_ref (x, VOIDmode));
8889 if (TARGET_TOC && ! large_toc_ok)
8890 return false;
8891 if (GET_MODE_NUNITS (mode) != 1)
8892 return false;
8893 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8894 && !(/* ??? Assume floating point reg based on mode? */
8895 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8896 return false;
8898 return CONSTANT_P (x) || large_toc_ok;
8900 else if (TARGET_MACHO)
8902 if (GET_MODE_NUNITS (mode) != 1)
8903 return false;
8904 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8905 && !(/* see above */
8906 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8907 return false;
8908 #if TARGET_MACHO
8909 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
8910 return CONSTANT_P (x);
8911 #endif
8912 /* Macho-O PIC code from here. */
8913 if (GET_CODE (x) == CONST)
8914 x = XEXP (x, 0);
8916 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
8917 if (SYMBOL_REF_P (x))
8918 return false;
8920 /* So this is OK if the wrapped object is const. */
8921 if (GET_CODE (x) == UNSPEC
8922 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8923 return CONSTANT_P (XVECEXP (x, 0, 0));
8924 return CONSTANT_P (x);
8926 return false;
8930 /* Try machine-dependent ways of modifying an illegitimate address
8931 to be legitimate. If we find one, return the new, valid address.
8932 This is used from only one place: `memory_address' in explow.cc.
8934 OLDX is the address as it was before break_out_memory_refs was
8935 called. In some cases it is useful to look at this to decide what
8936 needs to be done.
8938 It is always safe for this function to do nothing. It exists to
8939 recognize opportunities to optimize the output.
8941 On RS/6000, first check for the sum of a register with a constant
8942 integer that is out of range. If so, generate code to add the
8943 constant with the low-order 16 bits masked to the register and force
8944 this result into another register (this can be done with `cau').
8945 Then generate an address of REG+(CONST&0xffff), allowing for the
8946 possibility of bit 16 being a one.
8948 Then check for the sum of a register and something not constant, try to
8949 load the other things into a register and return the sum. */
8951 static rtx
8952 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8953 machine_mode mode)
8955 unsigned int extra;
8957 if (!reg_offset_addressing_ok_p (mode)
8958 || mode_supports_dq_form (mode))
8960 if (virtual_stack_registers_memory_p (x))
8961 return x;
8963 /* In theory we should not be seeing addresses of the form reg+0,
8964 but just in case it is generated, optimize it away. */
8965 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8966 return force_reg (Pmode, XEXP (x, 0));
8968 /* For TImode with load/store quad, restrict addresses to just a single
8969 pointer, so it works with both GPRs and VSX registers. */
8970 /* Make sure both operands are registers. */
8971 else if (GET_CODE (x) == PLUS
8972 && (mode != TImode || !TARGET_VSX))
8973 return gen_rtx_PLUS (Pmode,
8974 force_reg (Pmode, XEXP (x, 0)),
8975 force_reg (Pmode, XEXP (x, 1)));
8976 else
8977 return force_reg (Pmode, x);
8979 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
8981 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8982 if (model != 0)
8983 return rs6000_legitimize_tls_address (x, model);
8986 extra = 0;
8987 switch (mode)
8989 case E_TFmode:
8990 case E_TDmode:
8991 case E_TImode:
8992 case E_PTImode:
8993 case E_IFmode:
8994 case E_KFmode:
8995 /* As in legitimate_offset_address_p we do not assume
8996 worst-case. The mode here is just a hint as to the registers
8997 used. A TImode is usually in gprs, but may actually be in
8998 fprs. Leave worst-case scenario for reload to handle via
8999 insn constraints. PTImode is only GPRs. */
9000 extra = 8;
9001 break;
9002 default:
9003 break;
9006 if (GET_CODE (x) == PLUS
9007 && REG_P (XEXP (x, 0))
9008 && CONST_INT_P (XEXP (x, 1))
9009 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9010 >= 0x10000 - extra))
9012 HOST_WIDE_INT high_int, low_int;
9013 rtx sum;
9014 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9015 if (low_int >= 0x8000 - extra)
9016 low_int = 0;
9017 high_int = INTVAL (XEXP (x, 1)) - low_int;
9018 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9019 gen_int_mode (high_int, Pmode)), 0);
9020 return plus_constant (Pmode, sum, low_int);
9022 else if (GET_CODE (x) == PLUS
9023 && REG_P (XEXP (x, 0))
9024 && !CONST_INT_P (XEXP (x, 1))
9025 && GET_MODE_NUNITS (mode) == 1
9026 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9027 || (/* ??? Assume floating point reg based on mode? */
9028 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9029 && !avoiding_indexed_address_p (mode))
9031 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9032 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9034 else if ((TARGET_ELF
9035 #if TARGET_MACHO
9036 || !MACHO_DYNAMIC_NO_PIC_P
9037 #endif
9039 && TARGET_32BIT
9040 && TARGET_NO_TOC_OR_PCREL
9041 && !flag_pic
9042 && !CONST_INT_P (x)
9043 && !CONST_WIDE_INT_P (x)
9044 && !CONST_DOUBLE_P (x)
9045 && CONSTANT_P (x)
9046 && GET_MODE_NUNITS (mode) == 1
9047 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9048 || (/* ??? Assume floating point reg based on mode? */
9049 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9051 rtx reg = gen_reg_rtx (Pmode);
9052 if (TARGET_ELF)
9053 emit_insn (gen_elf_high (reg, x));
9054 else
9055 emit_insn (gen_macho_high (Pmode, reg, x));
9056 return gen_rtx_LO_SUM (Pmode, reg, x);
9058 else if (TARGET_TOC
9059 && SYMBOL_REF_P (x)
9060 && constant_pool_expr_p (x)
9061 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9062 return create_TOC_reference (x, NULL_RTX);
9063 else
9064 return x;
9067 /* Debug version of rs6000_legitimize_address. */
9068 static rtx
9069 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9071 rtx ret;
9072 rtx_insn *insns;
9074 start_sequence ();
9075 ret = rs6000_legitimize_address (x, oldx, mode);
9076 insns = get_insns ();
9077 end_sequence ();
9079 if (ret != x)
9081 fprintf (stderr,
9082 "\nrs6000_legitimize_address: mode %s, old code %s, "
9083 "new code %s, modified\n",
9084 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9085 GET_RTX_NAME (GET_CODE (ret)));
9087 fprintf (stderr, "Original address:\n");
9088 debug_rtx (x);
9090 fprintf (stderr, "oldx:\n");
9091 debug_rtx (oldx);
9093 fprintf (stderr, "New address:\n");
9094 debug_rtx (ret);
9096 if (insns)
9098 fprintf (stderr, "Insns added:\n");
9099 debug_rtx_list (insns, 20);
9102 else
9104 fprintf (stderr,
9105 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9106 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9108 debug_rtx (x);
9111 if (insns)
9112 emit_insn (insns);
9114 return ret;
9117 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9118 We need to emit DTP-relative relocations. */
9120 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9121 static void
9122 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9124 switch (size)
9126 case 4:
9127 fputs ("\t.long\t", file);
9128 break;
9129 case 8:
9130 fputs (DOUBLE_INT_ASM_OP, file);
9131 break;
9132 default:
9133 gcc_unreachable ();
9135 output_addr_const (file, x);
9136 if (TARGET_ELF)
9137 fputs ("@dtprel+0x8000", file);
9140 /* Return true if X is a symbol that refers to real (rather than emulated)
9141 TLS. */
9143 static bool
9144 rs6000_real_tls_symbol_ref_p (rtx x)
9146 return (SYMBOL_REF_P (x)
9147 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9150 /* In the name of slightly smaller debug output, and to cater to
9151 general assembler lossage, recognize various UNSPEC sequences
9152 and turn them back into a direct symbol reference. */
9154 static rtx
9155 rs6000_delegitimize_address (rtx orig_x)
9157 rtx x, y, offset;
9159 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9160 encodes loading up the high part of the address of a TOC reference along
9161 with a load of a GPR using the same base register used for the load. We
9162 return the original SYMBOL_REF.
9164 (set (reg:INT1 <reg>
9165 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9167 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9168 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9169 We return the original SYMBOL_REF.
9171 (parallel [(set (reg:DI <base-reg>)
9172 (unspec:DI [(symbol_ref <symbol>)
9173 (const_int <marker>)]
9174 UNSPEC_PCREL_OPT_LD_ADDR))
9175 (set (reg:DI <load-reg>)
9176 (unspec:DI [(const_int 0)]
9177 UNSPEC_PCREL_OPT_LD_DATA))])
9179 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9180 GPR being loaded is the same as the GPR used to hold the external address.
9182 (set (reg:DI <base-reg>)
9183 (unspec:DI [(symbol_ref <symbol>)
9184 (const_int <marker>)]
9185 UNSPEC_PCREL_OPT_LD_SAME_REG))
9187 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9188 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9189 We return the original SYMBOL_REF.
9191 (parallel [(set (reg:DI <base-reg>)
9192 (unspec:DI [(symbol_ref <symbol>)
9193 (const_int <marker>)]
9194 UNSPEC_PCREL_OPT_ST_ADDR))
9195 (use (reg <store-reg>))]) */
9197 if (GET_CODE (orig_x) == UNSPEC)
9198 switch (XINT (orig_x, 1))
9200 case UNSPEC_FUSION_GPR:
9201 case UNSPEC_PCREL_OPT_LD_ADDR:
9202 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9203 case UNSPEC_PCREL_OPT_ST_ADDR:
9204 orig_x = XVECEXP (orig_x, 0, 0);
9205 break;
9207 default:
9208 break;
9211 orig_x = delegitimize_mem_from_attrs (orig_x);
9213 x = orig_x;
9214 if (MEM_P (x))
9215 x = XEXP (x, 0);
9217 y = x;
9218 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9219 y = XEXP (y, 1);
9221 offset = NULL_RTX;
9222 if (GET_CODE (y) == PLUS
9223 && GET_MODE (y) == Pmode
9224 && CONST_INT_P (XEXP (y, 1)))
9226 offset = XEXP (y, 1);
9227 y = XEXP (y, 0);
9230 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9232 y = XVECEXP (y, 0, 0);
9234 #ifdef HAVE_AS_TLS
9235 /* Do not associate thread-local symbols with the original
9236 constant pool symbol. */
9237 if (TARGET_XCOFF
9238 && SYMBOL_REF_P (y)
9239 && CONSTANT_POOL_ADDRESS_P (y)
9240 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9241 return orig_x;
9242 #endif
9244 if (offset != NULL_RTX)
9245 y = gen_rtx_PLUS (Pmode, y, offset);
9246 if (!MEM_P (orig_x))
9247 return y;
9248 else
9249 return replace_equiv_address_nv (orig_x, y);
9252 if (TARGET_MACHO
9253 && GET_CODE (orig_x) == LO_SUM
9254 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9256 y = XEXP (XEXP (orig_x, 1), 0);
9257 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9258 return XVECEXP (y, 0, 0);
9261 return orig_x;
9264 /* Return true if X shouldn't be emitted into the debug info.
9265 The linker doesn't like .toc section references from
9266 .debug_* sections, so reject .toc section symbols. */
9268 static bool
9269 rs6000_const_not_ok_for_debug_p (rtx x)
9271 if (GET_CODE (x) == UNSPEC)
9272 return true;
9273 if (SYMBOL_REF_P (x)
9274 && CONSTANT_POOL_ADDRESS_P (x))
9276 rtx c = get_pool_constant (x);
9277 machine_mode cmode = get_pool_mode (x);
9278 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9279 return true;
9282 return false;
9285 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9287 static bool
9288 rs6000_legitimate_combined_insn (rtx_insn *insn)
9290 int icode = INSN_CODE (insn);
9292 /* Reject creating doloop insns. Combine should not be allowed
9293 to create these for a number of reasons:
9294 1) In a nested loop, if combine creates one of these in an
9295 outer loop and the register allocator happens to allocate ctr
9296 to the outer loop insn, then the inner loop can't use ctr.
9297 Inner loops ought to be more highly optimized.
9298 2) Combine often wants to create one of these from what was
9299 originally a three insn sequence, first combining the three
9300 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9301 allocated ctr, the splitter takes use back to the three insn
9302 sequence. It's better to stop combine at the two insn
9303 sequence.
9304 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9305 insns, the register allocator sometimes uses floating point
9306 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9307 jump insn and output reloads are not implemented for jumps,
9308 the ctrsi/ctrdi splitters need to handle all possible cases.
9309 That's a pain, and it gets to be seriously difficult when a
9310 splitter that runs after reload needs memory to transfer from
9311 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9312 for the difficult case. It's better to not create problems
9313 in the first place. */
9314 if (icode != CODE_FOR_nothing
9315 && (icode == CODE_FOR_bdz_si
9316 || icode == CODE_FOR_bdz_di
9317 || icode == CODE_FOR_bdnz_si
9318 || icode == CODE_FOR_bdnz_di
9319 || icode == CODE_FOR_bdztf_si
9320 || icode == CODE_FOR_bdztf_di
9321 || icode == CODE_FOR_bdnztf_si
9322 || icode == CODE_FOR_bdnztf_di))
9323 return false;
9325 return true;
9328 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9330 static GTY(()) rtx rs6000_tls_symbol;
9331 static rtx
9332 rs6000_tls_get_addr (void)
9334 if (!rs6000_tls_symbol)
9335 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9337 return rs6000_tls_symbol;
9340 /* Construct the SYMBOL_REF for TLS GOT references. */
9342 static GTY(()) rtx rs6000_got_symbol;
9344 rs6000_got_sym (void)
9346 if (!rs6000_got_symbol)
9348 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9349 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9350 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9353 return rs6000_got_symbol;
9356 /* AIX Thread-Local Address support. */
9358 static rtx
9359 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9361 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9362 const char *name;
9363 char *tlsname;
9365 /* Place addr into TOC constant pool. */
9366 sym = force_const_mem (GET_MODE (addr), addr);
9368 /* Output the TOC entry and create the MEM referencing the value. */
9369 if (constant_pool_expr_p (XEXP (sym, 0))
9370 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9372 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9373 mem = gen_const_mem (Pmode, tocref);
9374 set_mem_alias_set (mem, get_TOC_alias_set ());
9376 else
9377 return sym;
9379 /* Use global-dynamic for local-dynamic. */
9380 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9381 || model == TLS_MODEL_LOCAL_DYNAMIC)
9383 /* Create new TOC reference for @m symbol. */
9384 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9385 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9386 strcpy (tlsname, "*LCM");
9387 strcat (tlsname, name + 3);
9388 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9389 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9390 tocref = create_TOC_reference (modaddr, NULL_RTX);
9391 rtx modmem = gen_const_mem (Pmode, tocref);
9392 set_mem_alias_set (modmem, get_TOC_alias_set ());
9394 rtx modreg = gen_reg_rtx (Pmode);
9395 emit_insn (gen_rtx_SET (modreg, modmem));
9397 tmpreg = gen_reg_rtx (Pmode);
9398 emit_insn (gen_rtx_SET (tmpreg, mem));
9400 dest = gen_reg_rtx (Pmode);
9401 if (TARGET_32BIT)
9402 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9403 else
9404 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9405 return dest;
9407 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9408 else if (TARGET_32BIT)
9410 tlsreg = gen_reg_rtx (SImode);
9411 emit_insn (gen_tls_get_tpointer (tlsreg));
9413 else
9415 tlsreg = gen_rtx_REG (DImode, 13);
9416 xcoff_tls_exec_model_detected = true;
9419 /* Load the TOC value into temporary register. */
9420 tmpreg = gen_reg_rtx (Pmode);
9421 emit_insn (gen_rtx_SET (tmpreg, mem));
9422 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9423 gen_rtx_MINUS (Pmode, addr, tlsreg));
9425 /* Add TOC symbol value to TLS pointer. */
9426 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9428 return dest;
9431 /* Passes the tls arg value for global dynamic and local dynamic
9432 emit_library_call_value in rs6000_legitimize_tls_address to
9433 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9434 marker relocs put on __tls_get_addr calls. */
9435 static rtx global_tlsarg;
9437 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9438 this (thread-local) address. */
9440 static rtx
9441 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9443 rtx dest, insn;
9445 if (TARGET_XCOFF)
9446 return rs6000_legitimize_tls_address_aix (addr, model);
9448 dest = gen_reg_rtx (Pmode);
9449 if (model == TLS_MODEL_LOCAL_EXEC
9450 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9452 rtx tlsreg;
9454 if (TARGET_64BIT)
9456 tlsreg = gen_rtx_REG (Pmode, 13);
9457 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9459 else
9461 tlsreg = gen_rtx_REG (Pmode, 2);
9462 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9464 emit_insn (insn);
9466 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9468 rtx tlsreg, tmp;
9470 tmp = gen_reg_rtx (Pmode);
9471 if (TARGET_64BIT)
9473 tlsreg = gen_rtx_REG (Pmode, 13);
9474 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9476 else
9478 tlsreg = gen_rtx_REG (Pmode, 2);
9479 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9481 emit_insn (insn);
9482 if (TARGET_64BIT)
9483 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9484 else
9485 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9486 emit_insn (insn);
9488 else
9490 rtx got, tga, tmp1, tmp2;
9492 /* We currently use relocations like @got@tlsgd for tls, which
9493 means the linker will handle allocation of tls entries, placing
9494 them in the .got section. So use a pointer to the .got section,
9495 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9496 or to secondary GOT sections used by 32-bit -fPIC. */
9497 if (rs6000_pcrel_p ())
9498 got = const0_rtx;
9499 else if (TARGET_64BIT)
9500 got = gen_rtx_REG (Pmode, 2);
9501 else
9503 if (flag_pic == 1)
9504 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9505 else
9507 rtx gsym = rs6000_got_sym ();
9508 got = gen_reg_rtx (Pmode);
9509 if (flag_pic == 0)
9510 rs6000_emit_move (got, gsym, Pmode);
9511 else
9513 rtx mem, lab;
9515 tmp1 = gen_reg_rtx (Pmode);
9516 tmp2 = gen_reg_rtx (Pmode);
9517 mem = gen_const_mem (Pmode, tmp1);
9518 lab = gen_label_rtx ();
9519 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9520 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9521 if (TARGET_LINK_STACK)
9522 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9523 emit_move_insn (tmp2, mem);
9524 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9525 set_unique_reg_note (last, REG_EQUAL, gsym);
9530 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9532 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9533 UNSPEC_TLSGD);
9534 tga = rs6000_tls_get_addr ();
9535 rtx argreg = gen_rtx_REG (Pmode, 3);
9536 emit_insn (gen_rtx_SET (argreg, arg));
9537 global_tlsarg = arg;
9538 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9539 global_tlsarg = NULL_RTX;
9541 /* Make a note so that the result of this call can be CSEd. */
9542 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9543 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9544 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9546 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9548 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9549 tga = rs6000_tls_get_addr ();
9550 tmp1 = gen_reg_rtx (Pmode);
9551 rtx argreg = gen_rtx_REG (Pmode, 3);
9552 emit_insn (gen_rtx_SET (argreg, arg));
9553 global_tlsarg = arg;
9554 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9555 global_tlsarg = NULL_RTX;
9557 /* Make a note so that the result of this call can be CSEd. */
9558 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9559 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9560 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9562 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9564 if (TARGET_64BIT)
9565 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9566 else
9567 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9569 else if (rs6000_tls_size == 32)
9571 tmp2 = gen_reg_rtx (Pmode);
9572 if (TARGET_64BIT)
9573 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9574 else
9575 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9576 emit_insn (insn);
9577 if (TARGET_64BIT)
9578 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9579 else
9580 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9582 else
9584 tmp2 = gen_reg_rtx (Pmode);
9585 if (TARGET_64BIT)
9586 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9587 else
9588 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9589 emit_insn (insn);
9590 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9592 emit_insn (insn);
9594 else
9596 /* IE, or 64-bit offset LE. */
9597 tmp2 = gen_reg_rtx (Pmode);
9598 if (TARGET_64BIT)
9599 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9600 else
9601 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9602 emit_insn (insn);
9603 if (rs6000_pcrel_p ())
9605 if (TARGET_64BIT)
9606 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9607 else
9608 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9610 else if (TARGET_64BIT)
9611 insn = gen_tls_tls_64 (dest, tmp2, addr);
9612 else
9613 insn = gen_tls_tls_32 (dest, tmp2, addr);
9614 emit_insn (insn);
9618 return dest;
9621 /* Only create the global variable for the stack protect guard if we are using
9622 the global flavor of that guard. */
9623 static tree
9624 rs6000_init_stack_protect_guard (void)
9626 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9627 return default_stack_protect_guard ();
9629 return NULL_TREE;
9632 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9634 static bool
9635 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9637 if (GET_CODE (x) == HIGH
9638 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9639 return true;
9641 /* A TLS symbol in the TOC cannot contain a sum. */
9642 if (GET_CODE (x) == CONST
9643 && GET_CODE (XEXP (x, 0)) == PLUS
9644 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9645 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9646 return true;
9648 /* Allow AIX TOC TLS symbols in the constant pool,
9649 but not ELF TLS symbols. */
9650 return TARGET_ELF && tls_referenced_p (x);
9653 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9654 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9655 can be addressed relative to the toc pointer. */
9657 static bool
9658 use_toc_relative_ref (rtx sym, machine_mode mode)
9660 return ((constant_pool_expr_p (sym)
9661 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9662 get_pool_mode (sym)))
9663 || (TARGET_CMODEL == CMODEL_MEDIUM
9664 && SYMBOL_REF_LOCAL_P (sym)
9665 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9668 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9669 that is a valid memory address for an instruction.
9670 The MODE argument is the machine mode for the MEM expression
9671 that wants to use this address.
9673 On the RS/6000, there are four valid address: a SYMBOL_REF that
9674 refers to a constant pool entry of an address (or the sum of it
9675 plus a constant), a short (16-bit signed) constant plus a register,
9676 the sum of two registers, or a register indirect, possibly with an
9677 auto-increment. For DFmode, DDmode and DImode with a constant plus
9678 register, we must ensure that both words are addressable or PowerPC64
9679 with offset word aligned.
9681 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9682 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9683 because adjacent memory cells are accessed by adding word-sized offsets
9684 during assembly output. */
9685 static bool
9686 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9688 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9689 bool quad_offset_p = mode_supports_dq_form (mode);
9691 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9692 return 0;
9694 /* Handle unaligned altivec lvx/stvx type addresses. */
9695 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9696 && GET_CODE (x) == AND
9697 && CONST_INT_P (XEXP (x, 1))
9698 && INTVAL (XEXP (x, 1)) == -16)
9700 x = XEXP (x, 0);
9701 return (legitimate_indirect_address_p (x, reg_ok_strict)
9702 || legitimate_indexed_address_p (x, reg_ok_strict)
9703 || virtual_stack_registers_memory_p (x));
9706 if (legitimate_indirect_address_p (x, reg_ok_strict))
9707 return 1;
9708 if (TARGET_UPDATE
9709 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9710 && mode_supports_pre_incdec_p (mode)
9711 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9712 return 1;
9714 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9715 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9716 return 1;
9718 /* Handle restricted vector d-form offsets in ISA 3.0. */
9719 if (quad_offset_p)
9721 if (quad_address_p (x, mode, reg_ok_strict))
9722 return 1;
9724 else if (virtual_stack_registers_memory_p (x))
9725 return 1;
9727 else if (reg_offset_p)
9729 if (legitimate_small_data_p (mode, x))
9730 return 1;
9731 if (legitimate_constant_pool_address_p (x, mode,
9732 reg_ok_strict || lra_in_progress))
9733 return 1;
9736 /* For TImode, if we have TImode in VSX registers, only allow register
9737 indirect addresses. This will allow the values to go in either GPRs
9738 or VSX registers without reloading. The vector types would tend to
9739 go into VSX registers, so we allow REG+REG, while TImode seems
9740 somewhat split, in that some uses are GPR based, and some VSX based. */
9741 /* FIXME: We could loosen this by changing the following to
9742 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9743 but currently we cannot allow REG+REG addressing for TImode. See
9744 PR72827 for complete details on how this ends up hoodwinking DSE. */
9745 if (mode == TImode && TARGET_VSX)
9746 return 0;
9747 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9748 if (! reg_ok_strict
9749 && reg_offset_p
9750 && GET_CODE (x) == PLUS
9751 && REG_P (XEXP (x, 0))
9752 && (XEXP (x, 0) == virtual_stack_vars_rtx
9753 || XEXP (x, 0) == arg_pointer_rtx)
9754 && CONST_INT_P (XEXP (x, 1)))
9755 return 1;
9756 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9757 return 1;
9758 if (!FLOAT128_2REG_P (mode)
9759 && (TARGET_HARD_FLOAT
9760 || TARGET_POWERPC64
9761 || (mode != DFmode && mode != DDmode))
9762 && (TARGET_POWERPC64 || mode != DImode)
9763 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9764 && mode != PTImode
9765 && !avoiding_indexed_address_p (mode)
9766 && legitimate_indexed_address_p (x, reg_ok_strict))
9767 return 1;
9768 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9769 && mode_supports_pre_modify_p (mode)
9770 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9771 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9772 reg_ok_strict, false)
9773 || (!avoiding_indexed_address_p (mode)
9774 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9775 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9777 /* There is no prefixed version of the load/store with update. */
9778 rtx addr = XEXP (x, 1);
9779 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9781 if (reg_offset_p && !quad_offset_p
9782 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9783 return 1;
9784 return 0;
9787 /* Debug version of rs6000_legitimate_address_p. */
9788 static bool
9789 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9790 bool reg_ok_strict)
9792 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9793 fprintf (stderr,
9794 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9795 "strict = %d, reload = %s, code = %s\n",
9796 ret ? "true" : "false",
9797 GET_MODE_NAME (mode),
9798 reg_ok_strict,
9799 (reload_completed ? "after" : "before"),
9800 GET_RTX_NAME (GET_CODE (x)));
9801 debug_rtx (x);
9803 return ret;
9806 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9808 static bool
9809 rs6000_mode_dependent_address_p (const_rtx addr,
9810 addr_space_t as ATTRIBUTE_UNUSED)
9812 return rs6000_mode_dependent_address_ptr (addr);
9815 /* Go to LABEL if ADDR (a legitimate address expression)
9816 has an effect that depends on the machine mode it is used for.
9818 On the RS/6000 this is true of all integral offsets (since AltiVec
9819 and VSX modes don't allow them) or is a pre-increment or decrement.
9821 ??? Except that due to conceptual problems in offsettable_address_p
9822 we can't really report the problems of integral offsets. So leave
9823 this assuming that the adjustable offset must be valid for the
9824 sub-words of a TFmode operand, which is what we had before. */
9826 static bool
9827 rs6000_mode_dependent_address (const_rtx addr)
9829 switch (GET_CODE (addr))
9831 case PLUS:
9832 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9833 is considered a legitimate address before reload, so there
9834 are no offset restrictions in that case. Note that this
9835 condition is safe in strict mode because any address involving
9836 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9837 been rejected as illegitimate. */
9838 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9839 && XEXP (addr, 0) != arg_pointer_rtx
9840 && CONST_INT_P (XEXP (addr, 1)))
9842 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9843 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9844 if (TARGET_PREFIXED)
9845 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9846 else
9847 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9849 break;
9851 case LO_SUM:
9852 /* Anything in the constant pool is sufficiently aligned that
9853 all bytes have the same high part address. */
9854 return !legitimate_constant_pool_address_p (addr, QImode, false);
9856 /* Auto-increment cases are now treated generically in recog.cc. */
9857 case PRE_MODIFY:
9858 return TARGET_UPDATE;
9860 /* AND is only allowed in Altivec loads. */
9861 case AND:
9862 return true;
9864 default:
9865 break;
9868 return false;
9871 /* Debug version of rs6000_mode_dependent_address. */
9872 static bool
9873 rs6000_debug_mode_dependent_address (const_rtx addr)
9875 bool ret = rs6000_mode_dependent_address (addr);
9877 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9878 ret ? "true" : "false");
9879 debug_rtx (addr);
9881 return ret;
9884 /* Implement FIND_BASE_TERM. */
9887 rs6000_find_base_term (rtx op)
9889 rtx base;
9891 base = op;
9892 if (GET_CODE (base) == CONST)
9893 base = XEXP (base, 0);
9894 if (GET_CODE (base) == PLUS)
9895 base = XEXP (base, 0);
9896 if (GET_CODE (base) == UNSPEC)
9897 switch (XINT (base, 1))
9899 case UNSPEC_TOCREL:
9900 case UNSPEC_MACHOPIC_OFFSET:
9901 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9902 for aliasing purposes. */
9903 return XVECEXP (base, 0, 0);
9906 return op;
9909 /* More elaborate version of recog's offsettable_memref_p predicate
9910 that works around the ??? note of rs6000_mode_dependent_address.
9911 In particular it accepts
9913 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9915 in 32-bit mode, that the recog predicate rejects. */
9917 static bool
9918 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9920 bool worst_case;
9922 if (!MEM_P (op))
9923 return false;
9925 /* First mimic offsettable_memref_p. */
9926 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9927 return true;
9929 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9930 the latter predicate knows nothing about the mode of the memory
9931 reference and, therefore, assumes that it is the largest supported
9932 mode (TFmode). As a consequence, legitimate offsettable memory
9933 references are rejected. rs6000_legitimate_offset_address_p contains
9934 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9935 at least with a little bit of help here given that we know the
9936 actual registers used. */
9937 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9938 || GET_MODE_SIZE (reg_mode) == 4);
9939 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9940 strict, worst_case);
9943 /* Determine the reassociation width to be used in reassociate_bb.
9944 This takes into account how many parallel operations we
9945 can actually do of a given type, and also the latency.
9947 int add/sub 6/cycle
9948 mul 2/cycle
9949 vect add/sub/mul 2/cycle
9950 fp add/sub/mul 2/cycle
9951 dfp 1/cycle
9954 static int
9955 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9956 machine_mode mode)
9958 switch (rs6000_tune)
9960 case PROCESSOR_POWER8:
9961 case PROCESSOR_POWER9:
9962 case PROCESSOR_POWER10:
9963 if (DECIMAL_FLOAT_MODE_P (mode))
9964 return 1;
9965 if (VECTOR_MODE_P (mode))
9966 return 4;
9967 if (INTEGRAL_MODE_P (mode))
9968 return 1;
9969 if (FLOAT_MODE_P (mode))
9970 return 4;
9971 break;
9972 default:
9973 break;
9975 return 1;
9978 /* Change register usage conditional on target flags. */
9979 static void
9980 rs6000_conditional_register_usage (void)
9982 int i;
9984 if (TARGET_DEBUG_TARGET)
9985 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9987 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9988 if (TARGET_64BIT)
9989 fixed_regs[13] = call_used_regs[13] = 1;
9991 /* Conditionally disable FPRs. */
9992 if (TARGET_SOFT_FLOAT)
9993 for (i = 32; i < 64; i++)
9994 fixed_regs[i] = call_used_regs[i] = 1;
9996 /* The TOC register is not killed across calls in a way that is
9997 visible to the compiler. */
9998 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9999 call_used_regs[2] = 0;
10001 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10002 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10004 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10005 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10006 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10008 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10009 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10010 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10012 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10013 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10015 if (!TARGET_ALTIVEC && !TARGET_VSX)
10017 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10018 fixed_regs[i] = call_used_regs[i] = 1;
10019 call_used_regs[VRSAVE_REGNO] = 1;
10022 if (TARGET_ALTIVEC || TARGET_VSX)
10023 global_regs[VSCR_REGNO] = 1;
10025 if (TARGET_ALTIVEC_ABI)
10027 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10028 call_used_regs[i] = 1;
10030 /* AIX reserves VR20:31 in non-extended ABI mode. */
10031 if (TARGET_XCOFF && !rs6000_aix_extabi)
10032 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10033 fixed_regs[i] = call_used_regs[i] = 1;
10038 /* Output insns to set DEST equal to the constant SOURCE as a series of
10039 lis, ori and shl instructions and return TRUE. */
10041 bool
10042 rs6000_emit_set_const (rtx dest, rtx source)
10044 machine_mode mode = GET_MODE (dest);
10045 rtx temp, set;
10046 rtx_insn *insn;
10047 HOST_WIDE_INT c;
10049 gcc_checking_assert (CONST_INT_P (source));
10050 c = INTVAL (source);
10051 switch (mode)
10053 case E_QImode:
10054 case E_HImode:
10055 emit_insn (gen_rtx_SET (dest, source));
10056 return true;
10058 case E_SImode:
10059 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10061 emit_insn (gen_rtx_SET (copy_rtx (temp),
10062 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10063 emit_insn (gen_rtx_SET (dest,
10064 gen_rtx_IOR (SImode, copy_rtx (temp),
10065 GEN_INT (c & 0xffff))));
10066 break;
10068 case E_DImode:
10069 if (!TARGET_POWERPC64)
10071 rtx hi, lo;
10073 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10074 DImode);
10075 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10076 DImode);
10077 emit_move_insn (hi, GEN_INT (c >> 32));
10078 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10079 emit_move_insn (lo, GEN_INT (c));
10081 else
10082 rs6000_emit_set_long_const (dest, c);
10083 break;
10085 default:
10086 gcc_unreachable ();
10089 insn = get_last_insn ();
10090 set = single_set (insn);
10091 if (! CONSTANT_P (SET_SRC (set)))
10092 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10094 return true;
10097 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10098 Output insns to set DEST equal to the constant C as a series of
10099 lis, ori and shl instructions. */
10101 static void
10102 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10104 rtx temp;
10105 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10107 ud1 = c & 0xffff;
10108 c = c >> 16;
10109 ud2 = c & 0xffff;
10110 c = c >> 16;
10111 ud3 = c & 0xffff;
10112 c = c >> 16;
10113 ud4 = c & 0xffff;
10115 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10116 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10117 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10119 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10120 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10122 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10124 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10125 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10126 if (ud1 != 0)
10127 emit_move_insn (dest,
10128 gen_rtx_IOR (DImode, copy_rtx (temp),
10129 GEN_INT (ud1)));
10131 else if (ud3 == 0 && ud4 == 0)
10133 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10135 gcc_assert (ud2 & 0x8000);
10136 emit_move_insn (copy_rtx (temp),
10137 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10138 if (ud1 != 0)
10139 emit_move_insn (copy_rtx (temp),
10140 gen_rtx_IOR (DImode, copy_rtx (temp),
10141 GEN_INT (ud1)));
10142 emit_move_insn (dest,
10143 gen_rtx_ZERO_EXTEND (DImode,
10144 gen_lowpart (SImode,
10145 copy_rtx (temp))));
10147 else if (ud1 == ud3 && ud2 == ud4)
10149 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10150 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10151 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10152 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10153 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10154 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10156 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10157 || (ud4 == 0 && ! (ud3 & 0x8000)))
10159 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10161 emit_move_insn (copy_rtx (temp),
10162 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10163 if (ud2 != 0)
10164 emit_move_insn (copy_rtx (temp),
10165 gen_rtx_IOR (DImode, copy_rtx (temp),
10166 GEN_INT (ud2)));
10167 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10168 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10169 GEN_INT (16)));
10170 if (ud1 != 0)
10171 emit_move_insn (dest,
10172 gen_rtx_IOR (DImode, copy_rtx (temp),
10173 GEN_INT (ud1)));
10175 else
10177 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10179 emit_move_insn (copy_rtx (temp),
10180 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10181 if (ud3 != 0)
10182 emit_move_insn (copy_rtx (temp),
10183 gen_rtx_IOR (DImode, copy_rtx (temp),
10184 GEN_INT (ud3)));
10186 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10187 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10188 GEN_INT (32)));
10189 if (ud2 != 0)
10190 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10191 gen_rtx_IOR (DImode, copy_rtx (temp),
10192 GEN_INT (ud2 << 16)));
10193 if (ud1 != 0)
10194 emit_move_insn (dest,
10195 gen_rtx_IOR (DImode, copy_rtx (temp),
10196 GEN_INT (ud1)));
10200 /* Helper for the following. Get rid of [r+r] memory refs
10201 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10203 static void
10204 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10206 if (MEM_P (operands[0])
10207 && !REG_P (XEXP (operands[0], 0))
10208 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10209 GET_MODE (operands[0]), false))
10210 operands[0]
10211 = replace_equiv_address (operands[0],
10212 copy_addr_to_reg (XEXP (operands[0], 0)));
10214 if (MEM_P (operands[1])
10215 && !REG_P (XEXP (operands[1], 0))
10216 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10217 GET_MODE (operands[1]), false))
10218 operands[1]
10219 = replace_equiv_address (operands[1],
10220 copy_addr_to_reg (XEXP (operands[1], 0)));
10223 /* Generate a vector of constants to permute MODE for a little-endian
10224 storage operation by swapping the two halves of a vector. */
10225 static rtvec
10226 rs6000_const_vec (machine_mode mode)
10228 int i, subparts;
10229 rtvec v;
10231 switch (mode)
10233 case E_V1TImode:
10234 subparts = 1;
10235 break;
10236 case E_V2DFmode:
10237 case E_V2DImode:
10238 subparts = 2;
10239 break;
10240 case E_V4SFmode:
10241 case E_V4SImode:
10242 subparts = 4;
10243 break;
10244 case E_V8HImode:
10245 subparts = 8;
10246 break;
10247 case E_V16QImode:
10248 subparts = 16;
10249 break;
10250 default:
10251 gcc_unreachable();
10254 v = rtvec_alloc (subparts);
10256 for (i = 0; i < subparts / 2; ++i)
10257 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10258 for (i = subparts / 2; i < subparts; ++i)
10259 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10261 return v;
10264 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10265 store operation. */
10266 void
10267 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10269 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10270 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10272 /* Scalar permutations are easier to express in integer modes rather than
10273 floating-point modes, so cast them here. We use V1TImode instead
10274 of TImode to ensure that the values don't go through GPRs. */
10275 if (FLOAT128_VECTOR_P (mode))
10277 dest = gen_lowpart (V1TImode, dest);
10278 source = gen_lowpart (V1TImode, source);
10279 mode = V1TImode;
10282 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10283 scalar. */
10284 if (mode == TImode || mode == V1TImode)
10285 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10286 GEN_INT (64))));
10287 else
10289 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10290 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10294 /* Emit a little-endian load from vector memory location SOURCE to VSX
10295 register DEST in mode MODE. The load is done with two permuting
10296 insn's that represent an lxvd2x and xxpermdi. */
10297 void
10298 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10300 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10301 V1TImode). */
10302 if (mode == TImode || mode == V1TImode)
10304 mode = V2DImode;
10305 dest = gen_lowpart (V2DImode, dest);
10306 source = adjust_address (source, V2DImode, 0);
10309 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10310 rs6000_emit_le_vsx_permute (tmp, source, mode);
10311 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10314 /* Emit a little-endian store to vector memory location DEST from VSX
10315 register SOURCE in mode MODE. The store is done with two permuting
10316 insn's that represent an xxpermdi and an stxvd2x. */
10317 void
10318 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10320 /* This should never be called after LRA. */
10321 gcc_assert (can_create_pseudo_p ());
10323 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10324 V1TImode). */
10325 if (mode == TImode || mode == V1TImode)
10327 mode = V2DImode;
10328 dest = adjust_address (dest, V2DImode, 0);
10329 source = gen_lowpart (V2DImode, source);
10332 rtx tmp = gen_reg_rtx_and_attrs (source);
10333 rs6000_emit_le_vsx_permute (tmp, source, mode);
10334 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10337 /* Emit a sequence representing a little-endian VSX load or store,
10338 moving data from SOURCE to DEST in mode MODE. This is done
10339 separately from rs6000_emit_move to ensure it is called only
10340 during expand. LE VSX loads and stores introduced later are
10341 handled with a split. The expand-time RTL generation allows
10342 us to optimize away redundant pairs of register-permutes. */
10343 void
10344 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10346 gcc_assert (!BYTES_BIG_ENDIAN
10347 && VECTOR_MEM_VSX_P (mode)
10348 && !TARGET_P9_VECTOR
10349 && !gpr_or_gpr_p (dest, source)
10350 && (MEM_P (source) ^ MEM_P (dest)));
10352 if (MEM_P (source))
10354 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10355 rs6000_emit_le_vsx_load (dest, source, mode);
10357 else
10359 if (!REG_P (source))
10360 source = force_reg (mode, source);
10361 rs6000_emit_le_vsx_store (dest, source, mode);
10365 /* Return whether a SFmode or SImode move can be done without converting one
10366 mode to another. This arrises when we have:
10368 (SUBREG:SF (REG:SI ...))
10369 (SUBREG:SI (REG:SF ...))
10371 and one of the values is in a floating point/vector register, where SFmode
10372 scalars are stored in DFmode format. */
10374 bool
10375 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10377 if (TARGET_ALLOW_SF_SUBREG)
10378 return true;
10380 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10381 return true;
10383 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10384 return true;
10386 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10387 if (SUBREG_P (dest))
10389 rtx dest_subreg = SUBREG_REG (dest);
10390 rtx src_subreg = SUBREG_REG (src);
10391 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10394 return false;
10398 /* Helper function to change moves with:
10400 (SUBREG:SF (REG:SI)) and
10401 (SUBREG:SI (REG:SF))
10403 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10404 values are stored as DFmode values in the VSX registers. We need to convert
10405 the bits before we can use a direct move or operate on the bits in the
10406 vector register as an integer type.
10408 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10410 static bool
10411 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10413 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10414 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10415 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10417 rtx inner_source = SUBREG_REG (source);
10418 machine_mode inner_mode = GET_MODE (inner_source);
10420 if (mode == SImode && inner_mode == SFmode)
10422 emit_insn (gen_movsi_from_sf (dest, inner_source));
10423 return true;
10426 if (mode == SFmode && inner_mode == SImode)
10428 emit_insn (gen_movsf_from_si (dest, inner_source));
10429 return true;
10433 return false;
10436 /* Emit a move from SOURCE to DEST in mode MODE. */
10437 void
10438 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10440 rtx operands[2];
10441 operands[0] = dest;
10442 operands[1] = source;
10444 if (TARGET_DEBUG_ADDR)
10446 fprintf (stderr,
10447 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10448 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10449 GET_MODE_NAME (mode),
10450 lra_in_progress,
10451 reload_completed,
10452 can_create_pseudo_p ());
10453 debug_rtx (dest);
10454 fprintf (stderr, "source:\n");
10455 debug_rtx (source);
10458 /* Check that we get CONST_WIDE_INT only when we should. */
10459 if (CONST_WIDE_INT_P (operands[1])
10460 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10461 gcc_unreachable ();
10463 #ifdef HAVE_AS_GNU_ATTRIBUTE
10464 /* If we use a long double type, set the flags in .gnu_attribute that say
10465 what the long double type is. This is to allow the linker's warning
10466 message for the wrong long double to be useful, even if the function does
10467 not do a call (for example, doing a 128-bit add on power9 if the long
10468 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10469 used if they aren't the default long dobule type. */
10470 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10472 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10473 rs6000_passes_float = rs6000_passes_long_double = true;
10475 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10476 rs6000_passes_float = rs6000_passes_long_double = true;
10478 #endif
10480 /* See if we need to special case SImode/SFmode SUBREG moves. */
10481 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10482 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10483 return;
10485 /* Check if GCC is setting up a block move that will end up using FP
10486 registers as temporaries. We must make sure this is acceptable. */
10487 if (MEM_P (operands[0])
10488 && MEM_P (operands[1])
10489 && mode == DImode
10490 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10491 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10492 && ! (rs6000_slow_unaligned_access (SImode,
10493 (MEM_ALIGN (operands[0]) > 32
10494 ? 32 : MEM_ALIGN (operands[0])))
10495 || rs6000_slow_unaligned_access (SImode,
10496 (MEM_ALIGN (operands[1]) > 32
10497 ? 32 : MEM_ALIGN (operands[1]))))
10498 && ! MEM_VOLATILE_P (operands [0])
10499 && ! MEM_VOLATILE_P (operands [1]))
10501 emit_move_insn (adjust_address (operands[0], SImode, 0),
10502 adjust_address (operands[1], SImode, 0));
10503 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10504 adjust_address (copy_rtx (operands[1]), SImode, 4));
10505 return;
10508 if (can_create_pseudo_p () && MEM_P (operands[0])
10509 && !gpc_reg_operand (operands[1], mode))
10510 operands[1] = force_reg (mode, operands[1]);
10512 /* Recognize the case where operand[1] is a reference to thread-local
10513 data and load its address to a register. */
10514 if (tls_referenced_p (operands[1]))
10516 enum tls_model model;
10517 rtx tmp = operands[1];
10518 rtx addend = NULL;
10520 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10522 addend = XEXP (XEXP (tmp, 0), 1);
10523 tmp = XEXP (XEXP (tmp, 0), 0);
10526 gcc_assert (SYMBOL_REF_P (tmp));
10527 model = SYMBOL_REF_TLS_MODEL (tmp);
10528 gcc_assert (model != 0);
10530 tmp = rs6000_legitimize_tls_address (tmp, model);
10531 if (addend)
10533 tmp = gen_rtx_PLUS (mode, tmp, addend);
10534 tmp = force_operand (tmp, operands[0]);
10536 operands[1] = tmp;
10539 /* 128-bit constant floating-point values on Darwin should really be loaded
10540 as two parts. However, this premature splitting is a problem when DFmode
10541 values can go into Altivec registers. */
10542 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10543 && !reg_addr[DFmode].scalar_in_vmx_p)
10545 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10546 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10547 DFmode);
10548 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10549 GET_MODE_SIZE (DFmode)),
10550 simplify_gen_subreg (DFmode, operands[1], mode,
10551 GET_MODE_SIZE (DFmode)),
10552 DFmode);
10553 return;
10556 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10557 p1:SD) if p1 is not of floating point class and p0 is spilled as
10558 we can have no analogous movsd_store for this. */
10559 if (lra_in_progress && mode == DDmode
10560 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10561 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10562 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10563 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10565 enum reg_class cl;
10566 int regno = REGNO (SUBREG_REG (operands[1]));
10568 if (!HARD_REGISTER_NUM_P (regno))
10570 cl = reg_preferred_class (regno);
10571 regno = reg_renumber[regno];
10572 if (regno < 0)
10573 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10575 if (regno >= 0 && ! FP_REGNO_P (regno))
10577 mode = SDmode;
10578 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10579 operands[1] = SUBREG_REG (operands[1]);
10582 if (lra_in_progress
10583 && mode == SDmode
10584 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10585 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10586 && (REG_P (operands[1])
10587 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10589 int regno = reg_or_subregno (operands[1]);
10590 enum reg_class cl;
10592 if (!HARD_REGISTER_NUM_P (regno))
10594 cl = reg_preferred_class (regno);
10595 gcc_assert (cl != NO_REGS);
10596 regno = reg_renumber[regno];
10597 if (regno < 0)
10598 regno = ira_class_hard_regs[cl][0];
10600 if (FP_REGNO_P (regno))
10602 if (GET_MODE (operands[0]) != DDmode)
10603 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10604 emit_insn (gen_movsd_store (operands[0], operands[1]));
10606 else if (INT_REGNO_P (regno))
10607 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10608 else
10609 gcc_unreachable();
10610 return;
10612 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10613 p:DD)) if p0 is not of floating point class and p1 is spilled as
10614 we can have no analogous movsd_load for this. */
10615 if (lra_in_progress && mode == DDmode
10616 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10617 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10618 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10619 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10621 enum reg_class cl;
10622 int regno = REGNO (SUBREG_REG (operands[0]));
10624 if (!HARD_REGISTER_NUM_P (regno))
10626 cl = reg_preferred_class (regno);
10627 regno = reg_renumber[regno];
10628 if (regno < 0)
10629 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10631 if (regno >= 0 && ! FP_REGNO_P (regno))
10633 mode = SDmode;
10634 operands[0] = SUBREG_REG (operands[0]);
10635 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10638 if (lra_in_progress
10639 && mode == SDmode
10640 && (REG_P (operands[0])
10641 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10642 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10643 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10645 int regno = reg_or_subregno (operands[0]);
10646 enum reg_class cl;
10648 if (!HARD_REGISTER_NUM_P (regno))
10650 cl = reg_preferred_class (regno);
10651 gcc_assert (cl != NO_REGS);
10652 regno = reg_renumber[regno];
10653 if (regno < 0)
10654 regno = ira_class_hard_regs[cl][0];
10656 if (FP_REGNO_P (regno))
10658 if (GET_MODE (operands[1]) != DDmode)
10659 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10660 emit_insn (gen_movsd_load (operands[0], operands[1]));
10662 else if (INT_REGNO_P (regno))
10663 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10664 else
10665 gcc_unreachable();
10666 return;
10669 /* FIXME: In the long term, this switch statement should go away
10670 and be replaced by a sequence of tests based on things like
10671 mode == Pmode. */
10672 switch (mode)
10674 case E_HImode:
10675 case E_QImode:
10676 if (CONSTANT_P (operands[1])
10677 && !CONST_INT_P (operands[1]))
10678 operands[1] = force_const_mem (mode, operands[1]);
10679 break;
10681 case E_TFmode:
10682 case E_TDmode:
10683 case E_IFmode:
10684 case E_KFmode:
10685 if (FLOAT128_2REG_P (mode))
10686 rs6000_eliminate_indexed_memrefs (operands);
10687 /* fall through */
10689 case E_DFmode:
10690 case E_DDmode:
10691 case E_SFmode:
10692 case E_SDmode:
10693 if (CONSTANT_P (operands[1])
10694 && ! easy_fp_constant (operands[1], mode))
10695 operands[1] = force_const_mem (mode, operands[1]);
10696 break;
10698 case E_V16QImode:
10699 case E_V8HImode:
10700 case E_V4SFmode:
10701 case E_V4SImode:
10702 case E_V2DFmode:
10703 case E_V2DImode:
10704 case E_V1TImode:
10705 if (CONSTANT_P (operands[1])
10706 && !easy_vector_constant (operands[1], mode))
10707 operands[1] = force_const_mem (mode, operands[1]);
10708 break;
10710 case E_OOmode:
10711 case E_XOmode:
10712 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10713 error ("%qs is an opaque type, and you cannot set it to other values",
10714 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10715 break;
10717 case E_SImode:
10718 case E_DImode:
10719 /* Use default pattern for address of ELF small data */
10720 if (TARGET_ELF
10721 && mode == Pmode
10722 && DEFAULT_ABI == ABI_V4
10723 && (SYMBOL_REF_P (operands[1])
10724 || GET_CODE (operands[1]) == CONST)
10725 && small_data_operand (operands[1], mode))
10727 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10728 return;
10731 /* Use the default pattern for loading up PC-relative addresses. */
10732 if (TARGET_PCREL && mode == Pmode
10733 && pcrel_local_or_external_address (operands[1], Pmode))
10735 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10736 return;
10739 if (DEFAULT_ABI == ABI_V4
10740 && mode == Pmode && mode == SImode
10741 && flag_pic == 1 && got_operand (operands[1], mode))
10743 emit_insn (gen_movsi_got (operands[0], operands[1]));
10744 return;
10747 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10748 && TARGET_NO_TOC_OR_PCREL
10749 && ! flag_pic
10750 && mode == Pmode
10751 && CONSTANT_P (operands[1])
10752 && GET_CODE (operands[1]) != HIGH
10753 && !CONST_INT_P (operands[1]))
10755 rtx target = (!can_create_pseudo_p ()
10756 ? operands[0]
10757 : gen_reg_rtx (mode));
10759 /* If this is a function address on -mcall-aixdesc,
10760 convert it to the address of the descriptor. */
10761 if (DEFAULT_ABI == ABI_AIX
10762 && SYMBOL_REF_P (operands[1])
10763 && XSTR (operands[1], 0)[0] == '.')
10765 const char *name = XSTR (operands[1], 0);
10766 rtx new_ref;
10767 while (*name == '.')
10768 name++;
10769 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10770 CONSTANT_POOL_ADDRESS_P (new_ref)
10771 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10772 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10773 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10774 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10775 operands[1] = new_ref;
10778 if (DEFAULT_ABI == ABI_DARWIN)
10780 #if TARGET_MACHO
10781 /* This is not PIC code, but could require the subset of
10782 indirections used by mdynamic-no-pic. */
10783 if (MACHO_DYNAMIC_NO_PIC_P)
10785 /* Take care of any required data indirection. */
10786 operands[1] = rs6000_machopic_legitimize_pic_address (
10787 operands[1], mode, operands[0]);
10788 if (operands[0] != operands[1])
10789 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10790 return;
10792 #endif
10793 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10794 emit_insn (gen_macho_low (Pmode, operands[0],
10795 target, operands[1]));
10796 return;
10799 emit_insn (gen_elf_high (target, operands[1]));
10800 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10801 return;
10804 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10805 and we have put it in the TOC, we just need to make a TOC-relative
10806 reference to it. */
10807 if (TARGET_TOC
10808 && SYMBOL_REF_P (operands[1])
10809 && use_toc_relative_ref (operands[1], mode))
10810 operands[1] = create_TOC_reference (operands[1], operands[0]);
10811 else if (mode == Pmode
10812 && CONSTANT_P (operands[1])
10813 && GET_CODE (operands[1]) != HIGH
10814 && ((REG_P (operands[0])
10815 && FP_REGNO_P (REGNO (operands[0])))
10816 || !CONST_INT_P (operands[1])
10817 || (num_insns_constant (operands[1], mode)
10818 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10819 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10820 && (TARGET_CMODEL == CMODEL_SMALL
10821 || can_create_pseudo_p ()
10822 || (REG_P (operands[0])
10823 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10826 #if TARGET_MACHO
10827 /* Darwin uses a special PIC legitimizer. */
10828 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10830 operands[1] =
10831 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10832 operands[0]);
10833 if (operands[0] != operands[1])
10834 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10835 return;
10837 #endif
10839 /* If we are to limit the number of things we put in the TOC and
10840 this is a symbol plus a constant we can add in one insn,
10841 just put the symbol in the TOC and add the constant. */
10842 if (GET_CODE (operands[1]) == CONST
10843 && TARGET_NO_SUM_IN_TOC
10844 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10845 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10846 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10847 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10848 && ! side_effects_p (operands[0]))
10850 rtx sym =
10851 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10852 rtx other = XEXP (XEXP (operands[1], 0), 1);
10854 sym = force_reg (mode, sym);
10855 emit_insn (gen_add3_insn (operands[0], sym, other));
10856 return;
10859 operands[1] = force_const_mem (mode, operands[1]);
10861 if (TARGET_TOC
10862 && SYMBOL_REF_P (XEXP (operands[1], 0))
10863 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10865 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10866 operands[0]);
10867 operands[1] = gen_const_mem (mode, tocref);
10868 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10871 break;
10873 case E_TImode:
10874 if (!VECTOR_MEM_VSX_P (TImode))
10875 rs6000_eliminate_indexed_memrefs (operands);
10876 break;
10878 case E_PTImode:
10879 rs6000_eliminate_indexed_memrefs (operands);
10880 break;
10882 default:
10883 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10886 /* Above, we may have called force_const_mem which may have returned
10887 an invalid address. If we can, fix this up; otherwise, reload will
10888 have to deal with it. */
10889 if (MEM_P (operands[1]))
10890 operands[1] = validize_mem (operands[1]);
10892 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10896 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10897 static void
10898 init_float128_ibm (machine_mode mode)
10900 if (!TARGET_XL_COMPAT)
10902 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10903 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10904 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10905 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10907 if (!TARGET_HARD_FLOAT)
10909 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10910 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10911 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10912 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10913 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10914 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10915 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10916 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10918 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10919 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10920 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10921 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10922 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10923 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10924 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10925 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10928 else
10930 set_optab_libfunc (add_optab, mode, "_xlqadd");
10931 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10932 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10933 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10936 /* Add various conversions for IFmode to use the traditional TFmode
10937 names. */
10938 if (mode == IFmode)
10940 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10941 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10942 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10943 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10944 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10945 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10947 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
10948 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
10950 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
10951 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
10953 if (TARGET_POWERPC64)
10955 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10956 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10957 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10958 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10963 /* Create a decl for either complex long double multiply or complex long double
10964 divide when long double is IEEE 128-bit floating point. We can't use
10965 __multc3 and __divtc3 because the original long double using IBM extended
10966 double used those names. The complex multiply/divide functions are encoded
10967 as builtin functions with a complex result and 4 scalar inputs. */
10969 static void
10970 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10972 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10973 name, NULL_TREE);
10975 set_builtin_decl (fncode, fndecl, true);
10977 if (TARGET_DEBUG_BUILTIN)
10978 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10980 return;
10983 /* Set up IEEE 128-bit floating point routines. Use different names if the
10984 arguments can be passed in a vector register. The historical PowerPC
10985 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10986 continue to use that if we aren't using vector registers to pass IEEE
10987 128-bit floating point. */
10989 static void
10990 init_float128_ieee (machine_mode mode)
10992 if (FLOAT128_VECTOR_P (mode))
10994 static bool complex_muldiv_init_p = false;
10996 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10997 we have clone or target attributes, this will be called a second
10998 time. We want to create the built-in function only once. */
10999 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
11001 complex_muldiv_init_p = true;
11002 built_in_function fncode_mul =
11003 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
11004 - MIN_MODE_COMPLEX_FLOAT);
11005 built_in_function fncode_div =
11006 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
11007 - MIN_MODE_COMPLEX_FLOAT);
11009 tree fntype = build_function_type_list (complex_long_double_type_node,
11010 long_double_type_node,
11011 long_double_type_node,
11012 long_double_type_node,
11013 long_double_type_node,
11014 NULL_TREE);
11016 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
11017 create_complex_muldiv ("__divkc3", fncode_div, fntype);
11020 set_optab_libfunc (add_optab, mode, "__addkf3");
11021 set_optab_libfunc (sub_optab, mode, "__subkf3");
11022 set_optab_libfunc (neg_optab, mode, "__negkf2");
11023 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11024 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11025 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11026 set_optab_libfunc (abs_optab, mode, "__abskf2");
11027 set_optab_libfunc (powi_optab, mode, "__powikf2");
11029 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11030 set_optab_libfunc (ne_optab, mode, "__nekf2");
11031 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11032 set_optab_libfunc (ge_optab, mode, "__gekf2");
11033 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11034 set_optab_libfunc (le_optab, mode, "__lekf2");
11035 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11037 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11038 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11039 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11040 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11042 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11043 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11044 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11046 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11047 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11048 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11050 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11051 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11052 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11053 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11054 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11055 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11057 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11058 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11059 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11060 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11062 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11063 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11064 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11065 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11067 if (TARGET_POWERPC64)
11069 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11070 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11071 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11072 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11076 else
11078 set_optab_libfunc (add_optab, mode, "_q_add");
11079 set_optab_libfunc (sub_optab, mode, "_q_sub");
11080 set_optab_libfunc (neg_optab, mode, "_q_neg");
11081 set_optab_libfunc (smul_optab, mode, "_q_mul");
11082 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11083 if (TARGET_PPC_GPOPT)
11084 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11086 set_optab_libfunc (eq_optab, mode, "_q_feq");
11087 set_optab_libfunc (ne_optab, mode, "_q_fne");
11088 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11089 set_optab_libfunc (ge_optab, mode, "_q_fge");
11090 set_optab_libfunc (lt_optab, mode, "_q_flt");
11091 set_optab_libfunc (le_optab, mode, "_q_fle");
11093 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11094 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11095 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11096 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11097 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11098 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11099 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11100 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11104 static void
11105 rs6000_init_libfuncs (void)
11107 /* __float128 support. */
11108 if (TARGET_FLOAT128_TYPE)
11110 init_float128_ibm (IFmode);
11111 init_float128_ieee (KFmode);
11114 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11115 if (TARGET_LONG_DOUBLE_128)
11117 if (!TARGET_IEEEQUAD)
11118 init_float128_ibm (TFmode);
11120 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11121 else
11122 init_float128_ieee (TFmode);
11126 /* Emit a potentially record-form instruction, setting DST from SRC.
11127 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11128 signed comparison of DST with zero. If DOT is 1, the generated RTL
11129 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11130 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11131 a separate COMPARE. */
11133 void
11134 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11136 if (dot == 0)
11138 emit_move_insn (dst, src);
11139 return;
11142 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11144 emit_move_insn (dst, src);
11145 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11146 return;
11149 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11150 if (dot == 1)
11152 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11153 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11155 else
11157 rtx set = gen_rtx_SET (dst, src);
11158 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11163 /* A validation routine: say whether CODE, a condition code, and MODE
11164 match. The other alternatives either don't make sense or should
11165 never be generated. */
11167 void
11168 validate_condition_mode (enum rtx_code code, machine_mode mode)
11170 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11171 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11172 && GET_MODE_CLASS (mode) == MODE_CC);
11174 /* These don't make sense. */
11175 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11176 || mode != CCUNSmode);
11178 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11179 || mode == CCUNSmode);
11181 gcc_assert (mode == CCFPmode
11182 || (code != ORDERED && code != UNORDERED
11183 && code != UNEQ && code != LTGT
11184 && code != UNGT && code != UNLT
11185 && code != UNGE && code != UNLE));
11187 /* These are invalid; the information is not there. */
11188 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11192 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11193 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11194 not zero, store there the bit offset (counted from the right) where
11195 the single stretch of 1 bits begins; and similarly for B, the bit
11196 offset where it ends. */
11198 bool
11199 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11201 unsigned HOST_WIDE_INT val = INTVAL (mask);
11202 unsigned HOST_WIDE_INT bit;
11203 int nb, ne;
11204 int n = GET_MODE_PRECISION (mode);
11206 if (mode != DImode && mode != SImode)
11207 return false;
11209 if (INTVAL (mask) >= 0)
11211 bit = val & -val;
11212 ne = exact_log2 (bit);
11213 nb = exact_log2 (val + bit);
11215 else if (val + 1 == 0)
11217 nb = n;
11218 ne = 0;
11220 else if (val & 1)
11222 val = ~val;
11223 bit = val & -val;
11224 nb = exact_log2 (bit);
11225 ne = exact_log2 (val + bit);
11227 else
11229 bit = val & -val;
11230 ne = exact_log2 (bit);
11231 if (val + bit == 0)
11232 nb = n;
11233 else
11234 nb = 0;
11237 nb--;
11239 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11240 return false;
11242 if (b)
11243 *b = nb;
11244 if (e)
11245 *e = ne;
11247 return true;
11250 bool
11251 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11253 int nb, ne;
11254 return rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0;
11257 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11258 or rldicr instruction, to implement an AND with it in mode MODE. */
11260 bool
11261 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11263 int nb, ne;
11265 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11266 return false;
11268 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11269 does not wrap. */
11270 if (mode == DImode)
11271 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11273 /* For SImode, rlwinm can do everything. */
11274 if (mode == SImode)
11275 return (nb < 32 && ne < 32);
11277 return false;
11280 /* Return the instruction template for an AND with mask in mode MODE, with
11281 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11283 const char *
11284 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11286 int nb, ne;
11288 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11289 gcc_unreachable ();
11291 if (mode == DImode && ne == 0)
11293 operands[3] = GEN_INT (63 - nb);
11294 if (dot)
11295 return "rldicl. %0,%1,0,%3";
11296 return "rldicl %0,%1,0,%3";
11299 if (mode == DImode && nb == 63)
11301 operands[3] = GEN_INT (63 - ne);
11302 if (dot)
11303 return "rldicr. %0,%1,0,%3";
11304 return "rldicr %0,%1,0,%3";
11307 if (nb < 32 && ne < 32)
11309 operands[3] = GEN_INT (31 - nb);
11310 operands[4] = GEN_INT (31 - ne);
11311 if (dot)
11312 return "rlwinm. %0,%1,0,%3,%4";
11313 return "rlwinm %0,%1,0,%3,%4";
11316 gcc_unreachable ();
11319 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11320 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11321 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11323 bool
11324 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11326 int nb, ne;
11328 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11329 return false;
11331 int n = GET_MODE_PRECISION (mode);
11332 int sh = -1;
11334 if (CONST_INT_P (XEXP (shift, 1)))
11336 sh = INTVAL (XEXP (shift, 1));
11337 if (sh < 0 || sh >= n)
11338 return false;
11341 rtx_code code = GET_CODE (shift);
11343 /* Convert any shift by 0 to a rotate, to simplify below code. */
11344 if (sh == 0)
11345 code = ROTATE;
11347 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11348 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11349 code = ASHIFT;
11350 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11352 code = LSHIFTRT;
11353 sh = n - sh;
11356 /* DImode rotates need rld*. */
11357 if (mode == DImode && code == ROTATE)
11358 return (nb == 63 || ne == 0 || ne == sh);
11360 /* SImode rotates need rlw*. */
11361 if (mode == SImode && code == ROTATE)
11362 return (nb < 32 && ne < 32 && sh < 32);
11364 /* Wrap-around masks are only okay for rotates. */
11365 if (ne > nb)
11366 return false;
11368 /* Variable shifts are only okay for rotates. */
11369 if (sh < 0)
11370 return false;
11372 /* Don't allow ASHIFT if the mask is wrong for that. */
11373 if (code == ASHIFT && ne < sh)
11374 return false;
11376 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11377 if the mask is wrong for that. */
11378 if (nb < 32 && ne < 32 && sh < 32
11379 && !(code == LSHIFTRT && nb >= 32 - sh))
11380 return true;
11382 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11383 if the mask is wrong for that. */
11384 if (code == LSHIFTRT)
11385 sh = 64 - sh;
11386 if (nb == 63 || ne == 0 || ne == sh)
11387 return !(code == LSHIFTRT && nb >= sh);
11389 return false;
11392 /* Return the instruction template for a shift with mask in mode MODE, with
11393 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11395 const char *
11396 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11398 int nb, ne;
11400 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11401 gcc_unreachable ();
11403 if (mode == DImode && ne == 0)
11405 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11406 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11407 operands[3] = GEN_INT (63 - nb);
11408 if (dot)
11409 return "rld%I2cl. %0,%1,%2,%3";
11410 return "rld%I2cl %0,%1,%2,%3";
11413 if (mode == DImode && nb == 63)
11415 operands[3] = GEN_INT (63 - ne);
11416 if (dot)
11417 return "rld%I2cr. %0,%1,%2,%3";
11418 return "rld%I2cr %0,%1,%2,%3";
11421 if (mode == DImode
11422 && GET_CODE (operands[4]) != LSHIFTRT
11423 && CONST_INT_P (operands[2])
11424 && ne == INTVAL (operands[2]))
11426 operands[3] = GEN_INT (63 - nb);
11427 if (dot)
11428 return "rld%I2c. %0,%1,%2,%3";
11429 return "rld%I2c %0,%1,%2,%3";
11432 if (nb < 32 && ne < 32)
11434 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11435 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11436 operands[3] = GEN_INT (31 - nb);
11437 operands[4] = GEN_INT (31 - ne);
11438 /* This insn can also be a 64-bit rotate with mask that really makes
11439 it just a shift right (with mask); the %h below are to adjust for
11440 that situation (shift count is >= 32 in that case). */
11441 if (dot)
11442 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11443 return "rlw%I2nm %0,%1,%h2,%3,%4";
11446 gcc_unreachable ();
11449 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11450 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11451 ASHIFT, or LSHIFTRT) in mode MODE. */
11453 bool
11454 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11456 int nb, ne;
11458 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11459 return false;
11461 int n = GET_MODE_PRECISION (mode);
11463 int sh = INTVAL (XEXP (shift, 1));
11464 if (sh < 0 || sh >= n)
11465 return false;
11467 rtx_code code = GET_CODE (shift);
11469 /* Convert any shift by 0 to a rotate, to simplify below code. */
11470 if (sh == 0)
11471 code = ROTATE;
11473 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11474 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11475 code = ASHIFT;
11476 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11478 code = LSHIFTRT;
11479 sh = n - sh;
11482 /* DImode rotates need rldimi. */
11483 if (mode == DImode && code == ROTATE)
11484 return (ne == sh);
11486 /* SImode rotates need rlwimi. */
11487 if (mode == SImode && code == ROTATE)
11488 return (nb < 32 && ne < 32 && sh < 32);
11490 /* Wrap-around masks are only okay for rotates. */
11491 if (ne > nb)
11492 return false;
11494 /* Don't allow ASHIFT if the mask is wrong for that. */
11495 if (code == ASHIFT && ne < sh)
11496 return false;
11498 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11499 if the mask is wrong for that. */
11500 if (nb < 32 && ne < 32 && sh < 32
11501 && !(code == LSHIFTRT && nb >= 32 - sh))
11502 return true;
11504 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11505 if the mask is wrong for that. */
11506 if (code == LSHIFTRT)
11507 sh = 64 - sh;
11508 if (ne == sh)
11509 return !(code == LSHIFTRT && nb >= sh);
11511 return false;
11514 /* Return the instruction template for an insert with mask in mode MODE, with
11515 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11517 const char *
11518 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11520 int nb, ne;
11522 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11523 gcc_unreachable ();
11525 /* Prefer rldimi because rlwimi is cracked. */
11526 if (TARGET_POWERPC64
11527 && (!dot || mode == DImode)
11528 && GET_CODE (operands[4]) != LSHIFTRT
11529 && ne == INTVAL (operands[2]))
11531 operands[3] = GEN_INT (63 - nb);
11532 if (dot)
11533 return "rldimi. %0,%1,%2,%3";
11534 return "rldimi %0,%1,%2,%3";
11537 if (nb < 32 && ne < 32)
11539 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11540 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11541 operands[3] = GEN_INT (31 - nb);
11542 operands[4] = GEN_INT (31 - ne);
11543 if (dot)
11544 return "rlwimi. %0,%1,%2,%3,%4";
11545 return "rlwimi %0,%1,%2,%3,%4";
11548 gcc_unreachable ();
11551 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11552 using two machine instructions. */
11554 bool
11555 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11557 /* There are two kinds of AND we can handle with two insns:
11558 1) those we can do with two rl* insn;
11559 2) ori[s];xori[s].
11561 We do not handle that last case yet. */
11563 /* If there is just one stretch of ones, we can do it. */
11564 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11565 return true;
11567 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11568 one insn, we can do the whole thing with two. */
11569 unsigned HOST_WIDE_INT val = INTVAL (c);
11570 unsigned HOST_WIDE_INT bit1 = val & -val;
11571 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11572 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11573 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11574 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11577 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11578 If EXPAND is true, split rotate-and-mask instructions we generate to
11579 their constituent parts as well (this is used during expand); if DOT
11580 is 1, make the last insn a record-form instruction clobbering the
11581 destination GPR and setting the CC reg (from operands[3]); if 2, set
11582 that GPR as well as the CC reg. */
11584 void
11585 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11587 gcc_assert (!(expand && dot));
11589 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11591 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11592 shift right. This generates better code than doing the masks without
11593 shifts, or shifting first right and then left. */
11594 int nb, ne;
11595 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11597 gcc_assert (mode == DImode);
11599 int shift = 63 - nb;
11600 if (expand)
11602 rtx tmp1 = gen_reg_rtx (DImode);
11603 rtx tmp2 = gen_reg_rtx (DImode);
11604 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11605 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11606 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11608 else
11610 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11611 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11612 emit_move_insn (operands[0], tmp);
11613 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11614 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11616 return;
11619 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11620 that does the rest. */
11621 unsigned HOST_WIDE_INT bit1 = val & -val;
11622 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11623 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11624 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11626 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11627 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11629 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11631 /* Two "no-rotate"-and-mask instructions, for SImode. */
11632 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11634 gcc_assert (mode == SImode);
11636 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11637 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11638 emit_move_insn (reg, tmp);
11639 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11640 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11641 return;
11644 gcc_assert (mode == DImode);
11646 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11647 insns; we have to do the first in SImode, because it wraps. */
11648 if (mask2 <= 0xffffffff
11649 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11651 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11652 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11653 GEN_INT (mask1));
11654 rtx reg_low = gen_lowpart (SImode, reg);
11655 emit_move_insn (reg_low, tmp);
11656 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11657 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11658 return;
11661 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11662 at the top end), rotate back and clear the other hole. */
11663 int right = exact_log2 (bit3);
11664 int left = 64 - right;
11666 /* Rotate the mask too. */
11667 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11669 if (expand)
11671 rtx tmp1 = gen_reg_rtx (DImode);
11672 rtx tmp2 = gen_reg_rtx (DImode);
11673 rtx tmp3 = gen_reg_rtx (DImode);
11674 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11675 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11676 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11677 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11679 else
11681 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11682 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11683 emit_move_insn (operands[0], tmp);
11684 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11685 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11686 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11690 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11691 for lfq and stfq insns iff the registers are hard registers. */
11694 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11696 /* We might have been passed a SUBREG. */
11697 if (!REG_P (reg1) || !REG_P (reg2))
11698 return 0;
11700 /* We might have been passed non floating point registers. */
11701 if (!FP_REGNO_P (REGNO (reg1))
11702 || !FP_REGNO_P (REGNO (reg2)))
11703 return 0;
11705 return (REGNO (reg1) == REGNO (reg2) - 1);
11708 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11709 addr1 and addr2 must be in consecutive memory locations
11710 (addr2 == addr1 + 8). */
11713 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11715 rtx addr1, addr2;
11716 unsigned int reg1, reg2;
11717 int offset1, offset2;
11719 /* The mems cannot be volatile. */
11720 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11721 return 0;
11723 addr1 = XEXP (mem1, 0);
11724 addr2 = XEXP (mem2, 0);
11726 /* Extract an offset (if used) from the first addr. */
11727 if (GET_CODE (addr1) == PLUS)
11729 /* If not a REG, return zero. */
11730 if (!REG_P (XEXP (addr1, 0)))
11731 return 0;
11732 else
11734 reg1 = REGNO (XEXP (addr1, 0));
11735 /* The offset must be constant! */
11736 if (!CONST_INT_P (XEXP (addr1, 1)))
11737 return 0;
11738 offset1 = INTVAL (XEXP (addr1, 1));
11741 else if (!REG_P (addr1))
11742 return 0;
11743 else
11745 reg1 = REGNO (addr1);
11746 /* This was a simple (mem (reg)) expression. Offset is 0. */
11747 offset1 = 0;
11750 /* And now for the second addr. */
11751 if (GET_CODE (addr2) == PLUS)
11753 /* If not a REG, return zero. */
11754 if (!REG_P (XEXP (addr2, 0)))
11755 return 0;
11756 else
11758 reg2 = REGNO (XEXP (addr2, 0));
11759 /* The offset must be constant. */
11760 if (!CONST_INT_P (XEXP (addr2, 1)))
11761 return 0;
11762 offset2 = INTVAL (XEXP (addr2, 1));
11765 else if (!REG_P (addr2))
11766 return 0;
11767 else
11769 reg2 = REGNO (addr2);
11770 /* This was a simple (mem (reg)) expression. Offset is 0. */
11771 offset2 = 0;
11774 /* Both of these must have the same base register. */
11775 if (reg1 != reg2)
11776 return 0;
11778 /* The offset for the second addr must be 8 more than the first addr. */
11779 if (offset2 != offset1 + 8)
11780 return 0;
11782 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11783 instructions. */
11784 return 1;
11787 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11788 need to use DDmode, in all other cases we can use the same mode. */
11789 static machine_mode
11790 rs6000_secondary_memory_needed_mode (machine_mode mode)
11792 if (lra_in_progress && mode == SDmode)
11793 return DDmode;
11794 return mode;
11797 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11798 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11799 only work on the traditional altivec registers, note if an altivec register
11800 was chosen. */
11802 static enum rs6000_reg_type
11803 register_to_reg_type (rtx reg, bool *is_altivec)
11805 HOST_WIDE_INT regno;
11806 enum reg_class rclass;
11808 if (SUBREG_P (reg))
11809 reg = SUBREG_REG (reg);
11811 if (!REG_P (reg))
11812 return NO_REG_TYPE;
11814 regno = REGNO (reg);
11815 if (!HARD_REGISTER_NUM_P (regno))
11817 if (!lra_in_progress && !reload_completed)
11818 return PSEUDO_REG_TYPE;
11820 regno = true_regnum (reg);
11821 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11822 return PSEUDO_REG_TYPE;
11825 gcc_assert (regno >= 0);
11827 if (is_altivec && ALTIVEC_REGNO_P (regno))
11828 *is_altivec = true;
11830 rclass = rs6000_regno_regclass[regno];
11831 return reg_class_to_reg_type[(int)rclass];
11834 /* Helper function to return the cost of adding a TOC entry address. */
11836 static inline int
11837 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11839 int ret;
11841 if (TARGET_CMODEL != CMODEL_SMALL)
11842 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11844 else
11845 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11847 return ret;
11850 /* Helper function for rs6000_secondary_reload to determine whether the memory
11851 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11852 needs reloading. Return negative if the memory is not handled by the memory
11853 helper functions and to try a different reload method, 0 if no additional
11854 instructions are need, and positive to give the extra cost for the
11855 memory. */
11857 static int
11858 rs6000_secondary_reload_memory (rtx addr,
11859 enum reg_class rclass,
11860 machine_mode mode)
11862 int extra_cost = 0;
11863 rtx reg, and_arg, plus_arg0, plus_arg1;
11864 addr_mask_type addr_mask;
11865 const char *type = NULL;
11866 const char *fail_msg = NULL;
11868 if (GPR_REG_CLASS_P (rclass))
11869 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11871 else if (rclass == FLOAT_REGS)
11872 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11874 else if (rclass == ALTIVEC_REGS)
11875 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11877 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11878 else if (rclass == VSX_REGS)
11879 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11880 & ~RELOAD_REG_AND_M16);
11882 /* If the register allocator hasn't made up its mind yet on the register
11883 class to use, settle on defaults to use. */
11884 else if (rclass == NO_REGS)
11886 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11887 & ~RELOAD_REG_AND_M16);
11889 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11890 addr_mask &= ~(RELOAD_REG_INDEXED
11891 | RELOAD_REG_PRE_INCDEC
11892 | RELOAD_REG_PRE_MODIFY);
11895 else
11896 addr_mask = 0;
11898 /* If the register isn't valid in this register class, just return now. */
11899 if ((addr_mask & RELOAD_REG_VALID) == 0)
11901 if (TARGET_DEBUG_ADDR)
11903 fprintf (stderr,
11904 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11905 "not valid in class\n",
11906 GET_MODE_NAME (mode), reg_class_names[rclass]);
11907 debug_rtx (addr);
11910 return -1;
11913 switch (GET_CODE (addr))
11915 /* Does the register class supports auto update forms for this mode? We
11916 don't need a scratch register, since the powerpc only supports
11917 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11918 case PRE_INC:
11919 case PRE_DEC:
11920 reg = XEXP (addr, 0);
11921 if (!base_reg_operand (addr, GET_MODE (reg)))
11923 fail_msg = "no base register #1";
11924 extra_cost = -1;
11927 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11929 extra_cost = 1;
11930 type = "update";
11932 break;
11934 case PRE_MODIFY:
11935 reg = XEXP (addr, 0);
11936 plus_arg1 = XEXP (addr, 1);
11937 if (!base_reg_operand (reg, GET_MODE (reg))
11938 || GET_CODE (plus_arg1) != PLUS
11939 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11941 fail_msg = "bad PRE_MODIFY";
11942 extra_cost = -1;
11945 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11947 extra_cost = 1;
11948 type = "update";
11950 break;
11952 /* Do we need to simulate AND -16 to clear the bottom address bits used
11953 in VMX load/stores? Only allow the AND for vector sizes. */
11954 case AND:
11955 and_arg = XEXP (addr, 0);
11956 if (GET_MODE_SIZE (mode) != 16
11957 || !CONST_INT_P (XEXP (addr, 1))
11958 || INTVAL (XEXP (addr, 1)) != -16)
11960 fail_msg = "bad Altivec AND #1";
11961 extra_cost = -1;
11964 if (rclass != ALTIVEC_REGS)
11966 if (legitimate_indirect_address_p (and_arg, false))
11967 extra_cost = 1;
11969 else if (legitimate_indexed_address_p (and_arg, false))
11970 extra_cost = 2;
11972 else
11974 fail_msg = "bad Altivec AND #2";
11975 extra_cost = -1;
11978 type = "and";
11980 break;
11982 /* If this is an indirect address, make sure it is a base register. */
11983 case REG:
11984 case SUBREG:
11985 if (!legitimate_indirect_address_p (addr, false))
11987 extra_cost = 1;
11988 type = "move";
11990 break;
11992 /* If this is an indexed address, make sure the register class can handle
11993 indexed addresses for this mode. */
11994 case PLUS:
11995 plus_arg0 = XEXP (addr, 0);
11996 plus_arg1 = XEXP (addr, 1);
11998 /* (plus (plus (reg) (constant)) (constant)) is generated during
11999 push_reload processing, so handle it now. */
12000 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12002 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12004 extra_cost = 1;
12005 type = "offset";
12009 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12010 push_reload processing, so handle it now. */
12011 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12013 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12015 extra_cost = 1;
12016 type = "indexed #2";
12020 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12022 fail_msg = "no base register #2";
12023 extra_cost = -1;
12026 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12028 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12029 || !legitimate_indexed_address_p (addr, false))
12031 extra_cost = 1;
12032 type = "indexed";
12036 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12037 && CONST_INT_P (plus_arg1))
12039 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12041 extra_cost = 1;
12042 type = "vector d-form offset";
12046 /* Make sure the register class can handle offset addresses. */
12047 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12049 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12051 extra_cost = 1;
12052 type = "offset #2";
12056 else
12058 fail_msg = "bad PLUS";
12059 extra_cost = -1;
12062 break;
12064 case LO_SUM:
12065 /* Quad offsets are restricted and can't handle normal addresses. */
12066 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12068 extra_cost = -1;
12069 type = "vector d-form lo_sum";
12072 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12074 fail_msg = "bad LO_SUM";
12075 extra_cost = -1;
12078 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12080 extra_cost = 1;
12081 type = "lo_sum";
12083 break;
12085 /* Static addresses need to create a TOC entry. */
12086 case CONST:
12087 case SYMBOL_REF:
12088 case LABEL_REF:
12089 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12091 extra_cost = -1;
12092 type = "vector d-form lo_sum #2";
12095 else
12097 type = "address";
12098 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12100 break;
12102 /* TOC references look like offsetable memory. */
12103 case UNSPEC:
12104 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12106 fail_msg = "bad UNSPEC";
12107 extra_cost = -1;
12110 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12112 extra_cost = -1;
12113 type = "vector d-form lo_sum #3";
12116 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12118 extra_cost = 1;
12119 type = "toc reference";
12121 break;
12123 default:
12125 fail_msg = "bad address";
12126 extra_cost = -1;
12130 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12132 if (extra_cost < 0)
12133 fprintf (stderr,
12134 "rs6000_secondary_reload_memory error: mode = %s, "
12135 "class = %s, addr_mask = '%s', %s\n",
12136 GET_MODE_NAME (mode),
12137 reg_class_names[rclass],
12138 rs6000_debug_addr_mask (addr_mask, false),
12139 (fail_msg != NULL) ? fail_msg : "<bad address>");
12141 else
12142 fprintf (stderr,
12143 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12144 "addr_mask = '%s', extra cost = %d, %s\n",
12145 GET_MODE_NAME (mode),
12146 reg_class_names[rclass],
12147 rs6000_debug_addr_mask (addr_mask, false),
12148 extra_cost,
12149 (type) ? type : "<none>");
12151 debug_rtx (addr);
12154 return extra_cost;
12157 /* Helper function for rs6000_secondary_reload to return true if a move to a
12158 different register classe is really a simple move. */
12160 static bool
12161 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12162 enum rs6000_reg_type from_type,
12163 machine_mode mode)
12165 int size = GET_MODE_SIZE (mode);
12167 /* Add support for various direct moves available. In this function, we only
12168 look at cases where we don't need any extra registers, and one or more
12169 simple move insns are issued. Originally small integers are not allowed
12170 in FPR/VSX registers. Single precision binary floating is not a simple
12171 move because we need to convert to the single precision memory layout.
12172 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12173 need special direct move handling, which we do not support yet. */
12174 if (TARGET_DIRECT_MOVE
12175 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12176 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12178 if (TARGET_POWERPC64)
12180 /* ISA 2.07: MTVSRD or MVFVSRD. */
12181 if (size == 8)
12182 return true;
12184 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12185 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12186 return true;
12189 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12190 if (TARGET_P8_VECTOR)
12192 if (mode == SImode)
12193 return true;
12195 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12196 return true;
12199 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12200 if (mode == SDmode)
12201 return true;
12204 /* Move to/from SPR. */
12205 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12206 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12207 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12208 return true;
12210 return false;
12213 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12214 special direct moves that involve allocating an extra register, return the
12215 insn code of the helper function if there is such a function or
12216 CODE_FOR_nothing if not. */
12218 static bool
12219 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12220 enum rs6000_reg_type from_type,
12221 machine_mode mode,
12222 secondary_reload_info *sri,
12223 bool altivec_p)
12225 bool ret = false;
12226 enum insn_code icode = CODE_FOR_nothing;
12227 int cost = 0;
12228 int size = GET_MODE_SIZE (mode);
12230 if (TARGET_POWERPC64 && size == 16)
12232 /* Handle moving 128-bit values from GPRs to VSX point registers on
12233 ISA 2.07 (power8, power9) when running in 64-bit mode using
12234 XXPERMDI to glue the two 64-bit values back together. */
12235 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12237 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12238 icode = reg_addr[mode].reload_vsx_gpr;
12241 /* Handle moving 128-bit values from VSX point registers to GPRs on
12242 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12243 bottom 64-bit value. */
12244 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12246 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12247 icode = reg_addr[mode].reload_gpr_vsx;
12251 else if (TARGET_POWERPC64 && mode == SFmode)
12253 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12255 cost = 3; /* xscvdpspn, mfvsrd, and. */
12256 icode = reg_addr[mode].reload_gpr_vsx;
12259 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12261 cost = 2; /* mtvsrz, xscvspdpn. */
12262 icode = reg_addr[mode].reload_vsx_gpr;
12266 else if (!TARGET_POWERPC64 && size == 8)
12268 /* Handle moving 64-bit values from GPRs to floating point registers on
12269 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12270 32-bit values back together. Altivec register classes must be handled
12271 specially since a different instruction is used, and the secondary
12272 reload support requires a single instruction class in the scratch
12273 register constraint. However, right now TFmode is not allowed in
12274 Altivec registers, so the pattern will never match. */
12275 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12277 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12278 icode = reg_addr[mode].reload_fpr_gpr;
12282 if (icode != CODE_FOR_nothing)
12284 ret = true;
12285 if (sri)
12287 sri->icode = icode;
12288 sri->extra_cost = cost;
12292 return ret;
12295 /* Return whether a move between two register classes can be done either
12296 directly (simple move) or via a pattern that uses a single extra temporary
12297 (using ISA 2.07's direct move in this case. */
12299 static bool
12300 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12301 enum rs6000_reg_type from_type,
12302 machine_mode mode,
12303 secondary_reload_info *sri,
12304 bool altivec_p)
12306 /* Fall back to load/store reloads if either type is not a register. */
12307 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12308 return false;
12310 /* If we haven't allocated registers yet, assume the move can be done for the
12311 standard register types. */
12312 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12313 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12314 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12315 return true;
12317 /* Moves to the same set of registers is a simple move for non-specialized
12318 registers. */
12319 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12320 return true;
12322 /* Check whether a simple move can be done directly. */
12323 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12325 if (sri)
12327 sri->icode = CODE_FOR_nothing;
12328 sri->extra_cost = 0;
12330 return true;
12333 /* Now check if we can do it in a few steps. */
12334 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12335 altivec_p);
12338 /* Inform reload about cases where moving X with a mode MODE to a register in
12339 RCLASS requires an extra scratch or immediate register. Return the class
12340 needed for the immediate register.
12342 For VSX and Altivec, we may need a register to convert sp+offset into
12343 reg+sp.
12345 For misaligned 64-bit gpr loads and stores we need a register to
12346 convert an offset address to indirect. */
12348 static reg_class_t
12349 rs6000_secondary_reload (bool in_p,
12350 rtx x,
12351 reg_class_t rclass_i,
12352 machine_mode mode,
12353 secondary_reload_info *sri)
12355 enum reg_class rclass = (enum reg_class) rclass_i;
12356 reg_class_t ret = ALL_REGS;
12357 enum insn_code icode;
12358 bool default_p = false;
12359 bool done_p = false;
12361 /* Allow subreg of memory before/during reload. */
12362 bool memory_p = (MEM_P (x)
12363 || (!reload_completed && SUBREG_P (x)
12364 && MEM_P (SUBREG_REG (x))));
12366 sri->icode = CODE_FOR_nothing;
12367 sri->t_icode = CODE_FOR_nothing;
12368 sri->extra_cost = 0;
12369 icode = ((in_p)
12370 ? reg_addr[mode].reload_load
12371 : reg_addr[mode].reload_store);
12373 if (REG_P (x) || register_operand (x, mode))
12375 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12376 bool altivec_p = (rclass == ALTIVEC_REGS);
12377 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12379 if (!in_p)
12380 std::swap (to_type, from_type);
12382 /* Can we do a direct move of some sort? */
12383 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12384 altivec_p))
12386 icode = (enum insn_code)sri->icode;
12387 default_p = false;
12388 done_p = true;
12389 ret = NO_REGS;
12393 /* Make sure 0.0 is not reloaded or forced into memory. */
12394 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12396 ret = NO_REGS;
12397 default_p = false;
12398 done_p = true;
12401 /* If this is a scalar floating point value and we want to load it into the
12402 traditional Altivec registers, do it via a move via a traditional floating
12403 point register, unless we have D-form addressing. Also make sure that
12404 non-zero constants use a FPR. */
12405 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12406 && !mode_supports_vmx_dform (mode)
12407 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12408 && (memory_p || CONST_DOUBLE_P (x)))
12410 ret = FLOAT_REGS;
12411 default_p = false;
12412 done_p = true;
12415 /* Handle reload of load/stores if we have reload helper functions. */
12416 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12418 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12419 mode);
12421 if (extra_cost >= 0)
12423 done_p = true;
12424 ret = NO_REGS;
12425 if (extra_cost > 0)
12427 sri->extra_cost = extra_cost;
12428 sri->icode = icode;
12433 /* Handle unaligned loads and stores of integer registers. */
12434 if (!done_p && TARGET_POWERPC64
12435 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12436 && memory_p
12437 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12439 rtx addr = XEXP (x, 0);
12440 rtx off = address_offset (addr);
12442 if (off != NULL_RTX)
12444 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12445 unsigned HOST_WIDE_INT offset = INTVAL (off);
12447 /* We need a secondary reload when our legitimate_address_p
12448 says the address is good (as otherwise the entire address
12449 will be reloaded), and the offset is not a multiple of
12450 four or we have an address wrap. Address wrap will only
12451 occur for LO_SUMs since legitimate_offset_address_p
12452 rejects addresses for 16-byte mems that will wrap. */
12453 if (GET_CODE (addr) == LO_SUM
12454 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12455 && ((offset & 3) != 0
12456 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12457 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12458 && (offset & 3) != 0))
12460 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12461 if (in_p)
12462 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12463 : CODE_FOR_reload_di_load);
12464 else
12465 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12466 : CODE_FOR_reload_di_store);
12467 sri->extra_cost = 2;
12468 ret = NO_REGS;
12469 done_p = true;
12471 else
12472 default_p = true;
12474 else
12475 default_p = true;
12478 if (!done_p && !TARGET_POWERPC64
12479 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12480 && memory_p
12481 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12483 rtx addr = XEXP (x, 0);
12484 rtx off = address_offset (addr);
12486 if (off != NULL_RTX)
12488 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12489 unsigned HOST_WIDE_INT offset = INTVAL (off);
12491 /* We need a secondary reload when our legitimate_address_p
12492 says the address is good (as otherwise the entire address
12493 will be reloaded), and we have a wrap.
12495 legitimate_lo_sum_address_p allows LO_SUM addresses to
12496 have any offset so test for wrap in the low 16 bits.
12498 legitimate_offset_address_p checks for the range
12499 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12500 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12501 [0x7ff4,0x7fff] respectively, so test for the
12502 intersection of these ranges, [0x7ffc,0x7fff] and
12503 [0x7ff4,0x7ff7] respectively.
12505 Note that the address we see here may have been
12506 manipulated by legitimize_reload_address. */
12507 if (GET_CODE (addr) == LO_SUM
12508 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12509 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12511 if (in_p)
12512 sri->icode = CODE_FOR_reload_si_load;
12513 else
12514 sri->icode = CODE_FOR_reload_si_store;
12515 sri->extra_cost = 2;
12516 ret = NO_REGS;
12517 done_p = true;
12519 else
12520 default_p = true;
12522 else
12523 default_p = true;
12526 if (!done_p)
12527 default_p = true;
12529 if (default_p)
12530 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12532 gcc_assert (ret != ALL_REGS);
12534 if (TARGET_DEBUG_ADDR)
12536 fprintf (stderr,
12537 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12538 "mode = %s",
12539 reg_class_names[ret],
12540 in_p ? "true" : "false",
12541 reg_class_names[rclass],
12542 GET_MODE_NAME (mode));
12544 if (reload_completed)
12545 fputs (", after reload", stderr);
12547 if (!done_p)
12548 fputs (", done_p not set", stderr);
12550 if (default_p)
12551 fputs (", default secondary reload", stderr);
12553 if (sri->icode != CODE_FOR_nothing)
12554 fprintf (stderr, ", reload func = %s, extra cost = %d",
12555 insn_data[sri->icode].name, sri->extra_cost);
12557 else if (sri->extra_cost > 0)
12558 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12560 fputs ("\n", stderr);
12561 debug_rtx (x);
12564 return ret;
12567 /* Better tracing for rs6000_secondary_reload_inner. */
12569 static void
12570 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12571 bool store_p)
12573 rtx set, clobber;
12575 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12577 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12578 store_p ? "store" : "load");
12580 if (store_p)
12581 set = gen_rtx_SET (mem, reg);
12582 else
12583 set = gen_rtx_SET (reg, mem);
12585 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12586 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12589 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12590 ATTRIBUTE_NORETURN;
12592 static void
12593 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12594 bool store_p)
12596 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12597 gcc_unreachable ();
12600 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12601 reload helper functions. These were identified in
12602 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12603 reload, it calls the insns:
12604 reload_<RELOAD:mode>_<P:mptrsize>_store
12605 reload_<RELOAD:mode>_<P:mptrsize>_load
12607 which in turn calls this function, to do whatever is necessary to create
12608 valid addresses. */
12610 void
12611 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12613 int regno = true_regnum (reg);
12614 machine_mode mode = GET_MODE (reg);
12615 addr_mask_type addr_mask;
12616 rtx addr;
12617 rtx new_addr;
12618 rtx op_reg, op0, op1;
12619 rtx and_op;
12620 rtx cc_clobber;
12621 rtvec rv;
12623 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12624 || !base_reg_operand (scratch, GET_MODE (scratch)))
12625 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12627 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12628 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12630 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12631 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12633 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12634 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12636 else
12637 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12639 /* Make sure the mode is valid in this register class. */
12640 if ((addr_mask & RELOAD_REG_VALID) == 0)
12641 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12643 if (TARGET_DEBUG_ADDR)
12644 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12646 new_addr = addr = XEXP (mem, 0);
12647 switch (GET_CODE (addr))
12649 /* Does the register class support auto update forms for this mode? If
12650 not, do the update now. We don't need a scratch register, since the
12651 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12652 case PRE_INC:
12653 case PRE_DEC:
12654 op_reg = XEXP (addr, 0);
12655 if (!base_reg_operand (op_reg, Pmode))
12656 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12658 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12660 int delta = GET_MODE_SIZE (mode);
12661 if (GET_CODE (addr) == PRE_DEC)
12662 delta = -delta;
12663 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12664 new_addr = op_reg;
12666 break;
12668 case PRE_MODIFY:
12669 op0 = XEXP (addr, 0);
12670 op1 = XEXP (addr, 1);
12671 if (!base_reg_operand (op0, Pmode)
12672 || GET_CODE (op1) != PLUS
12673 || !rtx_equal_p (op0, XEXP (op1, 0)))
12674 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12676 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12678 emit_insn (gen_rtx_SET (op0, op1));
12679 new_addr = reg;
12681 break;
12683 /* Do we need to simulate AND -16 to clear the bottom address bits used
12684 in VMX load/stores? */
12685 case AND:
12686 op0 = XEXP (addr, 0);
12687 op1 = XEXP (addr, 1);
12688 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12690 if (REG_P (op0) || SUBREG_P (op0))
12691 op_reg = op0;
12693 else if (GET_CODE (op1) == PLUS)
12695 emit_insn (gen_rtx_SET (scratch, op1));
12696 op_reg = scratch;
12699 else
12700 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12702 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12703 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12704 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12705 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12706 new_addr = scratch;
12708 break;
12710 /* If this is an indirect address, make sure it is a base register. */
12711 case REG:
12712 case SUBREG:
12713 if (!base_reg_operand (addr, GET_MODE (addr)))
12715 emit_insn (gen_rtx_SET (scratch, addr));
12716 new_addr = scratch;
12718 break;
12720 /* If this is an indexed address, make sure the register class can handle
12721 indexed addresses for this mode. */
12722 case PLUS:
12723 op0 = XEXP (addr, 0);
12724 op1 = XEXP (addr, 1);
12725 if (!base_reg_operand (op0, Pmode))
12726 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12728 else if (int_reg_operand (op1, Pmode))
12730 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12732 emit_insn (gen_rtx_SET (scratch, addr));
12733 new_addr = scratch;
12737 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12739 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12740 || !quad_address_p (addr, mode, false))
12742 emit_insn (gen_rtx_SET (scratch, addr));
12743 new_addr = scratch;
12747 /* Make sure the register class can handle offset addresses. */
12748 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12750 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12752 emit_insn (gen_rtx_SET (scratch, addr));
12753 new_addr = scratch;
12757 else
12758 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12760 break;
12762 case LO_SUM:
12763 op0 = XEXP (addr, 0);
12764 op1 = XEXP (addr, 1);
12765 if (!base_reg_operand (op0, Pmode))
12766 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12768 else if (int_reg_operand (op1, Pmode))
12770 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12772 emit_insn (gen_rtx_SET (scratch, addr));
12773 new_addr = scratch;
12777 /* Quad offsets are restricted and can't handle normal addresses. */
12778 else if (mode_supports_dq_form (mode))
12780 emit_insn (gen_rtx_SET (scratch, addr));
12781 new_addr = scratch;
12784 /* Make sure the register class can handle offset addresses. */
12785 else if (legitimate_lo_sum_address_p (mode, addr, false))
12787 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12789 emit_insn (gen_rtx_SET (scratch, addr));
12790 new_addr = scratch;
12794 else
12795 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12797 break;
12799 case SYMBOL_REF:
12800 case CONST:
12801 case LABEL_REF:
12802 rs6000_emit_move (scratch, addr, Pmode);
12803 new_addr = scratch;
12804 break;
12806 default:
12807 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12810 /* Adjust the address if it changed. */
12811 if (addr != new_addr)
12813 mem = replace_equiv_address_nv (mem, new_addr);
12814 if (TARGET_DEBUG_ADDR)
12815 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12818 /* Now create the move. */
12819 if (store_p)
12820 emit_insn (gen_rtx_SET (mem, reg));
12821 else
12822 emit_insn (gen_rtx_SET (reg, mem));
12824 return;
12827 /* Convert reloads involving 64-bit gprs and misaligned offset
12828 addressing, or multiple 32-bit gprs and offsets that are too large,
12829 to use indirect addressing. */
12831 void
12832 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12834 int regno = true_regnum (reg);
12835 enum reg_class rclass;
12836 rtx addr;
12837 rtx scratch_or_premodify = scratch;
12839 if (TARGET_DEBUG_ADDR)
12841 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12842 store_p ? "store" : "load");
12843 fprintf (stderr, "reg:\n");
12844 debug_rtx (reg);
12845 fprintf (stderr, "mem:\n");
12846 debug_rtx (mem);
12847 fprintf (stderr, "scratch:\n");
12848 debug_rtx (scratch);
12851 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12852 gcc_assert (MEM_P (mem));
12853 rclass = REGNO_REG_CLASS (regno);
12854 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12855 addr = XEXP (mem, 0);
12857 if (GET_CODE (addr) == PRE_MODIFY)
12859 gcc_assert (REG_P (XEXP (addr, 0))
12860 && GET_CODE (XEXP (addr, 1)) == PLUS
12861 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12862 scratch_or_premodify = XEXP (addr, 0);
12863 addr = XEXP (addr, 1);
12865 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12867 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12869 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12871 /* Now create the move. */
12872 if (store_p)
12873 emit_insn (gen_rtx_SET (mem, reg));
12874 else
12875 emit_insn (gen_rtx_SET (reg, mem));
12877 return;
12880 /* Given an rtx X being reloaded into a reg required to be
12881 in class CLASS, return the class of reg to actually use.
12882 In general this is just CLASS; but on some machines
12883 in some cases it is preferable to use a more restrictive class.
12885 On the RS/6000, we have to return NO_REGS when we want to reload a
12886 floating-point CONST_DOUBLE to force it to be copied to memory.
12888 We also don't want to reload integer values into floating-point
12889 registers if we can at all help it. In fact, this can
12890 cause reload to die, if it tries to generate a reload of CTR
12891 into a FP register and discovers it doesn't have the memory location
12892 required.
12894 ??? Would it be a good idea to have reload do the converse, that is
12895 try to reload floating modes into FP registers if possible?
12898 static enum reg_class
12899 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12901 machine_mode mode = GET_MODE (x);
12902 bool is_constant = CONSTANT_P (x);
12904 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12905 reload class for it. */
12906 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12907 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12908 return NO_REGS;
12910 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12911 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12912 return NO_REGS;
12914 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12915 the reloading of address expressions using PLUS into floating point
12916 registers. */
12917 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12919 if (is_constant)
12921 /* Zero is always allowed in all VSX registers. */
12922 if (x == CONST0_RTX (mode))
12923 return rclass;
12925 /* If this is a vector constant that can be formed with a few Altivec
12926 instructions, we want altivec registers. */
12927 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12928 return ALTIVEC_REGS;
12930 /* If this is an integer constant that can easily be loaded into
12931 vector registers, allow it. */
12932 if (CONST_INT_P (x))
12934 HOST_WIDE_INT value = INTVAL (x);
12936 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12937 2.06 can generate it in the Altivec registers with
12938 VSPLTI<x>. */
12939 if (value == -1)
12941 if (TARGET_P8_VECTOR)
12942 return rclass;
12943 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12944 return ALTIVEC_REGS;
12945 else
12946 return NO_REGS;
12949 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12950 a sign extend in the Altivec registers. */
12951 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12952 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12953 return ALTIVEC_REGS;
12956 /* Force constant to memory. */
12957 return NO_REGS;
12960 /* D-form addressing can easily reload the value. */
12961 if (mode_supports_vmx_dform (mode)
12962 || mode_supports_dq_form (mode))
12963 return rclass;
12965 /* If this is a scalar floating point value and we don't have D-form
12966 addressing, prefer the traditional floating point registers so that we
12967 can use D-form (register+offset) addressing. */
12968 if (rclass == VSX_REGS
12969 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12970 return FLOAT_REGS;
12972 /* Prefer the Altivec registers if Altivec is handling the vector
12973 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12974 loads. */
12975 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12976 || mode == V1TImode)
12977 return ALTIVEC_REGS;
12979 return rclass;
12982 if (is_constant || GET_CODE (x) == PLUS)
12984 if (reg_class_subset_p (GENERAL_REGS, rclass))
12985 return GENERAL_REGS;
12986 if (reg_class_subset_p (BASE_REGS, rclass))
12987 return BASE_REGS;
12988 return NO_REGS;
12991 /* For the vector pair and vector quad modes, prefer their natural register
12992 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12993 the GPR registers. */
12994 if (rclass == GEN_OR_FLOAT_REGS)
12996 if (mode == OOmode)
12997 return VSX_REGS;
12999 if (mode == XOmode)
13000 return FLOAT_REGS;
13002 if (GET_MODE_CLASS (mode) == MODE_INT)
13003 return GENERAL_REGS;
13006 return rclass;
13009 /* Debug version of rs6000_preferred_reload_class. */
13010 static enum reg_class
13011 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13013 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13015 fprintf (stderr,
13016 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13017 "mode = %s, x:\n",
13018 reg_class_names[ret], reg_class_names[rclass],
13019 GET_MODE_NAME (GET_MODE (x)));
13020 debug_rtx (x);
13022 return ret;
13025 /* If we are copying between FP or AltiVec registers and anything else, we need
13026 a memory location. The exception is when we are targeting ppc64 and the
13027 move to/from fpr to gpr instructions are available. Also, under VSX, you
13028 can copy vector registers from the FP register set to the Altivec register
13029 set and vice versa. */
13031 static bool
13032 rs6000_secondary_memory_needed (machine_mode mode,
13033 reg_class_t from_class,
13034 reg_class_t to_class)
13036 enum rs6000_reg_type from_type, to_type;
13037 bool altivec_p = ((from_class == ALTIVEC_REGS)
13038 || (to_class == ALTIVEC_REGS));
13040 /* If a simple/direct move is available, we don't need secondary memory */
13041 from_type = reg_class_to_reg_type[(int)from_class];
13042 to_type = reg_class_to_reg_type[(int)to_class];
13044 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13045 (secondary_reload_info *)0, altivec_p))
13046 return false;
13048 /* If we have a floating point or vector register class, we need to use
13049 memory to transfer the data. */
13050 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13051 return true;
13053 return false;
13056 /* Debug version of rs6000_secondary_memory_needed. */
13057 static bool
13058 rs6000_debug_secondary_memory_needed (machine_mode mode,
13059 reg_class_t from_class,
13060 reg_class_t to_class)
13062 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13064 fprintf (stderr,
13065 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13066 "to_class = %s, mode = %s\n",
13067 ret ? "true" : "false",
13068 reg_class_names[from_class],
13069 reg_class_names[to_class],
13070 GET_MODE_NAME (mode));
13072 return ret;
13075 /* Return the register class of a scratch register needed to copy IN into
13076 or out of a register in RCLASS in MODE. If it can be done directly,
13077 NO_REGS is returned. */
13079 static enum reg_class
13080 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13081 rtx in)
13083 int regno;
13085 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13086 #if TARGET_MACHO
13087 && MACHOPIC_INDIRECT
13088 #endif
13091 /* We cannot copy a symbolic operand directly into anything
13092 other than BASE_REGS for TARGET_ELF. So indicate that a
13093 register from BASE_REGS is needed as an intermediate
13094 register.
13096 On Darwin, pic addresses require a load from memory, which
13097 needs a base register. */
13098 if (rclass != BASE_REGS
13099 && (SYMBOL_REF_P (in)
13100 || GET_CODE (in) == HIGH
13101 || GET_CODE (in) == LABEL_REF
13102 || GET_CODE (in) == CONST))
13103 return BASE_REGS;
13106 if (REG_P (in))
13108 regno = REGNO (in);
13109 if (!HARD_REGISTER_NUM_P (regno))
13111 regno = true_regnum (in);
13112 if (!HARD_REGISTER_NUM_P (regno))
13113 regno = -1;
13116 else if (SUBREG_P (in))
13118 regno = true_regnum (in);
13119 if (!HARD_REGISTER_NUM_P (regno))
13120 regno = -1;
13122 else
13123 regno = -1;
13125 /* If we have VSX register moves, prefer moving scalar values between
13126 Altivec registers and GPR by going via an FPR (and then via memory)
13127 instead of reloading the secondary memory address for Altivec moves. */
13128 if (TARGET_VSX
13129 && GET_MODE_SIZE (mode) < 16
13130 && !mode_supports_vmx_dform (mode)
13131 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13132 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13133 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13134 && (regno >= 0 && INT_REGNO_P (regno)))))
13135 return FLOAT_REGS;
13137 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13138 into anything. */
13139 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13140 || (regno >= 0 && INT_REGNO_P (regno)))
13141 return NO_REGS;
13143 /* Constants, memory, and VSX registers can go into VSX registers (both the
13144 traditional floating point and the altivec registers). */
13145 if (rclass == VSX_REGS
13146 && (regno == -1 || VSX_REGNO_P (regno)))
13147 return NO_REGS;
13149 /* Constants, memory, and FP registers can go into FP registers. */
13150 if ((regno == -1 || FP_REGNO_P (regno))
13151 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13152 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13154 /* Memory, and AltiVec registers can go into AltiVec registers. */
13155 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13156 && rclass == ALTIVEC_REGS)
13157 return NO_REGS;
13159 /* We can copy among the CR registers. */
13160 if ((rclass == CR_REGS || rclass == CR0_REGS)
13161 && regno >= 0 && CR_REGNO_P (regno))
13162 return NO_REGS;
13164 /* Otherwise, we need GENERAL_REGS. */
13165 return GENERAL_REGS;
13168 /* Debug version of rs6000_secondary_reload_class. */
13169 static enum reg_class
13170 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13171 machine_mode mode, rtx in)
13173 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13174 fprintf (stderr,
13175 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13176 "mode = %s, input rtx:\n",
13177 reg_class_names[ret], reg_class_names[rclass],
13178 GET_MODE_NAME (mode));
13179 debug_rtx (in);
13181 return ret;
13184 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13186 static bool
13187 rs6000_can_change_mode_class (machine_mode from,
13188 machine_mode to,
13189 reg_class_t rclass)
13191 unsigned from_size = GET_MODE_SIZE (from);
13192 unsigned to_size = GET_MODE_SIZE (to);
13194 if (from_size != to_size)
13196 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13198 if (reg_classes_intersect_p (xclass, rclass))
13200 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13201 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13202 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13203 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13205 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13206 single register under VSX because the scalar part of the register
13207 is in the upper 64-bits, and not the lower 64-bits. Types like
13208 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13209 IEEE floating point can't overlap, and neither can small
13210 values. */
13212 if (to_float128_vector_p && from_float128_vector_p)
13213 return true;
13215 else if (to_float128_vector_p || from_float128_vector_p)
13216 return false;
13218 /* TDmode in floating-mode registers must always go into a register
13219 pair with the most significant word in the even-numbered register
13220 to match ISA requirements. In little-endian mode, this does not
13221 match subreg numbering, so we cannot allow subregs. */
13222 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13223 return false;
13225 /* Allow SD<->DD changes, since SDmode values are stored in
13226 the low half of the DDmode, just like target-independent
13227 code expects. We need to allow at least SD->DD since
13228 rs6000_secondary_memory_needed_mode asks for that change
13229 to be made for SD reloads. */
13230 if ((to == DDmode && from == SDmode)
13231 || (to == SDmode && from == DDmode))
13232 return true;
13234 if (from_size < 8 || to_size < 8)
13235 return false;
13237 if (from_size == 8 && (8 * to_nregs) != to_size)
13238 return false;
13240 if (to_size == 8 && (8 * from_nregs) != from_size)
13241 return false;
13243 return true;
13245 else
13246 return true;
13249 /* Since the VSX register set includes traditional floating point registers
13250 and altivec registers, just check for the size being different instead of
13251 trying to check whether the modes are vector modes. Otherwise it won't
13252 allow say DF and DI to change classes. For types like TFmode and TDmode
13253 that take 2 64-bit registers, rather than a single 128-bit register, don't
13254 allow subregs of those types to other 128 bit types. */
13255 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13257 unsigned num_regs = (from_size + 15) / 16;
13258 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13259 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13260 return false;
13262 return (from_size == 8 || from_size == 16);
13265 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13266 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13267 return false;
13269 return true;
13272 /* Debug version of rs6000_can_change_mode_class. */
13273 static bool
13274 rs6000_debug_can_change_mode_class (machine_mode from,
13275 machine_mode to,
13276 reg_class_t rclass)
13278 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13280 fprintf (stderr,
13281 "rs6000_can_change_mode_class, return %s, from = %s, "
13282 "to = %s, rclass = %s\n",
13283 ret ? "true" : "false",
13284 GET_MODE_NAME (from), GET_MODE_NAME (to),
13285 reg_class_names[rclass]);
13287 return ret;
13290 /* Return a string to do a move operation of 128 bits of data. */
13292 const char *
13293 rs6000_output_move_128bit (rtx operands[])
13295 rtx dest = operands[0];
13296 rtx src = operands[1];
13297 machine_mode mode = GET_MODE (dest);
13298 int dest_regno;
13299 int src_regno;
13300 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13301 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13303 if (REG_P (dest))
13305 dest_regno = REGNO (dest);
13306 dest_gpr_p = INT_REGNO_P (dest_regno);
13307 dest_fp_p = FP_REGNO_P (dest_regno);
13308 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13309 dest_vsx_p = dest_fp_p | dest_vmx_p;
13311 else
13313 dest_regno = -1;
13314 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13317 if (REG_P (src))
13319 src_regno = REGNO (src);
13320 src_gpr_p = INT_REGNO_P (src_regno);
13321 src_fp_p = FP_REGNO_P (src_regno);
13322 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13323 src_vsx_p = src_fp_p | src_vmx_p;
13325 else
13327 src_regno = -1;
13328 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13331 /* Register moves. */
13332 if (dest_regno >= 0 && src_regno >= 0)
13334 if (dest_gpr_p)
13336 if (src_gpr_p)
13337 return "#";
13339 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13340 return (WORDS_BIG_ENDIAN
13341 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13342 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13344 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13345 return "#";
13348 else if (TARGET_VSX && dest_vsx_p)
13350 if (src_vsx_p)
13351 return "xxlor %x0,%x1,%x1";
13353 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13354 return (WORDS_BIG_ENDIAN
13355 ? "mtvsrdd %x0,%1,%L1"
13356 : "mtvsrdd %x0,%L1,%1");
13358 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13359 return "#";
13362 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13363 return "vor %0,%1,%1";
13365 else if (dest_fp_p && src_fp_p)
13366 return "#";
13369 /* Loads. */
13370 else if (dest_regno >= 0 && MEM_P (src))
13372 if (dest_gpr_p)
13374 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13375 return "lq %0,%1";
13376 else
13377 return "#";
13380 else if (TARGET_ALTIVEC && dest_vmx_p
13381 && altivec_indexed_or_indirect_operand (src, mode))
13382 return "lvx %0,%y1";
13384 else if (TARGET_VSX && dest_vsx_p)
13386 if (mode_supports_dq_form (mode)
13387 && quad_address_p (XEXP (src, 0), mode, true))
13388 return "lxv %x0,%1";
13390 else if (TARGET_P9_VECTOR)
13391 return "lxvx %x0,%y1";
13393 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13394 return "lxvw4x %x0,%y1";
13396 else
13397 return "lxvd2x %x0,%y1";
13400 else if (TARGET_ALTIVEC && dest_vmx_p)
13401 return "lvx %0,%y1";
13403 else if (dest_fp_p)
13404 return "#";
13407 /* Stores. */
13408 else if (src_regno >= 0 && MEM_P (dest))
13410 if (src_gpr_p)
13412 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13413 return "stq %1,%0";
13414 else
13415 return "#";
13418 else if (TARGET_ALTIVEC && src_vmx_p
13419 && altivec_indexed_or_indirect_operand (dest, mode))
13420 return "stvx %1,%y0";
13422 else if (TARGET_VSX && src_vsx_p)
13424 if (mode_supports_dq_form (mode)
13425 && quad_address_p (XEXP (dest, 0), mode, true))
13426 return "stxv %x1,%0";
13428 else if (TARGET_P9_VECTOR)
13429 return "stxvx %x1,%y0";
13431 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13432 return "stxvw4x %x1,%y0";
13434 else
13435 return "stxvd2x %x1,%y0";
13438 else if (TARGET_ALTIVEC && src_vmx_p)
13439 return "stvx %1,%y0";
13441 else if (src_fp_p)
13442 return "#";
13445 /* Constants. */
13446 else if (dest_regno >= 0
13447 && (CONST_INT_P (src)
13448 || CONST_WIDE_INT_P (src)
13449 || CONST_DOUBLE_P (src)
13450 || GET_CODE (src) == CONST_VECTOR))
13452 if (dest_gpr_p)
13453 return "#";
13455 else if ((dest_vmx_p && TARGET_ALTIVEC)
13456 || (dest_vsx_p && TARGET_VSX))
13457 return output_vec_const_move (operands);
13460 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13463 /* Validate a 128-bit move. */
13464 bool
13465 rs6000_move_128bit_ok_p (rtx operands[])
13467 machine_mode mode = GET_MODE (operands[0]);
13468 return (gpc_reg_operand (operands[0], mode)
13469 || gpc_reg_operand (operands[1], mode));
13472 /* Return true if a 128-bit move needs to be split. */
13473 bool
13474 rs6000_split_128bit_ok_p (rtx operands[])
13476 if (!reload_completed)
13477 return false;
13479 if (!gpr_or_gpr_p (operands[0], operands[1]))
13480 return false;
13482 if (quad_load_store_p (operands[0], operands[1]))
13483 return false;
13485 return true;
13489 /* Given a comparison operation, return the bit number in CCR to test. We
13490 know this is a valid comparison.
13492 SCC_P is 1 if this is for an scc. That means that %D will have been
13493 used instead of %C, so the bits will be in different places.
13495 Return -1 if OP isn't a valid comparison for some reason. */
13498 ccr_bit (rtx op, int scc_p)
13500 enum rtx_code code = GET_CODE (op);
13501 machine_mode cc_mode;
13502 int cc_regnum;
13503 int base_bit;
13504 rtx reg;
13506 if (!COMPARISON_P (op))
13507 return -1;
13509 reg = XEXP (op, 0);
13511 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13512 return -1;
13514 cc_mode = GET_MODE (reg);
13515 cc_regnum = REGNO (reg);
13516 base_bit = 4 * (cc_regnum - CR0_REGNO);
13518 validate_condition_mode (code, cc_mode);
13520 /* When generating a sCOND operation, only positive conditions are
13521 allowed. */
13522 if (scc_p)
13523 switch (code)
13525 case EQ:
13526 case GT:
13527 case LT:
13528 case UNORDERED:
13529 case GTU:
13530 case LTU:
13531 break;
13532 default:
13533 return -1;
13536 switch (code)
13538 case NE:
13539 return scc_p ? base_bit + 3 : base_bit + 2;
13540 case EQ:
13541 return base_bit + 2;
13542 case GT: case GTU: case UNLE:
13543 return base_bit + 1;
13544 case LT: case LTU: case UNGE:
13545 return base_bit;
13546 case ORDERED: case UNORDERED:
13547 return base_bit + 3;
13549 case GE: case GEU:
13550 /* If scc, we will have done a cror to put the bit in the
13551 unordered position. So test that bit. For integer, this is ! LT
13552 unless this is an scc insn. */
13553 return scc_p ? base_bit + 3 : base_bit;
13555 case LE: case LEU:
13556 return scc_p ? base_bit + 3 : base_bit + 1;
13558 default:
13559 return -1;
13563 /* Return the GOT register. */
13566 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13568 /* The second flow pass currently (June 1999) can't update
13569 regs_ever_live without disturbing other parts of the compiler, so
13570 update it here to make the prolog/epilogue code happy. */
13571 if (!can_create_pseudo_p ()
13572 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13573 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13575 crtl->uses_pic_offset_table = 1;
13577 return pic_offset_table_rtx;
13580 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13582 /* Write out a function code label. */
13584 void
13585 rs6000_output_function_entry (FILE *file, const char *fname)
13587 if (fname[0] != '.')
13589 switch (DEFAULT_ABI)
13591 default:
13592 gcc_unreachable ();
13594 case ABI_AIX:
13595 if (DOT_SYMBOLS)
13596 putc ('.', file);
13597 else
13598 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13599 break;
13601 case ABI_ELFv2:
13602 case ABI_V4:
13603 case ABI_DARWIN:
13604 break;
13608 RS6000_OUTPUT_BASENAME (file, fname);
13611 /* Print an operand. Recognize special options, documented below. */
13613 #if TARGET_ELF
13614 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13615 only introduced by the linker, when applying the sda21
13616 relocation. */
13617 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13618 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13619 #else
13620 #define SMALL_DATA_RELOC "sda21"
13621 #define SMALL_DATA_REG 0
13622 #endif
13624 void
13625 print_operand (FILE *file, rtx x, int code)
13627 int i;
13628 unsigned HOST_WIDE_INT uval;
13630 switch (code)
13632 /* %a is output_address. */
13634 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13635 output_operand. */
13637 case 'A':
13638 /* Write the MMA accumulator number associated with VSX register X. */
13639 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13640 output_operand_lossage ("invalid %%A value");
13641 else
13642 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13643 return;
13645 case 'D':
13646 /* Like 'J' but get to the GT bit only. */
13647 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13649 output_operand_lossage ("invalid %%D value");
13650 return;
13653 /* Bit 1 is GT bit. */
13654 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13656 /* Add one for shift count in rlinm for scc. */
13657 fprintf (file, "%d", i + 1);
13658 return;
13660 case 'e':
13661 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13662 if (! INT_P (x))
13664 output_operand_lossage ("invalid %%e value");
13665 return;
13668 uval = INTVAL (x);
13669 if ((uval & 0xffff) == 0 && uval != 0)
13670 putc ('s', file);
13671 return;
13673 case 'E':
13674 /* X is a CR register. Print the number of the EQ bit of the CR */
13675 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13676 output_operand_lossage ("invalid %%E value");
13677 else
13678 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13679 return;
13681 case 'f':
13682 /* X is a CR register. Print the shift count needed to move it
13683 to the high-order four bits. */
13684 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13685 output_operand_lossage ("invalid %%f value");
13686 else
13687 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13688 return;
13690 case 'F':
13691 /* Similar, but print the count for the rotate in the opposite
13692 direction. */
13693 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13694 output_operand_lossage ("invalid %%F value");
13695 else
13696 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13697 return;
13699 case 'G':
13700 /* X is a constant integer. If it is negative, print "m",
13701 otherwise print "z". This is to make an aze or ame insn. */
13702 if (!CONST_INT_P (x))
13703 output_operand_lossage ("invalid %%G value");
13704 else if (INTVAL (x) >= 0)
13705 putc ('z', file);
13706 else
13707 putc ('m', file);
13708 return;
13710 case 'h':
13711 /* If constant, output low-order five bits. Otherwise, write
13712 normally. */
13713 if (INT_P (x))
13714 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13715 else
13716 print_operand (file, x, 0);
13717 return;
13719 case 'H':
13720 /* If constant, output low-order six bits. Otherwise, write
13721 normally. */
13722 if (INT_P (x))
13723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13724 else
13725 print_operand (file, x, 0);
13726 return;
13728 case 'I':
13729 /* Print `i' if this is a constant, else nothing. */
13730 if (INT_P (x))
13731 putc ('i', file);
13732 return;
13734 case 'j':
13735 /* Write the bit number in CCR for jump. */
13736 i = ccr_bit (x, 0);
13737 if (i == -1)
13738 output_operand_lossage ("invalid %%j code");
13739 else
13740 fprintf (file, "%d", i);
13741 return;
13743 case 'J':
13744 /* Similar, but add one for shift count in rlinm for scc and pass
13745 scc flag to `ccr_bit'. */
13746 i = ccr_bit (x, 1);
13747 if (i == -1)
13748 output_operand_lossage ("invalid %%J code");
13749 else
13750 /* If we want bit 31, write a shift count of zero, not 32. */
13751 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13752 return;
13754 case 'k':
13755 /* X must be a constant. Write the 1's complement of the
13756 constant. */
13757 if (! INT_P (x))
13758 output_operand_lossage ("invalid %%k value");
13759 else
13760 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13761 return;
13763 case 'K':
13764 /* X must be a symbolic constant on ELF. Write an
13765 expression suitable for an 'addi' that adds in the low 16
13766 bits of the MEM. */
13767 if (GET_CODE (x) == CONST)
13769 if (GET_CODE (XEXP (x, 0)) != PLUS
13770 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13771 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13772 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13773 output_operand_lossage ("invalid %%K value");
13775 print_operand_address (file, x);
13776 fputs ("@l", file);
13777 return;
13779 /* %l is output_asm_label. */
13781 case 'L':
13782 /* Write second word of DImode or DFmode reference. Works on register
13783 or non-indexed memory only. */
13784 if (REG_P (x))
13785 fputs (reg_names[REGNO (x) + 1], file);
13786 else if (MEM_P (x))
13788 machine_mode mode = GET_MODE (x);
13789 /* Handle possible auto-increment. Since it is pre-increment and
13790 we have already done it, we can just use an offset of word. */
13791 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13792 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13793 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13794 UNITS_PER_WORD));
13795 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13796 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13797 UNITS_PER_WORD));
13798 else
13799 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13800 UNITS_PER_WORD),
13801 0));
13803 if (small_data_operand (x, GET_MODE (x)))
13804 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13805 reg_names[SMALL_DATA_REG]);
13807 return;
13809 case 'N': /* Unused */
13810 /* Write the number of elements in the vector times 4. */
13811 if (GET_CODE (x) != PARALLEL)
13812 output_operand_lossage ("invalid %%N value");
13813 else
13814 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13815 return;
13817 case 'O': /* Unused */
13818 /* Similar, but subtract 1 first. */
13819 if (GET_CODE (x) != PARALLEL)
13820 output_operand_lossage ("invalid %%O value");
13821 else
13822 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13823 return;
13825 case 'p':
13826 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13827 if (! INT_P (x)
13828 || INTVAL (x) < 0
13829 || (i = exact_log2 (INTVAL (x))) < 0)
13830 output_operand_lossage ("invalid %%p value");
13831 else
13832 fprintf (file, "%d", i);
13833 return;
13835 case 'P':
13836 /* The operand must be an indirect memory reference. The result
13837 is the register name. */
13838 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13839 || REGNO (XEXP (x, 0)) >= 32)
13840 output_operand_lossage ("invalid %%P value");
13841 else
13842 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13843 return;
13845 case 'q':
13846 /* This outputs the logical code corresponding to a boolean
13847 expression. The expression may have one or both operands
13848 negated (if one, only the first one). For condition register
13849 logical operations, it will also treat the negated
13850 CR codes as NOTs, but not handle NOTs of them. */
13852 const char *const *t = 0;
13853 const char *s;
13854 enum rtx_code code = GET_CODE (x);
13855 static const char * const tbl[3][3] = {
13856 { "and", "andc", "nor" },
13857 { "or", "orc", "nand" },
13858 { "xor", "eqv", "xor" } };
13860 if (code == AND)
13861 t = tbl[0];
13862 else if (code == IOR)
13863 t = tbl[1];
13864 else if (code == XOR)
13865 t = tbl[2];
13866 else
13867 output_operand_lossage ("invalid %%q value");
13869 if (GET_CODE (XEXP (x, 0)) != NOT)
13870 s = t[0];
13871 else
13873 if (GET_CODE (XEXP (x, 1)) == NOT)
13874 s = t[2];
13875 else
13876 s = t[1];
13879 fputs (s, file);
13881 return;
13883 case 'Q':
13884 if (! TARGET_MFCRF)
13885 return;
13886 fputc (',', file);
13887 /* FALLTHRU */
13889 case 'R':
13890 /* X is a CR register. Print the mask for `mtcrf'. */
13891 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13892 output_operand_lossage ("invalid %%R value");
13893 else
13894 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13895 return;
13897 case 's':
13898 /* Low 5 bits of 32 - value */
13899 if (! INT_P (x))
13900 output_operand_lossage ("invalid %%s value");
13901 else
13902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13903 return;
13905 case 't':
13906 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13907 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13909 output_operand_lossage ("invalid %%t value");
13910 return;
13913 /* Bit 3 is OV bit. */
13914 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13916 /* If we want bit 31, write a shift count of zero, not 32. */
13917 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13918 return;
13920 case 'T':
13921 /* Print the symbolic name of a branch target register. */
13922 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13923 x = XVECEXP (x, 0, 0);
13924 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13925 && REGNO (x) != CTR_REGNO))
13926 output_operand_lossage ("invalid %%T value");
13927 else if (REGNO (x) == LR_REGNO)
13928 fputs ("lr", file);
13929 else
13930 fputs ("ctr", file);
13931 return;
13933 case 'u':
13934 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13935 for use in unsigned operand. */
13936 if (! INT_P (x))
13938 output_operand_lossage ("invalid %%u value");
13939 return;
13942 uval = INTVAL (x);
13943 if ((uval & 0xffff) == 0)
13944 uval >>= 16;
13946 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13947 return;
13949 case 'v':
13950 /* High-order 16 bits of constant for use in signed operand. */
13951 if (! INT_P (x))
13952 output_operand_lossage ("invalid %%v value");
13953 else
13954 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13955 (INTVAL (x) >> 16) & 0xffff);
13956 return;
13958 case 'U':
13959 /* Print `u' if this has an auto-increment or auto-decrement. */
13960 if (MEM_P (x)
13961 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13962 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13963 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13964 putc ('u', file);
13965 return;
13967 case 'V':
13968 /* Print the trap code for this operand. */
13969 switch (GET_CODE (x))
13971 case EQ:
13972 fputs ("eq", file); /* 4 */
13973 break;
13974 case NE:
13975 fputs ("ne", file); /* 24 */
13976 break;
13977 case LT:
13978 fputs ("lt", file); /* 16 */
13979 break;
13980 case LE:
13981 fputs ("le", file); /* 20 */
13982 break;
13983 case GT:
13984 fputs ("gt", file); /* 8 */
13985 break;
13986 case GE:
13987 fputs ("ge", file); /* 12 */
13988 break;
13989 case LTU:
13990 fputs ("llt", file); /* 2 */
13991 break;
13992 case LEU:
13993 fputs ("lle", file); /* 6 */
13994 break;
13995 case GTU:
13996 fputs ("lgt", file); /* 1 */
13997 break;
13998 case GEU:
13999 fputs ("lge", file); /* 5 */
14000 break;
14001 default:
14002 output_operand_lossage ("invalid %%V value");
14004 break;
14006 case 'w':
14007 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14008 normally. */
14009 if (INT_P (x))
14010 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
14011 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
14012 else
14013 print_operand (file, x, 0);
14014 return;
14016 case 'x':
14017 /* X is a FPR or Altivec register used in a VSX context. */
14018 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14019 output_operand_lossage ("invalid %%x value");
14020 else
14022 int reg = REGNO (x);
14023 int vsx_reg = (FP_REGNO_P (reg)
14024 ? reg - 32
14025 : reg - FIRST_ALTIVEC_REGNO + 32);
14027 #ifdef TARGET_REGNAMES
14028 if (TARGET_REGNAMES)
14029 fprintf (file, "%%vs%d", vsx_reg);
14030 else
14031 #endif
14032 fprintf (file, "%d", vsx_reg);
14034 return;
14036 case 'X':
14037 if (MEM_P (x)
14038 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14039 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14040 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14041 putc ('x', file);
14042 return;
14044 case 'Y':
14045 /* Like 'L', for third word of TImode/PTImode */
14046 if (REG_P (x))
14047 fputs (reg_names[REGNO (x) + 2], file);
14048 else if (MEM_P (x))
14050 machine_mode mode = GET_MODE (x);
14051 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14052 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14053 output_address (mode, plus_constant (Pmode,
14054 XEXP (XEXP (x, 0), 0), 8));
14055 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14056 output_address (mode, plus_constant (Pmode,
14057 XEXP (XEXP (x, 0), 0), 8));
14058 else
14059 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14060 if (small_data_operand (x, GET_MODE (x)))
14061 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14062 reg_names[SMALL_DATA_REG]);
14064 return;
14066 case 'z':
14067 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14068 x = XVECEXP (x, 0, 1);
14069 /* X is a SYMBOL_REF. Write out the name preceded by a
14070 period and without any trailing data in brackets. Used for function
14071 names. If we are configured for System V (or the embedded ABI) on
14072 the PowerPC, do not emit the period, since those systems do not use
14073 TOCs and the like. */
14074 if (!SYMBOL_REF_P (x))
14076 output_operand_lossage ("invalid %%z value");
14077 return;
14080 /* For macho, check to see if we need a stub. */
14081 if (TARGET_MACHO)
14083 const char *name = XSTR (x, 0);
14084 #if TARGET_MACHO
14085 if (darwin_symbol_stubs
14086 && MACHOPIC_INDIRECT
14087 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14088 name = machopic_indirection_name (x, /*stub_p=*/true);
14089 #endif
14090 assemble_name (file, name);
14092 else if (!DOT_SYMBOLS)
14093 assemble_name (file, XSTR (x, 0));
14094 else
14095 rs6000_output_function_entry (file, XSTR (x, 0));
14096 return;
14098 case 'Z':
14099 /* Like 'L', for last word of TImode/PTImode. */
14100 if (REG_P (x))
14101 fputs (reg_names[REGNO (x) + 3], file);
14102 else if (MEM_P (x))
14104 machine_mode mode = GET_MODE (x);
14105 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14106 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14107 output_address (mode, plus_constant (Pmode,
14108 XEXP (XEXP (x, 0), 0), 12));
14109 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14110 output_address (mode, plus_constant (Pmode,
14111 XEXP (XEXP (x, 0), 0), 12));
14112 else
14113 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14114 if (small_data_operand (x, GET_MODE (x)))
14115 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14116 reg_names[SMALL_DATA_REG]);
14118 return;
14120 /* Print AltiVec memory operand. */
14121 case 'y':
14123 rtx tmp;
14125 gcc_assert (MEM_P (x));
14127 tmp = XEXP (x, 0);
14129 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14130 && GET_CODE (tmp) == AND
14131 && CONST_INT_P (XEXP (tmp, 1))
14132 && INTVAL (XEXP (tmp, 1)) == -16)
14133 tmp = XEXP (tmp, 0);
14134 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14135 && GET_CODE (tmp) == PRE_MODIFY)
14136 tmp = XEXP (tmp, 1);
14137 if (REG_P (tmp))
14138 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14139 else
14141 if (GET_CODE (tmp) != PLUS
14142 || !REG_P (XEXP (tmp, 0))
14143 || !REG_P (XEXP (tmp, 1)))
14145 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14146 break;
14149 if (REGNO (XEXP (tmp, 0)) == 0)
14150 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14151 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14152 else
14153 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14154 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14156 break;
14159 case 0:
14160 if (REG_P (x))
14161 fprintf (file, "%s", reg_names[REGNO (x)]);
14162 else if (MEM_P (x))
14164 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14165 know the width from the mode. */
14166 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14167 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14168 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14169 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14170 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14171 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14172 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14173 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14174 else
14175 output_address (GET_MODE (x), XEXP (x, 0));
14177 else if (toc_relative_expr_p (x, false,
14178 &tocrel_base_oac, &tocrel_offset_oac))
14179 /* This hack along with a corresponding hack in
14180 rs6000_output_addr_const_extra arranges to output addends
14181 where the assembler expects to find them. eg.
14182 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14183 without this hack would be output as "x@toc+4". We
14184 want "x+4@toc". */
14185 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14186 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14187 output_addr_const (file, XVECEXP (x, 0, 0));
14188 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14189 output_addr_const (file, XVECEXP (x, 0, 1));
14190 else
14191 output_addr_const (file, x);
14192 return;
14194 case '&':
14195 if (const char *name = get_some_local_dynamic_name ())
14196 assemble_name (file, name);
14197 else
14198 output_operand_lossage ("'%%&' used without any "
14199 "local dynamic TLS references");
14200 return;
14202 default:
14203 output_operand_lossage ("invalid %%xn code");
14207 /* Print the address of an operand. */
14209 void
14210 print_operand_address (FILE *file, rtx x)
14212 if (REG_P (x))
14213 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14215 /* Is it a PC-relative address? */
14216 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14218 HOST_WIDE_INT offset;
14220 if (GET_CODE (x) == CONST)
14221 x = XEXP (x, 0);
14223 if (GET_CODE (x) == PLUS)
14225 offset = INTVAL (XEXP (x, 1));
14226 x = XEXP (x, 0);
14228 else
14229 offset = 0;
14231 output_addr_const (file, x);
14233 if (offset)
14234 fprintf (file, "%+" PRId64, offset);
14236 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14237 fprintf (file, "@got");
14239 fprintf (file, "@pcrel");
14241 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14242 || GET_CODE (x) == LABEL_REF)
14244 output_addr_const (file, x);
14245 if (small_data_operand (x, GET_MODE (x)))
14246 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14247 reg_names[SMALL_DATA_REG]);
14248 else
14249 gcc_assert (!TARGET_TOC);
14251 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14252 && REG_P (XEXP (x, 1)))
14254 if (REGNO (XEXP (x, 0)) == 0)
14255 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14256 reg_names[ REGNO (XEXP (x, 0)) ]);
14257 else
14258 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14259 reg_names[ REGNO (XEXP (x, 1)) ]);
14261 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14262 && CONST_INT_P (XEXP (x, 1)))
14263 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14264 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14265 #if TARGET_MACHO
14266 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14267 && CONSTANT_P (XEXP (x, 1)))
14269 fprintf (file, "lo16(");
14270 output_addr_const (file, XEXP (x, 1));
14271 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14273 #endif
14274 #if TARGET_ELF
14275 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14276 && CONSTANT_P (XEXP (x, 1)))
14278 output_addr_const (file, XEXP (x, 1));
14279 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14281 #endif
14282 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14284 /* This hack along with a corresponding hack in
14285 rs6000_output_addr_const_extra arranges to output addends
14286 where the assembler expects to find them. eg.
14287 (lo_sum (reg 9)
14288 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14289 without this hack would be output as "x@toc+8@l(9)". We
14290 want "x+8@toc@l(9)". */
14291 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14292 if (GET_CODE (x) == LO_SUM)
14293 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14294 else
14295 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14297 else
14298 output_addr_const (file, x);
14301 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14303 bool
14304 rs6000_output_addr_const_extra (FILE *file, rtx x)
14306 if (GET_CODE (x) == UNSPEC)
14307 switch (XINT (x, 1))
14309 case UNSPEC_TOCREL:
14310 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14311 && REG_P (XVECEXP (x, 0, 1))
14312 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14313 output_addr_const (file, XVECEXP (x, 0, 0));
14314 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14316 if (INTVAL (tocrel_offset_oac) >= 0)
14317 fprintf (file, "+");
14318 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14320 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14322 putc ('-', file);
14323 assemble_name (file, toc_label_name);
14324 need_toc_init = 1;
14326 else if (TARGET_ELF)
14327 fputs ("@toc", file);
14328 return true;
14330 #if TARGET_MACHO
14331 case UNSPEC_MACHOPIC_OFFSET:
14332 output_addr_const (file, XVECEXP (x, 0, 0));
14333 putc ('-', file);
14334 machopic_output_function_base_name (file);
14335 return true;
14336 #endif
14338 return false;
14341 /* Target hook for assembling integer objects. The PowerPC version has
14342 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14343 is defined. It also needs to handle DI-mode objects on 64-bit
14344 targets. */
14346 static bool
14347 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14349 #ifdef RELOCATABLE_NEEDS_FIXUP
14350 /* Special handling for SI values. */
14351 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14353 static int recurse = 0;
14355 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14356 the .fixup section. Since the TOC section is already relocated, we
14357 don't need to mark it here. We used to skip the text section, but it
14358 should never be valid for relocated addresses to be placed in the text
14359 section. */
14360 if (DEFAULT_ABI == ABI_V4
14361 && (TARGET_RELOCATABLE || flag_pic > 1)
14362 && in_section != toc_section
14363 && !recurse
14364 && !CONST_SCALAR_INT_P (x)
14365 && CONSTANT_P (x))
14367 char buf[256];
14369 recurse = 1;
14370 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14371 fixuplabelno++;
14372 ASM_OUTPUT_LABEL (asm_out_file, buf);
14373 fprintf (asm_out_file, "\t.long\t(");
14374 output_addr_const (asm_out_file, x);
14375 fprintf (asm_out_file, ")@fixup\n");
14376 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14377 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14378 fprintf (asm_out_file, "\t.long\t");
14379 assemble_name (asm_out_file, buf);
14380 fprintf (asm_out_file, "\n\t.previous\n");
14381 recurse = 0;
14382 return true;
14384 /* Remove initial .'s to turn a -mcall-aixdesc function
14385 address into the address of the descriptor, not the function
14386 itself. */
14387 else if (SYMBOL_REF_P (x)
14388 && XSTR (x, 0)[0] == '.'
14389 && DEFAULT_ABI == ABI_AIX)
14391 const char *name = XSTR (x, 0);
14392 while (*name == '.')
14393 name++;
14395 fprintf (asm_out_file, "\t.long\t%s\n", name);
14396 return true;
14399 #endif /* RELOCATABLE_NEEDS_FIXUP */
14400 return default_assemble_integer (x, size, aligned_p);
14403 /* Return a template string for assembly to emit when making an
14404 external call. FUNOP is the call mem argument operand number. */
14406 static const char *
14407 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14409 /* -Wformat-overflow workaround, without which gcc thinks that %u
14410 might produce 10 digits. */
14411 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14413 char arg[12];
14414 arg[0] = 0;
14415 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14417 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14418 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14419 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14420 sprintf (arg, "(%%&@tlsld)");
14423 /* The magic 32768 offset here corresponds to the offset of
14424 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14425 char z[11];
14426 sprintf (z, "%%z%u%s", funop,
14427 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14428 ? "+32768" : ""));
14430 static char str[32]; /* 1 spare */
14431 if (rs6000_pcrel_p ())
14432 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14433 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14434 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14435 sibcall ? "" : "\n\tnop");
14436 else if (DEFAULT_ABI == ABI_V4)
14437 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14438 flag_pic ? "@plt" : "");
14439 #if TARGET_MACHO
14440 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14441 else if (DEFAULT_ABI == ABI_DARWIN)
14443 /* The cookie is in operand func+2. */
14444 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14445 int cookie = INTVAL (operands[funop + 2]);
14446 if (cookie & CALL_LONG)
14448 tree funname = get_identifier (XSTR (operands[funop], 0));
14449 tree labelname = get_prev_label (funname);
14450 gcc_checking_assert (labelname && !sibcall);
14452 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14453 instruction will reach 'foo', otherwise link as 'bl L42'".
14454 "L42" should be a 'branch island', that will do a far jump to
14455 'foo'. Branch islands are generated in
14456 macho_branch_islands(). */
14457 sprintf (str, "jbsr %%z%u,%.10s", funop,
14458 IDENTIFIER_POINTER (labelname));
14460 else
14461 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14462 after the call. */
14463 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14465 #endif
14466 else
14467 gcc_unreachable ();
14468 return str;
14471 const char *
14472 rs6000_call_template (rtx *operands, unsigned int funop)
14474 return rs6000_call_template_1 (operands, funop, false);
14477 const char *
14478 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14480 return rs6000_call_template_1 (operands, funop, true);
14483 /* As above, for indirect calls. */
14485 static const char *
14486 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14487 bool sibcall)
14489 /* -Wformat-overflow workaround, without which gcc thinks that %u
14490 might produce 10 digits. Note that -Wformat-overflow will not
14491 currently warn here for str[], so do not rely on a warning to
14492 ensure str[] is correctly sized. */
14493 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14495 /* Currently, funop is either 0 or 1. The maximum string is always
14496 a !speculate 64-bit __tls_get_addr call.
14498 ABI_ELFv2, pcrel:
14499 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14500 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14501 . 9 crset 2\n\t
14502 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14503 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14504 . 8 beq%T1l-
14505 .---
14506 .142
14508 ABI_AIX:
14509 . 9 ld 2,%3\n\t
14510 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14511 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14512 . 9 crset 2\n\t
14513 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14514 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14515 . 10 beq%T1l-\n\t
14516 . 10 ld 2,%4(1)
14517 .---
14518 .151
14520 ABI_ELFv2:
14521 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14522 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14523 . 9 crset 2\n\t
14524 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14525 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14526 . 10 beq%T1l-\n\t
14527 . 10 ld 2,%3(1)
14528 .---
14529 .142
14531 ABI_V4:
14532 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14533 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14534 . 9 crset 2\n\t
14535 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14536 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14537 . 8 beq%T1l-
14538 .---
14539 .141 */
14540 static char str[160]; /* 8 spare */
14541 char *s = str;
14542 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14544 if (DEFAULT_ABI == ABI_AIX)
14545 s += sprintf (s,
14546 "l%s 2,%%%u\n\t",
14547 ptrload, funop + 3);
14549 /* We don't need the extra code to stop indirect call speculation if
14550 calling via LR. */
14551 bool speculate = (TARGET_MACHO
14552 || rs6000_speculate_indirect_jumps
14553 || (REG_P (operands[funop])
14554 && REGNO (operands[funop]) == LR_REGNO));
14556 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14558 const char *rel64 = TARGET_64BIT ? "64" : "";
14559 char tls[29];
14560 tls[0] = 0;
14561 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14563 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14564 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14565 rel64, funop + 1);
14566 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14567 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14568 rel64);
14571 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14572 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14573 && flag_pic == 2 ? "+32768" : "");
14574 if (!speculate)
14576 s += sprintf (s,
14577 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14578 tls, rel64, notoc, funop, addend);
14579 s += sprintf (s, "crset 2\n\t");
14581 s += sprintf (s,
14582 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14583 tls, rel64, notoc, funop, addend);
14585 else if (!speculate)
14586 s += sprintf (s, "crset 2\n\t");
14588 if (rs6000_pcrel_p ())
14590 if (speculate)
14591 sprintf (s, "b%%T%ul", funop);
14592 else
14593 sprintf (s, "beq%%T%ul-", funop);
14595 else if (DEFAULT_ABI == ABI_AIX)
14597 if (speculate)
14598 sprintf (s,
14599 "b%%T%ul\n\t"
14600 "l%s 2,%%%u(1)",
14601 funop, ptrload, funop + 4);
14602 else
14603 sprintf (s,
14604 "beq%%T%ul-\n\t"
14605 "l%s 2,%%%u(1)",
14606 funop, ptrload, funop + 4);
14608 else if (DEFAULT_ABI == ABI_ELFv2)
14610 if (speculate)
14611 sprintf (s,
14612 "b%%T%ul\n\t"
14613 "l%s 2,%%%u(1)",
14614 funop, ptrload, funop + 3);
14615 else
14616 sprintf (s,
14617 "beq%%T%ul-\n\t"
14618 "l%s 2,%%%u(1)",
14619 funop, ptrload, funop + 3);
14621 else
14623 if (speculate)
14624 sprintf (s,
14625 "b%%T%u%s",
14626 funop, sibcall ? "" : "l");
14627 else
14628 sprintf (s,
14629 "beq%%T%u%s-%s",
14630 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14632 return str;
14635 const char *
14636 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14638 return rs6000_indirect_call_template_1 (operands, funop, false);
14641 const char *
14642 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14644 return rs6000_indirect_call_template_1 (operands, funop, true);
14647 #if HAVE_AS_PLTSEQ
14648 /* Output indirect call insns. WHICH identifies the type of sequence. */
14649 const char *
14650 rs6000_pltseq_template (rtx *operands, int which)
14652 const char *rel64 = TARGET_64BIT ? "64" : "";
14653 char tls[30];
14654 tls[0] = 0;
14655 if (GET_CODE (operands[3]) == UNSPEC)
14657 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14658 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14659 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14660 off, rel64);
14661 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14662 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14663 off, rel64);
14666 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14667 static char str[96]; /* 10 spare */
14668 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14669 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14670 && flag_pic == 2 ? "+32768" : "");
14671 switch (which)
14673 case RS6000_PLTSEQ_TOCSAVE:
14674 sprintf (str,
14675 "st%s\n\t"
14676 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14677 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14678 tls, rel64);
14679 break;
14680 case RS6000_PLTSEQ_PLT16_HA:
14681 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14682 sprintf (str,
14683 "lis %%0,0\n\t"
14684 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14685 tls, off, rel64);
14686 else
14687 sprintf (str,
14688 "addis %%0,%%1,0\n\t"
14689 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14690 tls, off, rel64, addend);
14691 break;
14692 case RS6000_PLTSEQ_PLT16_LO:
14693 sprintf (str,
14694 "l%s %%0,0(%%1)\n\t"
14695 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14696 TARGET_64BIT ? "d" : "wz",
14697 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14698 break;
14699 case RS6000_PLTSEQ_MTCTR:
14700 sprintf (str,
14701 "mtctr %%1\n\t"
14702 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14703 tls, rel64, addend);
14704 break;
14705 case RS6000_PLTSEQ_PLT_PCREL34:
14706 sprintf (str,
14707 "pl%s %%0,0(0),1\n\t"
14708 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14709 TARGET_64BIT ? "d" : "wz",
14710 tls, rel64);
14711 break;
14712 default:
14713 gcc_unreachable ();
14715 return str;
14717 #endif
14719 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14720 /* Emit an assembler directive to set symbol visibility for DECL to
14721 VISIBILITY_TYPE. */
14723 static void
14724 rs6000_assemble_visibility (tree decl, int vis)
14726 if (TARGET_XCOFF)
14727 return;
14729 /* Functions need to have their entry point symbol visibility set as
14730 well as their descriptor symbol visibility. */
14731 if (DEFAULT_ABI == ABI_AIX
14732 && DOT_SYMBOLS
14733 && TREE_CODE (decl) == FUNCTION_DECL)
14735 static const char * const visibility_types[] = {
14736 NULL, "protected", "hidden", "internal"
14739 const char *name, *type;
14741 name = ((* targetm.strip_name_encoding)
14742 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14743 type = visibility_types[vis];
14745 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14746 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14748 else
14749 default_assemble_visibility (decl, vis);
14751 #endif
14753 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14754 entry. If RECORD_P is true and the target supports named sections,
14755 the location of the NOPs will be recorded in a special object section
14756 called "__patchable_function_entries". This routine may be called
14757 twice per function to put NOPs before and after the function
14758 entry. */
14760 void
14761 rs6000_print_patchable_function_entry (FILE *file,
14762 unsigned HOST_WIDE_INT patch_area_size,
14763 bool record_p)
14765 unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14766 /* When .opd section is emitted, the function symbol
14767 default_print_patchable_function_entry_1 is emitted into the .opd section
14768 while the patchable area is emitted into the function section.
14769 Don't use SECTION_LINK_ORDER in that case. */
14770 if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14771 && HAVE_GAS_SECTION_LINK_ORDER)
14772 flags |= SECTION_LINK_ORDER;
14773 default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14774 flags);
14777 enum rtx_code
14778 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14780 /* Reversal of FP compares takes care -- an ordered compare
14781 becomes an unordered compare and vice versa. */
14782 if (mode == CCFPmode
14783 && (!flag_finite_math_only
14784 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14785 || code == UNEQ || code == LTGT))
14786 return reverse_condition_maybe_unordered (code);
14787 else
14788 return reverse_condition (code);
14791 /* Generate a compare for CODE. Return a brand-new rtx that
14792 represents the result of the compare. */
14794 static rtx
14795 rs6000_generate_compare (rtx cmp, machine_mode mode)
14797 machine_mode comp_mode;
14798 rtx compare_result;
14799 enum rtx_code code = GET_CODE (cmp);
14800 rtx op0 = XEXP (cmp, 0);
14801 rtx op1 = XEXP (cmp, 1);
14803 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14804 comp_mode = CCmode;
14805 else if (FLOAT_MODE_P (mode))
14806 comp_mode = CCFPmode;
14807 else if (code == GTU || code == LTU
14808 || code == GEU || code == LEU)
14809 comp_mode = CCUNSmode;
14810 else if ((code == EQ || code == NE)
14811 && unsigned_reg_p (op0)
14812 && (unsigned_reg_p (op1)
14813 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14814 /* These are unsigned values, perhaps there will be a later
14815 ordering compare that can be shared with this one. */
14816 comp_mode = CCUNSmode;
14817 else
14818 comp_mode = CCmode;
14820 /* If we have an unsigned compare, make sure we don't have a signed value as
14821 an immediate. */
14822 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14823 && INTVAL (op1) < 0)
14825 op0 = copy_rtx_if_shared (op0);
14826 op1 = force_reg (GET_MODE (op0), op1);
14827 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14830 /* First, the compare. */
14831 compare_result = gen_reg_rtx (comp_mode);
14833 /* IEEE 128-bit support in VSX registers when we do not have hardware
14834 support. */
14835 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14837 rtx libfunc = NULL_RTX;
14838 bool check_nan = false;
14839 rtx dest;
14841 switch (code)
14843 case EQ:
14844 case NE:
14845 libfunc = optab_libfunc (eq_optab, mode);
14846 break;
14848 case GT:
14849 case GE:
14850 libfunc = optab_libfunc (ge_optab, mode);
14851 break;
14853 case LT:
14854 case LE:
14855 libfunc = optab_libfunc (le_optab, mode);
14856 break;
14858 case UNORDERED:
14859 case ORDERED:
14860 libfunc = optab_libfunc (unord_optab, mode);
14861 code = (code == UNORDERED) ? NE : EQ;
14862 break;
14864 case UNGE:
14865 case UNGT:
14866 check_nan = true;
14867 libfunc = optab_libfunc (ge_optab, mode);
14868 code = (code == UNGE) ? GE : GT;
14869 break;
14871 case UNLE:
14872 case UNLT:
14873 check_nan = true;
14874 libfunc = optab_libfunc (le_optab, mode);
14875 code = (code == UNLE) ? LE : LT;
14876 break;
14878 case UNEQ:
14879 case LTGT:
14880 check_nan = true;
14881 libfunc = optab_libfunc (eq_optab, mode);
14882 code = (code = UNEQ) ? EQ : NE;
14883 break;
14885 default:
14886 gcc_unreachable ();
14889 gcc_assert (libfunc);
14891 if (!check_nan)
14892 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14893 SImode, op0, mode, op1, mode);
14895 /* The library signals an exception for signalling NaNs, so we need to
14896 handle isgreater, etc. by first checking isordered. */
14897 else
14899 rtx ne_rtx, normal_dest, unord_dest;
14900 rtx unord_func = optab_libfunc (unord_optab, mode);
14901 rtx join_label = gen_label_rtx ();
14902 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14903 rtx unord_cmp = gen_reg_rtx (comp_mode);
14906 /* Test for either value being a NaN. */
14907 gcc_assert (unord_func);
14908 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14909 SImode, op0, mode, op1, mode);
14911 /* Set value (0) if either value is a NaN, and jump to the join
14912 label. */
14913 dest = gen_reg_rtx (SImode);
14914 emit_move_insn (dest, const1_rtx);
14915 emit_insn (gen_rtx_SET (unord_cmp,
14916 gen_rtx_COMPARE (comp_mode, unord_dest,
14917 const0_rtx)));
14919 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14920 emit_jump_insn (gen_rtx_SET (pc_rtx,
14921 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14922 join_ref,
14923 pc_rtx)));
14925 /* Do the normal comparison, knowing that the values are not
14926 NaNs. */
14927 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14928 SImode, op0, mode, op1, mode);
14930 emit_insn (gen_cstoresi4 (dest,
14931 gen_rtx_fmt_ee (code, SImode, normal_dest,
14932 const0_rtx),
14933 normal_dest, const0_rtx));
14935 /* Join NaN and non-Nan paths. Compare dest against 0. */
14936 emit_label (join_label);
14937 code = NE;
14940 emit_insn (gen_rtx_SET (compare_result,
14941 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14944 else
14946 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14947 CLOBBERs to match cmptf_internal2 pattern. */
14948 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14949 && FLOAT128_IBM_P (GET_MODE (op0))
14950 && TARGET_HARD_FLOAT)
14951 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14952 gen_rtvec (10,
14953 gen_rtx_SET (compare_result,
14954 gen_rtx_COMPARE (comp_mode, op0, op1)),
14955 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14956 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14957 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14958 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14959 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14960 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14961 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14962 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14963 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14964 else if (GET_CODE (op1) == UNSPEC
14965 && XINT (op1, 1) == UNSPEC_SP_TEST)
14967 rtx op1b = XVECEXP (op1, 0, 0);
14968 comp_mode = CCEQmode;
14969 compare_result = gen_reg_rtx (CCEQmode);
14970 if (TARGET_64BIT)
14971 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14972 else
14973 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14975 else
14976 emit_insn (gen_rtx_SET (compare_result,
14977 gen_rtx_COMPARE (comp_mode, op0, op1)));
14980 validate_condition_mode (code, GET_MODE (compare_result));
14982 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14986 /* Return the diagnostic message string if the binary operation OP is
14987 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14989 static const char*
14990 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14991 const_tree type1,
14992 const_tree type2)
14994 machine_mode mode1 = TYPE_MODE (type1);
14995 machine_mode mode2 = TYPE_MODE (type2);
14997 /* For complex modes, use the inner type. */
14998 if (COMPLEX_MODE_P (mode1))
14999 mode1 = GET_MODE_INNER (mode1);
15001 if (COMPLEX_MODE_P (mode2))
15002 mode2 = GET_MODE_INNER (mode2);
15004 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15005 double to intermix unless -mfloat128-convert. */
15006 if (mode1 == mode2)
15007 return NULL;
15009 if (!TARGET_FLOAT128_CVT)
15011 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15012 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15013 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15014 "point types");
15017 return NULL;
15021 /* Expand floating point conversion to/from __float128 and __ibm128. */
15023 void
15024 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15026 machine_mode dest_mode = GET_MODE (dest);
15027 machine_mode src_mode = GET_MODE (src);
15028 convert_optab cvt = unknown_optab;
15029 bool do_move = false;
15030 rtx libfunc = NULL_RTX;
15031 rtx dest2;
15032 typedef rtx (*rtx_2func_t) (rtx, rtx);
15033 rtx_2func_t hw_convert = (rtx_2func_t)0;
15034 size_t kf_or_tf;
15036 struct hw_conv_t {
15037 rtx_2func_t from_df;
15038 rtx_2func_t from_sf;
15039 rtx_2func_t from_si_sign;
15040 rtx_2func_t from_si_uns;
15041 rtx_2func_t from_di_sign;
15042 rtx_2func_t from_di_uns;
15043 rtx_2func_t to_df;
15044 rtx_2func_t to_sf;
15045 rtx_2func_t to_si_sign;
15046 rtx_2func_t to_si_uns;
15047 rtx_2func_t to_di_sign;
15048 rtx_2func_t to_di_uns;
15049 } hw_conversions[2] = {
15050 /* convertions to/from KFmode */
15052 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15053 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15054 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15055 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15056 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15057 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15058 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15059 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15060 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15061 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15062 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15063 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15066 /* convertions to/from TFmode */
15068 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15069 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15070 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15071 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15072 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15073 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15074 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15075 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15076 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15077 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15078 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15079 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15083 if (dest_mode == src_mode)
15084 gcc_unreachable ();
15086 /* Eliminate memory operations. */
15087 if (MEM_P (src))
15088 src = force_reg (src_mode, src);
15090 if (MEM_P (dest))
15092 rtx tmp = gen_reg_rtx (dest_mode);
15093 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15094 rs6000_emit_move (dest, tmp, dest_mode);
15095 return;
15098 /* Convert to IEEE 128-bit floating point. */
15099 if (FLOAT128_IEEE_P (dest_mode))
15101 if (dest_mode == KFmode)
15102 kf_or_tf = 0;
15103 else if (dest_mode == TFmode)
15104 kf_or_tf = 1;
15105 else
15106 gcc_unreachable ();
15108 switch (src_mode)
15110 case E_DFmode:
15111 cvt = sext_optab;
15112 hw_convert = hw_conversions[kf_or_tf].from_df;
15113 break;
15115 case E_SFmode:
15116 cvt = sext_optab;
15117 hw_convert = hw_conversions[kf_or_tf].from_sf;
15118 break;
15120 case E_KFmode:
15121 case E_IFmode:
15122 case E_TFmode:
15123 if (FLOAT128_IBM_P (src_mode))
15124 cvt = sext_optab;
15125 else
15126 do_move = true;
15127 break;
15129 case E_SImode:
15130 if (unsigned_p)
15132 cvt = ufloat_optab;
15133 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15135 else
15137 cvt = sfloat_optab;
15138 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15140 break;
15142 case E_DImode:
15143 if (unsigned_p)
15145 cvt = ufloat_optab;
15146 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15148 else
15150 cvt = sfloat_optab;
15151 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15153 break;
15155 default:
15156 gcc_unreachable ();
15160 /* Convert from IEEE 128-bit floating point. */
15161 else if (FLOAT128_IEEE_P (src_mode))
15163 if (src_mode == KFmode)
15164 kf_or_tf = 0;
15165 else if (src_mode == TFmode)
15166 kf_or_tf = 1;
15167 else
15168 gcc_unreachable ();
15170 switch (dest_mode)
15172 case E_DFmode:
15173 cvt = trunc_optab;
15174 hw_convert = hw_conversions[kf_or_tf].to_df;
15175 break;
15177 case E_SFmode:
15178 cvt = trunc_optab;
15179 hw_convert = hw_conversions[kf_or_tf].to_sf;
15180 break;
15182 case E_KFmode:
15183 case E_IFmode:
15184 case E_TFmode:
15185 if (FLOAT128_IBM_P (dest_mode))
15186 cvt = trunc_optab;
15187 else
15188 do_move = true;
15189 break;
15191 case E_SImode:
15192 if (unsigned_p)
15194 cvt = ufix_optab;
15195 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15197 else
15199 cvt = sfix_optab;
15200 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15202 break;
15204 case E_DImode:
15205 if (unsigned_p)
15207 cvt = ufix_optab;
15208 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15210 else
15212 cvt = sfix_optab;
15213 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15215 break;
15217 default:
15218 gcc_unreachable ();
15222 /* Both IBM format. */
15223 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15224 do_move = true;
15226 else
15227 gcc_unreachable ();
15229 /* Handle conversion between TFmode/KFmode/IFmode. */
15230 if (do_move)
15231 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15233 /* Handle conversion if we have hardware support. */
15234 else if (TARGET_FLOAT128_HW && hw_convert)
15235 emit_insn ((hw_convert) (dest, src));
15237 /* Call an external function to do the conversion. */
15238 else if (cvt != unknown_optab)
15240 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15241 gcc_assert (libfunc != NULL_RTX);
15243 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15244 src, src_mode);
15246 gcc_assert (dest2 != NULL_RTX);
15247 if (!rtx_equal_p (dest, dest2))
15248 emit_move_insn (dest, dest2);
15251 else
15252 gcc_unreachable ();
15254 return;
15258 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15259 can be used as that dest register. Return the dest register. */
15262 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15264 if (op2 == const0_rtx)
15265 return op1;
15267 if (GET_CODE (scratch) == SCRATCH)
15268 scratch = gen_reg_rtx (mode);
15270 if (logical_operand (op2, mode))
15271 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15272 else
15273 emit_insn (gen_rtx_SET (scratch,
15274 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15276 return scratch;
15279 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15280 requires this. The result is mode MODE. */
15282 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15284 rtx cond[2];
15285 int n = 0;
15286 if (code == LTGT || code == LE || code == UNLT)
15287 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15288 if (code == LTGT || code == GE || code == UNGT)
15289 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15290 if (code == LE || code == GE || code == UNEQ)
15291 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15292 if (code == UNLT || code == UNGT || code == UNEQ)
15293 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15295 gcc_assert (n == 2);
15297 rtx cc = gen_reg_rtx (CCEQmode);
15298 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15299 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15301 return cc;
15304 void
15305 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15307 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15308 rtx_code cond_code = GET_CODE (condition_rtx);
15310 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15311 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15313 else if (cond_code == NE
15314 || cond_code == GE || cond_code == LE
15315 || cond_code == GEU || cond_code == LEU
15316 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15318 rtx not_result = gen_reg_rtx (CCEQmode);
15319 rtx not_op, rev_cond_rtx;
15320 machine_mode cc_mode;
15322 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15324 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15325 SImode, XEXP (condition_rtx, 0), const0_rtx);
15326 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15327 emit_insn (gen_rtx_SET (not_result, not_op));
15328 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15331 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15332 if (op_mode == VOIDmode)
15333 op_mode = GET_MODE (XEXP (operands[1], 1));
15335 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15337 PUT_MODE (condition_rtx, DImode);
15338 convert_move (operands[0], condition_rtx, 0);
15340 else
15342 PUT_MODE (condition_rtx, SImode);
15343 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15347 /* Emit a branch of kind CODE to location LOC. */
15349 void
15350 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15352 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15353 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15354 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15355 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15358 /* Return the string to output a conditional branch to LABEL, which is
15359 the operand template of the label, or NULL if the branch is really a
15360 conditional return.
15362 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15363 condition code register and its mode specifies what kind of
15364 comparison we made.
15366 REVERSED is nonzero if we should reverse the sense of the comparison.
15368 INSN is the insn. */
15370 char *
15371 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15373 static char string[64];
15374 enum rtx_code code = GET_CODE (op);
15375 rtx cc_reg = XEXP (op, 0);
15376 machine_mode mode = GET_MODE (cc_reg);
15377 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15378 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15379 int really_reversed = reversed ^ need_longbranch;
15380 char *s = string;
15381 const char *ccode;
15382 const char *pred;
15383 rtx note;
15385 validate_condition_mode (code, mode);
15387 /* Work out which way this really branches. We could use
15388 reverse_condition_maybe_unordered here always but this
15389 makes the resulting assembler clearer. */
15390 if (really_reversed)
15392 /* Reversal of FP compares takes care -- an ordered compare
15393 becomes an unordered compare and vice versa. */
15394 if (mode == CCFPmode)
15395 code = reverse_condition_maybe_unordered (code);
15396 else
15397 code = reverse_condition (code);
15400 switch (code)
15402 /* Not all of these are actually distinct opcodes, but
15403 we distinguish them for clarity of the resulting assembler. */
15404 case NE: case LTGT:
15405 ccode = "ne"; break;
15406 case EQ: case UNEQ:
15407 ccode = "eq"; break;
15408 case GE: case GEU:
15409 ccode = "ge"; break;
15410 case GT: case GTU: case UNGT:
15411 ccode = "gt"; break;
15412 case LE: case LEU:
15413 ccode = "le"; break;
15414 case LT: case LTU: case UNLT:
15415 ccode = "lt"; break;
15416 case UNORDERED: ccode = "un"; break;
15417 case ORDERED: ccode = "nu"; break;
15418 case UNGE: ccode = "nl"; break;
15419 case UNLE: ccode = "ng"; break;
15420 default:
15421 gcc_unreachable ();
15424 /* Maybe we have a guess as to how likely the branch is. */
15425 pred = "";
15426 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15427 if (note != NULL_RTX)
15429 /* PROB is the difference from 50%. */
15430 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15431 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15433 /* Only hint for highly probable/improbable branches on newer cpus when
15434 we have real profile data, as static prediction overrides processor
15435 dynamic prediction. For older cpus we may as well always hint, but
15436 assume not taken for branches that are very close to 50% as a
15437 mispredicted taken branch is more expensive than a
15438 mispredicted not-taken branch. */
15439 if (rs6000_always_hint
15440 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15441 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15442 && br_prob_note_reliable_p (note)))
15444 if (abs (prob) > REG_BR_PROB_BASE / 20
15445 && ((prob > 0) ^ need_longbranch))
15446 pred = "+";
15447 else
15448 pred = "-";
15452 if (label == NULL)
15453 s += sprintf (s, "b%slr%s ", ccode, pred);
15454 else
15455 s += sprintf (s, "b%s%s ", ccode, pred);
15457 /* We need to escape any '%' characters in the reg_names string.
15458 Assume they'd only be the first character.... */
15459 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15460 *s++ = '%';
15461 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15463 if (label != NULL)
15465 /* If the branch distance was too far, we may have to use an
15466 unconditional branch to go the distance. */
15467 if (need_longbranch)
15468 s += sprintf (s, ",$+8\n\tb %s", label);
15469 else
15470 s += sprintf (s, ",%s", label);
15473 return string;
15476 /* Return insn for VSX or Altivec comparisons. */
15478 static rtx
15479 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15481 rtx mask;
15482 machine_mode mode = GET_MODE (op0);
15484 switch (code)
15486 default:
15487 break;
15489 case GE:
15490 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15491 return NULL_RTX;
15492 /* FALLTHRU */
15494 case EQ:
15495 case GT:
15496 case GTU:
15497 case ORDERED:
15498 case UNORDERED:
15499 case UNEQ:
15500 case LTGT:
15501 mask = gen_reg_rtx (mode);
15502 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15503 return mask;
15506 return NULL_RTX;
15509 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15510 DMODE is expected destination mode. This is a recursive function. */
15512 static rtx
15513 rs6000_emit_vector_compare (enum rtx_code rcode,
15514 rtx op0, rtx op1,
15515 machine_mode dmode)
15517 rtx mask;
15518 bool swap_operands = false;
15519 bool try_again = false;
15521 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15522 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15524 /* See if the comparison works as is. */
15525 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15526 if (mask)
15527 return mask;
15529 switch (rcode)
15531 case LT:
15532 rcode = GT;
15533 swap_operands = true;
15534 try_again = true;
15535 break;
15536 case LTU:
15537 rcode = GTU;
15538 swap_operands = true;
15539 try_again = true;
15540 break;
15541 case NE:
15542 case UNLE:
15543 case UNLT:
15544 case UNGE:
15545 case UNGT:
15546 /* Invert condition and try again.
15547 e.g., A != B becomes ~(A==B). */
15549 enum rtx_code rev_code;
15550 enum insn_code nor_code;
15551 rtx mask2;
15553 rev_code = reverse_condition_maybe_unordered (rcode);
15554 if (rev_code == UNKNOWN)
15555 return NULL_RTX;
15557 nor_code = optab_handler (one_cmpl_optab, dmode);
15558 if (nor_code == CODE_FOR_nothing)
15559 return NULL_RTX;
15561 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15562 if (!mask2)
15563 return NULL_RTX;
15565 mask = gen_reg_rtx (dmode);
15566 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15567 return mask;
15569 break;
15570 case GE:
15571 case GEU:
15572 case LE:
15573 case LEU:
15574 /* Try GT/GTU/LT/LTU OR EQ */
15576 rtx c_rtx, eq_rtx;
15577 enum insn_code ior_code;
15578 enum rtx_code new_code;
15580 switch (rcode)
15582 case GE:
15583 new_code = GT;
15584 break;
15586 case GEU:
15587 new_code = GTU;
15588 break;
15590 case LE:
15591 new_code = LT;
15592 break;
15594 case LEU:
15595 new_code = LTU;
15596 break;
15598 default:
15599 gcc_unreachable ();
15602 ior_code = optab_handler (ior_optab, dmode);
15603 if (ior_code == CODE_FOR_nothing)
15604 return NULL_RTX;
15606 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15607 if (!c_rtx)
15608 return NULL_RTX;
15610 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15611 if (!eq_rtx)
15612 return NULL_RTX;
15614 mask = gen_reg_rtx (dmode);
15615 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15616 return mask;
15618 break;
15619 default:
15620 return NULL_RTX;
15623 if (try_again)
15625 if (swap_operands)
15626 std::swap (op0, op1);
15628 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15629 if (mask)
15630 return mask;
15633 /* You only get two chances. */
15634 return NULL_RTX;
15637 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15638 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15639 operands for the relation operation COND. */
15642 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15643 rtx cond, rtx cc_op0, rtx cc_op1)
15645 machine_mode dest_mode = GET_MODE (dest);
15646 machine_mode mask_mode = GET_MODE (cc_op0);
15647 enum rtx_code rcode = GET_CODE (cond);
15648 rtx mask;
15649 bool invert_move = false;
15651 if (VECTOR_UNIT_NONE_P (dest_mode))
15652 return 0;
15654 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15655 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15657 switch (rcode)
15659 /* Swap operands if we can, and fall back to doing the operation as
15660 specified, and doing a NOR to invert the test. */
15661 case NE:
15662 case UNLE:
15663 case UNLT:
15664 case UNGE:
15665 case UNGT:
15666 /* Invert condition and try again.
15667 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15668 invert_move = true;
15669 rcode = reverse_condition_maybe_unordered (rcode);
15670 if (rcode == UNKNOWN)
15671 return 0;
15672 break;
15674 case GE:
15675 case LE:
15676 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15678 /* Invert condition to avoid compound test. */
15679 invert_move = true;
15680 rcode = reverse_condition (rcode);
15682 break;
15684 case GTU:
15685 case GEU:
15686 case LTU:
15687 case LEU:
15689 /* Invert condition to avoid compound test if necessary. */
15690 if (rcode == GEU || rcode == LEU)
15692 invert_move = true;
15693 rcode = reverse_condition (rcode);
15695 break;
15697 default:
15698 break;
15701 /* Get the vector mask for the given relational operations. */
15702 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15704 if (!mask)
15705 return 0;
15707 if (mask_mode != dest_mode)
15708 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
15710 if (invert_move)
15711 std::swap (op_true, op_false);
15713 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15714 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15715 && (GET_CODE (op_true) == CONST_VECTOR
15716 || GET_CODE (op_false) == CONST_VECTOR))
15718 rtx constant_0 = CONST0_RTX (dest_mode);
15719 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15721 if (op_true == constant_m1 && op_false == constant_0)
15723 emit_move_insn (dest, mask);
15724 return 1;
15727 else if (op_true == constant_0 && op_false == constant_m1)
15729 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15730 return 1;
15733 /* If we can't use the vector comparison directly, perhaps we can use
15734 the mask for the true or false fields, instead of loading up a
15735 constant. */
15736 if (op_true == constant_m1)
15737 op_true = mask;
15739 if (op_false == constant_0)
15740 op_false = mask;
15743 if (!REG_P (op_true) && !SUBREG_P (op_true))
15744 op_true = force_reg (dest_mode, op_true);
15746 if (!REG_P (op_false) && !SUBREG_P (op_false))
15747 op_false = force_reg (dest_mode, op_false);
15749 rtx tmp = gen_rtx_IOR (dest_mode,
15750 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
15751 op_false),
15752 gen_rtx_AND (dest_mode, mask, op_true));
15753 emit_insn (gen_rtx_SET (dest, tmp));
15754 return 1;
15757 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15758 maximum or minimum with "C" semantics.
15760 Unless you use -ffast-math, you can't use these instructions to replace
15761 conditions that implicitly reverse the condition because the comparison
15762 might generate a NaN or signed zer0.
15764 I.e. the following can be replaced all of the time
15765 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15766 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15767 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15768 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15770 The following can be replaced only if -ffast-math is used:
15771 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15772 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15773 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15774 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15776 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15777 nonzero/true, FALSE_COND if it is zero/false.
15779 Return false if we can't generate the appropriate minimum or maximum, and
15780 true if we can did the minimum or maximum. */
15782 static bool
15783 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15785 enum rtx_code code = GET_CODE (op);
15786 rtx op0 = XEXP (op, 0);
15787 rtx op1 = XEXP (op, 1);
15788 machine_mode compare_mode = GET_MODE (op0);
15789 machine_mode result_mode = GET_MODE (dest);
15791 if (result_mode != compare_mode)
15792 return false;
15794 /* See the comments of this function, it simply expects GE/GT/LE/LT in
15795 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
15796 we need to do the reversions first to make the following checks
15797 support fewer cases, like:
15799 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
15800 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
15801 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
15802 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
15804 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
15805 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
15806 have to check for fast-math or the like. */
15807 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
15809 code = reverse_condition_maybe_unordered (code);
15810 std::swap (true_cond, false_cond);
15813 bool max_p;
15814 if (code == GE || code == GT)
15815 max_p = true;
15816 else if (code == LE || code == LT)
15817 max_p = false;
15818 else
15819 return false;
15821 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15824 /* Only when NaNs and signed-zeros are not in effect, smax could be
15825 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15826 `op0 > op1 ? op1 : op0`. */
15827 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15828 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15829 max_p = !max_p;
15831 else
15832 return false;
15834 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15835 return true;
15838 /* Possibly emit a floating point conditional move by generating a compare that
15839 sets a mask instruction and a XXSEL select instruction.
15841 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15842 nonzero/true, FALSE_COND if it is zero/false.
15844 Return false if the operation cannot be generated, and true if we could
15845 generate the instruction. */
15847 static bool
15848 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15850 enum rtx_code code = GET_CODE (op);
15851 rtx op0 = XEXP (op, 0);
15852 rtx op1 = XEXP (op, 1);
15853 machine_mode compare_mode = GET_MODE (op0);
15854 machine_mode result_mode = GET_MODE (dest);
15855 rtx compare_rtx;
15856 rtx cmove_rtx;
15857 rtx clobber_rtx;
15859 if (!can_create_pseudo_p ())
15860 return 0;
15862 /* We allow the comparison to be either SFmode/DFmode and the true/false
15863 condition to be either SFmode/DFmode. I.e. we allow:
15865 float a, b;
15866 double c, d, r;
15868 r = (a == b) ? c : d;
15870 and:
15872 double a, b;
15873 float c, d, r;
15875 r = (a == b) ? c : d;
15877 but we don't allow intermixing the IEEE 128-bit floating point types with
15878 the 32/64-bit scalar types. */
15880 if (!(compare_mode == result_mode
15881 || (compare_mode == SFmode && result_mode == DFmode)
15882 || (compare_mode == DFmode && result_mode == SFmode)))
15883 return false;
15885 switch (code)
15887 case EQ:
15888 case GE:
15889 case GT:
15890 break;
15892 case NE:
15893 case LT:
15894 case LE:
15895 code = swap_condition (code);
15896 std::swap (op0, op1);
15897 break;
15899 default:
15900 return false;
15903 /* Generate: [(parallel [(set (dest)
15904 (if_then_else (op (cmp1) (cmp2))
15905 (true)
15906 (false)))
15907 (clobber (scratch))])]. */
15909 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15910 cmove_rtx = gen_rtx_SET (dest,
15911 gen_rtx_IF_THEN_ELSE (result_mode,
15912 compare_rtx,
15913 true_cond,
15914 false_cond));
15916 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15917 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15918 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15920 return true;
15923 /* Helper function to return true if the target has instructions to do a
15924 compare and set mask instruction that can be used with XXSEL to implement a
15925 conditional move. It is also assumed that such a target also supports the
15926 "C" minimum and maximum instructions. */
15928 static bool
15929 have_compare_and_set_mask (machine_mode mode)
15931 switch (mode)
15933 case E_SFmode:
15934 case E_DFmode:
15935 return TARGET_P9_MINMAX;
15937 case E_KFmode:
15938 case E_TFmode:
15939 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
15941 default:
15942 break;
15945 return false;
15948 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15949 operands of the last comparison is nonzero/true, FALSE_COND if it
15950 is zero/false. Return 0 if the hardware has no such operation. */
15952 bool
15953 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15955 enum rtx_code code = GET_CODE (op);
15956 rtx op0 = XEXP (op, 0);
15957 rtx op1 = XEXP (op, 1);
15958 machine_mode compare_mode = GET_MODE (op0);
15959 machine_mode result_mode = GET_MODE (dest);
15960 rtx temp;
15961 bool is_against_zero;
15963 /* These modes should always match. */
15964 if (GET_MODE (op1) != compare_mode
15965 /* In the isel case however, we can use a compare immediate, so
15966 op1 may be a small constant. */
15967 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15968 return false;
15969 if (GET_MODE (true_cond) != result_mode)
15970 return false;
15971 if (GET_MODE (false_cond) != result_mode)
15972 return false;
15974 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15975 instructions. */
15976 if (have_compare_and_set_mask (compare_mode)
15977 && have_compare_and_set_mask (result_mode))
15979 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15980 return true;
15982 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15983 return true;
15986 /* Don't allow using floating point comparisons for integer results for
15987 now. */
15988 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15989 return false;
15991 /* First, work out if the hardware can do this at all, or
15992 if it's too slow.... */
15993 if (!FLOAT_MODE_P (compare_mode))
15995 if (TARGET_ISEL)
15996 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15997 return false;
16000 is_against_zero = op1 == CONST0_RTX (compare_mode);
16002 /* A floating-point subtract might overflow, underflow, or produce
16003 an inexact result, thus changing the floating-point flags, so it
16004 can't be generated if we care about that. It's safe if one side
16005 of the construct is zero, since then no subtract will be
16006 generated. */
16007 if (SCALAR_FLOAT_MODE_P (compare_mode)
16008 && flag_trapping_math && ! is_against_zero)
16009 return false;
16011 /* Eliminate half of the comparisons by switching operands, this
16012 makes the remaining code simpler. */
16013 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16014 || code == LTGT || code == LT || code == UNLE)
16016 code = reverse_condition_maybe_unordered (code);
16017 temp = true_cond;
16018 true_cond = false_cond;
16019 false_cond = temp;
16022 /* UNEQ and LTGT take four instructions for a comparison with zero,
16023 it'll probably be faster to use a branch here too. */
16024 if (code == UNEQ && HONOR_NANS (compare_mode))
16025 return false;
16027 /* We're going to try to implement comparisons by performing
16028 a subtract, then comparing against zero. Unfortunately,
16029 Inf - Inf is NaN which is not zero, and so if we don't
16030 know that the operand is finite and the comparison
16031 would treat EQ different to UNORDERED, we can't do it. */
16032 if (HONOR_INFINITIES (compare_mode)
16033 && code != GT && code != UNGE
16034 && (!CONST_DOUBLE_P (op1)
16035 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16036 /* Constructs of the form (a OP b ? a : b) are safe. */
16037 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16038 || (! rtx_equal_p (op0, true_cond)
16039 && ! rtx_equal_p (op1, true_cond))))
16040 return false;
16042 /* At this point we know we can use fsel. */
16044 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16045 is no fsel instruction. */
16046 if (compare_mode != SFmode && compare_mode != DFmode)
16047 return false;
16049 /* Reduce the comparison to a comparison against zero. */
16050 if (! is_against_zero)
16052 temp = gen_reg_rtx (compare_mode);
16053 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16054 op0 = temp;
16055 op1 = CONST0_RTX (compare_mode);
16058 /* If we don't care about NaNs we can reduce some of the comparisons
16059 down to faster ones. */
16060 if (! HONOR_NANS (compare_mode))
16061 switch (code)
16063 case GT:
16064 code = LE;
16065 temp = true_cond;
16066 true_cond = false_cond;
16067 false_cond = temp;
16068 break;
16069 case UNGE:
16070 code = GE;
16071 break;
16072 case UNEQ:
16073 code = EQ;
16074 break;
16075 default:
16076 break;
16079 /* Now, reduce everything down to a GE. */
16080 switch (code)
16082 case GE:
16083 break;
16085 case LE:
16086 temp = gen_reg_rtx (compare_mode);
16087 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16088 op0 = temp;
16089 break;
16091 case ORDERED:
16092 temp = gen_reg_rtx (compare_mode);
16093 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16094 op0 = temp;
16095 break;
16097 case EQ:
16098 temp = gen_reg_rtx (compare_mode);
16099 emit_insn (gen_rtx_SET (temp,
16100 gen_rtx_NEG (compare_mode,
16101 gen_rtx_ABS (compare_mode, op0))));
16102 op0 = temp;
16103 break;
16105 case UNGE:
16106 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16107 temp = gen_reg_rtx (result_mode);
16108 emit_insn (gen_rtx_SET (temp,
16109 gen_rtx_IF_THEN_ELSE (result_mode,
16110 gen_rtx_GE (VOIDmode,
16111 op0, op1),
16112 true_cond, false_cond)));
16113 false_cond = true_cond;
16114 true_cond = temp;
16116 temp = gen_reg_rtx (compare_mode);
16117 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16118 op0 = temp;
16119 break;
16121 case GT:
16122 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16123 temp = gen_reg_rtx (result_mode);
16124 emit_insn (gen_rtx_SET (temp,
16125 gen_rtx_IF_THEN_ELSE (result_mode,
16126 gen_rtx_GE (VOIDmode,
16127 op0, op1),
16128 true_cond, false_cond)));
16129 true_cond = false_cond;
16130 false_cond = temp;
16132 temp = gen_reg_rtx (compare_mode);
16133 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16134 op0 = temp;
16135 break;
16137 default:
16138 gcc_unreachable ();
16141 emit_insn (gen_rtx_SET (dest,
16142 gen_rtx_IF_THEN_ELSE (result_mode,
16143 gen_rtx_GE (VOIDmode,
16144 op0, op1),
16145 true_cond, false_cond)));
16146 return true;
16149 /* Same as above, but for ints (isel). */
16151 bool
16152 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16154 rtx condition_rtx, cr;
16155 machine_mode mode = GET_MODE (dest);
16156 enum rtx_code cond_code;
16157 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16158 bool signedp;
16160 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16161 return false;
16163 /* PR104335: We now need to expect CC-mode "comparisons"
16164 coming from ifcvt. The following code expects proper
16165 comparisons so better abort here. */
16166 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16167 return false;
16169 /* We still have to do the compare, because isel doesn't do a
16170 compare, it just looks at the CRx bits set by a previous compare
16171 instruction. */
16172 condition_rtx = rs6000_generate_compare (op, mode);
16173 cond_code = GET_CODE (condition_rtx);
16174 cr = XEXP (condition_rtx, 0);
16175 signedp = GET_MODE (cr) == CCmode;
16177 isel_func = (mode == SImode
16178 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16179 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16181 switch (cond_code)
16183 case LT: case GT: case LTU: case GTU: case EQ:
16184 /* isel handles these directly. */
16185 break;
16187 default:
16188 /* We need to swap the sense of the comparison. */
16190 std::swap (false_cond, true_cond);
16191 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16193 break;
16196 false_cond = force_reg (mode, false_cond);
16197 if (true_cond != const0_rtx)
16198 true_cond = force_reg (mode, true_cond);
16200 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16202 return true;
16205 void
16206 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16208 machine_mode mode = GET_MODE (op0);
16209 enum rtx_code c;
16210 rtx target;
16212 /* VSX/altivec have direct min/max insns. */
16213 if ((code == SMAX || code == SMIN)
16214 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16215 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16216 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16218 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16219 return;
16222 if (code == SMAX || code == SMIN)
16223 c = GE;
16224 else
16225 c = GEU;
16227 if (code == SMAX || code == UMAX)
16228 target = emit_conditional_move (dest, { c, op0, op1, mode },
16229 op0, op1, mode, 0);
16230 else
16231 target = emit_conditional_move (dest, { c, op0, op1, mode },
16232 op1, op0, mode, 0);
16233 gcc_assert (target);
16234 if (target != dest)
16235 emit_move_insn (dest, target);
16238 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16239 COND is true. Mark the jump as unlikely to be taken. */
16241 static void
16242 emit_unlikely_jump (rtx cond, rtx label)
16244 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16245 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16246 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16249 /* A subroutine of the atomic operation splitters. Emit a load-locked
16250 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16251 the zero_extend operation. */
16253 static void
16254 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16256 rtx (*fn) (rtx, rtx) = NULL;
16258 switch (mode)
16260 case E_QImode:
16261 fn = gen_load_lockedqi;
16262 break;
16263 case E_HImode:
16264 fn = gen_load_lockedhi;
16265 break;
16266 case E_SImode:
16267 if (GET_MODE (mem) == QImode)
16268 fn = gen_load_lockedqi_si;
16269 else if (GET_MODE (mem) == HImode)
16270 fn = gen_load_lockedhi_si;
16271 else
16272 fn = gen_load_lockedsi;
16273 break;
16274 case E_DImode:
16275 fn = gen_load_lockeddi;
16276 break;
16277 case E_TImode:
16278 fn = gen_load_lockedti;
16279 break;
16280 default:
16281 gcc_unreachable ();
16283 emit_insn (fn (reg, mem));
16286 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16287 instruction in MODE. */
16289 static void
16290 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16292 rtx (*fn) (rtx, rtx, rtx) = NULL;
16294 switch (mode)
16296 case E_QImode:
16297 fn = gen_store_conditionalqi;
16298 break;
16299 case E_HImode:
16300 fn = gen_store_conditionalhi;
16301 break;
16302 case E_SImode:
16303 fn = gen_store_conditionalsi;
16304 break;
16305 case E_DImode:
16306 fn = gen_store_conditionaldi;
16307 break;
16308 case E_TImode:
16309 fn = gen_store_conditionalti;
16310 break;
16311 default:
16312 gcc_unreachable ();
16315 /* Emit sync before stwcx. to address PPC405 Erratum. */
16316 if (PPC405_ERRATUM77)
16317 emit_insn (gen_hwsync ());
16319 emit_insn (fn (res, mem, val));
16322 /* Expand barriers before and after a load_locked/store_cond sequence. */
16324 static rtx
16325 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16327 rtx addr = XEXP (mem, 0);
16329 if (!legitimate_indirect_address_p (addr, reload_completed)
16330 && !legitimate_indexed_address_p (addr, reload_completed))
16332 addr = force_reg (Pmode, addr);
16333 mem = replace_equiv_address_nv (mem, addr);
16336 switch (model)
16338 case MEMMODEL_RELAXED:
16339 case MEMMODEL_CONSUME:
16340 case MEMMODEL_ACQUIRE:
16341 break;
16342 case MEMMODEL_RELEASE:
16343 case MEMMODEL_ACQ_REL:
16344 emit_insn (gen_lwsync ());
16345 break;
16346 case MEMMODEL_SEQ_CST:
16347 emit_insn (gen_hwsync ());
16348 break;
16349 default:
16350 gcc_unreachable ();
16352 return mem;
16355 static void
16356 rs6000_post_atomic_barrier (enum memmodel model)
16358 switch (model)
16360 case MEMMODEL_RELAXED:
16361 case MEMMODEL_CONSUME:
16362 case MEMMODEL_RELEASE:
16363 break;
16364 case MEMMODEL_ACQUIRE:
16365 case MEMMODEL_ACQ_REL:
16366 case MEMMODEL_SEQ_CST:
16367 emit_insn (gen_isync ());
16368 break;
16369 default:
16370 gcc_unreachable ();
16374 /* A subroutine of the various atomic expanders. For sub-word operations,
16375 we must adjust things to operate on SImode. Given the original MEM,
16376 return a new aligned memory. Also build and return the quantities by
16377 which to shift and mask. */
16379 static rtx
16380 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16382 rtx addr, align, shift, mask, mem;
16383 HOST_WIDE_INT shift_mask;
16384 machine_mode mode = GET_MODE (orig_mem);
16386 /* For smaller modes, we have to implement this via SImode. */
16387 shift_mask = (mode == QImode ? 0x18 : 0x10);
16389 addr = XEXP (orig_mem, 0);
16390 addr = force_reg (GET_MODE (addr), addr);
16392 /* Aligned memory containing subword. Generate a new memory. We
16393 do not want any of the existing MEM_ATTR data, as we're now
16394 accessing memory outside the original object. */
16395 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16396 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16397 mem = gen_rtx_MEM (SImode, align);
16398 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16399 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16400 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16402 /* Shift amount for subword relative to aligned word. */
16403 shift = gen_reg_rtx (SImode);
16404 addr = gen_lowpart (SImode, addr);
16405 rtx tmp = gen_reg_rtx (SImode);
16406 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16407 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16408 if (BYTES_BIG_ENDIAN)
16409 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16410 shift, 1, OPTAB_LIB_WIDEN);
16411 *pshift = shift;
16413 /* Mask for insertion. */
16414 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16415 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16416 *pmask = mask;
16418 return mem;
16421 /* A subroutine of the various atomic expanders. For sub-word operands,
16422 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16424 static rtx
16425 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16427 rtx x;
16429 x = gen_reg_rtx (SImode);
16430 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16431 gen_rtx_NOT (SImode, mask),
16432 oldval)));
16434 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16436 return x;
16439 /* A subroutine of the various atomic expanders. For sub-word operands,
16440 extract WIDE to NARROW via SHIFT. */
16442 static void
16443 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16445 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16446 wide, 1, OPTAB_LIB_WIDEN);
16447 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16450 /* Expand an atomic compare and swap operation. */
16452 void
16453 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16455 rtx boolval, retval, mem, oldval, newval, cond;
16456 rtx label1, label2, x, mask, shift;
16457 machine_mode mode, orig_mode;
16458 enum memmodel mod_s, mod_f;
16459 bool is_weak;
16461 boolval = operands[0];
16462 retval = operands[1];
16463 mem = operands[2];
16464 oldval = operands[3];
16465 newval = operands[4];
16466 is_weak = (INTVAL (operands[5]) != 0);
16467 mod_s = memmodel_base (INTVAL (operands[6]));
16468 mod_f = memmodel_base (INTVAL (operands[7]));
16469 orig_mode = mode = GET_MODE (mem);
16471 mask = shift = NULL_RTX;
16472 if (mode == QImode || mode == HImode)
16474 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16475 lwarx and shift/mask operations. With power8, we need to do the
16476 comparison in SImode, but the store is still done in QI/HImode. */
16477 oldval = convert_modes (SImode, mode, oldval, 1);
16479 if (!TARGET_SYNC_HI_QI)
16481 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16483 /* Shift and mask OLDVAL into position with the word. */
16484 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16485 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16487 /* Shift and mask NEWVAL into position within the word. */
16488 newval = convert_modes (SImode, mode, newval, 1);
16489 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16490 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16493 /* Prepare to adjust the return value. */
16494 retval = gen_reg_rtx (SImode);
16495 mode = SImode;
16497 else if (reg_overlap_mentioned_p (retval, oldval))
16498 oldval = copy_to_reg (oldval);
16500 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16501 oldval = copy_to_mode_reg (mode, oldval);
16503 if (reg_overlap_mentioned_p (retval, newval))
16504 newval = copy_to_reg (newval);
16506 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16508 label1 = NULL_RTX;
16509 if (!is_weak)
16511 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16512 emit_label (XEXP (label1, 0));
16514 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16516 emit_load_locked (mode, retval, mem);
16518 x = retval;
16519 if (mask)
16520 x = expand_simple_binop (SImode, AND, retval, mask,
16521 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16523 cond = gen_reg_rtx (CCmode);
16524 /* If we have TImode, synthesize a comparison. */
16525 if (mode != TImode)
16526 x = gen_rtx_COMPARE (CCmode, x, oldval);
16527 else
16529 rtx xor1_result = gen_reg_rtx (DImode);
16530 rtx xor2_result = gen_reg_rtx (DImode);
16531 rtx or_result = gen_reg_rtx (DImode);
16532 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16533 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16534 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16535 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16537 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16538 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16539 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16540 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16543 emit_insn (gen_rtx_SET (cond, x));
16545 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16546 emit_unlikely_jump (x, label2);
16548 x = newval;
16549 if (mask)
16550 x = rs6000_mask_atomic_subword (retval, newval, mask);
16552 emit_store_conditional (orig_mode, cond, mem, x);
16554 if (!is_weak)
16556 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16557 emit_unlikely_jump (x, label1);
16560 if (!is_mm_relaxed (mod_f))
16561 emit_label (XEXP (label2, 0));
16563 rs6000_post_atomic_barrier (mod_s);
16565 if (is_mm_relaxed (mod_f))
16566 emit_label (XEXP (label2, 0));
16568 if (shift)
16569 rs6000_finish_atomic_subword (operands[1], retval, shift);
16570 else if (mode != GET_MODE (operands[1]))
16571 convert_move (operands[1], retval, 1);
16573 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16574 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16575 emit_insn (gen_rtx_SET (boolval, x));
16578 /* Expand an atomic exchange operation. */
16580 void
16581 rs6000_expand_atomic_exchange (rtx operands[])
16583 rtx retval, mem, val, cond;
16584 machine_mode mode;
16585 enum memmodel model;
16586 rtx label, x, mask, shift;
16588 retval = operands[0];
16589 mem = operands[1];
16590 val = operands[2];
16591 model = memmodel_base (INTVAL (operands[3]));
16592 mode = GET_MODE (mem);
16594 mask = shift = NULL_RTX;
16595 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16597 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16599 /* Shift and mask VAL into position with the word. */
16600 val = convert_modes (SImode, mode, val, 1);
16601 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16602 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16604 /* Prepare to adjust the return value. */
16605 retval = gen_reg_rtx (SImode);
16606 mode = SImode;
16609 mem = rs6000_pre_atomic_barrier (mem, model);
16611 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16612 emit_label (XEXP (label, 0));
16614 emit_load_locked (mode, retval, mem);
16616 x = val;
16617 if (mask)
16618 x = rs6000_mask_atomic_subword (retval, val, mask);
16620 cond = gen_reg_rtx (CCmode);
16621 emit_store_conditional (mode, cond, mem, x);
16623 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16624 emit_unlikely_jump (x, label);
16626 rs6000_post_atomic_barrier (model);
16628 if (shift)
16629 rs6000_finish_atomic_subword (operands[0], retval, shift);
16632 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16633 to perform. MEM is the memory on which to operate. VAL is the second
16634 operand of the binary operator. BEFORE and AFTER are optional locations to
16635 return the value of MEM either before of after the operation. MODEL_RTX
16636 is a CONST_INT containing the memory model to use. */
16638 void
16639 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16640 rtx orig_before, rtx orig_after, rtx model_rtx)
16642 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16643 machine_mode mode = GET_MODE (mem);
16644 machine_mode store_mode = mode;
16645 rtx label, x, cond, mask, shift;
16646 rtx before = orig_before, after = orig_after;
16648 mask = shift = NULL_RTX;
16649 /* On power8, we want to use SImode for the operation. On previous systems,
16650 use the operation in a subword and shift/mask to get the proper byte or
16651 halfword. */
16652 if (mode == QImode || mode == HImode)
16654 if (TARGET_SYNC_HI_QI)
16656 val = convert_modes (SImode, mode, val, 1);
16658 /* Prepare to adjust the return value. */
16659 before = gen_reg_rtx (SImode);
16660 if (after)
16661 after = gen_reg_rtx (SImode);
16662 mode = SImode;
16664 else
16666 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16668 /* Shift and mask VAL into position with the word. */
16669 val = convert_modes (SImode, mode, val, 1);
16670 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16673 switch (code)
16675 case IOR:
16676 case XOR:
16677 /* We've already zero-extended VAL. That is sufficient to
16678 make certain that it does not affect other bits. */
16679 mask = NULL;
16680 break;
16682 case AND:
16683 /* If we make certain that all of the other bits in VAL are
16684 set, that will be sufficient to not affect other bits. */
16685 x = gen_rtx_NOT (SImode, mask);
16686 x = gen_rtx_IOR (SImode, x, val);
16687 emit_insn (gen_rtx_SET (val, x));
16688 mask = NULL;
16689 break;
16691 case NOT:
16692 case PLUS:
16693 case MINUS:
16694 /* These will all affect bits outside the field and need
16695 adjustment via MASK within the loop. */
16696 break;
16698 default:
16699 gcc_unreachable ();
16702 /* Prepare to adjust the return value. */
16703 before = gen_reg_rtx (SImode);
16704 if (after)
16705 after = gen_reg_rtx (SImode);
16706 store_mode = mode = SImode;
16710 mem = rs6000_pre_atomic_barrier (mem, model);
16712 label = gen_label_rtx ();
16713 emit_label (label);
16714 label = gen_rtx_LABEL_REF (VOIDmode, label);
16716 if (before == NULL_RTX)
16717 before = gen_reg_rtx (mode);
16719 emit_load_locked (mode, before, mem);
16721 if (code == NOT)
16723 x = expand_simple_binop (mode, AND, before, val,
16724 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16725 after = expand_simple_unop (mode, NOT, x, after, 1);
16727 else
16729 after = expand_simple_binop (mode, code, before, val,
16730 after, 1, OPTAB_LIB_WIDEN);
16733 x = after;
16734 if (mask)
16736 x = expand_simple_binop (SImode, AND, after, mask,
16737 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16738 x = rs6000_mask_atomic_subword (before, x, mask);
16740 else if (store_mode != mode)
16741 x = convert_modes (store_mode, mode, x, 1);
16743 cond = gen_reg_rtx (CCmode);
16744 emit_store_conditional (store_mode, cond, mem, x);
16746 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16747 emit_unlikely_jump (x, label);
16749 rs6000_post_atomic_barrier (model);
16751 if (shift)
16753 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16754 then do the calcuations in a SImode register. */
16755 if (orig_before)
16756 rs6000_finish_atomic_subword (orig_before, before, shift);
16757 if (orig_after)
16758 rs6000_finish_atomic_subword (orig_after, after, shift);
16760 else if (store_mode != mode)
16762 /* QImode/HImode on machines with lbarx/lharx where we do the native
16763 operation and then do the calcuations in a SImode register. */
16764 if (orig_before)
16765 convert_move (orig_before, before, 1);
16766 if (orig_after)
16767 convert_move (orig_after, after, 1);
16769 else if (orig_after && after != orig_after)
16770 emit_move_insn (orig_after, after);
16773 static GTY(()) alias_set_type TOC_alias_set = -1;
16775 alias_set_type
16776 get_TOC_alias_set (void)
16778 if (TOC_alias_set == -1)
16779 TOC_alias_set = new_alias_set ();
16780 return TOC_alias_set;
16783 /* The mode the ABI uses for a word. This is not the same as word_mode
16784 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16786 static scalar_int_mode
16787 rs6000_abi_word_mode (void)
16789 return TARGET_32BIT ? SImode : DImode;
16792 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16793 static char *
16794 rs6000_offload_options (void)
16796 if (TARGET_64BIT)
16797 return xstrdup ("-foffload-abi=lp64");
16798 else
16799 return xstrdup ("-foffload-abi=ilp32");
16803 /* A quick summary of the various types of 'constant-pool tables'
16804 under PowerPC:
16806 Target Flags Name One table per
16807 AIX (none) AIX TOC object file
16808 AIX -mfull-toc AIX TOC object file
16809 AIX -mminimal-toc AIX minimal TOC translation unit
16810 SVR4/EABI (none) SVR4 SDATA object file
16811 SVR4/EABI -fpic SVR4 pic object file
16812 SVR4/EABI -fPIC SVR4 PIC translation unit
16813 SVR4/EABI -mrelocatable EABI TOC function
16814 SVR4/EABI -maix AIX TOC object file
16815 SVR4/EABI -maix -mminimal-toc
16816 AIX minimal TOC translation unit
16818 Name Reg. Set by entries contains:
16819 made by addrs? fp? sum?
16821 AIX TOC 2 crt0 as Y option option
16822 AIX minimal TOC 30 prolog gcc Y Y option
16823 SVR4 SDATA 13 crt0 gcc N Y N
16824 SVR4 pic 30 prolog ld Y not yet N
16825 SVR4 PIC 30 prolog gcc Y option option
16826 EABI TOC 30 prolog gcc Y option option
16830 /* Hash functions for the hash table. */
16832 static unsigned
16833 rs6000_hash_constant (rtx k)
16835 enum rtx_code code = GET_CODE (k);
16836 machine_mode mode = GET_MODE (k);
16837 unsigned result = (code << 3) ^ mode;
16838 const char *format;
16839 int flen, fidx;
16841 format = GET_RTX_FORMAT (code);
16842 flen = strlen (format);
16843 fidx = 0;
16845 switch (code)
16847 case LABEL_REF:
16848 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16850 case CONST_WIDE_INT:
16852 int i;
16853 flen = CONST_WIDE_INT_NUNITS (k);
16854 for (i = 0; i < flen; i++)
16855 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16856 return result;
16859 case CONST_DOUBLE:
16860 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16862 case CODE_LABEL:
16863 fidx = 3;
16864 break;
16866 default:
16867 break;
16870 for (; fidx < flen; fidx++)
16871 switch (format[fidx])
16873 case 's':
16875 unsigned i, len;
16876 const char *str = XSTR (k, fidx);
16877 len = strlen (str);
16878 result = result * 613 + len;
16879 for (i = 0; i < len; i++)
16880 result = result * 613 + (unsigned) str[i];
16881 break;
16883 case 'u':
16884 case 'e':
16885 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16886 break;
16887 case 'i':
16888 case 'n':
16889 result = result * 613 + (unsigned) XINT (k, fidx);
16890 break;
16891 case 'w':
16892 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16893 result = result * 613 + (unsigned) XWINT (k, fidx);
16894 else
16896 size_t i;
16897 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16898 result = result * 613 + (unsigned) (XWINT (k, fidx)
16899 >> CHAR_BIT * i);
16901 break;
16902 case '0':
16903 break;
16904 default:
16905 gcc_unreachable ();
16908 return result;
16911 hashval_t
16912 toc_hasher::hash (toc_hash_struct *thc)
16914 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16917 /* Compare H1 and H2 for equivalence. */
16919 bool
16920 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16922 rtx r1 = h1->key;
16923 rtx r2 = h2->key;
16925 if (h1->key_mode != h2->key_mode)
16926 return 0;
16928 return rtx_equal_p (r1, r2);
16931 /* These are the names given by the C++ front-end to vtables, and
16932 vtable-like objects. Ideally, this logic should not be here;
16933 instead, there should be some programmatic way of inquiring as
16934 to whether or not an object is a vtable. */
16936 #define VTABLE_NAME_P(NAME) \
16937 (startswith (name, "_vt.") \
16938 || startswith (name, "_ZTV") \
16939 || startswith (name, "_ZTT") \
16940 || startswith (name, "_ZTI") \
16941 || startswith (name, "_ZTC"))
16943 #ifdef NO_DOLLAR_IN_LABEL
16944 /* Return a GGC-allocated character string translating dollar signs in
16945 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16947 const char *
16948 rs6000_xcoff_strip_dollar (const char *name)
16950 char *strip, *p;
16951 const char *q;
16952 size_t len;
16954 q = (const char *) strchr (name, '$');
16956 if (q == 0 || q == name)
16957 return name;
16959 len = strlen (name);
16960 strip = XALLOCAVEC (char, len + 1);
16961 strcpy (strip, name);
16962 p = strip + (q - name);
16963 while (p)
16965 *p = '_';
16966 p = strchr (p + 1, '$');
16969 return ggc_alloc_string (strip, len);
16971 #endif
16973 void
16974 rs6000_output_symbol_ref (FILE *file, rtx x)
16976 const char *name = XSTR (x, 0);
16978 /* Currently C++ toc references to vtables can be emitted before it
16979 is decided whether the vtable is public or private. If this is
16980 the case, then the linker will eventually complain that there is
16981 a reference to an unknown section. Thus, for vtables only,
16982 we emit the TOC reference to reference the identifier and not the
16983 symbol. */
16984 if (VTABLE_NAME_P (name))
16986 RS6000_OUTPUT_BASENAME (file, name);
16988 else
16989 assemble_name (file, name);
16992 /* Output a TOC entry. We derive the entry name from what is being
16993 written. */
16995 void
16996 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16998 char buf[256];
16999 const char *name = buf;
17000 rtx base = x;
17001 HOST_WIDE_INT offset = 0;
17003 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17005 /* When the linker won't eliminate them, don't output duplicate
17006 TOC entries (this happens on AIX if there is any kind of TOC,
17007 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17008 CODE_LABELs. */
17009 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17011 struct toc_hash_struct *h;
17013 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17014 time because GGC is not initialized at that point. */
17015 if (toc_hash_table == NULL)
17016 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17018 h = ggc_alloc<toc_hash_struct> ();
17019 h->key = x;
17020 h->key_mode = mode;
17021 h->labelno = labelno;
17023 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17024 if (*found == NULL)
17025 *found = h;
17026 else /* This is indeed a duplicate.
17027 Set this label equal to that label. */
17029 fputs ("\t.set ", file);
17030 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17031 fprintf (file, "%d,", labelno);
17032 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17033 fprintf (file, "%d\n", ((*found)->labelno));
17035 #ifdef HAVE_AS_TLS
17036 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17037 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17038 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17040 fputs ("\t.set ", file);
17041 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17042 fprintf (file, "%d,", labelno);
17043 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17044 fprintf (file, "%d\n", ((*found)->labelno));
17046 #endif
17047 return;
17051 /* If we're going to put a double constant in the TOC, make sure it's
17052 aligned properly when strict alignment is on. */
17053 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17054 && STRICT_ALIGNMENT
17055 && GET_MODE_BITSIZE (mode) >= 64
17056 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17057 ASM_OUTPUT_ALIGN (file, 3);
17060 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17062 /* Handle FP constants specially. Note that if we have a minimal
17063 TOC, things we put here aren't actually in the TOC, so we can allow
17064 FP constants. */
17065 if (CONST_DOUBLE_P (x)
17066 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17067 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17069 long k[4];
17071 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17072 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17073 else
17074 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17076 if (TARGET_64BIT)
17078 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17079 fputs (DOUBLE_INT_ASM_OP, file);
17080 else
17081 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17082 k[0] & 0xffffffff, k[1] & 0xffffffff,
17083 k[2] & 0xffffffff, k[3] & 0xffffffff);
17084 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17085 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17086 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17087 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17088 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17089 return;
17091 else
17093 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17094 fputs ("\t.long ", file);
17095 else
17096 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17097 k[0] & 0xffffffff, k[1] & 0xffffffff,
17098 k[2] & 0xffffffff, k[3] & 0xffffffff);
17099 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17100 k[0] & 0xffffffff, k[1] & 0xffffffff,
17101 k[2] & 0xffffffff, k[3] & 0xffffffff);
17102 return;
17105 else if (CONST_DOUBLE_P (x)
17106 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17108 long k[2];
17110 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17111 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17112 else
17113 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17115 if (TARGET_64BIT)
17117 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17118 fputs (DOUBLE_INT_ASM_OP, file);
17119 else
17120 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17121 k[0] & 0xffffffff, k[1] & 0xffffffff);
17122 fprintf (file, "0x%lx%08lx\n",
17123 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17124 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17125 return;
17127 else
17129 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17130 fputs ("\t.long ", file);
17131 else
17132 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17133 k[0] & 0xffffffff, k[1] & 0xffffffff);
17134 fprintf (file, "0x%lx,0x%lx\n",
17135 k[0] & 0xffffffff, k[1] & 0xffffffff);
17136 return;
17139 else if (CONST_DOUBLE_P (x)
17140 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17142 long l;
17144 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17145 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17146 else
17147 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17149 if (TARGET_64BIT)
17151 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17152 fputs (DOUBLE_INT_ASM_OP, file);
17153 else
17154 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17155 if (WORDS_BIG_ENDIAN)
17156 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17157 else
17158 fprintf (file, "0x%lx\n", l & 0xffffffff);
17159 return;
17161 else
17163 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17164 fputs ("\t.long ", file);
17165 else
17166 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17167 fprintf (file, "0x%lx\n", l & 0xffffffff);
17168 return;
17171 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17173 unsigned HOST_WIDE_INT low;
17174 HOST_WIDE_INT high;
17176 low = INTVAL (x) & 0xffffffff;
17177 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17179 /* TOC entries are always Pmode-sized, so when big-endian
17180 smaller integer constants in the TOC need to be padded.
17181 (This is still a win over putting the constants in
17182 a separate constant pool, because then we'd have
17183 to have both a TOC entry _and_ the actual constant.)
17185 For a 32-bit target, CONST_INT values are loaded and shifted
17186 entirely within `low' and can be stored in one TOC entry. */
17188 /* It would be easy to make this work, but it doesn't now. */
17189 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17191 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17193 low |= high << 32;
17194 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17195 high = (HOST_WIDE_INT) low >> 32;
17196 low &= 0xffffffff;
17199 if (TARGET_64BIT)
17201 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17202 fputs (DOUBLE_INT_ASM_OP, file);
17203 else
17204 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17205 (long) high & 0xffffffff, (long) low & 0xffffffff);
17206 fprintf (file, "0x%lx%08lx\n",
17207 (long) high & 0xffffffff, (long) low & 0xffffffff);
17208 return;
17210 else
17212 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17214 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17215 fputs ("\t.long ", file);
17216 else
17217 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17218 (long) high & 0xffffffff, (long) low & 0xffffffff);
17219 fprintf (file, "0x%lx,0x%lx\n",
17220 (long) high & 0xffffffff, (long) low & 0xffffffff);
17222 else
17224 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17225 fputs ("\t.long ", file);
17226 else
17227 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17228 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17230 return;
17234 if (GET_CODE (x) == CONST)
17236 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17237 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17239 base = XEXP (XEXP (x, 0), 0);
17240 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17243 switch (GET_CODE (base))
17245 case SYMBOL_REF:
17246 name = XSTR (base, 0);
17247 break;
17249 case LABEL_REF:
17250 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17251 CODE_LABEL_NUMBER (XEXP (base, 0)));
17252 break;
17254 case CODE_LABEL:
17255 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17256 break;
17258 default:
17259 gcc_unreachable ();
17262 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17263 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17264 else
17266 fputs ("\t.tc ", file);
17267 RS6000_OUTPUT_BASENAME (file, name);
17269 if (offset < 0)
17270 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17271 else if (offset)
17272 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17274 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17275 after other TOC symbols, reducing overflow of small TOC access
17276 to [TC] symbols. */
17277 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17278 ? "[TE]," : "[TC],", file);
17281 /* Currently C++ toc references to vtables can be emitted before it
17282 is decided whether the vtable is public or private. If this is
17283 the case, then the linker will eventually complain that there is
17284 a TOC reference to an unknown section. Thus, for vtables only,
17285 we emit the TOC reference to reference the symbol and not the
17286 section. */
17287 if (VTABLE_NAME_P (name))
17289 RS6000_OUTPUT_BASENAME (file, name);
17290 if (offset < 0)
17291 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17292 else if (offset > 0)
17293 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17295 else
17296 output_addr_const (file, x);
17298 #if HAVE_AS_TLS
17299 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17301 switch (SYMBOL_REF_TLS_MODEL (base))
17303 case 0:
17304 break;
17305 case TLS_MODEL_LOCAL_EXEC:
17306 fputs ("@le", file);
17307 break;
17308 case TLS_MODEL_INITIAL_EXEC:
17309 fputs ("@ie", file);
17310 break;
17311 /* Use global-dynamic for local-dynamic. */
17312 case TLS_MODEL_GLOBAL_DYNAMIC:
17313 case TLS_MODEL_LOCAL_DYNAMIC:
17314 putc ('\n', file);
17315 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17316 fputs ("\t.tc .", file);
17317 RS6000_OUTPUT_BASENAME (file, name);
17318 fputs ("[TC],", file);
17319 output_addr_const (file, x);
17320 fputs ("@m", file);
17321 break;
17322 default:
17323 gcc_unreachable ();
17326 #endif
17328 putc ('\n', file);
17331 /* Output an assembler pseudo-op to write an ASCII string of N characters
17332 starting at P to FILE.
17334 On the RS/6000, we have to do this using the .byte operation and
17335 write out special characters outside the quoted string.
17336 Also, the assembler is broken; very long strings are truncated,
17337 so we must artificially break them up early. */
17339 void
17340 output_ascii (FILE *file, const char *p, int n)
17342 char c;
17343 int i, count_string;
17344 const char *for_string = "\t.byte \"";
17345 const char *for_decimal = "\t.byte ";
17346 const char *to_close = NULL;
17348 count_string = 0;
17349 for (i = 0; i < n; i++)
17351 c = *p++;
17352 if (c >= ' ' && c < 0177)
17354 if (for_string)
17355 fputs (for_string, file);
17356 putc (c, file);
17358 /* Write two quotes to get one. */
17359 if (c == '"')
17361 putc (c, file);
17362 ++count_string;
17365 for_string = NULL;
17366 for_decimal = "\"\n\t.byte ";
17367 to_close = "\"\n";
17368 ++count_string;
17370 if (count_string >= 512)
17372 fputs (to_close, file);
17374 for_string = "\t.byte \"";
17375 for_decimal = "\t.byte ";
17376 to_close = NULL;
17377 count_string = 0;
17380 else
17382 if (for_decimal)
17383 fputs (for_decimal, file);
17384 fprintf (file, "%d", c);
17386 for_string = "\n\t.byte \"";
17387 for_decimal = ", ";
17388 to_close = "\n";
17389 count_string = 0;
17393 /* Now close the string if we have written one. Then end the line. */
17394 if (to_close)
17395 fputs (to_close, file);
17398 /* Generate a unique section name for FILENAME for a section type
17399 represented by SECTION_DESC. Output goes into BUF.
17401 SECTION_DESC can be any string, as long as it is different for each
17402 possible section type.
17404 We name the section in the same manner as xlc. The name begins with an
17405 underscore followed by the filename (after stripping any leading directory
17406 names) with the last period replaced by the string SECTION_DESC. If
17407 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17408 the name. */
17410 void
17411 rs6000_gen_section_name (char **buf, const char *filename,
17412 const char *section_desc)
17414 const char *q, *after_last_slash, *last_period = 0;
17415 char *p;
17416 int len;
17418 after_last_slash = filename;
17419 for (q = filename; *q; q++)
17421 if (*q == '/')
17422 after_last_slash = q + 1;
17423 else if (*q == '.')
17424 last_period = q;
17427 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17428 *buf = (char *) xmalloc (len);
17430 p = *buf;
17431 *p++ = '_';
17433 for (q = after_last_slash; *q; q++)
17435 if (q == last_period)
17437 strcpy (p, section_desc);
17438 p += strlen (section_desc);
17439 break;
17442 else if (ISALNUM (*q))
17443 *p++ = *q;
17446 if (last_period == 0)
17447 strcpy (p, section_desc);
17448 else
17449 *p = '\0';
17452 /* Emit profile function. */
17454 void
17455 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17457 /* Non-standard profiling for kernels, which just saves LR then calls
17458 _mcount without worrying about arg saves. The idea is to change
17459 the function prologue as little as possible as it isn't easy to
17460 account for arg save/restore code added just for _mcount. */
17461 if (TARGET_PROFILE_KERNEL)
17462 return;
17464 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17466 #ifndef NO_PROFILE_COUNTERS
17467 # define NO_PROFILE_COUNTERS 0
17468 #endif
17469 if (NO_PROFILE_COUNTERS)
17470 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17471 LCT_NORMAL, VOIDmode);
17472 else
17474 char buf[30];
17475 const char *label_name;
17476 rtx fun;
17478 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17479 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17480 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17482 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17483 LCT_NORMAL, VOIDmode, fun, Pmode);
17486 else if (DEFAULT_ABI == ABI_DARWIN)
17488 const char *mcount_name = RS6000_MCOUNT;
17489 int caller_addr_regno = LR_REGNO;
17491 /* Be conservative and always set this, at least for now. */
17492 crtl->uses_pic_offset_table = 1;
17494 #if TARGET_MACHO
17495 /* For PIC code, set up a stub and collect the caller's address
17496 from r0, which is where the prologue puts it. */
17497 if (MACHOPIC_INDIRECT
17498 && crtl->uses_pic_offset_table)
17499 caller_addr_regno = 0;
17500 #endif
17501 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17502 LCT_NORMAL, VOIDmode,
17503 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17507 /* Write function profiler code. */
17509 void
17510 output_function_profiler (FILE *file, int labelno)
17512 char buf[100];
17514 switch (DEFAULT_ABI)
17516 default:
17517 gcc_unreachable ();
17519 case ABI_V4:
17520 if (!TARGET_32BIT)
17522 warning (0, "no profiling of 64-bit code for this ABI");
17523 return;
17525 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17526 fprintf (file, "\tmflr %s\n", reg_names[0]);
17527 if (NO_PROFILE_COUNTERS)
17529 asm_fprintf (file, "\tstw %s,4(%s)\n",
17530 reg_names[0], reg_names[1]);
17532 else if (TARGET_SECURE_PLT && flag_pic)
17534 if (TARGET_LINK_STACK)
17536 char name[32];
17537 get_ppc476_thunk_name (name);
17538 asm_fprintf (file, "\tbl %s\n", name);
17540 else
17541 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17542 asm_fprintf (file, "\tstw %s,4(%s)\n",
17543 reg_names[0], reg_names[1]);
17544 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17545 asm_fprintf (file, "\taddis %s,%s,",
17546 reg_names[12], reg_names[12]);
17547 assemble_name (file, buf);
17548 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17549 assemble_name (file, buf);
17550 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17552 else if (flag_pic == 1)
17554 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17555 asm_fprintf (file, "\tstw %s,4(%s)\n",
17556 reg_names[0], reg_names[1]);
17557 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17558 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17559 assemble_name (file, buf);
17560 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17562 else if (flag_pic > 1)
17564 asm_fprintf (file, "\tstw %s,4(%s)\n",
17565 reg_names[0], reg_names[1]);
17566 /* Now, we need to get the address of the label. */
17567 if (TARGET_LINK_STACK)
17569 char name[32];
17570 get_ppc476_thunk_name (name);
17571 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17572 assemble_name (file, buf);
17573 fputs ("-.\n1:", file);
17574 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17575 asm_fprintf (file, "\taddi %s,%s,4\n",
17576 reg_names[11], reg_names[11]);
17578 else
17580 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17581 assemble_name (file, buf);
17582 fputs ("-.\n1:", file);
17583 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17585 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17586 reg_names[0], reg_names[11]);
17587 asm_fprintf (file, "\tadd %s,%s,%s\n",
17588 reg_names[0], reg_names[0], reg_names[11]);
17590 else
17592 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17593 assemble_name (file, buf);
17594 fputs ("@ha\n", file);
17595 asm_fprintf (file, "\tstw %s,4(%s)\n",
17596 reg_names[0], reg_names[1]);
17597 asm_fprintf (file, "\tla %s,", reg_names[0]);
17598 assemble_name (file, buf);
17599 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17602 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17603 fprintf (file, "\tbl %s%s\n",
17604 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17605 break;
17607 case ABI_AIX:
17608 case ABI_ELFv2:
17609 case ABI_DARWIN:
17610 /* Don't do anything, done in output_profile_hook (). */
17611 break;
17617 /* The following variable value is the last issued insn. */
17619 static rtx_insn *last_scheduled_insn;
17621 /* The following variable helps to balance issuing of load and
17622 store instructions */
17624 static int load_store_pendulum;
17626 /* The following variable helps pair divide insns during scheduling. */
17627 static int divide_cnt;
17628 /* The following variable helps pair and alternate vector and vector load
17629 insns during scheduling. */
17630 static int vec_pairing;
17633 /* Power4 load update and store update instructions are cracked into a
17634 load or store and an integer insn which are executed in the same cycle.
17635 Branches have their own dispatch slot which does not count against the
17636 GCC issue rate, but it changes the program flow so there are no other
17637 instructions to issue in this cycle. */
17639 static int
17640 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17642 last_scheduled_insn = insn;
17643 if (GET_CODE (PATTERN (insn)) == USE
17644 || GET_CODE (PATTERN (insn)) == CLOBBER)
17646 cached_can_issue_more = more;
17647 return cached_can_issue_more;
17650 if (insn_terminates_group_p (insn, current_group))
17652 cached_can_issue_more = 0;
17653 return cached_can_issue_more;
17656 /* If no reservation, but reach here */
17657 if (recog_memoized (insn) < 0)
17658 return more;
17660 if (rs6000_sched_groups)
17662 if (is_microcoded_insn (insn))
17663 cached_can_issue_more = 0;
17664 else if (is_cracked_insn (insn))
17665 cached_can_issue_more = more > 2 ? more - 2 : 0;
17666 else
17667 cached_can_issue_more = more - 1;
17669 return cached_can_issue_more;
17672 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17673 return 0;
17675 cached_can_issue_more = more - 1;
17676 return cached_can_issue_more;
17679 static int
17680 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17682 int r = rs6000_variable_issue_1 (insn, more);
17683 if (verbose)
17684 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17685 return r;
17688 /* Adjust the cost of a scheduling dependency. Return the new cost of
17689 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17691 static int
17692 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17693 unsigned int)
17695 enum attr_type attr_type;
17697 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17698 return cost;
17700 switch (dep_type)
17702 case REG_DEP_TRUE:
17704 /* Data dependency; DEP_INSN writes a register that INSN reads
17705 some cycles later. */
17707 /* Separate a load from a narrower, dependent store. */
17708 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17709 || rs6000_tune == PROCESSOR_POWER10)
17710 && GET_CODE (PATTERN (insn)) == SET
17711 && GET_CODE (PATTERN (dep_insn)) == SET
17712 && MEM_P (XEXP (PATTERN (insn), 1))
17713 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17714 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17715 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17716 return cost + 14;
17718 attr_type = get_attr_type (insn);
17720 switch (attr_type)
17722 case TYPE_JMPREG:
17723 /* Tell the first scheduling pass about the latency between
17724 a mtctr and bctr (and mtlr and br/blr). The first
17725 scheduling pass will not know about this latency since
17726 the mtctr instruction, which has the latency associated
17727 to it, will be generated by reload. */
17728 return 4;
17729 case TYPE_BRANCH:
17730 /* Leave some extra cycles between a compare and its
17731 dependent branch, to inhibit expensive mispredicts. */
17732 if ((rs6000_tune == PROCESSOR_PPC603
17733 || rs6000_tune == PROCESSOR_PPC604
17734 || rs6000_tune == PROCESSOR_PPC604e
17735 || rs6000_tune == PROCESSOR_PPC620
17736 || rs6000_tune == PROCESSOR_PPC630
17737 || rs6000_tune == PROCESSOR_PPC750
17738 || rs6000_tune == PROCESSOR_PPC7400
17739 || rs6000_tune == PROCESSOR_PPC7450
17740 || rs6000_tune == PROCESSOR_PPCE5500
17741 || rs6000_tune == PROCESSOR_PPCE6500
17742 || rs6000_tune == PROCESSOR_POWER4
17743 || rs6000_tune == PROCESSOR_POWER5
17744 || rs6000_tune == PROCESSOR_POWER7
17745 || rs6000_tune == PROCESSOR_POWER8
17746 || rs6000_tune == PROCESSOR_POWER9
17747 || rs6000_tune == PROCESSOR_POWER10
17748 || rs6000_tune == PROCESSOR_CELL)
17749 && recog_memoized (dep_insn)
17750 && (INSN_CODE (dep_insn) >= 0))
17752 switch (get_attr_type (dep_insn))
17754 case TYPE_CMP:
17755 case TYPE_FPCOMPARE:
17756 case TYPE_CR_LOGICAL:
17757 return cost + 2;
17758 case TYPE_EXTS:
17759 case TYPE_MUL:
17760 if (get_attr_dot (dep_insn) == DOT_YES)
17761 return cost + 2;
17762 else
17763 break;
17764 case TYPE_SHIFT:
17765 if (get_attr_dot (dep_insn) == DOT_YES
17766 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17767 return cost + 2;
17768 else
17769 break;
17770 default:
17771 break;
17773 break;
17775 case TYPE_STORE:
17776 case TYPE_FPSTORE:
17777 if ((rs6000_tune == PROCESSOR_POWER6)
17778 && recog_memoized (dep_insn)
17779 && (INSN_CODE (dep_insn) >= 0))
17782 if (GET_CODE (PATTERN (insn)) != SET)
17783 /* If this happens, we have to extend this to schedule
17784 optimally. Return default for now. */
17785 return cost;
17787 /* Adjust the cost for the case where the value written
17788 by a fixed point operation is used as the address
17789 gen value on a store. */
17790 switch (get_attr_type (dep_insn))
17792 case TYPE_LOAD:
17793 case TYPE_CNTLZ:
17795 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17796 return get_attr_sign_extend (dep_insn)
17797 == SIGN_EXTEND_YES ? 6 : 4;
17798 break;
17800 case TYPE_SHIFT:
17802 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17803 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17804 6 : 3;
17805 break;
17807 case TYPE_INTEGER:
17808 case TYPE_ADD:
17809 case TYPE_LOGICAL:
17810 case TYPE_EXTS:
17811 case TYPE_INSERT:
17813 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17814 return 3;
17815 break;
17817 case TYPE_STORE:
17818 case TYPE_FPLOAD:
17819 case TYPE_FPSTORE:
17821 if (get_attr_update (dep_insn) == UPDATE_YES
17822 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17823 return 3;
17824 break;
17826 case TYPE_MUL:
17828 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17829 return 17;
17830 break;
17832 case TYPE_DIV:
17834 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17835 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17836 break;
17838 default:
17839 break;
17842 break;
17844 case TYPE_LOAD:
17845 if ((rs6000_tune == PROCESSOR_POWER6)
17846 && recog_memoized (dep_insn)
17847 && (INSN_CODE (dep_insn) >= 0))
17850 /* Adjust the cost for the case where the value written
17851 by a fixed point instruction is used within the address
17852 gen portion of a subsequent load(u)(x) */
17853 switch (get_attr_type (dep_insn))
17855 case TYPE_LOAD:
17856 case TYPE_CNTLZ:
17858 if (set_to_load_agen (dep_insn, insn))
17859 return get_attr_sign_extend (dep_insn)
17860 == SIGN_EXTEND_YES ? 6 : 4;
17861 break;
17863 case TYPE_SHIFT:
17865 if (set_to_load_agen (dep_insn, insn))
17866 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17867 6 : 3;
17868 break;
17870 case TYPE_INTEGER:
17871 case TYPE_ADD:
17872 case TYPE_LOGICAL:
17873 case TYPE_EXTS:
17874 case TYPE_INSERT:
17876 if (set_to_load_agen (dep_insn, insn))
17877 return 3;
17878 break;
17880 case TYPE_STORE:
17881 case TYPE_FPLOAD:
17882 case TYPE_FPSTORE:
17884 if (get_attr_update (dep_insn) == UPDATE_YES
17885 && set_to_load_agen (dep_insn, insn))
17886 return 3;
17887 break;
17889 case TYPE_MUL:
17891 if (set_to_load_agen (dep_insn, insn))
17892 return 17;
17893 break;
17895 case TYPE_DIV:
17897 if (set_to_load_agen (dep_insn, insn))
17898 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17899 break;
17901 default:
17902 break;
17905 break;
17907 default:
17908 break;
17911 /* Fall out to return default cost. */
17913 break;
17915 case REG_DEP_OUTPUT:
17916 /* Output dependency; DEP_INSN writes a register that INSN writes some
17917 cycles later. */
17918 if ((rs6000_tune == PROCESSOR_POWER6)
17919 && recog_memoized (dep_insn)
17920 && (INSN_CODE (dep_insn) >= 0))
17922 attr_type = get_attr_type (insn);
17924 switch (attr_type)
17926 case TYPE_FP:
17927 case TYPE_FPSIMPLE:
17928 if (get_attr_type (dep_insn) == TYPE_FP
17929 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17930 return 1;
17931 break;
17932 default:
17933 break;
17936 /* Fall through, no cost for output dependency. */
17937 /* FALLTHRU */
17939 case REG_DEP_ANTI:
17940 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17941 cycles later. */
17942 return 0;
17944 default:
17945 gcc_unreachable ();
17948 return cost;
17951 /* Debug version of rs6000_adjust_cost. */
17953 static int
17954 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17955 int cost, unsigned int dw)
17957 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17959 if (ret != cost)
17961 const char *dep;
17963 switch (dep_type)
17965 default: dep = "unknown depencency"; break;
17966 case REG_DEP_TRUE: dep = "data dependency"; break;
17967 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17968 case REG_DEP_ANTI: dep = "anti depencency"; break;
17971 fprintf (stderr,
17972 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17973 "%s, insn:\n", ret, cost, dep);
17975 debug_rtx (insn);
17978 return ret;
17981 /* The function returns a true if INSN is microcoded.
17982 Return false otherwise. */
17984 static bool
17985 is_microcoded_insn (rtx_insn *insn)
17987 if (!insn || !NONDEBUG_INSN_P (insn)
17988 || GET_CODE (PATTERN (insn)) == USE
17989 || GET_CODE (PATTERN (insn)) == CLOBBER)
17990 return false;
17992 if (rs6000_tune == PROCESSOR_CELL)
17993 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17995 if (rs6000_sched_groups
17996 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17998 enum attr_type type = get_attr_type (insn);
17999 if ((type == TYPE_LOAD
18000 && get_attr_update (insn) == UPDATE_YES
18001 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18002 || ((type == TYPE_LOAD || type == TYPE_STORE)
18003 && get_attr_update (insn) == UPDATE_YES
18004 && get_attr_indexed (insn) == INDEXED_YES)
18005 || type == TYPE_MFCR)
18006 return true;
18009 return false;
18012 /* The function returns true if INSN is cracked into 2 instructions
18013 by the processor (and therefore occupies 2 issue slots). */
18015 static bool
18016 is_cracked_insn (rtx_insn *insn)
18018 if (!insn || !NONDEBUG_INSN_P (insn)
18019 || GET_CODE (PATTERN (insn)) == USE
18020 || GET_CODE (PATTERN (insn)) == CLOBBER)
18021 return false;
18023 if (rs6000_sched_groups
18024 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18026 enum attr_type type = get_attr_type (insn);
18027 if ((type == TYPE_LOAD
18028 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18029 && get_attr_update (insn) == UPDATE_NO)
18030 || (type == TYPE_LOAD
18031 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18032 && get_attr_update (insn) == UPDATE_YES
18033 && get_attr_indexed (insn) == INDEXED_NO)
18034 || (type == TYPE_STORE
18035 && get_attr_update (insn) == UPDATE_YES
18036 && get_attr_indexed (insn) == INDEXED_NO)
18037 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18038 && get_attr_update (insn) == UPDATE_YES)
18039 || (type == TYPE_CR_LOGICAL
18040 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18041 || (type == TYPE_EXTS
18042 && get_attr_dot (insn) == DOT_YES)
18043 || (type == TYPE_SHIFT
18044 && get_attr_dot (insn) == DOT_YES
18045 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18046 || (type == TYPE_MUL
18047 && get_attr_dot (insn) == DOT_YES)
18048 || type == TYPE_DIV
18049 || (type == TYPE_INSERT
18050 && get_attr_size (insn) == SIZE_32))
18051 return true;
18054 return false;
18057 /* The function returns true if INSN can be issued only from
18058 the branch slot. */
18060 static bool
18061 is_branch_slot_insn (rtx_insn *insn)
18063 if (!insn || !NONDEBUG_INSN_P (insn)
18064 || GET_CODE (PATTERN (insn)) == USE
18065 || GET_CODE (PATTERN (insn)) == CLOBBER)
18066 return false;
18068 if (rs6000_sched_groups)
18070 enum attr_type type = get_attr_type (insn);
18071 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18072 return true;
18073 return false;
18076 return false;
18079 /* The function returns true if out_inst sets a value that is
18080 used in the address generation computation of in_insn */
18081 static bool
18082 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18084 rtx out_set, in_set;
18086 /* For performance reasons, only handle the simple case where
18087 both loads are a single_set. */
18088 out_set = single_set (out_insn);
18089 if (out_set)
18091 in_set = single_set (in_insn);
18092 if (in_set)
18093 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18096 return false;
18099 /* Try to determine base/offset/size parts of the given MEM.
18100 Return true if successful, false if all the values couldn't
18101 be determined.
18103 This function only looks for REG or REG+CONST address forms.
18104 REG+REG address form will return false. */
18106 static bool
18107 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18108 HOST_WIDE_INT *size)
18110 rtx addr_rtx;
18111 if MEM_SIZE_KNOWN_P (mem)
18112 *size = MEM_SIZE (mem);
18113 else
18114 return false;
18116 addr_rtx = (XEXP (mem, 0));
18117 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18118 addr_rtx = XEXP (addr_rtx, 1);
18120 *offset = 0;
18121 while (GET_CODE (addr_rtx) == PLUS
18122 && CONST_INT_P (XEXP (addr_rtx, 1)))
18124 *offset += INTVAL (XEXP (addr_rtx, 1));
18125 addr_rtx = XEXP (addr_rtx, 0);
18127 if (!REG_P (addr_rtx))
18128 return false;
18130 *base = addr_rtx;
18131 return true;
18134 /* If the target storage locations of arguments MEM1 and MEM2 are
18135 adjacent, then return the argument that has the lower address.
18136 Otherwise, return NULL_RTX. */
18138 static rtx
18139 adjacent_mem_locations (rtx mem1, rtx mem2)
18141 rtx reg1, reg2;
18142 HOST_WIDE_INT off1, size1, off2, size2;
18144 if (MEM_P (mem1)
18145 && MEM_P (mem2)
18146 && get_memref_parts (mem1, &reg1, &off1, &size1)
18147 && get_memref_parts (mem2, &reg2, &off2, &size2)
18148 && REGNO (reg1) == REGNO (reg2))
18150 if (off1 + size1 == off2)
18151 return mem1;
18152 else if (off2 + size2 == off1)
18153 return mem2;
18156 return NULL_RTX;
18159 /* This function returns true if it can be determined that the two MEM
18160 locations overlap by at least 1 byte based on base reg/offset/size. */
18162 static bool
18163 mem_locations_overlap (rtx mem1, rtx mem2)
18165 rtx reg1, reg2;
18166 HOST_WIDE_INT off1, size1, off2, size2;
18168 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18169 && get_memref_parts (mem2, &reg2, &off2, &size2))
18170 return ((REGNO (reg1) == REGNO (reg2))
18171 && (((off1 <= off2) && (off1 + size1 > off2))
18172 || ((off2 <= off1) && (off2 + size2 > off1))));
18174 return false;
18177 /* A C statement (sans semicolon) to update the integer scheduling
18178 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18179 INSN earlier, reduce the priority to execute INSN later. Do not
18180 define this macro if you do not need to adjust the scheduling
18181 priorities of insns. */
18183 static int
18184 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18186 rtx load_mem, str_mem;
18187 /* On machines (like the 750) which have asymmetric integer units,
18188 where one integer unit can do multiply and divides and the other
18189 can't, reduce the priority of multiply/divide so it is scheduled
18190 before other integer operations. */
18192 #if 0
18193 if (! INSN_P (insn))
18194 return priority;
18196 if (GET_CODE (PATTERN (insn)) == USE)
18197 return priority;
18199 switch (rs6000_tune) {
18200 case PROCESSOR_PPC750:
18201 switch (get_attr_type (insn))
18203 default:
18204 break;
18206 case TYPE_MUL:
18207 case TYPE_DIV:
18208 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18209 priority, priority);
18210 if (priority >= 0 && priority < 0x01000000)
18211 priority >>= 3;
18212 break;
18215 #endif
18217 if (insn_must_be_first_in_group (insn)
18218 && reload_completed
18219 && current_sched_info->sched_max_insns_priority
18220 && rs6000_sched_restricted_insns_priority)
18223 /* Prioritize insns that can be dispatched only in the first
18224 dispatch slot. */
18225 if (rs6000_sched_restricted_insns_priority == 1)
18226 /* Attach highest priority to insn. This means that in
18227 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18228 precede 'priority' (critical path) considerations. */
18229 return current_sched_info->sched_max_insns_priority;
18230 else if (rs6000_sched_restricted_insns_priority == 2)
18231 /* Increase priority of insn by a minimal amount. This means that in
18232 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18233 considerations precede dispatch-slot restriction considerations. */
18234 return (priority + 1);
18237 if (rs6000_tune == PROCESSOR_POWER6
18238 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18239 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18240 /* Attach highest priority to insn if the scheduler has just issued two
18241 stores and this instruction is a load, or two loads and this instruction
18242 is a store. Power6 wants loads and stores scheduled alternately
18243 when possible */
18244 return current_sched_info->sched_max_insns_priority;
18246 return priority;
18249 /* Return true if the instruction is nonpipelined on the Cell. */
18250 static bool
18251 is_nonpipeline_insn (rtx_insn *insn)
18253 enum attr_type type;
18254 if (!insn || !NONDEBUG_INSN_P (insn)
18255 || GET_CODE (PATTERN (insn)) == USE
18256 || GET_CODE (PATTERN (insn)) == CLOBBER)
18257 return false;
18259 type = get_attr_type (insn);
18260 if (type == TYPE_MUL
18261 || type == TYPE_DIV
18262 || type == TYPE_SDIV
18263 || type == TYPE_DDIV
18264 || type == TYPE_SSQRT
18265 || type == TYPE_DSQRT
18266 || type == TYPE_MFCR
18267 || type == TYPE_MFCRF
18268 || type == TYPE_MFJMPR)
18270 return true;
18272 return false;
18276 /* Return how many instructions the machine can issue per cycle. */
18278 static int
18279 rs6000_issue_rate (void)
18281 /* Unless scheduling for register pressure, use issue rate of 1 for
18282 first scheduling pass to decrease degradation. */
18283 if (!reload_completed && !flag_sched_pressure)
18284 return 1;
18286 switch (rs6000_tune) {
18287 case PROCESSOR_RS64A:
18288 case PROCESSOR_PPC601: /* ? */
18289 case PROCESSOR_PPC7450:
18290 return 3;
18291 case PROCESSOR_PPC440:
18292 case PROCESSOR_PPC603:
18293 case PROCESSOR_PPC750:
18294 case PROCESSOR_PPC7400:
18295 case PROCESSOR_PPC8540:
18296 case PROCESSOR_PPC8548:
18297 case PROCESSOR_CELL:
18298 case PROCESSOR_PPCE300C2:
18299 case PROCESSOR_PPCE300C3:
18300 case PROCESSOR_PPCE500MC:
18301 case PROCESSOR_PPCE500MC64:
18302 case PROCESSOR_PPCE5500:
18303 case PROCESSOR_PPCE6500:
18304 case PROCESSOR_TITAN:
18305 return 2;
18306 case PROCESSOR_PPC476:
18307 case PROCESSOR_PPC604:
18308 case PROCESSOR_PPC604e:
18309 case PROCESSOR_PPC620:
18310 case PROCESSOR_PPC630:
18311 return 4;
18312 case PROCESSOR_POWER4:
18313 case PROCESSOR_POWER5:
18314 case PROCESSOR_POWER6:
18315 case PROCESSOR_POWER7:
18316 return 5;
18317 case PROCESSOR_POWER8:
18318 return 7;
18319 case PROCESSOR_POWER9:
18320 return 6;
18321 case PROCESSOR_POWER10:
18322 return 8;
18323 default:
18324 return 1;
18328 /* Return how many instructions to look ahead for better insn
18329 scheduling. */
18331 static int
18332 rs6000_use_sched_lookahead (void)
18334 switch (rs6000_tune)
18336 case PROCESSOR_PPC8540:
18337 case PROCESSOR_PPC8548:
18338 return 4;
18340 case PROCESSOR_CELL:
18341 return (reload_completed ? 8 : 0);
18343 default:
18344 return 0;
18348 /* We are choosing insn from the ready queue. Return zero if INSN can be
18349 chosen. */
18350 static int
18351 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18353 if (ready_index == 0)
18354 return 0;
18356 if (rs6000_tune != PROCESSOR_CELL)
18357 return 0;
18359 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18361 if (!reload_completed
18362 || is_nonpipeline_insn (insn)
18363 || is_microcoded_insn (insn))
18364 return 1;
18366 return 0;
18369 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18370 and return true. */
18372 static bool
18373 find_mem_ref (rtx pat, rtx *mem_ref)
18375 const char * fmt;
18376 int i, j;
18378 /* stack_tie does not produce any real memory traffic. */
18379 if (tie_operand (pat, VOIDmode))
18380 return false;
18382 if (MEM_P (pat))
18384 *mem_ref = pat;
18385 return true;
18388 /* Recursively process the pattern. */
18389 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18391 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18393 if (fmt[i] == 'e')
18395 if (find_mem_ref (XEXP (pat, i), mem_ref))
18396 return true;
18398 else if (fmt[i] == 'E')
18399 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18401 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18402 return true;
18406 return false;
18409 /* Determine if PAT is a PATTERN of a load insn. */
18411 static bool
18412 is_load_insn1 (rtx pat, rtx *load_mem)
18414 if (!pat || pat == NULL_RTX)
18415 return false;
18417 if (GET_CODE (pat) == SET)
18419 if (REG_P (SET_DEST (pat)))
18420 return find_mem_ref (SET_SRC (pat), load_mem);
18421 else
18422 return false;
18425 if (GET_CODE (pat) == PARALLEL)
18427 int i;
18429 for (i = 0; i < XVECLEN (pat, 0); i++)
18430 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18431 return true;
18434 return false;
18437 /* Determine if INSN loads from memory. */
18439 static bool
18440 is_load_insn (rtx insn, rtx *load_mem)
18442 if (!insn || !INSN_P (insn))
18443 return false;
18445 if (CALL_P (insn))
18446 return false;
18448 return is_load_insn1 (PATTERN (insn), load_mem);
18451 /* Determine if PAT is a PATTERN of a store insn. */
18453 static bool
18454 is_store_insn1 (rtx pat, rtx *str_mem)
18456 if (!pat || pat == NULL_RTX)
18457 return false;
18459 if (GET_CODE (pat) == SET)
18461 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18462 return find_mem_ref (SET_DEST (pat), str_mem);
18463 else
18464 return false;
18467 if (GET_CODE (pat) == PARALLEL)
18469 int i;
18471 for (i = 0; i < XVECLEN (pat, 0); i++)
18472 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18473 return true;
18476 return false;
18479 /* Determine if INSN stores to memory. */
18481 static bool
18482 is_store_insn (rtx insn, rtx *str_mem)
18484 if (!insn || !INSN_P (insn))
18485 return false;
18487 return is_store_insn1 (PATTERN (insn), str_mem);
18490 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18492 static bool
18493 is_power9_pairable_vec_type (enum attr_type type)
18495 switch (type)
18497 case TYPE_VECSIMPLE:
18498 case TYPE_VECCOMPLEX:
18499 case TYPE_VECDIV:
18500 case TYPE_VECCMP:
18501 case TYPE_VECPERM:
18502 case TYPE_VECFLOAT:
18503 case TYPE_VECFDIV:
18504 case TYPE_VECDOUBLE:
18505 return true;
18506 default:
18507 break;
18509 return false;
18512 /* Returns whether the dependence between INSN and NEXT is considered
18513 costly by the given target. */
18515 static bool
18516 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18518 rtx insn;
18519 rtx next;
18520 rtx load_mem, str_mem;
18522 /* If the flag is not enabled - no dependence is considered costly;
18523 allow all dependent insns in the same group.
18524 This is the most aggressive option. */
18525 if (rs6000_sched_costly_dep == no_dep_costly)
18526 return false;
18528 /* If the flag is set to 1 - a dependence is always considered costly;
18529 do not allow dependent instructions in the same group.
18530 This is the most conservative option. */
18531 if (rs6000_sched_costly_dep == all_deps_costly)
18532 return true;
18534 insn = DEP_PRO (dep);
18535 next = DEP_CON (dep);
18537 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18538 && is_load_insn (next, &load_mem)
18539 && is_store_insn (insn, &str_mem))
18540 /* Prevent load after store in the same group. */
18541 return true;
18543 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18544 && is_load_insn (next, &load_mem)
18545 && is_store_insn (insn, &str_mem)
18546 && DEP_TYPE (dep) == REG_DEP_TRUE
18547 && mem_locations_overlap(str_mem, load_mem))
18548 /* Prevent load after store in the same group if it is a true
18549 dependence. */
18550 return true;
18552 /* The flag is set to X; dependences with latency >= X are considered costly,
18553 and will not be scheduled in the same group. */
18554 if (rs6000_sched_costly_dep <= max_dep_latency
18555 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18556 return true;
18558 return false;
18561 /* Return the next insn after INSN that is found before TAIL is reached,
18562 skipping any "non-active" insns - insns that will not actually occupy
18563 an issue slot. Return NULL_RTX if such an insn is not found. */
18565 static rtx_insn *
18566 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18568 if (insn == NULL_RTX || insn == tail)
18569 return NULL;
18571 while (1)
18573 insn = NEXT_INSN (insn);
18574 if (insn == NULL_RTX || insn == tail)
18575 return NULL;
18577 if (CALL_P (insn)
18578 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18579 || (NONJUMP_INSN_P (insn)
18580 && GET_CODE (PATTERN (insn)) != USE
18581 && GET_CODE (PATTERN (insn)) != CLOBBER
18582 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18583 break;
18585 return insn;
18588 /* Move instruction at POS to the end of the READY list. */
18590 static void
18591 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18593 rtx_insn *tmp;
18594 int i;
18596 tmp = ready[pos];
18597 for (i = pos; i < lastpos; i++)
18598 ready[i] = ready[i + 1];
18599 ready[lastpos] = tmp;
18602 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18604 static int
18605 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18607 /* For Power6, we need to handle some special cases to try and keep the
18608 store queue from overflowing and triggering expensive flushes.
18610 This code monitors how load and store instructions are being issued
18611 and skews the ready list one way or the other to increase the likelihood
18612 that a desired instruction is issued at the proper time.
18614 A couple of things are done. First, we maintain a "load_store_pendulum"
18615 to track the current state of load/store issue.
18617 - If the pendulum is at zero, then no loads or stores have been
18618 issued in the current cycle so we do nothing.
18620 - If the pendulum is 1, then a single load has been issued in this
18621 cycle and we attempt to locate another load in the ready list to
18622 issue with it.
18624 - If the pendulum is -2, then two stores have already been
18625 issued in this cycle, so we increase the priority of the first load
18626 in the ready list to increase it's likelihood of being chosen first
18627 in the next cycle.
18629 - If the pendulum is -1, then a single store has been issued in this
18630 cycle and we attempt to locate another store in the ready list to
18631 issue with it, preferring a store to an adjacent memory location to
18632 facilitate store pairing in the store queue.
18634 - If the pendulum is 2, then two loads have already been
18635 issued in this cycle, so we increase the priority of the first store
18636 in the ready list to increase it's likelihood of being chosen first
18637 in the next cycle.
18639 - If the pendulum < -2 or > 2, then do nothing.
18641 Note: This code covers the most common scenarios. There exist non
18642 load/store instructions which make use of the LSU and which
18643 would need to be accounted for to strictly model the behavior
18644 of the machine. Those instructions are currently unaccounted
18645 for to help minimize compile time overhead of this code.
18647 int pos;
18648 rtx load_mem, str_mem;
18650 if (is_store_insn (last_scheduled_insn, &str_mem))
18651 /* Issuing a store, swing the load_store_pendulum to the left */
18652 load_store_pendulum--;
18653 else if (is_load_insn (last_scheduled_insn, &load_mem))
18654 /* Issuing a load, swing the load_store_pendulum to the right */
18655 load_store_pendulum++;
18656 else
18657 return cached_can_issue_more;
18659 /* If the pendulum is balanced, or there is only one instruction on
18660 the ready list, then all is well, so return. */
18661 if ((load_store_pendulum == 0) || (lastpos <= 0))
18662 return cached_can_issue_more;
18664 if (load_store_pendulum == 1)
18666 /* A load has been issued in this cycle. Scan the ready list
18667 for another load to issue with it */
18668 pos = lastpos;
18670 while (pos >= 0)
18672 if (is_load_insn (ready[pos], &load_mem))
18674 /* Found a load. Move it to the head of the ready list,
18675 and adjust it's priority so that it is more likely to
18676 stay there */
18677 move_to_end_of_ready (ready, pos, lastpos);
18679 if (!sel_sched_p ()
18680 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18681 INSN_PRIORITY (ready[lastpos])++;
18682 break;
18684 pos--;
18687 else if (load_store_pendulum == -2)
18689 /* Two stores have been issued in this cycle. Increase the
18690 priority of the first load in the ready list to favor it for
18691 issuing in the next cycle. */
18692 pos = lastpos;
18694 while (pos >= 0)
18696 if (is_load_insn (ready[pos], &load_mem)
18697 && !sel_sched_p ()
18698 && INSN_PRIORITY_KNOWN (ready[pos]))
18700 INSN_PRIORITY (ready[pos])++;
18702 /* Adjust the pendulum to account for the fact that a load
18703 was found and increased in priority. This is to prevent
18704 increasing the priority of multiple loads */
18705 load_store_pendulum--;
18707 break;
18709 pos--;
18712 else if (load_store_pendulum == -1)
18714 /* A store has been issued in this cycle. Scan the ready list for
18715 another store to issue with it, preferring a store to an adjacent
18716 memory location */
18717 int first_store_pos = -1;
18719 pos = lastpos;
18721 while (pos >= 0)
18723 if (is_store_insn (ready[pos], &str_mem))
18725 rtx str_mem2;
18726 /* Maintain the index of the first store found on the
18727 list */
18728 if (first_store_pos == -1)
18729 first_store_pos = pos;
18731 if (is_store_insn (last_scheduled_insn, &str_mem2)
18732 && adjacent_mem_locations (str_mem, str_mem2))
18734 /* Found an adjacent store. Move it to the head of the
18735 ready list, and adjust it's priority so that it is
18736 more likely to stay there */
18737 move_to_end_of_ready (ready, pos, lastpos);
18739 if (!sel_sched_p ()
18740 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18741 INSN_PRIORITY (ready[lastpos])++;
18743 first_store_pos = -1;
18745 break;
18748 pos--;
18751 if (first_store_pos >= 0)
18753 /* An adjacent store wasn't found, but a non-adjacent store was,
18754 so move the non-adjacent store to the front of the ready
18755 list, and adjust its priority so that it is more likely to
18756 stay there. */
18757 move_to_end_of_ready (ready, first_store_pos, lastpos);
18758 if (!sel_sched_p ()
18759 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18760 INSN_PRIORITY (ready[lastpos])++;
18763 else if (load_store_pendulum == 2)
18765 /* Two loads have been issued in this cycle. Increase the priority
18766 of the first store in the ready list to favor it for issuing in
18767 the next cycle. */
18768 pos = lastpos;
18770 while (pos >= 0)
18772 if (is_store_insn (ready[pos], &str_mem)
18773 && !sel_sched_p ()
18774 && INSN_PRIORITY_KNOWN (ready[pos]))
18776 INSN_PRIORITY (ready[pos])++;
18778 /* Adjust the pendulum to account for the fact that a store
18779 was found and increased in priority. This is to prevent
18780 increasing the priority of multiple stores */
18781 load_store_pendulum++;
18783 break;
18785 pos--;
18789 return cached_can_issue_more;
18792 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18794 static int
18795 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18797 int pos;
18798 enum attr_type type, type2;
18800 type = get_attr_type (last_scheduled_insn);
18802 /* Try to issue fixed point divides back-to-back in pairs so they will be
18803 routed to separate execution units and execute in parallel. */
18804 if (type == TYPE_DIV && divide_cnt == 0)
18806 /* First divide has been scheduled. */
18807 divide_cnt = 1;
18809 /* Scan the ready list looking for another divide, if found move it
18810 to the end of the list so it is chosen next. */
18811 pos = lastpos;
18812 while (pos >= 0)
18814 if (recog_memoized (ready[pos]) >= 0
18815 && get_attr_type (ready[pos]) == TYPE_DIV)
18817 move_to_end_of_ready (ready, pos, lastpos);
18818 break;
18820 pos--;
18823 else
18825 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18826 divide_cnt = 0;
18828 /* The best dispatch throughput for vector and vector load insns can be
18829 achieved by interleaving a vector and vector load such that they'll
18830 dispatch to the same superslice. If this pairing cannot be achieved
18831 then it is best to pair vector insns together and vector load insns
18832 together.
18834 To aid in this pairing, vec_pairing maintains the current state with
18835 the following values:
18837 0 : Initial state, no vecload/vector pairing has been started.
18839 1 : A vecload or vector insn has been issued and a candidate for
18840 pairing has been found and moved to the end of the ready
18841 list. */
18842 if (type == TYPE_VECLOAD)
18844 /* Issued a vecload. */
18845 if (vec_pairing == 0)
18847 int vecload_pos = -1;
18848 /* We issued a single vecload, look for a vector insn to pair it
18849 with. If one isn't found, try to pair another vecload. */
18850 pos = lastpos;
18851 while (pos >= 0)
18853 if (recog_memoized (ready[pos]) >= 0)
18855 type2 = get_attr_type (ready[pos]);
18856 if (is_power9_pairable_vec_type (type2))
18858 /* Found a vector insn to pair with, move it to the
18859 end of the ready list so it is scheduled next. */
18860 move_to_end_of_ready (ready, pos, lastpos);
18861 vec_pairing = 1;
18862 return cached_can_issue_more;
18864 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18865 /* Remember position of first vecload seen. */
18866 vecload_pos = pos;
18868 pos--;
18870 if (vecload_pos >= 0)
18872 /* Didn't find a vector to pair with but did find a vecload,
18873 move it to the end of the ready list. */
18874 move_to_end_of_ready (ready, vecload_pos, lastpos);
18875 vec_pairing = 1;
18876 return cached_can_issue_more;
18880 else if (is_power9_pairable_vec_type (type))
18882 /* Issued a vector operation. */
18883 if (vec_pairing == 0)
18885 int vec_pos = -1;
18886 /* We issued a single vector insn, look for a vecload to pair it
18887 with. If one isn't found, try to pair another vector. */
18888 pos = lastpos;
18889 while (pos >= 0)
18891 if (recog_memoized (ready[pos]) >= 0)
18893 type2 = get_attr_type (ready[pos]);
18894 if (type2 == TYPE_VECLOAD)
18896 /* Found a vecload insn to pair with, move it to the
18897 end of the ready list so it is scheduled next. */
18898 move_to_end_of_ready (ready, pos, lastpos);
18899 vec_pairing = 1;
18900 return cached_can_issue_more;
18902 else if (is_power9_pairable_vec_type (type2)
18903 && vec_pos == -1)
18904 /* Remember position of first vector insn seen. */
18905 vec_pos = pos;
18907 pos--;
18909 if (vec_pos >= 0)
18911 /* Didn't find a vecload to pair with but did find a vector
18912 insn, move it to the end of the ready list. */
18913 move_to_end_of_ready (ready, vec_pos, lastpos);
18914 vec_pairing = 1;
18915 return cached_can_issue_more;
18920 /* We've either finished a vec/vecload pair, couldn't find an insn to
18921 continue the current pair, or the last insn had nothing to do with
18922 with pairing. In any case, reset the state. */
18923 vec_pairing = 0;
18926 return cached_can_issue_more;
18929 /* Determine if INSN is a store to memory that can be fused with a similar
18930 adjacent store. */
18932 static bool
18933 is_fusable_store (rtx_insn *insn, rtx *str_mem)
18935 /* Insn must be a non-prefixed base+disp form store. */
18936 if (is_store_insn (insn, str_mem)
18937 && get_attr_prefixed (insn) == PREFIXED_NO
18938 && get_attr_update (insn) == UPDATE_NO
18939 && get_attr_indexed (insn) == INDEXED_NO)
18941 /* Further restrictions by mode and size. */
18942 if (!MEM_SIZE_KNOWN_P (*str_mem))
18943 return false;
18945 machine_mode mode = GET_MODE (*str_mem);
18946 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
18948 if (INTEGRAL_MODE_P (mode))
18949 /* Must be word or dword size. */
18950 return (size == 4 || size == 8);
18951 else if (FLOAT_MODE_P (mode))
18952 /* Must be dword size. */
18953 return (size == 8);
18956 return false;
18959 /* Do Power10 specific reordering of the ready list. */
18961 static int
18962 power10_sched_reorder (rtx_insn **ready, int lastpos)
18964 rtx mem1;
18966 /* Do store fusion during sched2 only. */
18967 if (!reload_completed)
18968 return cached_can_issue_more;
18970 /* If the prior insn finished off a store fusion pair then simply
18971 reset the counter and return, nothing more to do. */
18972 if (load_store_pendulum != 0)
18974 load_store_pendulum = 0;
18975 return cached_can_issue_more;
18978 /* Try to pair certain store insns to adjacent memory locations
18979 so that the hardware will fuse them to a single operation. */
18980 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
18983 /* A fusable store was just scheduled. Scan the ready list for another
18984 store that it can fuse with. */
18985 int pos = lastpos;
18986 while (pos >= 0)
18988 rtx mem2;
18989 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
18990 must be ascending only. */
18991 if (is_fusable_store (ready[pos], &mem2)
18992 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
18993 && adjacent_mem_locations (mem1, mem2))
18994 || (FLOAT_MODE_P (GET_MODE (mem1))
18995 && (adjacent_mem_locations (mem1, mem2) == mem1))))
18997 /* Found a fusable store. Move it to the end of the ready list
18998 so it is scheduled next. */
18999 move_to_end_of_ready (ready, pos, lastpos);
19001 load_store_pendulum = -1;
19002 break;
19004 pos--;
19008 return cached_can_issue_more;
19011 /* We are about to begin issuing insns for this clock cycle. */
19013 static int
19014 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19015 rtx_insn **ready ATTRIBUTE_UNUSED,
19016 int *pn_ready ATTRIBUTE_UNUSED,
19017 int clock_var ATTRIBUTE_UNUSED)
19019 int n_ready = *pn_ready;
19021 if (sched_verbose)
19022 fprintf (dump, "// rs6000_sched_reorder :\n");
19024 /* Reorder the ready list, if the second to last ready insn
19025 is a nonepipeline insn. */
19026 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19028 if (is_nonpipeline_insn (ready[n_ready - 1])
19029 && (recog_memoized (ready[n_ready - 2]) > 0))
19030 /* Simply swap first two insns. */
19031 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19034 if (rs6000_tune == PROCESSOR_POWER6)
19035 load_store_pendulum = 0;
19037 /* Do Power10 dependent reordering. */
19038 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19039 power10_sched_reorder (ready, n_ready - 1);
19041 return rs6000_issue_rate ();
19044 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19046 static int
19047 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19048 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19050 if (sched_verbose)
19051 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19053 /* Do Power6 dependent reordering if necessary. */
19054 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19055 return power6_sched_reorder2 (ready, *pn_ready - 1);
19057 /* Do Power9 dependent reordering if necessary. */
19058 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19059 && recog_memoized (last_scheduled_insn) >= 0)
19060 return power9_sched_reorder2 (ready, *pn_ready - 1);
19062 /* Do Power10 dependent reordering. */
19063 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19064 return power10_sched_reorder (ready, *pn_ready - 1);
19066 return cached_can_issue_more;
19069 /* Return whether the presence of INSN causes a dispatch group termination
19070 of group WHICH_GROUP.
19072 If WHICH_GROUP == current_group, this function will return true if INSN
19073 causes the termination of the current group (i.e, the dispatch group to
19074 which INSN belongs). This means that INSN will be the last insn in the
19075 group it belongs to.
19077 If WHICH_GROUP == previous_group, this function will return true if INSN
19078 causes the termination of the previous group (i.e, the dispatch group that
19079 precedes the group to which INSN belongs). This means that INSN will be
19080 the first insn in the group it belongs to). */
19082 static bool
19083 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19085 bool first, last;
19087 if (! insn)
19088 return false;
19090 first = insn_must_be_first_in_group (insn);
19091 last = insn_must_be_last_in_group (insn);
19093 if (first && last)
19094 return true;
19096 if (which_group == current_group)
19097 return last;
19098 else if (which_group == previous_group)
19099 return first;
19101 return false;
19105 static bool
19106 insn_must_be_first_in_group (rtx_insn *insn)
19108 enum attr_type type;
19110 if (!insn
19111 || NOTE_P (insn)
19112 || DEBUG_INSN_P (insn)
19113 || GET_CODE (PATTERN (insn)) == USE
19114 || GET_CODE (PATTERN (insn)) == CLOBBER)
19115 return false;
19117 switch (rs6000_tune)
19119 case PROCESSOR_POWER5:
19120 if (is_cracked_insn (insn))
19121 return true;
19122 /* FALLTHRU */
19123 case PROCESSOR_POWER4:
19124 if (is_microcoded_insn (insn))
19125 return true;
19127 if (!rs6000_sched_groups)
19128 return false;
19130 type = get_attr_type (insn);
19132 switch (type)
19134 case TYPE_MFCR:
19135 case TYPE_MFCRF:
19136 case TYPE_MTCR:
19137 case TYPE_CR_LOGICAL:
19138 case TYPE_MTJMPR:
19139 case TYPE_MFJMPR:
19140 case TYPE_DIV:
19141 case TYPE_LOAD_L:
19142 case TYPE_STORE_C:
19143 case TYPE_ISYNC:
19144 case TYPE_SYNC:
19145 return true;
19146 default:
19147 break;
19149 break;
19150 case PROCESSOR_POWER6:
19151 type = get_attr_type (insn);
19153 switch (type)
19155 case TYPE_EXTS:
19156 case TYPE_CNTLZ:
19157 case TYPE_TRAP:
19158 case TYPE_MUL:
19159 case TYPE_INSERT:
19160 case TYPE_FPCOMPARE:
19161 case TYPE_MFCR:
19162 case TYPE_MTCR:
19163 case TYPE_MFJMPR:
19164 case TYPE_MTJMPR:
19165 case TYPE_ISYNC:
19166 case TYPE_SYNC:
19167 case TYPE_LOAD_L:
19168 case TYPE_STORE_C:
19169 return true;
19170 case TYPE_SHIFT:
19171 if (get_attr_dot (insn) == DOT_NO
19172 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19173 return true;
19174 else
19175 break;
19176 case TYPE_DIV:
19177 if (get_attr_size (insn) == SIZE_32)
19178 return true;
19179 else
19180 break;
19181 case TYPE_LOAD:
19182 case TYPE_STORE:
19183 case TYPE_FPLOAD:
19184 case TYPE_FPSTORE:
19185 if (get_attr_update (insn) == UPDATE_YES)
19186 return true;
19187 else
19188 break;
19189 default:
19190 break;
19192 break;
19193 case PROCESSOR_POWER7:
19194 type = get_attr_type (insn);
19196 switch (type)
19198 case TYPE_CR_LOGICAL:
19199 case TYPE_MFCR:
19200 case TYPE_MFCRF:
19201 case TYPE_MTCR:
19202 case TYPE_DIV:
19203 case TYPE_ISYNC:
19204 case TYPE_LOAD_L:
19205 case TYPE_STORE_C:
19206 case TYPE_MFJMPR:
19207 case TYPE_MTJMPR:
19208 return true;
19209 case TYPE_MUL:
19210 case TYPE_SHIFT:
19211 case TYPE_EXTS:
19212 if (get_attr_dot (insn) == DOT_YES)
19213 return true;
19214 else
19215 break;
19216 case TYPE_LOAD:
19217 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19218 || get_attr_update (insn) == UPDATE_YES)
19219 return true;
19220 else
19221 break;
19222 case TYPE_STORE:
19223 case TYPE_FPLOAD:
19224 case TYPE_FPSTORE:
19225 if (get_attr_update (insn) == UPDATE_YES)
19226 return true;
19227 else
19228 break;
19229 default:
19230 break;
19232 break;
19233 case PROCESSOR_POWER8:
19234 type = get_attr_type (insn);
19236 switch (type)
19238 case TYPE_CR_LOGICAL:
19239 case TYPE_MFCR:
19240 case TYPE_MFCRF:
19241 case TYPE_MTCR:
19242 case TYPE_SYNC:
19243 case TYPE_ISYNC:
19244 case TYPE_LOAD_L:
19245 case TYPE_STORE_C:
19246 case TYPE_VECSTORE:
19247 case TYPE_MFJMPR:
19248 case TYPE_MTJMPR:
19249 return true;
19250 case TYPE_SHIFT:
19251 case TYPE_EXTS:
19252 case TYPE_MUL:
19253 if (get_attr_dot (insn) == DOT_YES)
19254 return true;
19255 else
19256 break;
19257 case TYPE_LOAD:
19258 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19259 || get_attr_update (insn) == UPDATE_YES)
19260 return true;
19261 else
19262 break;
19263 case TYPE_STORE:
19264 if (get_attr_update (insn) == UPDATE_YES
19265 && get_attr_indexed (insn) == INDEXED_YES)
19266 return true;
19267 else
19268 break;
19269 default:
19270 break;
19272 break;
19273 default:
19274 break;
19277 return false;
19280 static bool
19281 insn_must_be_last_in_group (rtx_insn *insn)
19283 enum attr_type type;
19285 if (!insn
19286 || NOTE_P (insn)
19287 || DEBUG_INSN_P (insn)
19288 || GET_CODE (PATTERN (insn)) == USE
19289 || GET_CODE (PATTERN (insn)) == CLOBBER)
19290 return false;
19292 switch (rs6000_tune) {
19293 case PROCESSOR_POWER4:
19294 case PROCESSOR_POWER5:
19295 if (is_microcoded_insn (insn))
19296 return true;
19298 if (is_branch_slot_insn (insn))
19299 return true;
19301 break;
19302 case PROCESSOR_POWER6:
19303 type = get_attr_type (insn);
19305 switch (type)
19307 case TYPE_EXTS:
19308 case TYPE_CNTLZ:
19309 case TYPE_TRAP:
19310 case TYPE_MUL:
19311 case TYPE_FPCOMPARE:
19312 case TYPE_MFCR:
19313 case TYPE_MTCR:
19314 case TYPE_MFJMPR:
19315 case TYPE_MTJMPR:
19316 case TYPE_ISYNC:
19317 case TYPE_SYNC:
19318 case TYPE_LOAD_L:
19319 case TYPE_STORE_C:
19320 return true;
19321 case TYPE_SHIFT:
19322 if (get_attr_dot (insn) == DOT_NO
19323 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19324 return true;
19325 else
19326 break;
19327 case TYPE_DIV:
19328 if (get_attr_size (insn) == SIZE_32)
19329 return true;
19330 else
19331 break;
19332 default:
19333 break;
19335 break;
19336 case PROCESSOR_POWER7:
19337 type = get_attr_type (insn);
19339 switch (type)
19341 case TYPE_ISYNC:
19342 case TYPE_SYNC:
19343 case TYPE_LOAD_L:
19344 case TYPE_STORE_C:
19345 return true;
19346 case TYPE_LOAD:
19347 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19348 && get_attr_update (insn) == UPDATE_YES)
19349 return true;
19350 else
19351 break;
19352 case TYPE_STORE:
19353 if (get_attr_update (insn) == UPDATE_YES
19354 && get_attr_indexed (insn) == INDEXED_YES)
19355 return true;
19356 else
19357 break;
19358 default:
19359 break;
19361 break;
19362 case PROCESSOR_POWER8:
19363 type = get_attr_type (insn);
19365 switch (type)
19367 case TYPE_MFCR:
19368 case TYPE_MTCR:
19369 case TYPE_ISYNC:
19370 case TYPE_SYNC:
19371 case TYPE_LOAD_L:
19372 case TYPE_STORE_C:
19373 return true;
19374 case TYPE_LOAD:
19375 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19376 && get_attr_update (insn) == UPDATE_YES)
19377 return true;
19378 else
19379 break;
19380 case TYPE_STORE:
19381 if (get_attr_update (insn) == UPDATE_YES
19382 && get_attr_indexed (insn) == INDEXED_YES)
19383 return true;
19384 else
19385 break;
19386 default:
19387 break;
19389 break;
19390 default:
19391 break;
19394 return false;
19397 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19398 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19400 static bool
19401 is_costly_group (rtx *group_insns, rtx next_insn)
19403 int i;
19404 int issue_rate = rs6000_issue_rate ();
19406 for (i = 0; i < issue_rate; i++)
19408 sd_iterator_def sd_it;
19409 dep_t dep;
19410 rtx insn = group_insns[i];
19412 if (!insn)
19413 continue;
19415 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19417 rtx next = DEP_CON (dep);
19419 if (next == next_insn
19420 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19421 return true;
19425 return false;
19428 /* Utility of the function redefine_groups.
19429 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19430 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19431 to keep it "far" (in a separate group) from GROUP_INSNS, following
19432 one of the following schemes, depending on the value of the flag
19433 -minsert_sched_nops = X:
19434 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19435 in order to force NEXT_INSN into a separate group.
19436 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19437 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19438 insertion (has a group just ended, how many vacant issue slots remain in the
19439 last group, and how many dispatch groups were encountered so far). */
19441 static int
19442 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19443 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19444 int *group_count)
19446 rtx nop;
19447 bool force;
19448 int issue_rate = rs6000_issue_rate ();
19449 bool end = *group_end;
19450 int i;
19452 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19453 return can_issue_more;
19455 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19456 return can_issue_more;
19458 force = is_costly_group (group_insns, next_insn);
19459 if (!force)
19460 return can_issue_more;
19462 if (sched_verbose > 6)
19463 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19464 *group_count ,can_issue_more);
19466 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19468 if (*group_end)
19469 can_issue_more = 0;
19471 /* Since only a branch can be issued in the last issue_slot, it is
19472 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19473 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19474 in this case the last nop will start a new group and the branch
19475 will be forced to the new group. */
19476 if (can_issue_more && !is_branch_slot_insn (next_insn))
19477 can_issue_more--;
19479 /* Do we have a special group ending nop? */
19480 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19481 || rs6000_tune == PROCESSOR_POWER8)
19483 nop = gen_group_ending_nop ();
19484 emit_insn_before (nop, next_insn);
19485 can_issue_more = 0;
19487 else
19488 while (can_issue_more > 0)
19490 nop = gen_nop ();
19491 emit_insn_before (nop, next_insn);
19492 can_issue_more--;
19495 *group_end = true;
19496 return 0;
19499 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19501 int n_nops = rs6000_sched_insert_nops;
19503 /* Nops can't be issued from the branch slot, so the effective
19504 issue_rate for nops is 'issue_rate - 1'. */
19505 if (can_issue_more == 0)
19506 can_issue_more = issue_rate;
19507 can_issue_more--;
19508 if (can_issue_more == 0)
19510 can_issue_more = issue_rate - 1;
19511 (*group_count)++;
19512 end = true;
19513 for (i = 0; i < issue_rate; i++)
19515 group_insns[i] = 0;
19519 while (n_nops > 0)
19521 nop = gen_nop ();
19522 emit_insn_before (nop, next_insn);
19523 if (can_issue_more == issue_rate - 1) /* new group begins */
19524 end = false;
19525 can_issue_more--;
19526 if (can_issue_more == 0)
19528 can_issue_more = issue_rate - 1;
19529 (*group_count)++;
19530 end = true;
19531 for (i = 0; i < issue_rate; i++)
19533 group_insns[i] = 0;
19536 n_nops--;
19539 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19540 can_issue_more++;
19542 /* Is next_insn going to start a new group? */
19543 *group_end
19544 = (end
19545 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19546 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19547 || (can_issue_more < issue_rate &&
19548 insn_terminates_group_p (next_insn, previous_group)));
19549 if (*group_end && end)
19550 (*group_count)--;
19552 if (sched_verbose > 6)
19553 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19554 *group_count, can_issue_more);
19555 return can_issue_more;
19558 return can_issue_more;
19561 /* This function tries to synch the dispatch groups that the compiler "sees"
19562 with the dispatch groups that the processor dispatcher is expected to
19563 form in practice. It tries to achieve this synchronization by forcing the
19564 estimated processor grouping on the compiler (as opposed to the function
19565 'pad_goups' which tries to force the scheduler's grouping on the processor).
19567 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19568 examines the (estimated) dispatch groups that will be formed by the processor
19569 dispatcher. It marks these group boundaries to reflect the estimated
19570 processor grouping, overriding the grouping that the scheduler had marked.
19571 Depending on the value of the flag '-minsert-sched-nops' this function can
19572 force certain insns into separate groups or force a certain distance between
19573 them by inserting nops, for example, if there exists a "costly dependence"
19574 between the insns.
19576 The function estimates the group boundaries that the processor will form as
19577 follows: It keeps track of how many vacant issue slots are available after
19578 each insn. A subsequent insn will start a new group if one of the following
19579 4 cases applies:
19580 - no more vacant issue slots remain in the current dispatch group.
19581 - only the last issue slot, which is the branch slot, is vacant, but the next
19582 insn is not a branch.
19583 - only the last 2 or less issue slots, including the branch slot, are vacant,
19584 which means that a cracked insn (which occupies two issue slots) can't be
19585 issued in this group.
19586 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19587 start a new group. */
19589 static int
19590 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19591 rtx_insn *tail)
19593 rtx_insn *insn, *next_insn;
19594 int issue_rate;
19595 int can_issue_more;
19596 int slot, i;
19597 bool group_end;
19598 int group_count = 0;
19599 rtx *group_insns;
19601 /* Initialize. */
19602 issue_rate = rs6000_issue_rate ();
19603 group_insns = XALLOCAVEC (rtx, issue_rate);
19604 for (i = 0; i < issue_rate; i++)
19606 group_insns[i] = 0;
19608 can_issue_more = issue_rate;
19609 slot = 0;
19610 insn = get_next_active_insn (prev_head_insn, tail);
19611 group_end = false;
19613 while (insn != NULL_RTX)
19615 slot = (issue_rate - can_issue_more);
19616 group_insns[slot] = insn;
19617 can_issue_more =
19618 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19619 if (insn_terminates_group_p (insn, current_group))
19620 can_issue_more = 0;
19622 next_insn = get_next_active_insn (insn, tail);
19623 if (next_insn == NULL_RTX)
19624 return group_count + 1;
19626 /* Is next_insn going to start a new group? */
19627 group_end
19628 = (can_issue_more == 0
19629 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19630 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19631 || (can_issue_more < issue_rate &&
19632 insn_terminates_group_p (next_insn, previous_group)));
19634 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19635 next_insn, &group_end, can_issue_more,
19636 &group_count);
19638 if (group_end)
19640 group_count++;
19641 can_issue_more = 0;
19642 for (i = 0; i < issue_rate; i++)
19644 group_insns[i] = 0;
19648 if (GET_MODE (next_insn) == TImode && can_issue_more)
19649 PUT_MODE (next_insn, VOIDmode);
19650 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19651 PUT_MODE (next_insn, TImode);
19653 insn = next_insn;
19654 if (can_issue_more == 0)
19655 can_issue_more = issue_rate;
19656 } /* while */
19658 return group_count;
19661 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19662 dispatch group boundaries that the scheduler had marked. Pad with nops
19663 any dispatch groups which have vacant issue slots, in order to force the
19664 scheduler's grouping on the processor dispatcher. The function
19665 returns the number of dispatch groups found. */
19667 static int
19668 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19669 rtx_insn *tail)
19671 rtx_insn *insn, *next_insn;
19672 rtx nop;
19673 int issue_rate;
19674 int can_issue_more;
19675 int group_end;
19676 int group_count = 0;
19678 /* Initialize issue_rate. */
19679 issue_rate = rs6000_issue_rate ();
19680 can_issue_more = issue_rate;
19682 insn = get_next_active_insn (prev_head_insn, tail);
19683 next_insn = get_next_active_insn (insn, tail);
19685 while (insn != NULL_RTX)
19687 can_issue_more =
19688 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19690 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19692 if (next_insn == NULL_RTX)
19693 break;
19695 if (group_end)
19697 /* If the scheduler had marked group termination at this location
19698 (between insn and next_insn), and neither insn nor next_insn will
19699 force group termination, pad the group with nops to force group
19700 termination. */
19701 if (can_issue_more
19702 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19703 && !insn_terminates_group_p (insn, current_group)
19704 && !insn_terminates_group_p (next_insn, previous_group))
19706 if (!is_branch_slot_insn (next_insn))
19707 can_issue_more--;
19709 while (can_issue_more)
19711 nop = gen_nop ();
19712 emit_insn_before (nop, next_insn);
19713 can_issue_more--;
19717 can_issue_more = issue_rate;
19718 group_count++;
19721 insn = next_insn;
19722 next_insn = get_next_active_insn (insn, tail);
19725 return group_count;
19728 /* We're beginning a new block. Initialize data structures as necessary. */
19730 static void
19731 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19732 int sched_verbose ATTRIBUTE_UNUSED,
19733 int max_ready ATTRIBUTE_UNUSED)
19735 last_scheduled_insn = NULL;
19736 load_store_pendulum = 0;
19737 divide_cnt = 0;
19738 vec_pairing = 0;
19741 /* The following function is called at the end of scheduling BB.
19742 After reload, it inserts nops at insn group bundling. */
19744 static void
19745 rs6000_sched_finish (FILE *dump, int sched_verbose)
19747 int n_groups;
19749 if (sched_verbose)
19750 fprintf (dump, "=== Finishing schedule.\n");
19752 if (reload_completed && rs6000_sched_groups)
19754 /* Do not run sched_finish hook when selective scheduling enabled. */
19755 if (sel_sched_p ())
19756 return;
19758 if (rs6000_sched_insert_nops == sched_finish_none)
19759 return;
19761 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19762 n_groups = pad_groups (dump, sched_verbose,
19763 current_sched_info->prev_head,
19764 current_sched_info->next_tail);
19765 else
19766 n_groups = redefine_groups (dump, sched_verbose,
19767 current_sched_info->prev_head,
19768 current_sched_info->next_tail);
19770 if (sched_verbose >= 6)
19772 fprintf (dump, "ngroups = %d\n", n_groups);
19773 print_rtl (dump, current_sched_info->prev_head);
19774 fprintf (dump, "Done finish_sched\n");
19779 struct rs6000_sched_context
19781 short cached_can_issue_more;
19782 rtx_insn *last_scheduled_insn;
19783 int load_store_pendulum;
19784 int divide_cnt;
19785 int vec_pairing;
19788 typedef struct rs6000_sched_context rs6000_sched_context_def;
19789 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19791 /* Allocate store for new scheduling context. */
19792 static void *
19793 rs6000_alloc_sched_context (void)
19795 return xmalloc (sizeof (rs6000_sched_context_def));
19798 /* If CLEAN_P is true then initializes _SC with clean data,
19799 and from the global context otherwise. */
19800 static void
19801 rs6000_init_sched_context (void *_sc, bool clean_p)
19803 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19805 if (clean_p)
19807 sc->cached_can_issue_more = 0;
19808 sc->last_scheduled_insn = NULL;
19809 sc->load_store_pendulum = 0;
19810 sc->divide_cnt = 0;
19811 sc->vec_pairing = 0;
19813 else
19815 sc->cached_can_issue_more = cached_can_issue_more;
19816 sc->last_scheduled_insn = last_scheduled_insn;
19817 sc->load_store_pendulum = load_store_pendulum;
19818 sc->divide_cnt = divide_cnt;
19819 sc->vec_pairing = vec_pairing;
19823 /* Sets the global scheduling context to the one pointed to by _SC. */
19824 static void
19825 rs6000_set_sched_context (void *_sc)
19827 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19829 gcc_assert (sc != NULL);
19831 cached_can_issue_more = sc->cached_can_issue_more;
19832 last_scheduled_insn = sc->last_scheduled_insn;
19833 load_store_pendulum = sc->load_store_pendulum;
19834 divide_cnt = sc->divide_cnt;
19835 vec_pairing = sc->vec_pairing;
19838 /* Free _SC. */
19839 static void
19840 rs6000_free_sched_context (void *_sc)
19842 gcc_assert (_sc != NULL);
19844 free (_sc);
19847 static bool
19848 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19850 switch (get_attr_type (insn))
19852 case TYPE_DIV:
19853 case TYPE_SDIV:
19854 case TYPE_DDIV:
19855 case TYPE_VECDIV:
19856 case TYPE_SSQRT:
19857 case TYPE_DSQRT:
19858 return false;
19860 default:
19861 return true;
19865 /* Length in units of the trampoline for entering a nested function. */
19868 rs6000_trampoline_size (void)
19870 int ret = 0;
19872 switch (DEFAULT_ABI)
19874 default:
19875 gcc_unreachable ();
19877 case ABI_AIX:
19878 ret = (TARGET_32BIT) ? 12 : 24;
19879 break;
19881 case ABI_ELFv2:
19882 gcc_assert (!TARGET_32BIT);
19883 ret = 32;
19884 break;
19886 case ABI_DARWIN:
19887 case ABI_V4:
19888 ret = (TARGET_32BIT) ? 40 : 48;
19889 break;
19892 return ret;
19895 /* Emit RTL insns to initialize the variable parts of a trampoline.
19896 FNADDR is an RTX for the address of the function's pure code.
19897 CXT is an RTX for the static chain value for the function. */
19899 static void
19900 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19902 int regsize = (TARGET_32BIT) ? 4 : 8;
19903 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19904 rtx ctx_reg = force_reg (Pmode, cxt);
19905 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19907 switch (DEFAULT_ABI)
19909 default:
19910 gcc_unreachable ();
19912 /* Under AIX, just build the 3 word function descriptor */
19913 case ABI_AIX:
19915 rtx fnmem, fn_reg, toc_reg;
19917 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19918 error ("you cannot take the address of a nested function if you use "
19919 "the %qs option", "-mno-pointers-to-nested-functions");
19921 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19922 fn_reg = gen_reg_rtx (Pmode);
19923 toc_reg = gen_reg_rtx (Pmode);
19925 /* Macro to shorten the code expansions below. */
19926 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19928 m_tramp = replace_equiv_address (m_tramp, addr);
19930 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19931 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19932 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19933 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19934 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19936 # undef MEM_PLUS
19938 break;
19940 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19941 case ABI_ELFv2:
19942 case ABI_DARWIN:
19943 case ABI_V4:
19944 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19945 LCT_NORMAL, VOIDmode,
19946 addr, Pmode,
19947 GEN_INT (rs6000_trampoline_size ()), SImode,
19948 fnaddr, Pmode,
19949 ctx_reg, Pmode);
19950 break;
19955 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19956 identifier as an argument, so the front end shouldn't look it up. */
19958 static bool
19959 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19961 return is_attribute_p ("altivec", attr_id);
19964 /* Handle the "altivec" attribute. The attribute may have
19965 arguments as follows:
19967 __attribute__((altivec(vector__)))
19968 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19969 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19971 and may appear more than once (e.g., 'vector bool char') in a
19972 given declaration. */
19974 static tree
19975 rs6000_handle_altivec_attribute (tree *node,
19976 tree name ATTRIBUTE_UNUSED,
19977 tree args,
19978 int flags ATTRIBUTE_UNUSED,
19979 bool *no_add_attrs)
19981 tree type = *node, result = NULL_TREE;
19982 machine_mode mode;
19983 int unsigned_p;
19984 char altivec_type
19985 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19986 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19987 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19988 : '?');
19990 while (POINTER_TYPE_P (type)
19991 || TREE_CODE (type) == FUNCTION_TYPE
19992 || TREE_CODE (type) == METHOD_TYPE
19993 || TREE_CODE (type) == ARRAY_TYPE)
19994 type = TREE_TYPE (type);
19996 mode = TYPE_MODE (type);
19998 /* Check for invalid AltiVec type qualifiers. */
19999 if (type == long_double_type_node)
20000 error ("use of %<long double%> in AltiVec types is invalid");
20001 else if (type == boolean_type_node)
20002 error ("use of boolean types in AltiVec types is invalid");
20003 else if (TREE_CODE (type) == COMPLEX_TYPE)
20004 error ("use of %<complex%> in AltiVec types is invalid");
20005 else if (DECIMAL_FLOAT_MODE_P (mode))
20006 error ("use of decimal floating-point types in AltiVec types is invalid");
20007 else if (!TARGET_VSX)
20009 if (type == long_unsigned_type_node || type == long_integer_type_node)
20011 if (TARGET_64BIT)
20012 error ("use of %<long%> in AltiVec types is invalid for "
20013 "64-bit code without %qs", "-mvsx");
20014 else if (rs6000_warn_altivec_long)
20015 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20016 "use %<int%>");
20018 else if (type == long_long_unsigned_type_node
20019 || type == long_long_integer_type_node)
20020 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20021 "-mvsx");
20022 else if (type == double_type_node)
20023 error ("use of %<double%> in AltiVec types is invalid without %qs",
20024 "-mvsx");
20027 switch (altivec_type)
20029 case 'v':
20030 unsigned_p = TYPE_UNSIGNED (type);
20031 switch (mode)
20033 case E_TImode:
20034 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20035 break;
20036 case E_DImode:
20037 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20038 break;
20039 case E_SImode:
20040 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20041 break;
20042 case E_HImode:
20043 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20044 break;
20045 case E_QImode:
20046 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20047 break;
20048 case E_SFmode: result = V4SF_type_node; break;
20049 case E_DFmode: result = V2DF_type_node; break;
20050 /* If the user says 'vector int bool', we may be handed the 'bool'
20051 attribute _before_ the 'vector' attribute, and so select the
20052 proper type in the 'b' case below. */
20053 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20054 case E_V2DImode: case E_V2DFmode:
20055 result = type;
20056 default: break;
20058 break;
20059 case 'b':
20060 switch (mode)
20062 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20063 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20064 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20065 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20066 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20067 default: break;
20069 break;
20070 case 'p':
20071 switch (mode)
20073 case E_V8HImode: result = pixel_V8HI_type_node;
20074 default: break;
20076 default: break;
20079 /* Propagate qualifiers attached to the element type
20080 onto the vector type. */
20081 if (result && result != type && TYPE_QUALS (type))
20082 result = build_qualified_type (result, TYPE_QUALS (type));
20084 *no_add_attrs = true; /* No need to hang on to the attribute. */
20086 if (result)
20087 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20089 return NULL_TREE;
20092 /* AltiVec defines five built-in scalar types that serve as vector
20093 elements; we must teach the compiler how to mangle them. The 128-bit
20094 floating point mangling is target-specific as well. MMA defines
20095 two built-in types to be used as opaque vector types. */
20097 static const char *
20098 rs6000_mangle_type (const_tree type)
20100 type = TYPE_MAIN_VARIANT (type);
20102 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20103 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20104 && TREE_CODE (type) != OPAQUE_TYPE)
20105 return NULL;
20107 if (type == bool_char_type_node) return "U6__boolc";
20108 if (type == bool_short_type_node) return "U6__bools";
20109 if (type == pixel_type_node) return "u7__pixel";
20110 if (type == bool_int_type_node) return "U6__booli";
20111 if (type == bool_long_long_type_node) return "U6__boolx";
20113 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20114 return "g";
20115 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20116 return "u9__ieee128";
20118 if (type == vector_pair_type_node)
20119 return "u13__vector_pair";
20120 if (type == vector_quad_type_node)
20121 return "u13__vector_quad";
20123 /* For all other types, use the default mangling. */
20124 return NULL;
20127 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20128 struct attribute_spec.handler. */
20130 static tree
20131 rs6000_handle_longcall_attribute (tree *node, tree name,
20132 tree args ATTRIBUTE_UNUSED,
20133 int flags ATTRIBUTE_UNUSED,
20134 bool *no_add_attrs)
20136 if (TREE_CODE (*node) != FUNCTION_TYPE
20137 && TREE_CODE (*node) != FIELD_DECL
20138 && TREE_CODE (*node) != TYPE_DECL)
20140 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20141 name);
20142 *no_add_attrs = true;
20145 return NULL_TREE;
20148 /* Set longcall attributes on all functions declared when
20149 rs6000_default_long_calls is true. */
20150 static void
20151 rs6000_set_default_type_attributes (tree type)
20153 if (rs6000_default_long_calls
20154 && (TREE_CODE (type) == FUNCTION_TYPE
20155 || TREE_CODE (type) == METHOD_TYPE))
20156 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20157 NULL_TREE,
20158 TYPE_ATTRIBUTES (type));
20160 #if TARGET_MACHO
20161 darwin_set_default_type_attributes (type);
20162 #endif
20165 /* Return a reference suitable for calling a function with the
20166 longcall attribute. */
20168 static rtx
20169 rs6000_longcall_ref (rtx call_ref, rtx arg)
20171 /* System V adds '.' to the internal name, so skip them. */
20172 const char *call_name = XSTR (call_ref, 0);
20173 if (*call_name == '.')
20175 while (*call_name == '.')
20176 call_name++;
20178 tree node = get_identifier (call_name);
20179 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20182 if (TARGET_PLTSEQ)
20184 rtx base = const0_rtx;
20185 int regno = 12;
20186 if (rs6000_pcrel_p ())
20188 rtx reg = gen_rtx_REG (Pmode, regno);
20189 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20190 gen_rtvec (3, base, call_ref, arg),
20191 UNSPECV_PLT_PCREL);
20192 emit_insn (gen_rtx_SET (reg, u));
20193 return reg;
20196 if (DEFAULT_ABI == ABI_ELFv2)
20197 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20198 else
20200 if (flag_pic)
20201 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20202 regno = 11;
20204 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20205 may be used by a function global entry point. For SysV4, r11
20206 is used by __glink_PLTresolve lazy resolver entry. */
20207 rtx reg = gen_rtx_REG (Pmode, regno);
20208 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20209 UNSPEC_PLT16_HA);
20210 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20211 gen_rtvec (3, reg, call_ref, arg),
20212 UNSPECV_PLT16_LO);
20213 emit_insn (gen_rtx_SET (reg, hi));
20214 emit_insn (gen_rtx_SET (reg, lo));
20215 return reg;
20218 return force_reg (Pmode, call_ref);
20221 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20222 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20223 #endif
20225 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20226 struct attribute_spec.handler. */
20227 static tree
20228 rs6000_handle_struct_attribute (tree *node, tree name,
20229 tree args ATTRIBUTE_UNUSED,
20230 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20232 tree *type = NULL;
20233 if (DECL_P (*node))
20235 if (TREE_CODE (*node) == TYPE_DECL)
20236 type = &TREE_TYPE (*node);
20238 else
20239 type = node;
20241 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20242 || TREE_CODE (*type) == UNION_TYPE)))
20244 warning (OPT_Wattributes, "%qE attribute ignored", name);
20245 *no_add_attrs = true;
20248 else if ((is_attribute_p ("ms_struct", name)
20249 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20250 || ((is_attribute_p ("gcc_struct", name)
20251 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20253 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20254 name);
20255 *no_add_attrs = true;
20258 return NULL_TREE;
20261 static bool
20262 rs6000_ms_bitfield_layout_p (const_tree record_type)
20264 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20265 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20266 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20269 #ifdef USING_ELFOS_H
20271 /* A get_unnamed_section callback, used for switching to toc_section. */
20273 static void
20274 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20276 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20277 && TARGET_MINIMAL_TOC)
20279 if (!toc_initialized)
20281 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20282 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20283 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20284 fprintf (asm_out_file, "\t.tc ");
20285 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20286 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20287 fprintf (asm_out_file, "\n");
20289 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20290 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20291 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20292 fprintf (asm_out_file, " = .+32768\n");
20293 toc_initialized = 1;
20295 else
20296 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20298 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20300 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20301 if (!toc_initialized)
20303 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20304 toc_initialized = 1;
20307 else
20309 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20310 if (!toc_initialized)
20312 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20313 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20314 fprintf (asm_out_file, " = .+32768\n");
20315 toc_initialized = 1;
20320 /* Implement TARGET_ASM_INIT_SECTIONS. */
20322 static void
20323 rs6000_elf_asm_init_sections (void)
20325 toc_section
20326 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20328 sdata2_section
20329 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20330 SDATA2_SECTION_ASM_OP);
20333 /* Implement TARGET_SELECT_RTX_SECTION. */
20335 static section *
20336 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20337 unsigned HOST_WIDE_INT align)
20339 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20340 return toc_section;
20341 else
20342 return default_elf_select_rtx_section (mode, x, align);
20345 /* For a SYMBOL_REF, set generic flags and then perform some
20346 target-specific processing.
20348 When the AIX ABI is requested on a non-AIX system, replace the
20349 function name with the real name (with a leading .) rather than the
20350 function descriptor name. This saves a lot of overriding code to
20351 read the prefixes. */
20353 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20354 static void
20355 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20357 default_encode_section_info (decl, rtl, first);
20359 if (first
20360 && TREE_CODE (decl) == FUNCTION_DECL
20361 && !TARGET_AIX
20362 && DEFAULT_ABI == ABI_AIX)
20364 rtx sym_ref = XEXP (rtl, 0);
20365 size_t len = strlen (XSTR (sym_ref, 0));
20366 char *str = XALLOCAVEC (char, len + 2);
20367 str[0] = '.';
20368 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20369 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20373 static inline bool
20374 compare_section_name (const char *section, const char *templ)
20376 int len;
20378 len = strlen (templ);
20379 return (strncmp (section, templ, len) == 0
20380 && (section[len] == 0 || section[len] == '.'));
20383 bool
20384 rs6000_elf_in_small_data_p (const_tree decl)
20386 if (rs6000_sdata == SDATA_NONE)
20387 return false;
20389 /* We want to merge strings, so we never consider them small data. */
20390 if (TREE_CODE (decl) == STRING_CST)
20391 return false;
20393 /* Functions are never in the small data area. */
20394 if (TREE_CODE (decl) == FUNCTION_DECL)
20395 return false;
20397 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20399 const char *section = DECL_SECTION_NAME (decl);
20400 if (compare_section_name (section, ".sdata")
20401 || compare_section_name (section, ".sdata2")
20402 || compare_section_name (section, ".gnu.linkonce.s")
20403 || compare_section_name (section, ".sbss")
20404 || compare_section_name (section, ".sbss2")
20405 || compare_section_name (section, ".gnu.linkonce.sb")
20406 || strcmp (section, ".PPC.EMB.sdata0") == 0
20407 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20408 return true;
20410 else
20412 /* If we are told not to put readonly data in sdata, then don't. */
20413 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20414 && !rs6000_readonly_in_sdata)
20415 return false;
20417 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20419 if (size > 0
20420 && size <= g_switch_value
20421 /* If it's not public, and we're not going to reference it there,
20422 there's no need to put it in the small data section. */
20423 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20424 return true;
20427 return false;
20430 #endif /* USING_ELFOS_H */
20432 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20434 static bool
20435 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20437 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20440 /* Do not place thread-local symbols refs in the object blocks. */
20442 static bool
20443 rs6000_use_blocks_for_decl_p (const_tree decl)
20445 return !DECL_THREAD_LOCAL_P (decl);
20448 /* Return a REG that occurs in ADDR with coefficient 1.
20449 ADDR can be effectively incremented by incrementing REG.
20451 r0 is special and we must not select it as an address
20452 register by this routine since our caller will try to
20453 increment the returned register via an "la" instruction. */
20456 find_addr_reg (rtx addr)
20458 while (GET_CODE (addr) == PLUS)
20460 if (REG_P (XEXP (addr, 0))
20461 && REGNO (XEXP (addr, 0)) != 0)
20462 addr = XEXP (addr, 0);
20463 else if (REG_P (XEXP (addr, 1))
20464 && REGNO (XEXP (addr, 1)) != 0)
20465 addr = XEXP (addr, 1);
20466 else if (CONSTANT_P (XEXP (addr, 0)))
20467 addr = XEXP (addr, 1);
20468 else if (CONSTANT_P (XEXP (addr, 1)))
20469 addr = XEXP (addr, 0);
20470 else
20471 gcc_unreachable ();
20473 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20474 return addr;
20477 void
20478 rs6000_fatal_bad_address (rtx op)
20480 fatal_insn ("bad address", op);
20483 #if TARGET_MACHO
20485 vec<branch_island, va_gc> *branch_islands;
20487 /* Remember to generate a branch island for far calls to the given
20488 function. */
20490 static void
20491 add_compiler_branch_island (tree label_name, tree function_name,
20492 int line_number)
20494 branch_island bi = {function_name, label_name, line_number};
20495 vec_safe_push (branch_islands, bi);
20498 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20499 already there or not. */
20501 static int
20502 no_previous_def (tree function_name)
20504 branch_island *bi;
20505 unsigned ix;
20507 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20508 if (function_name == bi->function_name)
20509 return 0;
20510 return 1;
20513 /* GET_PREV_LABEL gets the label name from the previous definition of
20514 the function. */
20516 static tree
20517 get_prev_label (tree function_name)
20519 branch_island *bi;
20520 unsigned ix;
20522 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20523 if (function_name == bi->function_name)
20524 return bi->label_name;
20525 return NULL_TREE;
20528 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20530 void
20531 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20533 unsigned int length;
20534 char *symbol_name, *lazy_ptr_name;
20535 char *local_label_0;
20536 static unsigned label = 0;
20538 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20539 symb = (*targetm.strip_name_encoding) (symb);
20541 length = strlen (symb);
20542 symbol_name = XALLOCAVEC (char, length + 32);
20543 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20545 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20546 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20548 if (MACHOPIC_PURE)
20550 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20551 fprintf (file, "\t.align 5\n");
20553 fprintf (file, "%s:\n", stub);
20554 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20556 label++;
20557 local_label_0 = XALLOCAVEC (char, 16);
20558 sprintf (local_label_0, "L%u$spb", label);
20560 fprintf (file, "\tmflr r0\n");
20561 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20562 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20563 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20564 lazy_ptr_name, local_label_0);
20565 fprintf (file, "\tmtlr r0\n");
20566 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20567 (TARGET_64BIT ? "ldu" : "lwzu"),
20568 lazy_ptr_name, local_label_0);
20569 fprintf (file, "\tmtctr r12\n");
20570 fprintf (file, "\tbctr\n");
20572 else /* mdynamic-no-pic or mkernel. */
20574 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20575 fprintf (file, "\t.align 4\n");
20577 fprintf (file, "%s:\n", stub);
20578 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20580 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20581 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20582 (TARGET_64BIT ? "ldu" : "lwzu"),
20583 lazy_ptr_name);
20584 fprintf (file, "\tmtctr r12\n");
20585 fprintf (file, "\tbctr\n");
20588 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20589 fprintf (file, "%s:\n", lazy_ptr_name);
20590 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20591 fprintf (file, "%sdyld_stub_binding_helper\n",
20592 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20595 /* Legitimize PIC addresses. If the address is already
20596 position-independent, we return ORIG. Newly generated
20597 position-independent addresses go into a reg. This is REG if non
20598 zero, otherwise we allocate register(s) as necessary. */
20600 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20603 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20604 rtx reg)
20606 rtx base, offset;
20608 if (reg == NULL && !reload_completed)
20609 reg = gen_reg_rtx (Pmode);
20611 if (GET_CODE (orig) == CONST)
20613 rtx reg_temp;
20615 if (GET_CODE (XEXP (orig, 0)) == PLUS
20616 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20617 return orig;
20619 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20621 /* Use a different reg for the intermediate value, as
20622 it will be marked UNCHANGING. */
20623 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20624 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20625 Pmode, reg_temp);
20626 offset =
20627 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20628 Pmode, reg);
20630 if (CONST_INT_P (offset))
20632 if (SMALL_INT (offset))
20633 return plus_constant (Pmode, base, INTVAL (offset));
20634 else if (!reload_completed)
20635 offset = force_reg (Pmode, offset);
20636 else
20638 rtx mem = force_const_mem (Pmode, orig);
20639 return machopic_legitimize_pic_address (mem, Pmode, reg);
20642 return gen_rtx_PLUS (Pmode, base, offset);
20645 /* Fall back on generic machopic code. */
20646 return machopic_legitimize_pic_address (orig, mode, reg);
20649 /* Output a .machine directive for the Darwin assembler, and call
20650 the generic start_file routine. */
20652 static void
20653 rs6000_darwin_file_start (void)
20655 static const struct
20657 const char *arg;
20658 const char *name;
20659 HOST_WIDE_INT if_set;
20660 } mapping[] = {
20661 { "ppc64", "ppc64", MASK_64BIT },
20662 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
20663 | MASK_POWERPC64 },
20664 { "power4", "ppc970", 0 },
20665 { "G5", "ppc970", 0 },
20666 { "7450", "ppc7450", 0 },
20667 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
20668 { "G4", "ppc7400", 0 },
20669 { "750", "ppc750", 0 },
20670 { "740", "ppc750", 0 },
20671 { "G3", "ppc750", 0 },
20672 { "604e", "ppc604e", 0 },
20673 { "604", "ppc604", 0 },
20674 { "603e", "ppc603", 0 },
20675 { "603", "ppc603", 0 },
20676 { "601", "ppc601", 0 },
20677 { NULL, "ppc", 0 } };
20678 const char *cpu_id = "";
20679 size_t i;
20681 rs6000_file_start ();
20682 darwin_file_start ();
20684 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20686 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20687 cpu_id = rs6000_default_cpu;
20689 if (OPTION_SET_P (rs6000_cpu_index))
20690 cpu_id = processor_target_table[rs6000_cpu_index].name;
20692 /* Look through the mapping array. Pick the first name that either
20693 matches the argument, has a bit set in IF_SET that is also set
20694 in the target flags, or has a NULL name. */
20696 i = 0;
20697 while (mapping[i].arg != NULL
20698 && strcmp (mapping[i].arg, cpu_id) != 0
20699 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20700 i++;
20702 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20705 #endif /* TARGET_MACHO */
20707 #if TARGET_ELF
20708 static int
20709 rs6000_elf_reloc_rw_mask (void)
20711 if (flag_pic)
20712 return 3;
20713 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20714 return 2;
20715 else
20716 return 0;
20719 /* Record an element in the table of global constructors. SYMBOL is
20720 a SYMBOL_REF of the function to be called; PRIORITY is a number
20721 between 0 and MAX_INIT_PRIORITY.
20723 This differs from default_named_section_asm_out_constructor in
20724 that we have special handling for -mrelocatable. */
20726 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20727 static void
20728 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20730 const char *section = ".ctors";
20731 char buf[18];
20733 if (priority != DEFAULT_INIT_PRIORITY)
20735 sprintf (buf, ".ctors.%.5u",
20736 /* Invert the numbering so the linker puts us in the proper
20737 order; constructors are run from right to left, and the
20738 linker sorts in increasing order. */
20739 MAX_INIT_PRIORITY - priority);
20740 section = buf;
20743 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20744 assemble_align (POINTER_SIZE);
20746 if (DEFAULT_ABI == ABI_V4
20747 && (TARGET_RELOCATABLE || flag_pic > 1))
20749 fputs ("\t.long (", asm_out_file);
20750 output_addr_const (asm_out_file, symbol);
20751 fputs (")@fixup\n", asm_out_file);
20753 else
20754 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20757 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20758 static void
20759 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20761 const char *section = ".dtors";
20762 char buf[18];
20764 if (priority != DEFAULT_INIT_PRIORITY)
20766 sprintf (buf, ".dtors.%.5u",
20767 /* Invert the numbering so the linker puts us in the proper
20768 order; constructors are run from right to left, and the
20769 linker sorts in increasing order. */
20770 MAX_INIT_PRIORITY - priority);
20771 section = buf;
20774 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20775 assemble_align (POINTER_SIZE);
20777 if (DEFAULT_ABI == ABI_V4
20778 && (TARGET_RELOCATABLE || flag_pic > 1))
20780 fputs ("\t.long (", asm_out_file);
20781 output_addr_const (asm_out_file, symbol);
20782 fputs (")@fixup\n", asm_out_file);
20784 else
20785 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20788 void
20789 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20791 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20793 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20794 ASM_OUTPUT_LABEL (file, name);
20795 fputs (DOUBLE_INT_ASM_OP, file);
20796 rs6000_output_function_entry (file, name);
20797 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20798 if (DOT_SYMBOLS)
20800 fputs ("\t.size\t", file);
20801 assemble_name (file, name);
20802 fputs (",24\n\t.type\t.", file);
20803 assemble_name (file, name);
20804 fputs (",@function\n", file);
20805 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20807 fputs ("\t.globl\t.", file);
20808 assemble_name (file, name);
20809 putc ('\n', file);
20812 else
20813 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20814 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20815 rs6000_output_function_entry (file, name);
20816 fputs (":\n", file);
20817 return;
20820 int uses_toc;
20821 if (DEFAULT_ABI == ABI_V4
20822 && (TARGET_RELOCATABLE || flag_pic > 1)
20823 && !TARGET_SECURE_PLT
20824 && (!constant_pool_empty_p () || crtl->profile)
20825 && (uses_toc = uses_TOC ()))
20827 char buf[256];
20829 if (uses_toc == 2)
20830 switch_to_other_text_partition ();
20831 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20833 fprintf (file, "\t.long ");
20834 assemble_name (file, toc_label_name);
20835 need_toc_init = 1;
20836 putc ('-', file);
20837 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20838 assemble_name (file, buf);
20839 putc ('\n', file);
20840 if (uses_toc == 2)
20841 switch_to_other_text_partition ();
20844 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20845 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20847 if (TARGET_CMODEL == CMODEL_LARGE
20848 && rs6000_global_entry_point_prologue_needed_p ())
20850 char buf[256];
20852 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20854 fprintf (file, "\t.quad .TOC.-");
20855 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20856 assemble_name (file, buf);
20857 putc ('\n', file);
20860 if (DEFAULT_ABI == ABI_AIX)
20862 const char *desc_name, *orig_name;
20864 orig_name = (*targetm.strip_name_encoding) (name);
20865 desc_name = orig_name;
20866 while (*desc_name == '.')
20867 desc_name++;
20869 if (TREE_PUBLIC (decl))
20870 fprintf (file, "\t.globl %s\n", desc_name);
20872 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20873 fprintf (file, "%s:\n", desc_name);
20874 fprintf (file, "\t.long %s\n", orig_name);
20875 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20876 fputs ("\t.long 0\n", file);
20877 fprintf (file, "\t.previous\n");
20879 ASM_OUTPUT_LABEL (file, name);
20882 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20883 static void
20884 rs6000_elf_file_end (void)
20886 #ifdef HAVE_AS_GNU_ATTRIBUTE
20887 /* ??? The value emitted depends on options active at file end.
20888 Assume anyone using #pragma or attributes that might change
20889 options knows what they are doing. */
20890 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20891 && rs6000_passes_float)
20893 int fp;
20895 if (TARGET_HARD_FLOAT)
20896 fp = 1;
20897 else
20898 fp = 2;
20899 if (rs6000_passes_long_double)
20901 if (!TARGET_LONG_DOUBLE_128)
20902 fp |= 2 * 4;
20903 else if (TARGET_IEEEQUAD)
20904 fp |= 3 * 4;
20905 else
20906 fp |= 1 * 4;
20908 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20910 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20912 if (rs6000_passes_vector)
20913 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20914 (TARGET_ALTIVEC_ABI ? 2 : 1));
20915 if (rs6000_returns_struct)
20916 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20917 aix_struct_return ? 2 : 1);
20919 #endif
20920 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20921 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20922 file_end_indicate_exec_stack ();
20923 #endif
20925 if (flag_split_stack)
20926 file_end_indicate_split_stack ();
20928 if (cpu_builtin_p)
20930 /* We have expanded a CPU builtin, so we need to emit a reference to
20931 the special symbol that LIBC uses to declare it supports the
20932 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20933 switch_to_section (data_section);
20934 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20935 fprintf (asm_out_file, "\t%s %s\n",
20936 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20939 #endif
20941 #if TARGET_XCOFF
20943 /* Names of bss and data sections. These should be unique names for each
20944 compilation unit. */
20946 char *xcoff_bss_section_name;
20947 char *xcoff_private_data_section_name;
20948 char *xcoff_private_rodata_section_name;
20949 char *xcoff_tls_data_section_name;
20950 char *xcoff_read_only_section_name;
20952 static enum unwind_info_type
20953 rs6000_xcoff_debug_unwind_info (void)
20955 return UI_NONE;
20958 static void
20959 rs6000_xcoff_asm_output_anchor (rtx symbol)
20961 char buffer[100];
20963 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20964 SYMBOL_REF_BLOCK_OFFSET (symbol));
20965 fprintf (asm_out_file, "%s", SET_ASM_OP);
20966 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20967 fprintf (asm_out_file, ",");
20968 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20969 fprintf (asm_out_file, "\n");
20972 static void
20973 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20975 fputs (GLOBAL_ASM_OP, stream);
20976 RS6000_OUTPUT_BASENAME (stream, name);
20977 putc ('\n', stream);
20980 /* A get_unnamed_decl callback, used for read-only sections. PTR
20981 points to the section string variable. */
20983 static void
20984 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
20986 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20987 directive
20988 ? xcoff_private_rodata_section_name
20989 : xcoff_read_only_section_name,
20990 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20993 /* Likewise for read-write sections. */
20995 static void
20996 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
20998 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20999 xcoff_private_data_section_name,
21000 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21003 static void
21004 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21006 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21007 directive
21008 ? xcoff_private_data_section_name
21009 : xcoff_tls_data_section_name,
21010 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21013 /* A get_unnamed_section callback, used for switching to toc_section. */
21015 static void
21016 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21018 if (TARGET_MINIMAL_TOC)
21020 /* toc_section is always selected at least once from
21021 rs6000_xcoff_file_start, so this is guaranteed to
21022 always be defined once and only once in each file. */
21023 if (!toc_initialized)
21025 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21026 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21027 toc_initialized = 1;
21029 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21030 (TARGET_32BIT ? "" : ",3"));
21032 else
21033 fputs ("\t.toc\n", asm_out_file);
21036 /* Implement TARGET_ASM_INIT_SECTIONS. */
21038 static void
21039 rs6000_xcoff_asm_init_sections (void)
21041 read_only_data_section
21042 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21043 NULL);
21045 private_data_section
21046 = get_unnamed_section (SECTION_WRITE,
21047 rs6000_xcoff_output_readwrite_section_asm_op,
21048 NULL);
21050 read_only_private_data_section
21051 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21052 "");
21054 tls_data_section
21055 = get_unnamed_section (SECTION_TLS,
21056 rs6000_xcoff_output_tls_section_asm_op,
21057 NULL);
21059 tls_private_data_section
21060 = get_unnamed_section (SECTION_TLS,
21061 rs6000_xcoff_output_tls_section_asm_op,
21062 "");
21064 toc_section
21065 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21067 readonly_data_section = read_only_data_section;
21070 static int
21071 rs6000_xcoff_reloc_rw_mask (void)
21073 return 3;
21076 static void
21077 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21078 tree decl ATTRIBUTE_UNUSED)
21080 int smclass;
21081 static const char * const suffix[7]
21082 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21084 if (flags & SECTION_EXCLUDE)
21085 smclass = 6;
21086 else if (flags & SECTION_DEBUG)
21088 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21089 return;
21091 else if (flags & SECTION_CODE)
21092 smclass = 0;
21093 else if (flags & SECTION_TLS)
21095 if (flags & SECTION_BSS)
21096 smclass = 5;
21097 else
21098 smclass = 4;
21100 else if (flags & SECTION_WRITE)
21102 if (flags & SECTION_BSS)
21103 smclass = 3;
21104 else
21105 smclass = 2;
21107 else
21108 smclass = 1;
21110 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21111 (flags & SECTION_CODE) ? "." : "",
21112 name, suffix[smclass], flags & SECTION_ENTSIZE);
21115 #define IN_NAMED_SECTION(DECL) \
21116 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21117 && DECL_SECTION_NAME (DECL) != NULL)
21119 static section *
21120 rs6000_xcoff_select_section (tree decl, int reloc,
21121 unsigned HOST_WIDE_INT align)
21123 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21124 named section. */
21125 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21127 resolve_unique_section (decl, reloc, true);
21128 if (IN_NAMED_SECTION (decl))
21129 return get_named_section (decl, NULL, reloc);
21132 if (decl_readonly_section (decl, reloc))
21134 if (TREE_PUBLIC (decl))
21135 return read_only_data_section;
21136 else
21137 return read_only_private_data_section;
21139 else
21141 #if HAVE_AS_TLS
21142 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21144 if (bss_initializer_p (decl))
21145 return tls_comm_section;
21146 else if (TREE_PUBLIC (decl))
21147 return tls_data_section;
21148 else
21149 return tls_private_data_section;
21151 else
21152 #endif
21153 if (TREE_PUBLIC (decl))
21154 return data_section;
21155 else
21156 return private_data_section;
21160 static void
21161 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21163 const char *name;
21165 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21166 name = (*targetm.strip_name_encoding) (name);
21167 set_decl_section_name (decl, name);
21170 /* Select section for constant in constant pool.
21172 On RS/6000, all constants are in the private read-only data area.
21173 However, if this is being placed in the TOC it must be output as a
21174 toc entry. */
21176 static section *
21177 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21178 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21180 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21181 return toc_section;
21182 else
21183 return read_only_private_data_section;
21186 /* Remove any trailing [DS] or the like from the symbol name. */
21188 static const char *
21189 rs6000_xcoff_strip_name_encoding (const char *name)
21191 size_t len;
21192 if (*name == '*')
21193 name++;
21194 len = strlen (name);
21195 if (name[len - 1] == ']')
21196 return ggc_alloc_string (name, len - 4);
21197 else
21198 return name;
21201 /* Section attributes. AIX is always PIC. */
21203 static unsigned int
21204 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21206 unsigned int align;
21207 unsigned int flags = default_section_type_flags (decl, name, reloc);
21209 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21210 flags |= SECTION_BSS;
21212 /* Align to at least UNIT size. */
21213 if (!decl || !DECL_P (decl))
21214 align = MIN_UNITS_PER_WORD;
21215 /* Align code CSECT to at least 32 bytes. */
21216 else if ((flags & SECTION_CODE) != 0)
21217 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21218 else
21219 /* Increase alignment of large objects if not already stricter. */
21220 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21221 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21222 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21224 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21227 /* Output at beginning of assembler file.
21229 Initialize the section names for the RS/6000 at this point.
21231 Specify filename, including full path, to assembler.
21233 We want to go into the TOC section so at least one .toc will be emitted.
21234 Also, in order to output proper .bs/.es pairs, we need at least one static
21235 [RW] section emitted.
21237 Finally, declare mcount when profiling to make the assembler happy. */
21239 static void
21240 rs6000_xcoff_file_start (void)
21242 rs6000_gen_section_name (&xcoff_bss_section_name,
21243 main_input_filename, ".bss_");
21244 rs6000_gen_section_name (&xcoff_private_data_section_name,
21245 main_input_filename, ".rw_");
21246 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21247 main_input_filename, ".rop_");
21248 rs6000_gen_section_name (&xcoff_read_only_section_name,
21249 main_input_filename, ".ro_");
21250 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21251 main_input_filename, ".tls_");
21253 fputs ("\t.file\t", asm_out_file);
21254 output_quoted_string (asm_out_file, main_input_filename);
21255 fputc ('\n', asm_out_file);
21256 if (write_symbols != NO_DEBUG)
21257 switch_to_section (private_data_section);
21258 switch_to_section (toc_section);
21259 switch_to_section (text_section);
21260 if (profile_flag)
21261 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21262 rs6000_file_start ();
21265 /* Output at end of assembler file.
21266 On the RS/6000, referencing data should automatically pull in text. */
21268 static void
21269 rs6000_xcoff_file_end (void)
21271 switch_to_section (text_section);
21272 if (xcoff_tls_exec_model_detected)
21274 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21275 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21277 fputs ("_section_.text:\n", asm_out_file);
21278 switch_to_section (data_section);
21279 fputs (TARGET_32BIT
21280 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21281 asm_out_file);
21285 struct declare_alias_data
21287 FILE *file;
21288 bool function_descriptor;
21291 /* Declare alias N. A helper function for for_node_and_aliases. */
21293 static bool
21294 rs6000_declare_alias (struct symtab_node *n, void *d)
21296 struct declare_alias_data *data = (struct declare_alias_data *)d;
21297 /* Main symbol is output specially, because varasm machinery does part of
21298 the job for us - we do not need to declare .globl/lglobs and such. */
21299 if (!n->alias || n->weakref)
21300 return false;
21302 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21303 return false;
21305 /* Prevent assemble_alias from trying to use .set pseudo operation
21306 that does not behave as expected by the middle-end. */
21307 TREE_ASM_WRITTEN (n->decl) = true;
21309 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21310 char *buffer = (char *) alloca (strlen (name) + 2);
21311 char *p;
21312 int dollar_inside = 0;
21314 strcpy (buffer, name);
21315 p = strchr (buffer, '$');
21316 while (p) {
21317 *p = '_';
21318 dollar_inside++;
21319 p = strchr (p + 1, '$');
21321 if (TREE_PUBLIC (n->decl))
21323 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21325 if (dollar_inside) {
21326 if (data->function_descriptor)
21327 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21328 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21330 if (data->function_descriptor)
21332 fputs ("\t.globl .", data->file);
21333 RS6000_OUTPUT_BASENAME (data->file, buffer);
21334 putc ('\n', data->file);
21336 fputs ("\t.globl ", data->file);
21337 assemble_name (data->file, buffer);
21338 putc ('\n', data->file);
21340 #ifdef ASM_WEAKEN_DECL
21341 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21342 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21343 #endif
21345 else
21347 if (dollar_inside)
21349 if (data->function_descriptor)
21350 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21351 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21353 if (data->function_descriptor)
21355 fputs ("\t.lglobl .", data->file);
21356 RS6000_OUTPUT_BASENAME (data->file, buffer);
21357 putc ('\n', data->file);
21359 fputs ("\t.lglobl ", data->file);
21360 assemble_name (data->file, buffer);
21361 putc ('\n', data->file);
21363 if (data->function_descriptor)
21364 putc ('.', data->file);
21365 ASM_OUTPUT_LABEL (data->file, buffer);
21366 return false;
21370 #ifdef HAVE_GAS_HIDDEN
21371 /* Helper function to calculate visibility of a DECL
21372 and return the value as a const string. */
21374 static const char *
21375 rs6000_xcoff_visibility (tree decl)
21377 static const char * const visibility_types[] = {
21378 "", ",protected", ",hidden", ",internal"
21381 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21382 return visibility_types[vis];
21384 #endif
21387 /* This macro produces the initial definition of a function name.
21388 On the RS/6000, we need to place an extra '.' in the function name and
21389 output the function descriptor.
21390 Dollar signs are converted to underscores.
21392 The csect for the function will have already been created when
21393 text_section was selected. We do have to go back to that csect, however.
21395 The third and fourth parameters to the .function pseudo-op (16 and 044)
21396 are placeholders which no longer have any use.
21398 Because AIX assembler's .set command has unexpected semantics, we output
21399 all aliases as alternative labels in front of the definition. */
21401 void
21402 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21404 char *buffer = (char *) alloca (strlen (name) + 1);
21405 char *p;
21406 int dollar_inside = 0;
21407 struct declare_alias_data data = {file, false};
21409 strcpy (buffer, name);
21410 p = strchr (buffer, '$');
21411 while (p) {
21412 *p = '_';
21413 dollar_inside++;
21414 p = strchr (p + 1, '$');
21416 if (TREE_PUBLIC (decl))
21418 if (!RS6000_WEAK || !DECL_WEAK (decl))
21420 if (dollar_inside) {
21421 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21422 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21424 fputs ("\t.globl .", file);
21425 RS6000_OUTPUT_BASENAME (file, buffer);
21426 #ifdef HAVE_GAS_HIDDEN
21427 fputs (rs6000_xcoff_visibility (decl), file);
21428 #endif
21429 putc ('\n', file);
21432 else
21434 if (dollar_inside) {
21435 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21436 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21438 fputs ("\t.lglobl .", file);
21439 RS6000_OUTPUT_BASENAME (file, buffer);
21440 putc ('\n', file);
21443 fputs ("\t.csect ", file);
21444 assemble_name (file, buffer);
21445 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21447 ASM_OUTPUT_LABEL (file, buffer);
21449 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21450 &data, true);
21451 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21452 RS6000_OUTPUT_BASENAME (file, buffer);
21453 fputs (", TOC[tc0], 0\n", file);
21455 in_section = NULL;
21456 switch_to_section (function_section (decl));
21457 putc ('.', file);
21458 ASM_OUTPUT_LABEL (file, buffer);
21460 data.function_descriptor = true;
21461 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21462 &data, true);
21463 if (!DECL_IGNORED_P (decl))
21465 if (dwarf_debuginfo_p ())
21467 name = (*targetm.strip_name_encoding) (name);
21468 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21471 return;
21475 /* Output assembly language to globalize a symbol from a DECL,
21476 possibly with visibility. */
21478 void
21479 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21481 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21482 fputs (GLOBAL_ASM_OP, stream);
21483 assemble_name (stream, name);
21484 #ifdef HAVE_GAS_HIDDEN
21485 fputs (rs6000_xcoff_visibility (decl), stream);
21486 #endif
21487 putc ('\n', stream);
21490 /* Output assembly language to define a symbol as COMMON from a DECL,
21491 possibly with visibility. */
21493 void
21494 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21495 tree decl ATTRIBUTE_UNUSED,
21496 const char *name,
21497 unsigned HOST_WIDE_INT size,
21498 unsigned int align)
21500 unsigned int align2 = 2;
21502 if (align == 0)
21503 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21505 if (align > 32)
21506 align2 = floor_log2 (align / BITS_PER_UNIT);
21507 else if (size > 4)
21508 align2 = 3;
21510 if (! DECL_COMMON (decl))
21512 /* Forget section. */
21513 in_section = NULL;
21515 /* Globalize TLS BSS. */
21516 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21518 fputs (GLOBAL_ASM_OP, stream);
21519 assemble_name (stream, name);
21520 fputc ('\n', stream);
21523 /* Switch to section and skip space. */
21524 fputs ("\t.csect ", stream);
21525 assemble_name (stream, name);
21526 fprintf (stream, ",%u\n", align2);
21527 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21528 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21529 return;
21532 if (TREE_PUBLIC (decl))
21534 fprintf (stream,
21535 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21536 name, size, align2);
21538 #ifdef HAVE_GAS_HIDDEN
21539 if (decl != NULL)
21540 fputs (rs6000_xcoff_visibility (decl), stream);
21541 #endif
21542 putc ('\n', stream);
21544 else
21545 fprintf (stream,
21546 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21547 (*targetm.strip_name_encoding) (name), size, name, align2);
21550 /* This macro produces the initial definition of a object (variable) name.
21551 Because AIX assembler's .set command has unexpected semantics, we output
21552 all aliases as alternative labels in front of the definition. */
21554 void
21555 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21557 struct declare_alias_data data = {file, false};
21558 ASM_OUTPUT_LABEL (file, name);
21559 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21560 &data, true);
21563 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21565 void
21566 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21568 fputs (integer_asm_op (size, FALSE), file);
21569 assemble_name (file, label);
21570 fputs ("-$", file);
21573 /* Output a symbol offset relative to the dbase for the current object.
21574 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21575 signed offsets.
21577 __gcc_unwind_dbase is embedded in all executables/libraries through
21578 libgcc/config/rs6000/crtdbase.S. */
21580 void
21581 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21583 fputs (integer_asm_op (size, FALSE), file);
21584 assemble_name (file, label);
21585 fputs("-__gcc_unwind_dbase", file);
21588 #ifdef HAVE_AS_TLS
21589 static void
21590 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21592 rtx symbol;
21593 int flags;
21594 const char *symname;
21596 default_encode_section_info (decl, rtl, first);
21598 /* Careful not to prod global register variables. */
21599 if (!MEM_P (rtl))
21600 return;
21601 symbol = XEXP (rtl, 0);
21602 if (!SYMBOL_REF_P (symbol))
21603 return;
21605 flags = SYMBOL_REF_FLAGS (symbol);
21607 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21608 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21610 SYMBOL_REF_FLAGS (symbol) = flags;
21612 symname = XSTR (symbol, 0);
21614 /* Append CSECT mapping class, unless the symbol already is qualified.
21615 Aliases are implemented as labels, so the symbol name should not add
21616 a mapping class. */
21617 if (decl
21618 && DECL_P (decl)
21619 && VAR_OR_FUNCTION_DECL_P (decl)
21620 && (symtab_node::get (decl) == NULL
21621 || symtab_node::get (decl)->alias == 0)
21622 && symname[strlen (symname) - 1] != ']')
21624 const char *smclass = NULL;
21626 if (TREE_CODE (decl) == FUNCTION_DECL)
21627 smclass = "[DS]";
21628 else if (DECL_THREAD_LOCAL_P (decl))
21630 if (bss_initializer_p (decl))
21631 smclass = "[UL]";
21632 else if (flag_data_sections)
21633 smclass = "[TL]";
21635 else if (DECL_EXTERNAL (decl))
21636 smclass = "[UA]";
21637 else if (bss_initializer_p (decl))
21638 smclass = "[BS]";
21639 else if (flag_data_sections)
21641 /* This must exactly match the logic of select section. */
21642 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21643 smclass = "[RO]";
21644 else
21645 smclass = "[RW]";
21648 if (smclass != NULL)
21650 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21652 strcpy (newname, symname);
21653 strcat (newname, smclass);
21654 XSTR (symbol, 0) = ggc_strdup (newname);
21658 #endif /* HAVE_AS_TLS */
21659 #endif /* TARGET_XCOFF */
21661 void
21662 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21663 const char *name, const char *val)
21665 fputs ("\t.weak\t", stream);
21666 assemble_name (stream, name);
21667 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21668 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21670 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21671 if (TARGET_XCOFF)
21672 fputs (rs6000_xcoff_visibility (decl), stream);
21673 #endif
21674 fputs ("\n\t.weak\t.", stream);
21675 RS6000_OUTPUT_BASENAME (stream, name);
21677 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21678 if (TARGET_XCOFF)
21679 fputs (rs6000_xcoff_visibility (decl), stream);
21680 #endif
21681 fputc ('\n', stream);
21683 if (val)
21685 #ifdef ASM_OUTPUT_DEF
21686 ASM_OUTPUT_DEF (stream, name, val);
21687 #endif
21688 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21689 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21691 fputs ("\t.set\t.", stream);
21692 RS6000_OUTPUT_BASENAME (stream, name);
21693 fputs (",.", stream);
21694 RS6000_OUTPUT_BASENAME (stream, val);
21695 fputc ('\n', stream);
21701 /* Return true if INSN should not be copied. */
21703 static bool
21704 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21706 return recog_memoized (insn) >= 0
21707 && get_attr_cannot_copy (insn);
21710 /* Compute a (partial) cost for rtx X. Return true if the complete
21711 cost has been computed, and false if subexpressions should be
21712 scanned. In either case, *TOTAL contains the cost result. */
21714 static bool
21715 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21716 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21718 int code = GET_CODE (x);
21720 switch (code)
21722 /* On the RS/6000, if it is valid in the insn, it is free. */
21723 case CONST_INT:
21724 if (((outer_code == SET
21725 || outer_code == PLUS
21726 || outer_code == MINUS)
21727 && (satisfies_constraint_I (x)
21728 || satisfies_constraint_L (x)))
21729 || (outer_code == AND
21730 && (satisfies_constraint_K (x)
21731 || (mode == SImode
21732 ? satisfies_constraint_L (x)
21733 : satisfies_constraint_J (x))))
21734 || ((outer_code == IOR || outer_code == XOR)
21735 && (satisfies_constraint_K (x)
21736 || (mode == SImode
21737 ? satisfies_constraint_L (x)
21738 : satisfies_constraint_J (x))))
21739 || outer_code == ASHIFT
21740 || outer_code == ASHIFTRT
21741 || outer_code == LSHIFTRT
21742 || outer_code == ROTATE
21743 || outer_code == ROTATERT
21744 || outer_code == ZERO_EXTRACT
21745 || (outer_code == MULT
21746 && satisfies_constraint_I (x))
21747 || ((outer_code == DIV || outer_code == UDIV
21748 || outer_code == MOD || outer_code == UMOD)
21749 && exact_log2 (INTVAL (x)) >= 0)
21750 || (outer_code == COMPARE
21751 && (satisfies_constraint_I (x)
21752 || satisfies_constraint_K (x)))
21753 || ((outer_code == EQ || outer_code == NE)
21754 && (satisfies_constraint_I (x)
21755 || satisfies_constraint_K (x)
21756 || (mode == SImode
21757 ? satisfies_constraint_L (x)
21758 : satisfies_constraint_J (x))))
21759 || (outer_code == GTU
21760 && satisfies_constraint_I (x))
21761 || (outer_code == LTU
21762 && satisfies_constraint_P (x)))
21764 *total = 0;
21765 return true;
21767 else if ((outer_code == PLUS
21768 && reg_or_add_cint_operand (x, mode))
21769 || (outer_code == MINUS
21770 && reg_or_sub_cint_operand (x, mode))
21771 || ((outer_code == SET
21772 || outer_code == IOR
21773 || outer_code == XOR)
21774 && (INTVAL (x)
21775 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21777 *total = COSTS_N_INSNS (1);
21778 return true;
21780 /* FALLTHRU */
21782 case CONST_DOUBLE:
21783 case CONST_WIDE_INT:
21784 case CONST:
21785 case HIGH:
21786 case SYMBOL_REF:
21787 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21788 return true;
21790 case MEM:
21791 /* When optimizing for size, MEM should be slightly more expensive
21792 than generating address, e.g., (plus (reg) (const)).
21793 L1 cache latency is about two instructions. */
21794 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21795 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21796 *total += COSTS_N_INSNS (100);
21797 return true;
21799 case LABEL_REF:
21800 *total = 0;
21801 return true;
21803 case PLUS:
21804 case MINUS:
21805 if (FLOAT_MODE_P (mode))
21806 *total = rs6000_cost->fp;
21807 else
21808 *total = COSTS_N_INSNS (1);
21809 return false;
21811 case MULT:
21812 if (CONST_INT_P (XEXP (x, 1))
21813 && satisfies_constraint_I (XEXP (x, 1)))
21815 if (INTVAL (XEXP (x, 1)) >= -256
21816 && INTVAL (XEXP (x, 1)) <= 255)
21817 *total = rs6000_cost->mulsi_const9;
21818 else
21819 *total = rs6000_cost->mulsi_const;
21821 else if (mode == SFmode)
21822 *total = rs6000_cost->fp;
21823 else if (FLOAT_MODE_P (mode))
21824 *total = rs6000_cost->dmul;
21825 else if (mode == DImode)
21826 *total = rs6000_cost->muldi;
21827 else
21828 *total = rs6000_cost->mulsi;
21829 return false;
21831 case FMA:
21832 if (mode == SFmode)
21833 *total = rs6000_cost->fp;
21834 else
21835 *total = rs6000_cost->dmul;
21836 break;
21838 case DIV:
21839 case MOD:
21840 if (FLOAT_MODE_P (mode))
21842 *total = mode == DFmode ? rs6000_cost->ddiv
21843 : rs6000_cost->sdiv;
21844 return false;
21846 /* FALLTHRU */
21848 case UDIV:
21849 case UMOD:
21850 if (CONST_INT_P (XEXP (x, 1))
21851 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21853 if (code == DIV || code == MOD)
21854 /* Shift, addze */
21855 *total = COSTS_N_INSNS (2);
21856 else
21857 /* Shift */
21858 *total = COSTS_N_INSNS (1);
21860 else
21862 if (GET_MODE (XEXP (x, 1)) == DImode)
21863 *total = rs6000_cost->divdi;
21864 else
21865 *total = rs6000_cost->divsi;
21867 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21868 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21869 *total += COSTS_N_INSNS (2);
21870 return false;
21872 case CTZ:
21873 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21874 return false;
21876 case FFS:
21877 *total = COSTS_N_INSNS (4);
21878 return false;
21880 case POPCOUNT:
21881 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21882 return false;
21884 case PARITY:
21885 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21886 return false;
21888 case NOT:
21889 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21890 *total = 0;
21891 else
21892 *total = COSTS_N_INSNS (1);
21893 return false;
21895 case AND:
21896 if (CONST_INT_P (XEXP (x, 1)))
21898 rtx left = XEXP (x, 0);
21899 rtx_code left_code = GET_CODE (left);
21901 /* rotate-and-mask: 1 insn. */
21902 if ((left_code == ROTATE
21903 || left_code == ASHIFT
21904 || left_code == LSHIFTRT)
21905 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21907 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21908 if (!CONST_INT_P (XEXP (left, 1)))
21909 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21910 *total += COSTS_N_INSNS (1);
21911 return true;
21914 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21915 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21916 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21917 || (val & 0xffff) == val
21918 || (val & 0xffff0000) == val
21919 || ((val & 0xffff) == 0 && mode == SImode))
21921 *total = rtx_cost (left, mode, AND, 0, speed);
21922 *total += COSTS_N_INSNS (1);
21923 return true;
21926 /* 2 insns. */
21927 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21929 *total = rtx_cost (left, mode, AND, 0, speed);
21930 *total += COSTS_N_INSNS (2);
21931 return true;
21935 *total = COSTS_N_INSNS (1);
21936 return false;
21938 case IOR:
21939 /* FIXME */
21940 *total = COSTS_N_INSNS (1);
21941 return true;
21943 case CLZ:
21944 case XOR:
21945 case ZERO_EXTRACT:
21946 *total = COSTS_N_INSNS (1);
21947 return false;
21949 case ASHIFT:
21950 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21951 the sign extend and shift separately within the insn. */
21952 if (TARGET_EXTSWSLI && mode == DImode
21953 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21954 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21956 *total = 0;
21957 return false;
21959 /* fall through */
21961 case ASHIFTRT:
21962 case LSHIFTRT:
21963 case ROTATE:
21964 case ROTATERT:
21965 /* Handle mul_highpart. */
21966 if (outer_code == TRUNCATE
21967 && GET_CODE (XEXP (x, 0)) == MULT)
21969 if (mode == DImode)
21970 *total = rs6000_cost->muldi;
21971 else
21972 *total = rs6000_cost->mulsi;
21973 return true;
21975 else if (outer_code == AND)
21976 *total = 0;
21977 else
21978 *total = COSTS_N_INSNS (1);
21979 return false;
21981 case SIGN_EXTEND:
21982 case ZERO_EXTEND:
21983 if (MEM_P (XEXP (x, 0)))
21984 *total = 0;
21985 else
21986 *total = COSTS_N_INSNS (1);
21987 return false;
21989 case COMPARE:
21990 case NEG:
21991 case ABS:
21992 if (!FLOAT_MODE_P (mode))
21994 *total = COSTS_N_INSNS (1);
21995 return false;
21997 /* FALLTHRU */
21999 case FLOAT:
22000 case UNSIGNED_FLOAT:
22001 case FIX:
22002 case UNSIGNED_FIX:
22003 case FLOAT_TRUNCATE:
22004 *total = rs6000_cost->fp;
22005 return false;
22007 case FLOAT_EXTEND:
22008 if (mode == DFmode)
22009 *total = rs6000_cost->sfdf_convert;
22010 else
22011 *total = rs6000_cost->fp;
22012 return false;
22014 case CALL:
22015 case IF_THEN_ELSE:
22016 if (!speed)
22018 *total = COSTS_N_INSNS (1);
22019 return true;
22021 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22023 *total = rs6000_cost->fp;
22024 return false;
22026 break;
22028 case NE:
22029 case EQ:
22030 case GTU:
22031 case LTU:
22032 /* Carry bit requires mode == Pmode.
22033 NEG or PLUS already counted so only add one. */
22034 if (mode == Pmode
22035 && (outer_code == NEG || outer_code == PLUS))
22037 *total = COSTS_N_INSNS (1);
22038 return true;
22040 /* FALLTHRU */
22042 case GT:
22043 case LT:
22044 case UNORDERED:
22045 if (outer_code == SET)
22047 if (XEXP (x, 1) == const0_rtx)
22049 *total = COSTS_N_INSNS (2);
22050 return true;
22052 else
22054 *total = COSTS_N_INSNS (3);
22055 return false;
22058 /* CC COMPARE. */
22059 if (outer_code == COMPARE)
22061 *total = 0;
22062 return true;
22064 break;
22066 case UNSPEC:
22067 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22069 *total = 0;
22070 return true;
22072 break;
22074 default:
22075 break;
22078 return false;
22081 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22083 static bool
22084 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22085 int opno, int *total, bool speed)
22087 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22089 fprintf (stderr,
22090 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22091 "opno = %d, total = %d, speed = %s, x:\n",
22092 ret ? "complete" : "scan inner",
22093 GET_MODE_NAME (mode),
22094 GET_RTX_NAME (outer_code),
22095 opno,
22096 *total,
22097 speed ? "true" : "false");
22099 debug_rtx (x);
22101 return ret;
22104 static int
22105 rs6000_insn_cost (rtx_insn *insn, bool speed)
22107 if (recog_memoized (insn) < 0)
22108 return 0;
22110 /* If we are optimizing for size, just use the length. */
22111 if (!speed)
22112 return get_attr_length (insn);
22114 /* Use the cost if provided. */
22115 int cost = get_attr_cost (insn);
22116 if (cost > 0)
22117 return cost;
22119 /* If the insn tells us how many insns there are, use that. Otherwise use
22120 the length/4. Adjust the insn length to remove the extra size that
22121 prefixed instructions take. */
22122 int n = get_attr_num_insns (insn);
22123 if (n == 0)
22125 int length = get_attr_length (insn);
22126 if (get_attr_prefixed (insn) == PREFIXED_YES)
22128 int adjust = 0;
22129 ADJUST_INSN_LENGTH (insn, adjust);
22130 length -= adjust;
22133 n = length / 4;
22136 enum attr_type type = get_attr_type (insn);
22138 switch (type)
22140 case TYPE_LOAD:
22141 case TYPE_FPLOAD:
22142 case TYPE_VECLOAD:
22143 cost = COSTS_N_INSNS (n + 1);
22144 break;
22146 case TYPE_MUL:
22147 switch (get_attr_size (insn))
22149 case SIZE_8:
22150 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22151 break;
22152 case SIZE_16:
22153 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22154 break;
22155 case SIZE_32:
22156 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22157 break;
22158 case SIZE_64:
22159 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22160 break;
22161 default:
22162 gcc_unreachable ();
22164 break;
22165 case TYPE_DIV:
22166 switch (get_attr_size (insn))
22168 case SIZE_32:
22169 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22170 break;
22171 case SIZE_64:
22172 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22173 break;
22174 default:
22175 gcc_unreachable ();
22177 break;
22179 case TYPE_FP:
22180 cost = n * rs6000_cost->fp;
22181 break;
22182 case TYPE_DMUL:
22183 cost = n * rs6000_cost->dmul;
22184 break;
22185 case TYPE_SDIV:
22186 cost = n * rs6000_cost->sdiv;
22187 break;
22188 case TYPE_DDIV:
22189 cost = n * rs6000_cost->ddiv;
22190 break;
22192 case TYPE_SYNC:
22193 case TYPE_LOAD_L:
22194 case TYPE_MFCR:
22195 case TYPE_MFCRF:
22196 cost = COSTS_N_INSNS (n + 2);
22197 break;
22199 default:
22200 cost = COSTS_N_INSNS (n);
22203 return cost;
22206 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22208 static int
22209 rs6000_debug_address_cost (rtx x, machine_mode mode,
22210 addr_space_t as, bool speed)
22212 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22214 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22215 ret, speed ? "true" : "false");
22216 debug_rtx (x);
22218 return ret;
22222 /* A C expression returning the cost of moving data from a register of class
22223 CLASS1 to one of CLASS2. */
22225 static int
22226 rs6000_register_move_cost (machine_mode mode,
22227 reg_class_t from, reg_class_t to)
22229 int ret;
22230 reg_class_t rclass;
22232 if (TARGET_DEBUG_COST)
22233 dbg_cost_ctrl++;
22235 /* If we have VSX, we can easily move between FPR or Altivec registers,
22236 otherwise we can only easily move within classes.
22237 Do this first so we give best-case answers for union classes
22238 containing both gprs and vsx regs. */
22239 HARD_REG_SET to_vsx, from_vsx;
22240 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22241 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22242 if (!hard_reg_set_empty_p (to_vsx)
22243 && !hard_reg_set_empty_p (from_vsx)
22244 && (TARGET_VSX
22245 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22247 int reg = FIRST_FPR_REGNO;
22248 if (TARGET_VSX
22249 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22250 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22251 reg = FIRST_ALTIVEC_REGNO;
22252 ret = 2 * hard_regno_nregs (reg, mode);
22255 /* Moves from/to GENERAL_REGS. */
22256 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22257 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22259 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22261 if (TARGET_DIRECT_MOVE)
22263 /* Keep the cost for direct moves above that for within
22264 a register class even if the actual processor cost is
22265 comparable. We do this because a direct move insn
22266 can't be a nop, whereas with ideal register
22267 allocation a move within the same class might turn
22268 out to be a nop. */
22269 if (rs6000_tune == PROCESSOR_POWER9
22270 || rs6000_tune == PROCESSOR_POWER10)
22271 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22272 else
22273 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22274 /* SFmode requires a conversion when moving between gprs
22275 and vsx. */
22276 if (mode == SFmode)
22277 ret += 2;
22279 else
22280 ret = (rs6000_memory_move_cost (mode, rclass, false)
22281 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22284 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22285 shift. */
22286 else if (rclass == CR_REGS)
22287 ret = 4;
22289 /* For those processors that have slow LR/CTR moves, make them more
22290 expensive than memory in order to bias spills to memory .*/
22291 else if ((rs6000_tune == PROCESSOR_POWER6
22292 || rs6000_tune == PROCESSOR_POWER7
22293 || rs6000_tune == PROCESSOR_POWER8
22294 || rs6000_tune == PROCESSOR_POWER9)
22295 && reg_class_subset_p (rclass, SPECIAL_REGS))
22296 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22298 else
22299 /* A move will cost one instruction per GPR moved. */
22300 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22303 /* Everything else has to go through GENERAL_REGS. */
22304 else
22305 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22306 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22308 if (TARGET_DEBUG_COST)
22310 if (dbg_cost_ctrl == 1)
22311 fprintf (stderr,
22312 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22313 ret, GET_MODE_NAME (mode), reg_class_names[from],
22314 reg_class_names[to]);
22315 dbg_cost_ctrl--;
22318 return ret;
22321 /* A C expressions returning the cost of moving data of MODE from a register to
22322 or from memory. */
22324 static int
22325 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22326 bool in ATTRIBUTE_UNUSED)
22328 int ret;
22330 if (TARGET_DEBUG_COST)
22331 dbg_cost_ctrl++;
22333 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22334 ret = 4 * hard_regno_nregs (0, mode);
22335 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22336 || reg_classes_intersect_p (rclass, VSX_REGS)))
22337 ret = 4 * hard_regno_nregs (32, mode);
22338 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22339 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22340 else
22341 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22343 if (TARGET_DEBUG_COST)
22345 if (dbg_cost_ctrl == 1)
22346 fprintf (stderr,
22347 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22348 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22349 dbg_cost_ctrl--;
22352 return ret;
22355 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22357 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22358 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22359 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22360 move cost between GENERAL_REGS and VSX_REGS low.
22362 It might seem reasonable to use a union class. After all, if usage
22363 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22364 rather than memory. However, in cases where register pressure of
22365 both is high, like the cactus_adm spec test, allowing
22366 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22367 the first scheduling pass. This is partly due to an allocno of
22368 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22369 class, which gives too high a pressure for GENERAL_REGS and too low
22370 for VSX_REGS. So, force a choice of the subclass here.
22372 The best class is also the union if GENERAL_REGS and VSX_REGS have
22373 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22374 allocno class, since trying to narrow down the class by regno mode
22375 is prone to error. For example, SImode is allowed in VSX regs and
22376 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22377 it would be wrong to choose an allocno of GENERAL_REGS based on
22378 SImode. */
22380 static reg_class_t
22381 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22382 reg_class_t allocno_class,
22383 reg_class_t best_class)
22385 switch (allocno_class)
22387 case GEN_OR_VSX_REGS:
22388 /* best_class must be a subset of allocno_class. */
22389 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22390 || best_class == GEN_OR_FLOAT_REGS
22391 || best_class == VSX_REGS
22392 || best_class == ALTIVEC_REGS
22393 || best_class == FLOAT_REGS
22394 || best_class == GENERAL_REGS
22395 || best_class == BASE_REGS);
22396 /* Use best_class but choose wider classes when copying from the
22397 wider class to best_class is cheap. This mimics IRA choice
22398 of allocno class. */
22399 if (best_class == BASE_REGS)
22400 return GENERAL_REGS;
22401 if (TARGET_VSX && best_class == FLOAT_REGS)
22402 return VSX_REGS;
22403 return best_class;
22405 case VSX_REGS:
22406 if (best_class == ALTIVEC_REGS)
22407 return ALTIVEC_REGS;
22409 default:
22410 break;
22413 return allocno_class;
22416 /* Load up a constant. If the mode is a vector mode, splat the value across
22417 all of the vector elements. */
22419 static rtx
22420 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22422 rtx reg;
22424 if (mode == SFmode || mode == DFmode)
22426 rtx d = const_double_from_real_value (dconst, mode);
22427 reg = force_reg (mode, d);
22429 else if (mode == V4SFmode)
22431 rtx d = const_double_from_real_value (dconst, SFmode);
22432 rtvec v = gen_rtvec (4, d, d, d, d);
22433 reg = gen_reg_rtx (mode);
22434 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22436 else if (mode == V2DFmode)
22438 rtx d = const_double_from_real_value (dconst, DFmode);
22439 rtvec v = gen_rtvec (2, d, d);
22440 reg = gen_reg_rtx (mode);
22441 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22443 else
22444 gcc_unreachable ();
22446 return reg;
22449 /* Generate an FMA instruction. */
22451 static void
22452 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22454 machine_mode mode = GET_MODE (target);
22455 rtx dst;
22457 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22458 gcc_assert (dst != NULL);
22460 if (dst != target)
22461 emit_move_insn (target, dst);
22464 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22466 static void
22467 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22469 machine_mode mode = GET_MODE (dst);
22470 rtx r;
22472 /* This is a tad more complicated, since the fnma_optab is for
22473 a different expression: fma(-m1, m2, a), which is the same
22474 thing except in the case of signed zeros.
22476 Fortunately we know that if FMA is supported that FNMSUB is
22477 also supported in the ISA. Just expand it directly. */
22479 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22481 r = gen_rtx_NEG (mode, a);
22482 r = gen_rtx_FMA (mode, m1, m2, r);
22483 r = gen_rtx_NEG (mode, r);
22484 emit_insn (gen_rtx_SET (dst, r));
22487 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22488 add a reg_note saying that this was a division. Support both scalar and
22489 vector divide. Assumes no trapping math and finite arguments. */
22491 void
22492 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22494 machine_mode mode = GET_MODE (dst);
22495 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22496 int i;
22498 /* Low precision estimates guarantee 5 bits of accuracy. High
22499 precision estimates guarantee 14 bits of accuracy. SFmode
22500 requires 23 bits of accuracy. DFmode requires 52 bits of
22501 accuracy. Each pass at least doubles the accuracy, leading
22502 to the following. */
22503 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22504 if (mode == DFmode || mode == V2DFmode)
22505 passes++;
22507 enum insn_code code = optab_handler (smul_optab, mode);
22508 insn_gen_fn gen_mul = GEN_FCN (code);
22510 gcc_assert (code != CODE_FOR_nothing);
22512 one = rs6000_load_constant_and_splat (mode, dconst1);
22514 /* x0 = 1./d estimate */
22515 x0 = gen_reg_rtx (mode);
22516 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22517 UNSPEC_FRES)));
22519 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22520 if (passes > 1) {
22522 /* e0 = 1. - d * x0 */
22523 e0 = gen_reg_rtx (mode);
22524 rs6000_emit_nmsub (e0, d, x0, one);
22526 /* x1 = x0 + e0 * x0 */
22527 x1 = gen_reg_rtx (mode);
22528 rs6000_emit_madd (x1, e0, x0, x0);
22530 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22531 ++i, xprev = xnext, eprev = enext) {
22533 /* enext = eprev * eprev */
22534 enext = gen_reg_rtx (mode);
22535 emit_insn (gen_mul (enext, eprev, eprev));
22537 /* xnext = xprev + enext * xprev */
22538 xnext = gen_reg_rtx (mode);
22539 rs6000_emit_madd (xnext, enext, xprev, xprev);
22542 } else
22543 xprev = x0;
22545 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22547 /* u = n * xprev */
22548 u = gen_reg_rtx (mode);
22549 emit_insn (gen_mul (u, n, xprev));
22551 /* v = n - (d * u) */
22552 v = gen_reg_rtx (mode);
22553 rs6000_emit_nmsub (v, d, u, n);
22555 /* dst = (v * xprev) + u */
22556 rs6000_emit_madd (dst, v, xprev, u);
22558 if (note_p)
22559 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22562 /* Goldschmidt's Algorithm for single/double-precision floating point
22563 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22565 void
22566 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22568 machine_mode mode = GET_MODE (src);
22569 rtx e = gen_reg_rtx (mode);
22570 rtx g = gen_reg_rtx (mode);
22571 rtx h = gen_reg_rtx (mode);
22573 /* Low precision estimates guarantee 5 bits of accuracy. High
22574 precision estimates guarantee 14 bits of accuracy. SFmode
22575 requires 23 bits of accuracy. DFmode requires 52 bits of
22576 accuracy. Each pass at least doubles the accuracy, leading
22577 to the following. */
22578 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22579 if (mode == DFmode || mode == V2DFmode)
22580 passes++;
22582 int i;
22583 rtx mhalf;
22584 enum insn_code code = optab_handler (smul_optab, mode);
22585 insn_gen_fn gen_mul = GEN_FCN (code);
22587 gcc_assert (code != CODE_FOR_nothing);
22589 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22591 /* e = rsqrt estimate */
22592 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22593 UNSPEC_RSQRT)));
22595 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22596 if (!recip)
22598 rtx zero = force_reg (mode, CONST0_RTX (mode));
22600 if (mode == SFmode)
22602 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22603 e, zero, mode, 0);
22604 if (target != e)
22605 emit_move_insn (e, target);
22607 else
22609 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22610 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22614 /* g = sqrt estimate. */
22615 emit_insn (gen_mul (g, e, src));
22616 /* h = 1/(2*sqrt) estimate. */
22617 emit_insn (gen_mul (h, e, mhalf));
22619 if (recip)
22621 if (passes == 1)
22623 rtx t = gen_reg_rtx (mode);
22624 rs6000_emit_nmsub (t, g, h, mhalf);
22625 /* Apply correction directly to 1/rsqrt estimate. */
22626 rs6000_emit_madd (dst, e, t, e);
22628 else
22630 for (i = 0; i < passes; i++)
22632 rtx t1 = gen_reg_rtx (mode);
22633 rtx g1 = gen_reg_rtx (mode);
22634 rtx h1 = gen_reg_rtx (mode);
22636 rs6000_emit_nmsub (t1, g, h, mhalf);
22637 rs6000_emit_madd (g1, g, t1, g);
22638 rs6000_emit_madd (h1, h, t1, h);
22640 g = g1;
22641 h = h1;
22643 /* Multiply by 2 for 1/rsqrt. */
22644 emit_insn (gen_add3_insn (dst, h, h));
22647 else
22649 rtx t = gen_reg_rtx (mode);
22650 rs6000_emit_nmsub (t, g, h, mhalf);
22651 rs6000_emit_madd (dst, g, t, g);
22654 return;
22657 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22658 (Power7) targets. DST is the target, and SRC is the argument operand. */
22660 void
22661 rs6000_emit_popcount (rtx dst, rtx src)
22663 machine_mode mode = GET_MODE (dst);
22664 rtx tmp1, tmp2;
22666 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22667 if (TARGET_POPCNTD)
22669 if (mode == SImode)
22670 emit_insn (gen_popcntdsi2 (dst, src));
22671 else
22672 emit_insn (gen_popcntddi2 (dst, src));
22673 return;
22676 tmp1 = gen_reg_rtx (mode);
22678 if (mode == SImode)
22680 emit_insn (gen_popcntbsi2 (tmp1, src));
22681 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22682 NULL_RTX, 0);
22683 tmp2 = force_reg (SImode, tmp2);
22684 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22686 else
22688 emit_insn (gen_popcntbdi2 (tmp1, src));
22689 tmp2 = expand_mult (DImode, tmp1,
22690 GEN_INT ((HOST_WIDE_INT)
22691 0x01010101 << 32 | 0x01010101),
22692 NULL_RTX, 0);
22693 tmp2 = force_reg (DImode, tmp2);
22694 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22699 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22700 target, and SRC is the argument operand. */
22702 void
22703 rs6000_emit_parity (rtx dst, rtx src)
22705 machine_mode mode = GET_MODE (dst);
22706 rtx tmp;
22708 tmp = gen_reg_rtx (mode);
22710 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22711 if (TARGET_CMPB)
22713 if (mode == SImode)
22715 emit_insn (gen_popcntbsi2 (tmp, src));
22716 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22718 else
22720 emit_insn (gen_popcntbdi2 (tmp, src));
22721 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22723 return;
22726 if (mode == SImode)
22728 /* Is mult+shift >= shift+xor+shift+xor? */
22729 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22731 rtx tmp1, tmp2, tmp3, tmp4;
22733 tmp1 = gen_reg_rtx (SImode);
22734 emit_insn (gen_popcntbsi2 (tmp1, src));
22736 tmp2 = gen_reg_rtx (SImode);
22737 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22738 tmp3 = gen_reg_rtx (SImode);
22739 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22741 tmp4 = gen_reg_rtx (SImode);
22742 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22743 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22745 else
22746 rs6000_emit_popcount (tmp, src);
22747 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22749 else
22751 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22752 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22754 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22756 tmp1 = gen_reg_rtx (DImode);
22757 emit_insn (gen_popcntbdi2 (tmp1, src));
22759 tmp2 = gen_reg_rtx (DImode);
22760 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22761 tmp3 = gen_reg_rtx (DImode);
22762 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22764 tmp4 = gen_reg_rtx (DImode);
22765 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22766 tmp5 = gen_reg_rtx (DImode);
22767 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22769 tmp6 = gen_reg_rtx (DImode);
22770 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22771 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22773 else
22774 rs6000_emit_popcount (tmp, src);
22775 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22779 /* Expand an Altivec constant permutation for little endian mode.
22780 OP0 and OP1 are the input vectors and TARGET is the output vector.
22781 SEL specifies the constant permutation vector.
22783 There are two issues: First, the two input operands must be
22784 swapped so that together they form a double-wide array in LE
22785 order. Second, the vperm instruction has surprising behavior
22786 in LE mode: it interprets the elements of the source vectors
22787 in BE mode ("left to right") and interprets the elements of
22788 the destination vector in LE mode ("right to left"). To
22789 correct for this, we must subtract each element of the permute
22790 control vector from 31.
22792 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22793 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22794 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22795 serve as the permute control vector. Then, in BE mode,
22797 vperm 9,10,11,12
22799 places the desired result in vr9. However, in LE mode the
22800 vector contents will be
22802 vr10 = 00000003 00000002 00000001 00000000
22803 vr11 = 00000007 00000006 00000005 00000004
22805 The result of the vperm using the same permute control vector is
22807 vr9 = 05000000 07000000 01000000 03000000
22809 That is, the leftmost 4 bytes of vr10 are interpreted as the
22810 source for the rightmost 4 bytes of vr9, and so on.
22812 If we change the permute control vector to
22814 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22816 and issue
22818 vperm 9,11,10,12
22820 we get the desired
22822 vr9 = 00000006 00000004 00000002 00000000. */
22824 static void
22825 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22826 const vec_perm_indices &sel)
22828 unsigned int i;
22829 rtx perm[16];
22830 rtx constv, unspec;
22832 /* Unpack and adjust the constant selector. */
22833 for (i = 0; i < 16; ++i)
22835 unsigned int elt = 31 - (sel[i] & 31);
22836 perm[i] = GEN_INT (elt);
22839 /* Expand to a permute, swapping the inputs and using the
22840 adjusted selector. */
22841 if (!REG_P (op0))
22842 op0 = force_reg (V16QImode, op0);
22843 if (!REG_P (op1))
22844 op1 = force_reg (V16QImode, op1);
22846 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22847 constv = force_reg (V16QImode, constv);
22848 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22849 UNSPEC_VPERM);
22850 if (!REG_P (target))
22852 rtx tmp = gen_reg_rtx (V16QImode);
22853 emit_move_insn (tmp, unspec);
22854 unspec = tmp;
22857 emit_move_insn (target, unspec);
22860 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22861 permute control vector. But here it's not a constant, so we must
22862 generate a vector NAND or NOR to do the adjustment. */
22864 void
22865 altivec_expand_vec_perm_le (rtx operands[4])
22867 rtx notx, iorx, unspec;
22868 rtx target = operands[0];
22869 rtx op0 = operands[1];
22870 rtx op1 = operands[2];
22871 rtx sel = operands[3];
22872 rtx tmp = target;
22873 rtx norreg = gen_reg_rtx (V16QImode);
22874 machine_mode mode = GET_MODE (target);
22876 /* Get everything in regs so the pattern matches. */
22877 if (!REG_P (op0))
22878 op0 = force_reg (mode, op0);
22879 if (!REG_P (op1))
22880 op1 = force_reg (mode, op1);
22881 if (!REG_P (sel))
22882 sel = force_reg (V16QImode, sel);
22883 if (!REG_P (target))
22884 tmp = gen_reg_rtx (mode);
22886 if (TARGET_P9_VECTOR)
22888 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22889 UNSPEC_VPERMR);
22891 else
22893 /* Invert the selector with a VNAND if available, else a VNOR.
22894 The VNAND is preferred for future fusion opportunities. */
22895 notx = gen_rtx_NOT (V16QImode, sel);
22896 iorx = (TARGET_P8_VECTOR
22897 ? gen_rtx_IOR (V16QImode, notx, notx)
22898 : gen_rtx_AND (V16QImode, notx, notx));
22899 emit_insn (gen_rtx_SET (norreg, iorx));
22901 /* Permute with operands reversed and adjusted selector. */
22902 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22903 UNSPEC_VPERM);
22906 /* Copy into target, possibly by way of a register. */
22907 if (!REG_P (target))
22909 emit_move_insn (tmp, unspec);
22910 unspec = tmp;
22913 emit_move_insn (target, unspec);
22916 /* Expand an Altivec constant permutation. Return true if we match
22917 an efficient implementation; false to fall back to VPERM.
22919 OP0 and OP1 are the input vectors and TARGET is the output vector.
22920 SEL specifies the constant permutation vector. */
22922 static bool
22923 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22924 const vec_perm_indices &sel)
22926 struct altivec_perm_insn {
22927 HOST_WIDE_INT mask;
22928 enum insn_code impl;
22929 unsigned char perm[16];
22931 static const struct altivec_perm_insn patterns[] = {
22932 {OPTION_MASK_ALTIVEC,
22933 CODE_FOR_altivec_vpkuhum_direct,
22934 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
22935 {OPTION_MASK_ALTIVEC,
22936 CODE_FOR_altivec_vpkuwum_direct,
22937 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
22938 {OPTION_MASK_ALTIVEC,
22939 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22940 : CODE_FOR_altivec_vmrglb_direct,
22941 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
22942 {OPTION_MASK_ALTIVEC,
22943 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22944 : CODE_FOR_altivec_vmrglh_direct,
22945 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
22946 {OPTION_MASK_ALTIVEC,
22947 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
22948 : CODE_FOR_altivec_vmrglw_direct_v4si,
22949 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
22950 {OPTION_MASK_ALTIVEC,
22951 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22952 : CODE_FOR_altivec_vmrghb_direct,
22953 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
22954 {OPTION_MASK_ALTIVEC,
22955 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22956 : CODE_FOR_altivec_vmrghh_direct,
22957 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
22958 {OPTION_MASK_ALTIVEC,
22959 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
22960 : CODE_FOR_altivec_vmrghw_direct_v4si,
22961 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
22962 {OPTION_MASK_P8_VECTOR,
22963 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22964 : CODE_FOR_p8_vmrgow_v4sf_direct,
22965 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
22966 {OPTION_MASK_P8_VECTOR,
22967 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22968 : CODE_FOR_p8_vmrgew_v4sf_direct,
22969 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
22970 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22971 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
22972 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22973 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
22974 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22975 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
22976 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
22977 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
22979 unsigned int i, j, elt, which;
22980 unsigned char perm[16];
22981 rtx x;
22982 bool one_vec;
22984 /* Unpack the constant selector. */
22985 for (i = which = 0; i < 16; ++i)
22987 elt = sel[i] & 31;
22988 which |= (elt < 16 ? 1 : 2);
22989 perm[i] = elt;
22992 /* Simplify the constant selector based on operands. */
22993 switch (which)
22995 default:
22996 gcc_unreachable ();
22998 case 3:
22999 one_vec = false;
23000 if (!rtx_equal_p (op0, op1))
23001 break;
23002 /* FALLTHRU */
23004 case 2:
23005 for (i = 0; i < 16; ++i)
23006 perm[i] &= 15;
23007 op0 = op1;
23008 one_vec = true;
23009 break;
23011 case 1:
23012 op1 = op0;
23013 one_vec = true;
23014 break;
23017 /* Look for splat patterns. */
23018 if (one_vec)
23020 elt = perm[0];
23022 for (i = 0; i < 16; ++i)
23023 if (perm[i] != elt)
23024 break;
23025 if (i == 16)
23027 if (!BYTES_BIG_ENDIAN)
23028 elt = 15 - elt;
23029 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23030 return true;
23033 if (elt % 2 == 0)
23035 for (i = 0; i < 16; i += 2)
23036 if (perm[i] != elt || perm[i + 1] != elt + 1)
23037 break;
23038 if (i == 16)
23040 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23041 x = gen_reg_rtx (V8HImode);
23042 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23043 GEN_INT (field)));
23044 emit_move_insn (target, gen_lowpart (V16QImode, x));
23045 return true;
23049 if (elt % 4 == 0)
23051 for (i = 0; i < 16; i += 4)
23052 if (perm[i] != elt
23053 || perm[i + 1] != elt + 1
23054 || perm[i + 2] != elt + 2
23055 || perm[i + 3] != elt + 3)
23056 break;
23057 if (i == 16)
23059 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23060 x = gen_reg_rtx (V4SImode);
23061 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23062 GEN_INT (field)));
23063 emit_move_insn (target, gen_lowpart (V16QImode, x));
23064 return true;
23069 /* Look for merge and pack patterns. */
23070 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23072 bool swapped;
23074 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23075 continue;
23077 elt = patterns[j].perm[0];
23078 if (perm[0] == elt)
23079 swapped = false;
23080 else if (perm[0] == elt + 16)
23081 swapped = true;
23082 else
23083 continue;
23084 for (i = 1; i < 16; ++i)
23086 elt = patterns[j].perm[i];
23087 if (swapped)
23088 elt = (elt >= 16 ? elt - 16 : elt + 16);
23089 else if (one_vec && elt >= 16)
23090 elt -= 16;
23091 if (perm[i] != elt)
23092 break;
23094 if (i == 16)
23096 enum insn_code icode = patterns[j].impl;
23097 machine_mode omode = insn_data[icode].operand[0].mode;
23098 machine_mode imode = insn_data[icode].operand[1].mode;
23100 rtx perm_idx = GEN_INT (0);
23101 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23103 int perm_val = 0;
23104 if (one_vec)
23106 if (perm[0] == 8)
23107 perm_val |= 2;
23108 if (perm[8] == 8)
23109 perm_val |= 1;
23111 else
23113 if (perm[0] != 0)
23114 perm_val |= 2;
23115 if (perm[8] != 16)
23116 perm_val |= 1;
23118 perm_idx = GEN_INT (perm_val);
23121 /* For little-endian, don't use vpkuwum and vpkuhum if the
23122 underlying vector type is not V4SI and V8HI, respectively.
23123 For example, using vpkuwum with a V8HI picks up the even
23124 halfwords (BE numbering) when the even halfwords (LE
23125 numbering) are what we need. */
23126 if (!BYTES_BIG_ENDIAN
23127 && icode == CODE_FOR_altivec_vpkuwum_direct
23128 && ((REG_P (op0)
23129 && GET_MODE (op0) != V4SImode)
23130 || (SUBREG_P (op0)
23131 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23132 continue;
23133 if (!BYTES_BIG_ENDIAN
23134 && icode == CODE_FOR_altivec_vpkuhum_direct
23135 && ((REG_P (op0)
23136 && GET_MODE (op0) != V8HImode)
23137 || (SUBREG_P (op0)
23138 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23139 continue;
23141 /* For little-endian, the two input operands must be swapped
23142 (or swapped back) to ensure proper right-to-left numbering
23143 from 0 to 2N-1. */
23144 if (swapped ^ !BYTES_BIG_ENDIAN
23145 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23146 std::swap (op0, op1);
23147 if (imode != V16QImode)
23149 op0 = gen_lowpart (imode, op0);
23150 op1 = gen_lowpart (imode, op1);
23152 if (omode == V16QImode)
23153 x = target;
23154 else
23155 x = gen_reg_rtx (omode);
23156 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23157 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23158 else
23159 emit_insn (GEN_FCN (icode) (x, op0, op1));
23160 if (omode != V16QImode)
23161 emit_move_insn (target, gen_lowpart (V16QImode, x));
23162 return true;
23166 if (!BYTES_BIG_ENDIAN)
23168 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23169 return true;
23172 return false;
23175 /* Expand a VSX Permute Doubleword constant permutation.
23176 Return true if we match an efficient implementation. */
23178 static bool
23179 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23180 unsigned char perm0, unsigned char perm1)
23182 rtx x;
23184 /* If both selectors come from the same operand, fold to single op. */
23185 if ((perm0 & 2) == (perm1 & 2))
23187 if (perm0 & 2)
23188 op0 = op1;
23189 else
23190 op1 = op0;
23192 /* If both operands are equal, fold to simpler permutation. */
23193 if (rtx_equal_p (op0, op1))
23195 perm0 = perm0 & 1;
23196 perm1 = (perm1 & 1) + 2;
23198 /* If the first selector comes from the second operand, swap. */
23199 else if (perm0 & 2)
23201 if (perm1 & 2)
23202 return false;
23203 perm0 -= 2;
23204 perm1 += 2;
23205 std::swap (op0, op1);
23207 /* If the second selector does not come from the second operand, fail. */
23208 else if ((perm1 & 2) == 0)
23209 return false;
23211 /* Success! */
23212 if (target != NULL)
23214 machine_mode vmode, dmode;
23215 rtvec v;
23217 vmode = GET_MODE (target);
23218 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23219 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23220 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23221 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23222 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23223 emit_insn (gen_rtx_SET (target, x));
23225 return true;
23228 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23230 static bool
23231 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23232 rtx target, rtx op0, rtx op1,
23233 const vec_perm_indices &sel)
23235 if (vmode != op_mode)
23236 return false;
23238 bool testing_p = !target;
23240 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23241 if (TARGET_ALTIVEC && testing_p)
23242 return true;
23244 if (op0)
23246 rtx nop0 = force_reg (vmode, op0);
23247 if (op0 == op1)
23248 op1 = nop0;
23249 op0 = nop0;
23251 if (op1)
23252 op1 = force_reg (vmode, op1);
23254 /* Check for ps_merge* or xxpermdi insns. */
23255 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23257 if (testing_p)
23259 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23260 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23262 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23263 return true;
23266 if (TARGET_ALTIVEC)
23268 /* Force the target-independent code to lower to V16QImode. */
23269 if (vmode != V16QImode)
23270 return false;
23271 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23272 return true;
23275 return false;
23278 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23279 OP0 and OP1 are the input vectors and TARGET is the output vector.
23280 PERM specifies the constant permutation vector. */
23282 static void
23283 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23284 machine_mode vmode, const vec_perm_builder &perm)
23286 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23287 if (x != target)
23288 emit_move_insn (target, x);
23291 /* Expand an extract even operation. */
23293 void
23294 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23296 machine_mode vmode = GET_MODE (target);
23297 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23298 vec_perm_builder perm (nelt, nelt, 1);
23300 for (i = 0; i < nelt; i++)
23301 perm.quick_push (i * 2);
23303 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23306 /* Expand a vector interleave operation. */
23308 void
23309 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23311 machine_mode vmode = GET_MODE (target);
23312 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23313 vec_perm_builder perm (nelt, nelt, 1);
23315 high = (highp ? 0 : nelt / 2);
23316 for (i = 0; i < nelt / 2; i++)
23318 perm.quick_push (i + high);
23319 perm.quick_push (i + nelt + high);
23322 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23325 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23326 void
23327 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23329 HOST_WIDE_INT hwi_scale (scale);
23330 REAL_VALUE_TYPE r_pow;
23331 rtvec v = rtvec_alloc (2);
23332 rtx elt;
23333 rtx scale_vec = gen_reg_rtx (V2DFmode);
23334 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23335 elt = const_double_from_real_value (r_pow, DFmode);
23336 RTVEC_ELT (v, 0) = elt;
23337 RTVEC_ELT (v, 1) = elt;
23338 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23339 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23342 /* Return an RTX representing where to find the function value of a
23343 function returning MODE. */
23344 static rtx
23345 rs6000_complex_function_value (machine_mode mode)
23347 unsigned int regno;
23348 rtx r1, r2;
23349 machine_mode inner = GET_MODE_INNER (mode);
23350 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23352 if (TARGET_FLOAT128_TYPE
23353 && (mode == KCmode
23354 || (mode == TCmode && TARGET_IEEEQUAD)))
23355 regno = ALTIVEC_ARG_RETURN;
23357 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23358 regno = FP_ARG_RETURN;
23360 else
23362 regno = GP_ARG_RETURN;
23364 /* 32-bit is OK since it'll go in r3/r4. */
23365 if (TARGET_32BIT && inner_bytes >= 4)
23366 return gen_rtx_REG (mode, regno);
23369 if (inner_bytes >= 8)
23370 return gen_rtx_REG (mode, regno);
23372 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23373 const0_rtx);
23374 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23375 GEN_INT (inner_bytes));
23376 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23379 /* Return an rtx describing a return value of MODE as a PARALLEL
23380 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23381 stride REG_STRIDE. */
23383 static rtx
23384 rs6000_parallel_return (machine_mode mode,
23385 int n_elts, machine_mode elt_mode,
23386 unsigned int regno, unsigned int reg_stride)
23388 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23390 int i;
23391 for (i = 0; i < n_elts; i++)
23393 rtx r = gen_rtx_REG (elt_mode, regno);
23394 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23395 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23396 regno += reg_stride;
23399 return par;
23402 /* Target hook for TARGET_FUNCTION_VALUE.
23404 An integer value is in r3 and a floating-point value is in fp1,
23405 unless -msoft-float. */
23407 static rtx
23408 rs6000_function_value (const_tree valtype,
23409 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23410 bool outgoing ATTRIBUTE_UNUSED)
23412 machine_mode mode;
23413 unsigned int regno;
23414 machine_mode elt_mode;
23415 int n_elts;
23417 /* Special handling for structs in darwin64. */
23418 if (TARGET_MACHO
23419 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23421 CUMULATIVE_ARGS valcum;
23422 rtx valret;
23424 valcum.words = 0;
23425 valcum.fregno = FP_ARG_MIN_REG;
23426 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23427 /* Do a trial code generation as if this were going to be passed as
23428 an argument; if any part goes in memory, we return NULL. */
23429 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23430 if (valret)
23431 return valret;
23432 /* Otherwise fall through to standard ABI rules. */
23435 mode = TYPE_MODE (valtype);
23437 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23438 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23440 int first_reg, n_regs;
23442 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23444 /* _Decimal128 must use even/odd register pairs. */
23445 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23446 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23448 else
23450 first_reg = ALTIVEC_ARG_RETURN;
23451 n_regs = 1;
23454 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23457 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23458 if (TARGET_32BIT && TARGET_POWERPC64)
23459 switch (mode)
23461 default:
23462 break;
23463 case E_DImode:
23464 case E_SCmode:
23465 case E_DCmode:
23466 case E_TCmode:
23467 int count = GET_MODE_SIZE (mode) / 4;
23468 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23471 if ((INTEGRAL_TYPE_P (valtype)
23472 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23473 || POINTER_TYPE_P (valtype))
23474 mode = TARGET_32BIT ? SImode : DImode;
23476 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23477 /* _Decimal128 must use an even/odd register pair. */
23478 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23479 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23480 && !FLOAT128_VECTOR_P (mode))
23481 regno = FP_ARG_RETURN;
23482 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23483 && targetm.calls.split_complex_arg)
23484 return rs6000_complex_function_value (mode);
23485 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23486 return register is used in both cases, and we won't see V2DImode/V2DFmode
23487 for pure altivec, combine the two cases. */
23488 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23489 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23490 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23491 regno = ALTIVEC_ARG_RETURN;
23492 else
23493 regno = GP_ARG_RETURN;
23495 return gen_rtx_REG (mode, regno);
23498 /* Define how to find the value returned by a library function
23499 assuming the value has mode MODE. */
23501 rs6000_libcall_value (machine_mode mode)
23503 unsigned int regno;
23505 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23506 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23507 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23509 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23510 /* _Decimal128 must use an even/odd register pair. */
23511 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23512 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23513 regno = FP_ARG_RETURN;
23514 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23515 return register is used in both cases, and we won't see V2DImode/V2DFmode
23516 for pure altivec, combine the two cases. */
23517 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23518 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23519 regno = ALTIVEC_ARG_RETURN;
23520 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23521 return rs6000_complex_function_value (mode);
23522 else
23523 regno = GP_ARG_RETURN;
23525 return gen_rtx_REG (mode, regno);
23528 /* Compute register pressure classes. We implement the target hook to avoid
23529 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23530 lead to incorrect estimates of number of available registers and therefor
23531 increased register pressure/spill. */
23532 static int
23533 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23535 int n;
23537 n = 0;
23538 pressure_classes[n++] = GENERAL_REGS;
23539 if (TARGET_ALTIVEC)
23540 pressure_classes[n++] = ALTIVEC_REGS;
23541 if (TARGET_VSX)
23542 pressure_classes[n++] = VSX_REGS;
23543 else
23545 if (TARGET_HARD_FLOAT)
23546 pressure_classes[n++] = FLOAT_REGS;
23548 pressure_classes[n++] = CR_REGS;
23549 pressure_classes[n++] = SPECIAL_REGS;
23551 return n;
23554 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23555 Frame pointer elimination is automatically handled.
23557 For the RS/6000, if frame pointer elimination is being done, we would like
23558 to convert ap into fp, not sp.
23560 We need r30 if -mminimal-toc was specified, and there are constant pool
23561 references. */
23563 static bool
23564 rs6000_can_eliminate (const int from, const int to)
23566 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23567 ? ! frame_pointer_needed
23568 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23569 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23570 || constant_pool_empty_p ()
23571 : true);
23574 /* Define the offset between two registers, FROM to be eliminated and its
23575 replacement TO, at the start of a routine. */
23576 HOST_WIDE_INT
23577 rs6000_initial_elimination_offset (int from, int to)
23579 rs6000_stack_t *info = rs6000_stack_info ();
23580 HOST_WIDE_INT offset;
23582 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23583 offset = info->push_p ? 0 : -info->total_size;
23584 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23586 offset = info->push_p ? 0 : -info->total_size;
23587 if (FRAME_GROWS_DOWNWARD)
23588 offset += info->fixed_size + info->vars_size + info->parm_size;
23590 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23591 offset = FRAME_GROWS_DOWNWARD
23592 ? info->fixed_size + info->vars_size + info->parm_size
23593 : 0;
23594 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23595 offset = info->total_size;
23596 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23597 offset = info->push_p ? info->total_size : 0;
23598 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23599 offset = 0;
23600 else
23601 gcc_unreachable ();
23603 return offset;
23606 /* Fill in sizes of registers used by unwinder. */
23608 static void
23609 rs6000_init_dwarf_reg_sizes_extra (tree address)
23611 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23613 int i;
23614 machine_mode mode = TYPE_MODE (char_type_node);
23615 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23616 rtx mem = gen_rtx_MEM (BLKmode, addr);
23617 rtx value = gen_int_mode (16, mode);
23619 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23620 The unwinder still needs to know the size of Altivec registers. */
23622 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23624 int column = DWARF_REG_TO_UNWIND_COLUMN
23625 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23626 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23628 emit_move_insn (adjust_address (mem, mode, offset), value);
23633 /* Map internal gcc register numbers to debug format register numbers.
23634 FORMAT specifies the type of debug register number to use:
23635 0 -- debug information, except for frame-related sections
23636 1 -- DWARF .debug_frame section
23637 2 -- DWARF .eh_frame section */
23639 unsigned int
23640 rs6000_debugger_regno (unsigned int regno, unsigned int format)
23642 /* On some platforms, we use the standard DWARF register
23643 numbering for .debug_info and .debug_frame. */
23644 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23646 #ifdef RS6000_USE_DWARF_NUMBERING
23647 if (regno <= 31)
23648 return regno;
23649 if (FP_REGNO_P (regno))
23650 return regno - FIRST_FPR_REGNO + 32;
23651 if (ALTIVEC_REGNO_P (regno))
23652 return regno - FIRST_ALTIVEC_REGNO + 1124;
23653 if (regno == LR_REGNO)
23654 return 108;
23655 if (regno == CTR_REGNO)
23656 return 109;
23657 if (regno == CA_REGNO)
23658 return 101; /* XER */
23659 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23660 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23661 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23662 to the DWARF reg for CR. */
23663 if (format == 1 && regno == CR2_REGNO)
23664 return 64;
23665 if (CR_REGNO_P (regno))
23666 return regno - CR0_REGNO + 86;
23667 if (regno == VRSAVE_REGNO)
23668 return 356;
23669 if (regno == VSCR_REGNO)
23670 return 67;
23672 /* These do not make much sense. */
23673 if (regno == FRAME_POINTER_REGNUM)
23674 return 111;
23675 if (regno == ARG_POINTER_REGNUM)
23676 return 67;
23677 if (regno == 64)
23678 return 100;
23680 gcc_unreachable ();
23681 #endif
23684 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23685 information, and also for .eh_frame. */
23686 /* Translate the regnos to their numbers in GCC 7 (and before). */
23687 if (regno <= 31)
23688 return regno;
23689 if (FP_REGNO_P (regno))
23690 return regno - FIRST_FPR_REGNO + 32;
23691 if (ALTIVEC_REGNO_P (regno))
23692 return regno - FIRST_ALTIVEC_REGNO + 77;
23693 if (regno == LR_REGNO)
23694 return 65;
23695 if (regno == CTR_REGNO)
23696 return 66;
23697 if (regno == CA_REGNO)
23698 return 76; /* XER */
23699 if (CR_REGNO_P (regno))
23700 return regno - CR0_REGNO + 68;
23701 if (regno == VRSAVE_REGNO)
23702 return 109;
23703 if (regno == VSCR_REGNO)
23704 return 110;
23706 if (regno == FRAME_POINTER_REGNUM)
23707 return 111;
23708 if (regno == ARG_POINTER_REGNUM)
23709 return 67;
23710 if (regno == 64)
23711 return 64;
23713 gcc_unreachable ();
23716 /* target hook eh_return_filter_mode */
23717 static scalar_int_mode
23718 rs6000_eh_return_filter_mode (void)
23720 return TARGET_32BIT ? SImode : word_mode;
23723 /* Target hook for translate_mode_attribute. */
23724 static machine_mode
23725 rs6000_translate_mode_attribute (machine_mode mode)
23727 if ((FLOAT128_IEEE_P (mode)
23728 && ieee128_float_type_node == long_double_type_node)
23729 || (FLOAT128_IBM_P (mode)
23730 && ibm128_float_type_node == long_double_type_node))
23731 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23732 return mode;
23735 /* Target hook for scalar_mode_supported_p. */
23736 static bool
23737 rs6000_scalar_mode_supported_p (scalar_mode mode)
23739 /* -m32 does not support TImode. This is the default, from
23740 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23741 same ABI as for -m32. But default_scalar_mode_supported_p allows
23742 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23743 for -mpowerpc64. */
23744 if (TARGET_32BIT && mode == TImode)
23745 return false;
23747 if (DECIMAL_FLOAT_MODE_P (mode))
23748 return default_decimal_float_supported_p ();
23749 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23750 return true;
23751 else
23752 return default_scalar_mode_supported_p (mode);
23755 /* Target hook for libgcc_floating_mode_supported_p. */
23757 static bool
23758 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23760 switch (mode)
23762 case E_SFmode:
23763 case E_DFmode:
23764 case E_TFmode:
23765 return true;
23767 /* We only return true for KFmode if IEEE 128-bit types are supported, and
23768 if long double does not use the IEEE 128-bit format. If long double
23769 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23770 Because the code will not use KFmode in that case, there will be aborts
23771 because it can't find KFmode in the Floatn types. */
23772 case E_KFmode:
23773 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
23775 default:
23776 return false;
23780 /* Target hook for vector_mode_supported_p. */
23781 static bool
23782 rs6000_vector_mode_supported_p (machine_mode mode)
23784 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23785 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23786 double-double. */
23787 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23788 return true;
23790 else
23791 return false;
23794 /* Target hook for floatn_mode. */
23795 static opt_scalar_float_mode
23796 rs6000_floatn_mode (int n, bool extended)
23798 if (extended)
23800 switch (n)
23802 case 32:
23803 return DFmode;
23805 case 64:
23806 if (TARGET_FLOAT128_TYPE)
23807 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23808 else
23809 return opt_scalar_float_mode ();
23811 case 128:
23812 return opt_scalar_float_mode ();
23814 default:
23815 /* Those are the only valid _FloatNx types. */
23816 gcc_unreachable ();
23819 else
23821 switch (n)
23823 case 32:
23824 return SFmode;
23826 case 64:
23827 return DFmode;
23829 case 128:
23830 if (TARGET_FLOAT128_TYPE)
23831 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23832 else
23833 return opt_scalar_float_mode ();
23835 default:
23836 return opt_scalar_float_mode ();
23842 /* Target hook for c_mode_for_suffix. */
23843 static machine_mode
23844 rs6000_c_mode_for_suffix (char suffix)
23846 if (TARGET_FLOAT128_TYPE)
23848 if (suffix == 'q' || suffix == 'Q')
23849 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23851 /* At the moment, we are not defining a suffix for IBM extended double.
23852 If/when the default for -mabi=ieeelongdouble is changed, and we want
23853 to support __ibm128 constants in legacy library code, we may need to
23854 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
23855 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23856 __float80 constants. */
23859 return VOIDmode;
23862 /* Target hook for invalid_arg_for_unprototyped_fn. */
23863 static const char *
23864 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23866 return (!rs6000_darwin64_abi
23867 && typelist == 0
23868 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23869 && (funcdecl == NULL_TREE
23870 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23871 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23872 ? N_("AltiVec argument passed to unprototyped function")
23873 : NULL;
23876 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23877 setup by using __stack_chk_fail_local hidden function instead of
23878 calling __stack_chk_fail directly. Otherwise it is better to call
23879 __stack_chk_fail directly. */
23881 static tree ATTRIBUTE_UNUSED
23882 rs6000_stack_protect_fail (void)
23884 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23885 ? default_hidden_stack_protect_fail ()
23886 : default_external_stack_protect_fail ();
23889 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23891 #if TARGET_ELF
23892 static unsigned HOST_WIDE_INT
23893 rs6000_asan_shadow_offset (void)
23895 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23897 #endif
23899 /* Mask options that we want to support inside of attribute((target)) and
23900 #pragma GCC target operations. Note, we do not include things like
23901 64/32-bit, endianness, hard/soft floating point, etc. that would have
23902 different calling sequences. */
23904 struct rs6000_opt_mask {
23905 const char *name; /* option name */
23906 HOST_WIDE_INT mask; /* mask to set */
23907 bool invert; /* invert sense of mask */
23908 bool valid_target; /* option is a target option */
23911 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23913 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23914 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23915 false, true },
23916 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23917 false, true },
23918 { "cmpb", OPTION_MASK_CMPB, false, true },
23919 { "crypto", OPTION_MASK_CRYPTO, false, true },
23920 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23921 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23922 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23923 false, true },
23924 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23925 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23926 { "fprnd", OPTION_MASK_FPRND, false, true },
23927 { "power10", OPTION_MASK_POWER10, false, true },
23928 { "hard-dfp", OPTION_MASK_DFP, false, true },
23929 { "htm", OPTION_MASK_HTM, false, true },
23930 { "isel", OPTION_MASK_ISEL, false, true },
23931 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23932 { "mfpgpr", 0, false, true },
23933 { "mma", OPTION_MASK_MMA, false, true },
23934 { "modulo", OPTION_MASK_MODULO, false, true },
23935 { "mulhw", OPTION_MASK_MULHW, false, true },
23936 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23937 { "pcrel", OPTION_MASK_PCREL, false, true },
23938 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
23939 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23940 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23941 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23942 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23943 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23944 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23945 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23946 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23947 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
23948 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23949 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23950 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23951 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23952 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23953 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23954 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23955 { "string", 0, false, true },
23956 { "update", OPTION_MASK_NO_UPDATE, true , true },
23957 { "vsx", OPTION_MASK_VSX, false, true },
23958 #ifdef OPTION_MASK_64BIT
23959 #if TARGET_AIX_OS
23960 { "aix64", OPTION_MASK_64BIT, false, false },
23961 { "aix32", OPTION_MASK_64BIT, true, false },
23962 #else
23963 { "64", OPTION_MASK_64BIT, false, false },
23964 { "32", OPTION_MASK_64BIT, true, false },
23965 #endif
23966 #endif
23967 #ifdef OPTION_MASK_EABI
23968 { "eabi", OPTION_MASK_EABI, false, false },
23969 #endif
23970 #ifdef OPTION_MASK_LITTLE_ENDIAN
23971 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23972 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23973 #endif
23974 #ifdef OPTION_MASK_RELOCATABLE
23975 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23976 #endif
23977 #ifdef OPTION_MASK_STRICT_ALIGN
23978 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23979 #endif
23980 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23981 { "string", 0, false, false },
23984 /* Option variables that we want to support inside attribute((target)) and
23985 #pragma GCC target operations. */
23987 struct rs6000_opt_var {
23988 const char *name; /* option name */
23989 size_t global_offset; /* offset of the option in global_options. */
23990 size_t target_offset; /* offset of the option in target options. */
23993 static struct rs6000_opt_var const rs6000_opt_vars[] =
23995 { "friz",
23996 offsetof (struct gcc_options, x_TARGET_FRIZ),
23997 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23998 { "avoid-indexed-addresses",
23999 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24000 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24001 { "longcall",
24002 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24003 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24004 { "optimize-swaps",
24005 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24006 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24007 { "allow-movmisalign",
24008 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24009 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24010 { "sched-groups",
24011 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24012 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24013 { "always-hint",
24014 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24015 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24016 { "align-branch-targets",
24017 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24018 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24019 { "sched-prolog",
24020 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24021 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24022 { "sched-epilog",
24023 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24024 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24025 { "speculate-indirect-jumps",
24026 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24027 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24030 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24031 parsing. Return true if there were no errors. */
24033 static bool
24034 rs6000_inner_target_options (tree args, bool attr_p)
24036 bool ret = true;
24038 if (args == NULL_TREE)
24041 else if (TREE_CODE (args) == STRING_CST)
24043 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24044 char *q;
24046 while ((q = strtok (p, ",")) != NULL)
24048 bool error_p = false;
24049 bool not_valid_p = false;
24050 const char *cpu_opt = NULL;
24052 p = NULL;
24053 if (startswith (q, "cpu="))
24055 int cpu_index = rs6000_cpu_name_lookup (q+4);
24056 if (cpu_index >= 0)
24057 rs6000_cpu_index = cpu_index;
24058 else
24060 error_p = true;
24061 cpu_opt = q+4;
24064 else if (startswith (q, "tune="))
24066 int tune_index = rs6000_cpu_name_lookup (q+5);
24067 if (tune_index >= 0)
24068 rs6000_tune_index = tune_index;
24069 else
24071 error_p = true;
24072 cpu_opt = q+5;
24075 else
24077 size_t i;
24078 bool invert = false;
24079 char *r = q;
24081 error_p = true;
24082 if (startswith (r, "no-"))
24084 invert = true;
24085 r += 3;
24088 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24089 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24091 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24093 if (!rs6000_opt_masks[i].valid_target)
24094 not_valid_p = true;
24095 else
24097 error_p = false;
24098 rs6000_isa_flags_explicit |= mask;
24100 /* VSX needs altivec, so -mvsx automagically sets
24101 altivec and disables -mavoid-indexed-addresses. */
24102 if (!invert)
24104 if (mask == OPTION_MASK_VSX)
24106 mask |= OPTION_MASK_ALTIVEC;
24107 TARGET_AVOID_XFORM = 0;
24111 if (rs6000_opt_masks[i].invert)
24112 invert = !invert;
24114 if (invert)
24115 rs6000_isa_flags &= ~mask;
24116 else
24117 rs6000_isa_flags |= mask;
24119 break;
24122 if (error_p && !not_valid_p)
24124 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24125 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24127 size_t j = rs6000_opt_vars[i].global_offset;
24128 *((int *) ((char *)&global_options + j)) = !invert;
24129 error_p = false;
24130 not_valid_p = false;
24131 break;
24136 if (error_p)
24138 const char *eprefix, *esuffix;
24140 ret = false;
24141 if (attr_p)
24143 eprefix = "__attribute__((__target__(";
24144 esuffix = ")))";
24146 else
24148 eprefix = "#pragma GCC target ";
24149 esuffix = "";
24152 if (cpu_opt)
24153 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24154 q, esuffix);
24155 else if (not_valid_p)
24156 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24157 else
24158 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24163 else if (TREE_CODE (args) == TREE_LIST)
24167 tree value = TREE_VALUE (args);
24168 if (value)
24170 bool ret2 = rs6000_inner_target_options (value, attr_p);
24171 if (!ret2)
24172 ret = false;
24174 args = TREE_CHAIN (args);
24176 while (args != NULL_TREE);
24179 else
24181 error ("attribute %<target%> argument not a string");
24182 return false;
24185 return ret;
24188 /* Print out the target options as a list for -mdebug=target. */
24190 static void
24191 rs6000_debug_target_options (tree args, const char *prefix)
24193 if (args == NULL_TREE)
24194 fprintf (stderr, "%s<NULL>", prefix);
24196 else if (TREE_CODE (args) == STRING_CST)
24198 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24199 char *q;
24201 while ((q = strtok (p, ",")) != NULL)
24203 p = NULL;
24204 fprintf (stderr, "%s\"%s\"", prefix, q);
24205 prefix = ", ";
24209 else if (TREE_CODE (args) == TREE_LIST)
24213 tree value = TREE_VALUE (args);
24214 if (value)
24216 rs6000_debug_target_options (value, prefix);
24217 prefix = ", ";
24219 args = TREE_CHAIN (args);
24221 while (args != NULL_TREE);
24224 else
24225 gcc_unreachable ();
24227 return;
24231 /* Hook to validate attribute((target("..."))). */
24233 static bool
24234 rs6000_valid_attribute_p (tree fndecl,
24235 tree ARG_UNUSED (name),
24236 tree args,
24237 int flags)
24239 struct cl_target_option cur_target;
24240 bool ret;
24241 tree old_optimize;
24242 tree new_target, new_optimize;
24243 tree func_optimize;
24245 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24247 if (TARGET_DEBUG_TARGET)
24249 tree tname = DECL_NAME (fndecl);
24250 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24251 if (tname)
24252 fprintf (stderr, "function: %.*s\n",
24253 (int) IDENTIFIER_LENGTH (tname),
24254 IDENTIFIER_POINTER (tname));
24255 else
24256 fprintf (stderr, "function: unknown\n");
24258 fprintf (stderr, "args:");
24259 rs6000_debug_target_options (args, " ");
24260 fprintf (stderr, "\n");
24262 if (flags)
24263 fprintf (stderr, "flags: 0x%x\n", flags);
24265 fprintf (stderr, "--------------------\n");
24268 /* attribute((target("default"))) does nothing, beyond
24269 affecting multi-versioning. */
24270 if (TREE_VALUE (args)
24271 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24272 && TREE_CHAIN (args) == NULL_TREE
24273 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24274 return true;
24276 old_optimize = build_optimization_node (&global_options,
24277 &global_options_set);
24278 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24280 /* If the function changed the optimization levels as well as setting target
24281 options, start with the optimizations specified. */
24282 if (func_optimize && func_optimize != old_optimize)
24283 cl_optimization_restore (&global_options, &global_options_set,
24284 TREE_OPTIMIZATION (func_optimize));
24286 /* The target attributes may also change some optimization flags, so update
24287 the optimization options if necessary. */
24288 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24289 rs6000_cpu_index = rs6000_tune_index = -1;
24290 ret = rs6000_inner_target_options (args, true);
24292 /* Set up any additional state. */
24293 if (ret)
24295 ret = rs6000_option_override_internal (false);
24296 new_target = build_target_option_node (&global_options,
24297 &global_options_set);
24299 else
24300 new_target = NULL;
24302 new_optimize = build_optimization_node (&global_options,
24303 &global_options_set);
24305 if (!new_target)
24306 ret = false;
24308 else if (fndecl)
24310 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24312 if (old_optimize != new_optimize)
24313 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24316 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24318 if (old_optimize != new_optimize)
24319 cl_optimization_restore (&global_options, &global_options_set,
24320 TREE_OPTIMIZATION (old_optimize));
24322 return ret;
24326 /* Hook to validate the current #pragma GCC target and set the state, and
24327 update the macros based on what was changed. If ARGS is NULL, then
24328 POP_TARGET is used to reset the options. */
24330 bool
24331 rs6000_pragma_target_parse (tree args, tree pop_target)
24333 tree prev_tree = build_target_option_node (&global_options,
24334 &global_options_set);
24335 tree cur_tree;
24336 struct cl_target_option *prev_opt, *cur_opt;
24337 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24339 if (TARGET_DEBUG_TARGET)
24341 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24342 fprintf (stderr, "args:");
24343 rs6000_debug_target_options (args, " ");
24344 fprintf (stderr, "\n");
24346 if (pop_target)
24348 fprintf (stderr, "pop_target:\n");
24349 debug_tree (pop_target);
24351 else
24352 fprintf (stderr, "pop_target: <NULL>\n");
24354 fprintf (stderr, "--------------------\n");
24357 if (! args)
24359 cur_tree = ((pop_target)
24360 ? pop_target
24361 : target_option_default_node);
24362 cl_target_option_restore (&global_options, &global_options_set,
24363 TREE_TARGET_OPTION (cur_tree));
24365 else
24367 rs6000_cpu_index = rs6000_tune_index = -1;
24368 if (!rs6000_inner_target_options (args, false)
24369 || !rs6000_option_override_internal (false)
24370 || (cur_tree = build_target_option_node (&global_options,
24371 &global_options_set))
24372 == NULL_TREE)
24374 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24375 fprintf (stderr, "invalid pragma\n");
24377 return false;
24381 target_option_current_node = cur_tree;
24382 rs6000_activate_target_options (target_option_current_node);
24384 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24385 change the macros that are defined. */
24386 if (rs6000_target_modify_macros_ptr)
24388 prev_opt = TREE_TARGET_OPTION (prev_tree);
24389 prev_flags = prev_opt->x_rs6000_isa_flags;
24391 cur_opt = TREE_TARGET_OPTION (cur_tree);
24392 cur_flags = cur_opt->x_rs6000_isa_flags;
24394 diff_flags = (prev_flags ^ cur_flags);
24396 if (diff_flags != 0)
24398 /* Delete old macros. */
24399 rs6000_target_modify_macros_ptr (false,
24400 prev_flags & diff_flags);
24402 /* Define new macros. */
24403 rs6000_target_modify_macros_ptr (true,
24404 cur_flags & diff_flags);
24408 return true;
24412 /* Remember the last target of rs6000_set_current_function. */
24413 static GTY(()) tree rs6000_previous_fndecl;
24415 /* Restore target's globals from NEW_TREE and invalidate the
24416 rs6000_previous_fndecl cache. */
24418 void
24419 rs6000_activate_target_options (tree new_tree)
24421 cl_target_option_restore (&global_options, &global_options_set,
24422 TREE_TARGET_OPTION (new_tree));
24423 if (TREE_TARGET_GLOBALS (new_tree))
24424 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24425 else if (new_tree == target_option_default_node)
24426 restore_target_globals (&default_target_globals);
24427 else
24428 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24429 rs6000_previous_fndecl = NULL_TREE;
24432 /* Establish appropriate back-end context for processing the function
24433 FNDECL. The argument might be NULL to indicate processing at top
24434 level, outside of any function scope. */
24435 static void
24436 rs6000_set_current_function (tree fndecl)
24438 if (TARGET_DEBUG_TARGET)
24440 fprintf (stderr, "\n==================== rs6000_set_current_function");
24442 if (fndecl)
24443 fprintf (stderr, ", fndecl %s (%p)",
24444 (DECL_NAME (fndecl)
24445 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24446 : "<unknown>"), (void *)fndecl);
24448 if (rs6000_previous_fndecl)
24449 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24451 fprintf (stderr, "\n");
24454 /* Only change the context if the function changes. This hook is called
24455 several times in the course of compiling a function, and we don't want to
24456 slow things down too much or call target_reinit when it isn't safe. */
24457 if (fndecl == rs6000_previous_fndecl)
24458 return;
24460 tree old_tree;
24461 if (rs6000_previous_fndecl == NULL_TREE)
24462 old_tree = target_option_current_node;
24463 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24464 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24465 else
24466 old_tree = target_option_default_node;
24468 tree new_tree;
24469 if (fndecl == NULL_TREE)
24471 if (old_tree != target_option_current_node)
24472 new_tree = target_option_current_node;
24473 else
24474 new_tree = NULL_TREE;
24476 else
24478 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24479 if (new_tree == NULL_TREE)
24480 new_tree = target_option_default_node;
24483 if (TARGET_DEBUG_TARGET)
24485 if (new_tree)
24487 fprintf (stderr, "\nnew fndecl target specific options:\n");
24488 debug_tree (new_tree);
24491 if (old_tree)
24493 fprintf (stderr, "\nold fndecl target specific options:\n");
24494 debug_tree (old_tree);
24497 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24498 fprintf (stderr, "--------------------\n");
24501 if (new_tree && old_tree != new_tree)
24502 rs6000_activate_target_options (new_tree);
24504 if (fndecl)
24505 rs6000_previous_fndecl = fndecl;
24509 /* Save the current options */
24511 static void
24512 rs6000_function_specific_save (struct cl_target_option *ptr,
24513 struct gcc_options *opts,
24514 struct gcc_options */* opts_set */)
24516 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24517 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24520 /* Restore the current options */
24522 static void
24523 rs6000_function_specific_restore (struct gcc_options *opts,
24524 struct gcc_options */* opts_set */,
24525 struct cl_target_option *ptr)
24528 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24529 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24530 (void) rs6000_option_override_internal (false);
24533 /* Print the current options */
24535 static void
24536 rs6000_function_specific_print (FILE *file, int indent,
24537 struct cl_target_option *ptr)
24539 rs6000_print_isa_options (file, indent, "Isa options set",
24540 ptr->x_rs6000_isa_flags);
24542 rs6000_print_isa_options (file, indent, "Isa options explicit",
24543 ptr->x_rs6000_isa_flags_explicit);
24546 /* Helper function to print the current isa or misc options on a line. */
24548 static void
24549 rs6000_print_options_internal (FILE *file,
24550 int indent,
24551 const char *string,
24552 HOST_WIDE_INT flags,
24553 const char *prefix,
24554 const struct rs6000_opt_mask *opts,
24555 size_t num_elements)
24557 size_t i;
24558 size_t start_column = 0;
24559 size_t cur_column;
24560 size_t max_column = 120;
24561 size_t prefix_len = strlen (prefix);
24562 size_t comma_len = 0;
24563 const char *comma = "";
24565 if (indent)
24566 start_column += fprintf (file, "%*s", indent, "");
24568 if (!flags)
24570 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24571 return;
24574 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24576 /* Print the various mask options. */
24577 cur_column = start_column;
24578 for (i = 0; i < num_elements; i++)
24580 bool invert = opts[i].invert;
24581 const char *name = opts[i].name;
24582 const char *no_str = "";
24583 HOST_WIDE_INT mask = opts[i].mask;
24584 size_t len = comma_len + prefix_len + strlen (name);
24586 if (!invert)
24588 if ((flags & mask) == 0)
24590 no_str = "no-";
24591 len += strlen ("no-");
24594 flags &= ~mask;
24597 else
24599 if ((flags & mask) != 0)
24601 no_str = "no-";
24602 len += strlen ("no-");
24605 flags |= mask;
24608 cur_column += len;
24609 if (cur_column > max_column)
24611 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24612 cur_column = start_column + len;
24613 comma = "";
24616 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24617 comma = ", ";
24618 comma_len = strlen (", ");
24621 fputs ("\n", file);
24624 /* Helper function to print the current isa options on a line. */
24626 static void
24627 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24628 HOST_WIDE_INT flags)
24630 rs6000_print_options_internal (file, indent, string, flags, "-m",
24631 &rs6000_opt_masks[0],
24632 ARRAY_SIZE (rs6000_opt_masks));
24635 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24636 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24637 -mupper-regs-df, etc.).
24639 If the user used -mno-power8-vector, we need to turn off all of the implicit
24640 ISA 2.07 and 3.0 options that relate to the vector unit.
24642 If the user used -mno-power9-vector, we need to turn off all of the implicit
24643 ISA 3.0 options that relate to the vector unit.
24645 This function does not handle explicit options such as the user specifying
24646 -mdirect-move. These are handled in rs6000_option_override_internal, and
24647 the appropriate error is given if needed.
24649 We return a mask of all of the implicit options that should not be enabled
24650 by default. */
24652 static HOST_WIDE_INT
24653 rs6000_disable_incompatible_switches (void)
24655 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24656 size_t i, j;
24658 static const struct {
24659 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24660 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24661 const char *const name; /* name of the switch. */
24662 } flags[] = {
24663 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24664 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24665 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24666 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24669 for (i = 0; i < ARRAY_SIZE (flags); i++)
24671 HOST_WIDE_INT no_flag = flags[i].no_flag;
24673 if ((rs6000_isa_flags & no_flag) == 0
24674 && (rs6000_isa_flags_explicit & no_flag) != 0)
24676 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24677 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24678 & rs6000_isa_flags
24679 & dep_flags);
24681 if (set_flags)
24683 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24684 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24686 set_flags &= ~rs6000_opt_masks[j].mask;
24687 error ("%<-mno-%s%> turns off %<-m%s%>",
24688 flags[i].name,
24689 rs6000_opt_masks[j].name);
24692 gcc_assert (!set_flags);
24695 rs6000_isa_flags &= ~dep_flags;
24696 ignore_masks |= no_flag | dep_flags;
24700 return ignore_masks;
24704 /* Helper function for printing the function name when debugging. */
24706 static const char *
24707 get_decl_name (tree fn)
24709 tree name;
24711 if (!fn)
24712 return "<null>";
24714 name = DECL_NAME (fn);
24715 if (!name)
24716 return "<no-name>";
24718 return IDENTIFIER_POINTER (name);
24721 /* Return the clone id of the target we are compiling code for in a target
24722 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24723 the priority list for the target clones (ordered from lowest to
24724 highest). */
24726 static int
24727 rs6000_clone_priority (tree fndecl)
24729 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24730 HOST_WIDE_INT isa_masks;
24731 int ret = CLONE_DEFAULT;
24732 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24733 const char *attrs_str = NULL;
24735 attrs = TREE_VALUE (TREE_VALUE (attrs));
24736 attrs_str = TREE_STRING_POINTER (attrs);
24738 /* Return priority zero for default function. Return the ISA needed for the
24739 function if it is not the default. */
24740 if (strcmp (attrs_str, "default") != 0)
24742 if (fn_opts == NULL_TREE)
24743 fn_opts = target_option_default_node;
24745 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24746 isa_masks = rs6000_isa_flags;
24747 else
24748 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24750 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24751 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24752 break;
24755 if (TARGET_DEBUG_TARGET)
24756 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24757 get_decl_name (fndecl), ret);
24759 return ret;
24762 /* This compares the priority of target features in function DECL1 and DECL2.
24763 It returns positive value if DECL1 is higher priority, negative value if
24764 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24765 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24767 static int
24768 rs6000_compare_version_priority (tree decl1, tree decl2)
24770 int priority1 = rs6000_clone_priority (decl1);
24771 int priority2 = rs6000_clone_priority (decl2);
24772 int ret = priority1 - priority2;
24774 if (TARGET_DEBUG_TARGET)
24775 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24776 get_decl_name (decl1), get_decl_name (decl2), ret);
24778 return ret;
24781 /* Make a dispatcher declaration for the multi-versioned function DECL.
24782 Calls to DECL function will be replaced with calls to the dispatcher
24783 by the front-end. Returns the decl of the dispatcher function. */
24785 static tree
24786 rs6000_get_function_versions_dispatcher (void *decl)
24788 tree fn = (tree) decl;
24789 struct cgraph_node *node = NULL;
24790 struct cgraph_node *default_node = NULL;
24791 struct cgraph_function_version_info *node_v = NULL;
24792 struct cgraph_function_version_info *first_v = NULL;
24794 tree dispatch_decl = NULL;
24796 struct cgraph_function_version_info *default_version_info = NULL;
24797 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24799 if (TARGET_DEBUG_TARGET)
24800 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24801 get_decl_name (fn));
24803 node = cgraph_node::get (fn);
24804 gcc_assert (node != NULL);
24806 node_v = node->function_version ();
24807 gcc_assert (node_v != NULL);
24809 if (node_v->dispatcher_resolver != NULL)
24810 return node_v->dispatcher_resolver;
24812 /* Find the default version and make it the first node. */
24813 first_v = node_v;
24814 /* Go to the beginning of the chain. */
24815 while (first_v->prev != NULL)
24816 first_v = first_v->prev;
24818 default_version_info = first_v;
24819 while (default_version_info != NULL)
24821 const tree decl2 = default_version_info->this_node->decl;
24822 if (is_function_default_version (decl2))
24823 break;
24824 default_version_info = default_version_info->next;
24827 /* If there is no default node, just return NULL. */
24828 if (default_version_info == NULL)
24829 return NULL;
24831 /* Make default info the first node. */
24832 if (first_v != default_version_info)
24834 default_version_info->prev->next = default_version_info->next;
24835 if (default_version_info->next)
24836 default_version_info->next->prev = default_version_info->prev;
24837 first_v->prev = default_version_info;
24838 default_version_info->next = first_v;
24839 default_version_info->prev = NULL;
24842 default_node = default_version_info->this_node;
24844 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24845 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24846 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24847 "exports hardware capability bits");
24848 #else
24850 if (targetm.has_ifunc_p ())
24852 struct cgraph_function_version_info *it_v = NULL;
24853 struct cgraph_node *dispatcher_node = NULL;
24854 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24856 /* Right now, the dispatching is done via ifunc. */
24857 dispatch_decl = make_dispatcher_decl (default_node->decl);
24858 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
24860 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24861 gcc_assert (dispatcher_node != NULL);
24862 dispatcher_node->dispatcher_function = 1;
24863 dispatcher_version_info
24864 = dispatcher_node->insert_new_function_version ();
24865 dispatcher_version_info->next = default_version_info;
24866 dispatcher_node->definition = 1;
24868 /* Set the dispatcher for all the versions. */
24869 it_v = default_version_info;
24870 while (it_v != NULL)
24872 it_v->dispatcher_resolver = dispatch_decl;
24873 it_v = it_v->next;
24876 else
24878 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24879 "multiversioning needs %<ifunc%> which is not supported "
24880 "on this target");
24882 #endif
24884 return dispatch_decl;
24887 /* Make the resolver function decl to dispatch the versions of a multi-
24888 versioned function, DEFAULT_DECL. Create an empty basic block in the
24889 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24890 function. */
24892 static tree
24893 make_resolver_func (const tree default_decl,
24894 const tree dispatch_decl,
24895 basic_block *empty_bb)
24897 /* Make the resolver function static. The resolver function returns
24898 void *. */
24899 tree decl_name = clone_function_name (default_decl, "resolver");
24900 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24901 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24902 tree decl = build_fn_decl (resolver_name, type);
24903 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24905 DECL_NAME (decl) = decl_name;
24906 TREE_USED (decl) = 1;
24907 DECL_ARTIFICIAL (decl) = 1;
24908 DECL_IGNORED_P (decl) = 0;
24909 TREE_PUBLIC (decl) = 0;
24910 DECL_UNINLINABLE (decl) = 1;
24912 /* Resolver is not external, body is generated. */
24913 DECL_EXTERNAL (decl) = 0;
24914 DECL_EXTERNAL (dispatch_decl) = 0;
24916 DECL_CONTEXT (decl) = NULL_TREE;
24917 DECL_INITIAL (decl) = make_node (BLOCK);
24918 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24920 if (DECL_COMDAT_GROUP (default_decl)
24921 || TREE_PUBLIC (default_decl))
24923 /* In this case, each translation unit with a call to this
24924 versioned function will put out a resolver. Ensure it
24925 is comdat to keep just one copy. */
24926 DECL_COMDAT (decl) = 1;
24927 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24929 else
24930 TREE_PUBLIC (dispatch_decl) = 0;
24932 /* Build result decl and add to function_decl. */
24933 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24934 DECL_CONTEXT (t) = decl;
24935 DECL_ARTIFICIAL (t) = 1;
24936 DECL_IGNORED_P (t) = 1;
24937 DECL_RESULT (decl) = t;
24939 gimplify_function_tree (decl);
24940 push_cfun (DECL_STRUCT_FUNCTION (decl));
24941 *empty_bb = init_lowered_empty_function (decl, false,
24942 profile_count::uninitialized ());
24944 cgraph_node::add_new_function (decl, true);
24945 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24947 pop_cfun ();
24949 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24950 DECL_ATTRIBUTES (dispatch_decl)
24951 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24953 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24955 return decl;
24958 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24959 return a pointer to VERSION_DECL if we are running on a machine that
24960 supports the index CLONE_ISA hardware architecture bits. This function will
24961 be called during version dispatch to decide which function version to
24962 execute. It returns the basic block at the end, to which more conditions
24963 can be added. */
24965 static basic_block
24966 add_condition_to_bb (tree function_decl, tree version_decl,
24967 int clone_isa, basic_block new_bb)
24969 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
24971 gcc_assert (new_bb != NULL);
24972 gimple_seq gseq = bb_seq (new_bb);
24975 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
24976 build_fold_addr_expr (version_decl));
24977 tree result_var = create_tmp_var (ptr_type_node);
24978 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
24979 gimple *return_stmt = gimple_build_return (result_var);
24981 if (clone_isa == CLONE_DEFAULT)
24983 gimple_seq_add_stmt (&gseq, convert_stmt);
24984 gimple_seq_add_stmt (&gseq, return_stmt);
24985 set_bb_seq (new_bb, gseq);
24986 gimple_set_bb (convert_stmt, new_bb);
24987 gimple_set_bb (return_stmt, new_bb);
24988 pop_cfun ();
24989 return new_bb;
24992 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24993 tree cond_var = create_tmp_var (bool_int_type_node);
24994 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
24995 const char *arg_str = rs6000_clone_map[clone_isa].name;
24996 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24997 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24998 gimple_call_set_lhs (call_cond_stmt, cond_var);
25000 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25001 gimple_set_bb (call_cond_stmt, new_bb);
25002 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25004 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25005 NULL_TREE, NULL_TREE);
25006 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25007 gimple_set_bb (if_else_stmt, new_bb);
25008 gimple_seq_add_stmt (&gseq, if_else_stmt);
25010 gimple_seq_add_stmt (&gseq, convert_stmt);
25011 gimple_seq_add_stmt (&gseq, return_stmt);
25012 set_bb_seq (new_bb, gseq);
25014 basic_block bb1 = new_bb;
25015 edge e12 = split_block (bb1, if_else_stmt);
25016 basic_block bb2 = e12->dest;
25017 e12->flags &= ~EDGE_FALLTHRU;
25018 e12->flags |= EDGE_TRUE_VALUE;
25020 edge e23 = split_block (bb2, return_stmt);
25021 gimple_set_bb (convert_stmt, bb2);
25022 gimple_set_bb (return_stmt, bb2);
25024 basic_block bb3 = e23->dest;
25025 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25027 remove_edge (e23);
25028 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25030 pop_cfun ();
25031 return bb3;
25034 /* This function generates the dispatch function for multi-versioned functions.
25035 DISPATCH_DECL is the function which will contain the dispatch logic.
25036 FNDECLS are the function choices for dispatch, and is a tree chain.
25037 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25038 code is generated. */
25040 static int
25041 dispatch_function_versions (tree dispatch_decl,
25042 void *fndecls_p,
25043 basic_block *empty_bb)
25045 int ix;
25046 tree ele;
25047 vec<tree> *fndecls;
25048 tree clones[CLONE_MAX];
25050 if (TARGET_DEBUG_TARGET)
25051 fputs ("dispatch_function_versions, top\n", stderr);
25053 gcc_assert (dispatch_decl != NULL
25054 && fndecls_p != NULL
25055 && empty_bb != NULL);
25057 /* fndecls_p is actually a vector. */
25058 fndecls = static_cast<vec<tree> *> (fndecls_p);
25060 /* At least one more version other than the default. */
25061 gcc_assert (fndecls->length () >= 2);
25063 /* The first version in the vector is the default decl. */
25064 memset ((void *) clones, '\0', sizeof (clones));
25065 clones[CLONE_DEFAULT] = (*fndecls)[0];
25067 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25068 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25069 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25070 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25071 to insert the code here to do the call. */
25073 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25075 int priority = rs6000_clone_priority (ele);
25076 if (!clones[priority])
25077 clones[priority] = ele;
25080 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25081 if (clones[ix])
25083 if (TARGET_DEBUG_TARGET)
25084 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25085 ix, get_decl_name (clones[ix]));
25087 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25088 *empty_bb);
25091 return 0;
25094 /* Generate the dispatching code body to dispatch multi-versioned function
25095 DECL. The target hook is called to process the "target" attributes and
25096 provide the code to dispatch the right function at run-time. NODE points
25097 to the dispatcher decl whose body will be created. */
25099 static tree
25100 rs6000_generate_version_dispatcher_body (void *node_p)
25102 tree resolver;
25103 basic_block empty_bb;
25104 struct cgraph_node *node = (cgraph_node *) node_p;
25105 struct cgraph_function_version_info *ninfo = node->function_version ();
25107 if (ninfo->dispatcher_resolver)
25108 return ninfo->dispatcher_resolver;
25110 /* node is going to be an alias, so remove the finalized bit. */
25111 node->definition = false;
25113 /* The first version in the chain corresponds to the default version. */
25114 ninfo->dispatcher_resolver = resolver
25115 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25117 if (TARGET_DEBUG_TARGET)
25118 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25119 get_decl_name (resolver));
25121 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25122 auto_vec<tree, 2> fn_ver_vec;
25124 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25125 vinfo;
25126 vinfo = vinfo->next)
25128 struct cgraph_node *version = vinfo->this_node;
25129 /* Check for virtual functions here again, as by this time it should
25130 have been determined if this function needs a vtable index or
25131 not. This happens for methods in derived classes that override
25132 virtual methods in base classes but are not explicitly marked as
25133 virtual. */
25134 if (DECL_VINDEX (version->decl))
25135 sorry ("Virtual function multiversioning not supported");
25137 fn_ver_vec.safe_push (version->decl);
25140 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25141 cgraph_edge::rebuild_edges ();
25142 pop_cfun ();
25143 return resolver;
25146 /* Hook to decide if we need to scan function gimple statements to
25147 collect target specific information for inlining, and update the
25148 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25149 to predict which ISA feature is used at this time. Return true
25150 if we need to scan, otherwise return false. */
25152 static bool
25153 rs6000_need_ipa_fn_target_info (const_tree decl,
25154 unsigned int &info ATTRIBUTE_UNUSED)
25156 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25157 if (!target)
25158 target = target_option_default_node;
25159 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25161 /* See PR102059, we only handle HTM for now, so will only do
25162 the consequent scannings when HTM feature enabled. */
25163 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25164 return true;
25166 return false;
25169 /* Hook to update target specific information INFO for inlining by
25170 checking the given STMT. Return false if we don't need to scan
25171 any more, otherwise return true. */
25173 static bool
25174 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25176 /* Assume inline asm can use any instruction features. */
25177 if (gimple_code (stmt) == GIMPLE_ASM)
25179 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25180 the only bit we care about. */
25181 info |= RS6000_FN_TARGET_INFO_HTM;
25182 return false;
25184 else if (gimple_code (stmt) == GIMPLE_CALL)
25186 tree fndecl = gimple_call_fndecl (stmt);
25187 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25189 enum rs6000_gen_builtins fcode
25190 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25191 /* HTM bifs definitely exploit HTM insns. */
25192 if (bif_is_htm (rs6000_builtin_info[fcode]))
25194 info |= RS6000_FN_TARGET_INFO_HTM;
25195 return false;
25200 return true;
25203 /* Hook to determine if one function can safely inline another. */
25205 static bool
25206 rs6000_can_inline_p (tree caller, tree callee)
25208 bool ret = false;
25209 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25210 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25212 /* If the callee has no option attributes, then it is ok to inline. */
25213 if (!callee_tree)
25214 ret = true;
25216 else
25218 HOST_WIDE_INT caller_isa;
25219 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25220 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25221 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25223 /* If the caller has option attributes, then use them.
25224 Otherwise, use the command line options. */
25225 if (caller_tree)
25226 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25227 else
25228 caller_isa = rs6000_isa_flags;
25230 cgraph_node *callee_node = cgraph_node::get (callee);
25231 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25233 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25234 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25236 callee_isa &= ~OPTION_MASK_HTM;
25237 explicit_isa &= ~OPTION_MASK_HTM;
25241 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25242 purposes. */
25243 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25244 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25246 /* The callee's options must be a subset of the caller's options, i.e.
25247 a vsx function may inline an altivec function, but a no-vsx function
25248 must not inline a vsx function. However, for those options that the
25249 callee has explicitly enabled or disabled, then we must enforce that
25250 the callee's and caller's options match exactly; see PR70010. */
25251 if (((caller_isa & callee_isa) == callee_isa)
25252 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25253 ret = true;
25256 if (TARGET_DEBUG_TARGET)
25257 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25258 get_decl_name (caller), get_decl_name (callee),
25259 (ret ? "can" : "cannot"));
25261 return ret;
25264 /* Allocate a stack temp and fixup the address so it meets the particular
25265 memory requirements (either offetable or REG+REG addressing). */
25268 rs6000_allocate_stack_temp (machine_mode mode,
25269 bool offsettable_p,
25270 bool reg_reg_p)
25272 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25273 rtx addr = XEXP (stack, 0);
25274 int strict_p = reload_completed;
25276 if (!legitimate_indirect_address_p (addr, strict_p))
25278 if (offsettable_p
25279 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25280 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25282 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25283 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25286 return stack;
25289 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25290 convert to such a form to deal with memory reference instructions
25291 like STFIWX and LDBRX that only take reg+reg addressing. */
25294 rs6000_force_indexed_or_indirect_mem (rtx x)
25296 machine_mode mode = GET_MODE (x);
25298 gcc_assert (MEM_P (x));
25299 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25301 rtx addr = XEXP (x, 0);
25302 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25304 rtx reg = XEXP (addr, 0);
25305 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25306 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25307 gcc_assert (REG_P (reg));
25308 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25309 addr = reg;
25311 else if (GET_CODE (addr) == PRE_MODIFY)
25313 rtx reg = XEXP (addr, 0);
25314 rtx expr = XEXP (addr, 1);
25315 gcc_assert (REG_P (reg));
25316 gcc_assert (GET_CODE (expr) == PLUS);
25317 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25318 addr = reg;
25321 if (GET_CODE (addr) == PLUS)
25323 rtx op0 = XEXP (addr, 0);
25324 rtx op1 = XEXP (addr, 1);
25325 op0 = force_reg (Pmode, op0);
25326 op1 = force_reg (Pmode, op1);
25327 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25329 else
25330 x = replace_equiv_address (x, force_reg (Pmode, addr));
25333 return x;
25336 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25338 On the RS/6000, all integer constants are acceptable, most won't be valid
25339 for particular insns, though. Only easy FP constants are acceptable. */
25341 static bool
25342 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25344 if (TARGET_ELF && tls_referenced_p (x))
25345 return false;
25347 if (CONST_DOUBLE_P (x))
25348 return easy_fp_constant (x, mode);
25350 if (GET_CODE (x) == CONST_VECTOR)
25351 return easy_vector_constant (x, mode);
25353 return true;
25356 #if TARGET_AIX_OS
25357 /* Implement TARGET_PRECOMPUTE_TLS_P.
25359 On the AIX, TLS symbols are in the TOC, which is maintained in the
25360 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25361 must be considered legitimate constants. */
25363 static bool
25364 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25366 return tls_referenced_p (x);
25368 #endif
25371 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25373 static bool
25374 chain_already_loaded (rtx_insn *last)
25376 for (; last != NULL; last = PREV_INSN (last))
25378 if (NONJUMP_INSN_P (last))
25380 rtx patt = PATTERN (last);
25382 if (GET_CODE (patt) == SET)
25384 rtx lhs = XEXP (patt, 0);
25386 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25387 return true;
25391 return false;
25394 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25396 void
25397 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25399 rtx func = func_desc;
25400 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25401 rtx toc_load = NULL_RTX;
25402 rtx toc_restore = NULL_RTX;
25403 rtx func_addr;
25404 rtx abi_reg = NULL_RTX;
25405 rtx call[5];
25406 int n_call;
25407 rtx insn;
25408 bool is_pltseq_longcall;
25410 if (global_tlsarg)
25411 tlsarg = global_tlsarg;
25413 /* Handle longcall attributes. */
25414 is_pltseq_longcall = false;
25415 if ((INTVAL (cookie) & CALL_LONG) != 0
25416 && GET_CODE (func_desc) == SYMBOL_REF)
25418 func = rs6000_longcall_ref (func_desc, tlsarg);
25419 if (TARGET_PLTSEQ)
25420 is_pltseq_longcall = true;
25423 /* Handle indirect calls. */
25424 if (!SYMBOL_REF_P (func)
25425 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25427 if (!rs6000_pcrel_p ())
25429 /* Save the TOC into its reserved slot before the call,
25430 and prepare to restore it after the call. */
25431 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25432 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25433 gen_rtvec (1, stack_toc_offset),
25434 UNSPEC_TOCSLOT);
25435 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25437 /* Can we optimize saving the TOC in the prologue or
25438 do we need to do it at every call? */
25439 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25440 cfun->machine->save_toc_in_prologue = true;
25441 else
25443 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25444 rtx stack_toc_mem = gen_frame_mem (Pmode,
25445 gen_rtx_PLUS (Pmode, stack_ptr,
25446 stack_toc_offset));
25447 MEM_VOLATILE_P (stack_toc_mem) = 1;
25448 if (is_pltseq_longcall)
25450 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25451 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25452 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25454 else
25455 emit_move_insn (stack_toc_mem, toc_reg);
25459 if (DEFAULT_ABI == ABI_ELFv2)
25461 /* A function pointer in the ELFv2 ABI is just a plain address, but
25462 the ABI requires it to be loaded into r12 before the call. */
25463 func_addr = gen_rtx_REG (Pmode, 12);
25464 emit_move_insn (func_addr, func);
25465 abi_reg = func_addr;
25466 /* Indirect calls via CTR are strongly preferred over indirect
25467 calls via LR, so move the address there. Needed to mark
25468 this insn for linker plt sequence editing too. */
25469 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25470 if (is_pltseq_longcall)
25472 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25473 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25474 emit_insn (gen_rtx_SET (func_addr, mark_func));
25475 v = gen_rtvec (2, func_addr, func_desc);
25476 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25478 else
25479 emit_move_insn (func_addr, abi_reg);
25481 else
25483 /* A function pointer under AIX is a pointer to a data area whose
25484 first word contains the actual address of the function, whose
25485 second word contains a pointer to its TOC, and whose third word
25486 contains a value to place in the static chain register (r11).
25487 Note that if we load the static chain, our "trampoline" need
25488 not have any executable code. */
25490 /* Load up address of the actual function. */
25491 func = force_reg (Pmode, func);
25492 func_addr = gen_reg_rtx (Pmode);
25493 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25495 /* Indirect calls via CTR are strongly preferred over indirect
25496 calls via LR, so move the address there. */
25497 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25498 emit_move_insn (ctr_reg, func_addr);
25499 func_addr = ctr_reg;
25501 /* Prepare to load the TOC of the called function. Note that the
25502 TOC load must happen immediately before the actual call so
25503 that unwinding the TOC registers works correctly. See the
25504 comment in frob_update_context. */
25505 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25506 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25507 gen_rtx_PLUS (Pmode, func,
25508 func_toc_offset));
25509 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25511 /* If we have a static chain, load it up. But, if the call was
25512 originally direct, the 3rd word has not been written since no
25513 trampoline has been built, so we ought not to load it, lest we
25514 override a static chain value. */
25515 if (!(GET_CODE (func_desc) == SYMBOL_REF
25516 && SYMBOL_REF_FUNCTION_P (func_desc))
25517 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25518 && !chain_already_loaded (get_current_sequence ()->next->last))
25520 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25521 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25522 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25523 gen_rtx_PLUS (Pmode, func,
25524 func_sc_offset));
25525 emit_move_insn (sc_reg, func_sc_mem);
25526 abi_reg = sc_reg;
25530 else
25532 /* No TOC register needed for calls from PC-relative callers. */
25533 if (!rs6000_pcrel_p ())
25534 /* Direct calls use the TOC: for local calls, the callee will
25535 assume the TOC register is set; for non-local calls, the
25536 PLT stub needs the TOC register. */
25537 abi_reg = toc_reg;
25538 func_addr = func;
25541 /* Create the call. */
25542 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25543 if (value != NULL_RTX)
25544 call[0] = gen_rtx_SET (value, call[0]);
25545 call[1] = gen_rtx_USE (VOIDmode, cookie);
25546 n_call = 2;
25548 if (toc_load)
25549 call[n_call++] = toc_load;
25550 if (toc_restore)
25551 call[n_call++] = toc_restore;
25553 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25555 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25556 insn = emit_call_insn (insn);
25558 /* Mention all registers defined by the ABI to hold information
25559 as uses in CALL_INSN_FUNCTION_USAGE. */
25560 if (abi_reg)
25561 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25564 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25566 void
25567 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25569 rtx call[2];
25570 rtx insn;
25571 rtx r12 = NULL_RTX;
25572 rtx func_addr = func_desc;
25574 if (global_tlsarg)
25575 tlsarg = global_tlsarg;
25577 /* Handle longcall attributes. */
25578 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
25580 /* PCREL can do a sibling call to a longcall function
25581 because we don't need to restore the TOC register. */
25582 gcc_assert (rs6000_pcrel_p ());
25583 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
25585 else
25586 gcc_assert (INTVAL (cookie) == 0);
25588 /* For ELFv2, r12 and CTR need to hold the function address
25589 for an indirect call. */
25590 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25592 r12 = gen_rtx_REG (Pmode, 12);
25593 emit_move_insn (r12, func_desc);
25594 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25595 emit_move_insn (func_addr, r12);
25598 /* Create the call. */
25599 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25600 if (value != NULL_RTX)
25601 call[0] = gen_rtx_SET (value, call[0]);
25603 call[1] = simple_return_rtx;
25605 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25606 insn = emit_call_insn (insn);
25608 /* Note use of the TOC register. */
25609 if (!rs6000_pcrel_p ())
25610 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25611 gen_rtx_REG (Pmode, TOC_REGNUM));
25613 /* Note use of r12. */
25614 if (r12)
25615 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25618 /* Expand code to perform a call under the SYSV4 ABI. */
25620 void
25621 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25623 rtx func = func_desc;
25624 rtx func_addr;
25625 rtx call[4];
25626 rtx insn;
25627 rtx abi_reg = NULL_RTX;
25628 int n;
25630 if (global_tlsarg)
25631 tlsarg = global_tlsarg;
25633 /* Handle longcall attributes. */
25634 if ((INTVAL (cookie) & CALL_LONG) != 0
25635 && GET_CODE (func_desc) == SYMBOL_REF)
25637 func = rs6000_longcall_ref (func_desc, tlsarg);
25638 /* If the longcall was implemented as an inline PLT call using
25639 PLT unspecs then func will be REG:r11. If not, func will be
25640 a pseudo reg. The inline PLT call sequence supports lazy
25641 linking (and longcalls to functions in dlopen'd libraries).
25642 The other style of longcalls don't. The lazy linking entry
25643 to the dynamic symbol resolver requires r11 be the function
25644 address (as it is for linker generated PLT stubs). Ensure
25645 r11 stays valid to the bctrl by marking r11 used by the call. */
25646 if (TARGET_PLTSEQ)
25647 abi_reg = func;
25650 /* Handle indirect calls. */
25651 if (GET_CODE (func) != SYMBOL_REF)
25653 func = force_reg (Pmode, func);
25655 /* Indirect calls via CTR are strongly preferred over indirect
25656 calls via LR, so move the address there. That can't be left
25657 to reload because we want to mark every instruction in an
25658 inline PLT call sequence with a reloc, enabling the linker to
25659 edit the sequence back to a direct call when that makes sense. */
25660 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25661 if (abi_reg)
25663 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25664 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25665 emit_insn (gen_rtx_SET (func_addr, mark_func));
25666 v = gen_rtvec (2, func_addr, func_desc);
25667 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25669 else
25670 emit_move_insn (func_addr, func);
25672 else
25673 func_addr = func;
25675 /* Create the call. */
25676 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25677 if (value != NULL_RTX)
25678 call[0] = gen_rtx_SET (value, call[0]);
25680 call[1] = gen_rtx_USE (VOIDmode, cookie);
25681 n = 2;
25682 if (TARGET_SECURE_PLT
25683 && flag_pic
25684 && GET_CODE (func_addr) == SYMBOL_REF
25685 && !SYMBOL_REF_LOCAL_P (func_addr))
25686 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25688 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25690 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25691 insn = emit_call_insn (insn);
25692 if (abi_reg)
25693 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25696 /* Expand code to perform a sibling call under the SysV4 ABI. */
25698 void
25699 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25701 rtx func = func_desc;
25702 rtx func_addr;
25703 rtx call[3];
25704 rtx insn;
25705 rtx abi_reg = NULL_RTX;
25707 if (global_tlsarg)
25708 tlsarg = global_tlsarg;
25710 /* Handle longcall attributes. */
25711 if ((INTVAL (cookie) & CALL_LONG) != 0
25712 && GET_CODE (func_desc) == SYMBOL_REF)
25714 func = rs6000_longcall_ref (func_desc, tlsarg);
25715 /* If the longcall was implemented as an inline PLT call using
25716 PLT unspecs then func will be REG:r11. If not, func will be
25717 a pseudo reg. The inline PLT call sequence supports lazy
25718 linking (and longcalls to functions in dlopen'd libraries).
25719 The other style of longcalls don't. The lazy linking entry
25720 to the dynamic symbol resolver requires r11 be the function
25721 address (as it is for linker generated PLT stubs). Ensure
25722 r11 stays valid to the bctr by marking r11 used by the call. */
25723 if (TARGET_PLTSEQ)
25724 abi_reg = func;
25727 /* Handle indirect calls. */
25728 if (GET_CODE (func) != SYMBOL_REF)
25730 func = force_reg (Pmode, func);
25732 /* Indirect sibcalls must go via CTR. That can't be left to
25733 reload because we want to mark every instruction in an inline
25734 PLT call sequence with a reloc, enabling the linker to edit
25735 the sequence back to a direct call when that makes sense. */
25736 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25737 if (abi_reg)
25739 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25740 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25741 emit_insn (gen_rtx_SET (func_addr, mark_func));
25742 v = gen_rtvec (2, func_addr, func_desc);
25743 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25745 else
25746 emit_move_insn (func_addr, func);
25748 else
25749 func_addr = func;
25751 /* Create the call. */
25752 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25753 if (value != NULL_RTX)
25754 call[0] = gen_rtx_SET (value, call[0]);
25756 call[1] = gen_rtx_USE (VOIDmode, cookie);
25757 call[2] = simple_return_rtx;
25759 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25760 insn = emit_call_insn (insn);
25761 if (abi_reg)
25762 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25765 #if TARGET_MACHO
25767 /* Expand code to perform a call under the Darwin ABI.
25768 Modulo handling of mlongcall, this is much the same as sysv.
25769 if/when the longcall optimisation is removed, we could drop this
25770 code and use the sysv case (taking care to avoid the tls stuff).
25772 We can use this for sibcalls too, if needed. */
25774 void
25775 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25776 rtx cookie, bool sibcall)
25778 rtx func = func_desc;
25779 rtx func_addr;
25780 rtx call[3];
25781 rtx insn;
25782 int cookie_val = INTVAL (cookie);
25783 bool make_island = false;
25785 /* Handle longcall attributes, there are two cases for Darwin:
25786 1) Newer linkers are capable of synthesising any branch islands needed.
25787 2) We need a helper branch island synthesised by the compiler.
25788 The second case has mostly been retired and we don't use it for m64.
25789 In fact, it's is an optimisation, we could just indirect as sysv does..
25790 ... however, backwards compatibility for now.
25791 If we're going to use this, then we need to keep the CALL_LONG bit set,
25792 so that we can pick up the special insn form later. */
25793 if ((cookie_val & CALL_LONG) != 0
25794 && GET_CODE (func_desc) == SYMBOL_REF)
25796 /* FIXME: the longcall opt should not hang off this flag, it is most
25797 likely incorrect for kernel-mode code-generation. */
25798 if (darwin_symbol_stubs && TARGET_32BIT)
25799 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25800 else
25802 /* The linker is capable of doing this, but the user explicitly
25803 asked for -mlongcall, so we'll do the 'normal' version. */
25804 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25805 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25809 /* Handle indirect calls. */
25810 if (GET_CODE (func) != SYMBOL_REF)
25812 func = force_reg (Pmode, func);
25814 /* Indirect calls via CTR are strongly preferred over indirect
25815 calls via LR, and are required for indirect sibcalls, so move
25816 the address there. */
25817 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25818 emit_move_insn (func_addr, func);
25820 else
25821 func_addr = func;
25823 /* Create the call. */
25824 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25825 if (value != NULL_RTX)
25826 call[0] = gen_rtx_SET (value, call[0]);
25828 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25830 if (sibcall)
25831 call[2] = simple_return_rtx;
25832 else
25833 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25835 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25836 insn = emit_call_insn (insn);
25837 /* Now we have the debug info in the insn, we can set up the branch island
25838 if we're using one. */
25839 if (make_island)
25841 tree funname = get_identifier (XSTR (func_desc, 0));
25843 if (no_previous_def (funname))
25845 rtx label_rtx = gen_label_rtx ();
25846 char *label_buf, temp_buf[256];
25847 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25848 CODE_LABEL_NUMBER (label_rtx));
25849 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25850 tree labelname = get_identifier (label_buf);
25851 add_compiler_branch_island (labelname, funname,
25852 insn_line ((const rtx_insn*)insn));
25856 #endif
25858 void
25859 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25860 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25862 #if TARGET_MACHO
25863 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25864 #else
25865 gcc_unreachable();
25866 #endif
25870 void
25871 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25872 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25874 #if TARGET_MACHO
25875 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25876 #else
25877 gcc_unreachable();
25878 #endif
25881 /* Return whether we should generate PC-relative code for FNDECL. */
25882 bool
25883 rs6000_fndecl_pcrel_p (const_tree fndecl)
25885 if (DEFAULT_ABI != ABI_ELFv2)
25886 return false;
25888 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25890 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25891 && TARGET_CMODEL == CMODEL_MEDIUM);
25894 /* Return whether we should generate PC-relative code for *FN. */
25895 bool
25896 rs6000_function_pcrel_p (struct function *fn)
25898 if (DEFAULT_ABI != ABI_ELFv2)
25899 return false;
25901 /* Optimize usual case. */
25902 if (fn == cfun)
25903 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25904 && TARGET_CMODEL == CMODEL_MEDIUM);
25906 return rs6000_fndecl_pcrel_p (fn->decl);
25909 /* Return whether we should generate PC-relative code for the current
25910 function. */
25911 bool
25912 rs6000_pcrel_p ()
25914 return (DEFAULT_ABI == ABI_ELFv2
25915 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25916 && TARGET_CMODEL == CMODEL_MEDIUM);
25920 /* Given an address (ADDR), a mode (MODE), and what the format of the
25921 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25922 for the address. */
25924 enum insn_form
25925 address_to_insn_form (rtx addr,
25926 machine_mode mode,
25927 enum non_prefixed_form non_prefixed_format)
25929 /* Single register is easy. */
25930 if (REG_P (addr) || SUBREG_P (addr))
25931 return INSN_FORM_BASE_REG;
25933 /* If the non prefixed instruction format doesn't support offset addressing,
25934 make sure only indexed addressing is allowed.
25936 We special case SDmode so that the register allocator does not try to move
25937 SDmode through GPR registers, but instead uses the 32-bit integer load and
25938 store instructions for the floating point registers. */
25939 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25941 if (GET_CODE (addr) != PLUS)
25942 return INSN_FORM_BAD;
25944 rtx op0 = XEXP (addr, 0);
25945 rtx op1 = XEXP (addr, 1);
25946 if (!REG_P (op0) && !SUBREG_P (op0))
25947 return INSN_FORM_BAD;
25949 if (!REG_P (op1) && !SUBREG_P (op1))
25950 return INSN_FORM_BAD;
25952 return INSN_FORM_X;
25955 /* Deal with update forms. */
25956 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25957 return INSN_FORM_UPDATE;
25959 /* Handle PC-relative symbols and labels. Check for both local and
25960 external symbols. Assume labels are always local. TLS symbols
25961 are not PC-relative for rs6000. */
25962 if (TARGET_PCREL)
25964 if (LABEL_REF_P (addr))
25965 return INSN_FORM_PCREL_LOCAL;
25967 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25969 if (!SYMBOL_REF_LOCAL_P (addr))
25970 return INSN_FORM_PCREL_EXTERNAL;
25971 else
25972 return INSN_FORM_PCREL_LOCAL;
25976 if (GET_CODE (addr) == CONST)
25977 addr = XEXP (addr, 0);
25979 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25980 if (GET_CODE (addr) == LO_SUM)
25981 return INSN_FORM_LO_SUM;
25983 /* Everything below must be an offset address of some form. */
25984 if (GET_CODE (addr) != PLUS)
25985 return INSN_FORM_BAD;
25987 rtx op0 = XEXP (addr, 0);
25988 rtx op1 = XEXP (addr, 1);
25990 /* Check for indexed addresses. */
25991 if (REG_P (op1) || SUBREG_P (op1))
25993 if (REG_P (op0) || SUBREG_P (op0))
25994 return INSN_FORM_X;
25996 return INSN_FORM_BAD;
25999 if (!CONST_INT_P (op1))
26000 return INSN_FORM_BAD;
26002 HOST_WIDE_INT offset = INTVAL (op1);
26003 if (!SIGNED_INTEGER_34BIT_P (offset))
26004 return INSN_FORM_BAD;
26006 /* Check for local and external PC-relative addresses. Labels are always
26007 local. TLS symbols are not PC-relative for rs6000. */
26008 if (TARGET_PCREL)
26010 if (LABEL_REF_P (op0))
26011 return INSN_FORM_PCREL_LOCAL;
26013 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26015 if (!SYMBOL_REF_LOCAL_P (op0))
26016 return INSN_FORM_PCREL_EXTERNAL;
26017 else
26018 return INSN_FORM_PCREL_LOCAL;
26022 /* If it isn't PC-relative, the address must use a base register. */
26023 if (!REG_P (op0) && !SUBREG_P (op0))
26024 return INSN_FORM_BAD;
26026 /* Large offsets must be prefixed. */
26027 if (!SIGNED_INTEGER_16BIT_P (offset))
26029 if (TARGET_PREFIXED)
26030 return INSN_FORM_PREFIXED_NUMERIC;
26032 return INSN_FORM_BAD;
26035 /* We have a 16-bit offset, see what default instruction format to use. */
26036 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26038 unsigned size = GET_MODE_SIZE (mode);
26040 /* On 64-bit systems, assume 64-bit integers need to use DS form
26041 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26042 (for LXV and STXV). TImode is problematical in that its normal usage
26043 is expected to be GPRs where it wants a DS instruction format, but if
26044 it goes into the vector registers, it wants a DQ instruction
26045 format. */
26046 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26047 non_prefixed_format = NON_PREFIXED_DS;
26049 else if (TARGET_VSX && size >= 16
26050 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26051 non_prefixed_format = NON_PREFIXED_DQ;
26053 else
26054 non_prefixed_format = NON_PREFIXED_D;
26057 /* Classify the D/DS/DQ-form addresses. */
26058 switch (non_prefixed_format)
26060 /* Instruction format D, all 16 bits are valid. */
26061 case NON_PREFIXED_D:
26062 return INSN_FORM_D;
26064 /* Instruction format DS, bottom 2 bits must be 0. */
26065 case NON_PREFIXED_DS:
26066 if ((offset & 3) == 0)
26067 return INSN_FORM_DS;
26069 else if (TARGET_PREFIXED)
26070 return INSN_FORM_PREFIXED_NUMERIC;
26072 else
26073 return INSN_FORM_BAD;
26075 /* Instruction format DQ, bottom 4 bits must be 0. */
26076 case NON_PREFIXED_DQ:
26077 if ((offset & 15) == 0)
26078 return INSN_FORM_DQ;
26080 else if (TARGET_PREFIXED)
26081 return INSN_FORM_PREFIXED_NUMERIC;
26083 else
26084 return INSN_FORM_BAD;
26086 default:
26087 break;
26090 return INSN_FORM_BAD;
26093 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26094 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26095 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26096 a D-form or DS-form instruction. X-form and base_reg are always
26097 allowed. */
26098 bool
26099 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26100 enum non_prefixed_form non_prefixed_format)
26102 enum insn_form result_form;
26104 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26106 switch (non_prefixed_format)
26108 case NON_PREFIXED_D:
26109 switch (result_form)
26111 case INSN_FORM_X:
26112 case INSN_FORM_D:
26113 case INSN_FORM_DS:
26114 case INSN_FORM_BASE_REG:
26115 return true;
26116 default:
26117 return false;
26119 break;
26120 case NON_PREFIXED_DS:
26121 switch (result_form)
26123 case INSN_FORM_X:
26124 case INSN_FORM_DS:
26125 case INSN_FORM_BASE_REG:
26126 return true;
26127 default:
26128 return false;
26130 break;
26131 default:
26132 break;
26134 return false;
26137 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26138 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26139 the load or store with the PCREL_OPT optimization to make sure it is an
26140 instruction that can be optimized.
26142 We need to specify the MODE separately from the REG to allow for loads that
26143 include zero/sign/float extension. */
26145 bool
26146 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26148 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26149 PCREL_OPT optimization. */
26150 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26151 if (non_prefixed == NON_PREFIXED_X)
26152 return false;
26154 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26155 rtx addr = XEXP (mem, 0);
26156 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26157 return (iform == INSN_FORM_BASE_REG
26158 || iform == INSN_FORM_D
26159 || iform == INSN_FORM_DS
26160 || iform == INSN_FORM_DQ);
26163 /* Helper function to see if we're potentially looking at lfs/stfs.
26164 - PARALLEL containing a SET and a CLOBBER
26165 - stfs:
26166 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26167 - CLOBBER is a V4SF
26168 - lfs:
26169 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26170 - CLOBBER is a DI
26173 static bool
26174 is_lfs_stfs_insn (rtx_insn *insn)
26176 rtx pattern = PATTERN (insn);
26177 if (GET_CODE (pattern) != PARALLEL)
26178 return false;
26180 /* This should be a parallel with exactly one set and one clobber. */
26181 if (XVECLEN (pattern, 0) != 2)
26182 return false;
26184 rtx set = XVECEXP (pattern, 0, 0);
26185 if (GET_CODE (set) != SET)
26186 return false;
26188 rtx clobber = XVECEXP (pattern, 0, 1);
26189 if (GET_CODE (clobber) != CLOBBER)
26190 return false;
26192 /* All we care is that the destination of the SET is a mem:SI,
26193 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26194 should be a scratch:V4SF. */
26196 rtx dest = SET_DEST (set);
26197 rtx src = SET_SRC (set);
26198 rtx scratch = SET_DEST (clobber);
26200 if (GET_CODE (src) != UNSPEC)
26201 return false;
26203 /* stfs case. */
26204 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26205 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26206 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26207 return true;
26209 /* lfs case. */
26210 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26211 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26212 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26213 return true;
26215 return false;
26218 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26219 instruction format (D/DS/DQ) used for offset memory. */
26221 enum non_prefixed_form
26222 reg_to_non_prefixed (rtx reg, machine_mode mode)
26224 /* If it isn't a register, use the defaults. */
26225 if (!REG_P (reg) && !SUBREG_P (reg))
26226 return NON_PREFIXED_DEFAULT;
26228 unsigned int r = reg_or_subregno (reg);
26230 /* If we have a pseudo, use the default instruction format. */
26231 if (!HARD_REGISTER_NUM_P (r))
26232 return NON_PREFIXED_DEFAULT;
26234 unsigned size = GET_MODE_SIZE (mode);
26236 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26237 128-bit floating point, and 128-bit integers. Before power9, only indexed
26238 addressing was available for vectors. */
26239 if (FP_REGNO_P (r))
26241 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26242 return NON_PREFIXED_D;
26244 else if (size < 8)
26245 return NON_PREFIXED_X;
26247 else if (TARGET_VSX && size >= 16
26248 && (VECTOR_MODE_P (mode)
26249 || VECTOR_ALIGNMENT_P (mode)
26250 || mode == TImode || mode == CTImode))
26251 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26253 else
26254 return NON_PREFIXED_DEFAULT;
26257 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26258 128-bit floating point, and 128-bit integers. Before power9, only indexed
26259 addressing was available. */
26260 else if (ALTIVEC_REGNO_P (r))
26262 if (!TARGET_P9_VECTOR)
26263 return NON_PREFIXED_X;
26265 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26266 return NON_PREFIXED_DS;
26268 else if (size < 8)
26269 return NON_PREFIXED_X;
26271 else if (TARGET_VSX && size >= 16
26272 && (VECTOR_MODE_P (mode)
26273 || VECTOR_ALIGNMENT_P (mode)
26274 || mode == TImode || mode == CTImode))
26275 return NON_PREFIXED_DQ;
26277 else
26278 return NON_PREFIXED_DEFAULT;
26281 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26282 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26283 through the GPR registers for memory operations. */
26284 else if (TARGET_POWERPC64 && size >= 8)
26285 return NON_PREFIXED_DS;
26287 return NON_PREFIXED_D;
26291 /* Whether a load instruction is a prefixed instruction. This is called from
26292 the prefixed attribute processing. */
26294 bool
26295 prefixed_load_p (rtx_insn *insn)
26297 /* Validate the insn to make sure it is a normal load insn. */
26298 extract_insn_cached (insn);
26299 if (recog_data.n_operands < 2)
26300 return false;
26302 rtx reg = recog_data.operand[0];
26303 rtx mem = recog_data.operand[1];
26305 if (!REG_P (reg) && !SUBREG_P (reg))
26306 return false;
26308 if (!MEM_P (mem))
26309 return false;
26311 /* Prefixed load instructions do not support update or indexed forms. */
26312 if (get_attr_indexed (insn) == INDEXED_YES
26313 || get_attr_update (insn) == UPDATE_YES)
26314 return false;
26316 /* LWA uses the DS format instead of the D format that LWZ uses. */
26317 enum non_prefixed_form non_prefixed;
26318 machine_mode reg_mode = GET_MODE (reg);
26319 machine_mode mem_mode = GET_MODE (mem);
26321 if (mem_mode == SImode && reg_mode == DImode
26322 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26323 non_prefixed = NON_PREFIXED_DS;
26325 else
26326 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26328 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26329 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26330 else
26331 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26334 /* Whether a store instruction is a prefixed instruction. This is called from
26335 the prefixed attribute processing. */
26337 bool
26338 prefixed_store_p (rtx_insn *insn)
26340 /* Validate the insn to make sure it is a normal store insn. */
26341 extract_insn_cached (insn);
26342 if (recog_data.n_operands < 2)
26343 return false;
26345 rtx mem = recog_data.operand[0];
26346 rtx reg = recog_data.operand[1];
26348 if (!REG_P (reg) && !SUBREG_P (reg))
26349 return false;
26351 if (!MEM_P (mem))
26352 return false;
26354 /* Prefixed store instructions do not support update or indexed forms. */
26355 if (get_attr_indexed (insn) == INDEXED_YES
26356 || get_attr_update (insn) == UPDATE_YES)
26357 return false;
26359 machine_mode mem_mode = GET_MODE (mem);
26360 rtx addr = XEXP (mem, 0);
26361 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26363 /* Need to make sure we aren't looking at a stfs which doesn't look
26364 like the other things reg_to_non_prefixed/address_is_prefixed
26365 looks for. */
26366 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26367 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26368 else
26369 return address_is_prefixed (addr, mem_mode, non_prefixed);
26372 /* Whether a load immediate or add instruction is a prefixed instruction. This
26373 is called from the prefixed attribute processing. */
26375 bool
26376 prefixed_paddi_p (rtx_insn *insn)
26378 rtx set = single_set (insn);
26379 if (!set)
26380 return false;
26382 rtx dest = SET_DEST (set);
26383 rtx src = SET_SRC (set);
26385 if (!REG_P (dest) && !SUBREG_P (dest))
26386 return false;
26388 /* Is this a load immediate that can't be done with a simple ADDI or
26389 ADDIS? */
26390 if (CONST_INT_P (src))
26391 return (satisfies_constraint_eI (src)
26392 && !satisfies_constraint_I (src)
26393 && !satisfies_constraint_L (src));
26395 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26396 ADDIS? */
26397 if (GET_CODE (src) == PLUS)
26399 rtx op1 = XEXP (src, 1);
26401 return (CONST_INT_P (op1)
26402 && satisfies_constraint_eI (op1)
26403 && !satisfies_constraint_I (op1)
26404 && !satisfies_constraint_L (op1));
26407 /* If not, is it a load of a PC-relative address? */
26408 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26409 return false;
26411 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26412 return false;
26414 enum insn_form iform = address_to_insn_form (src, Pmode,
26415 NON_PREFIXED_DEFAULT);
26417 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26420 /* Whether the next instruction needs a 'p' prefix issued before the
26421 instruction is printed out. */
26422 static bool prepend_p_to_next_insn;
26424 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26425 outputting the assembler code. On the PowerPC, we remember if the current
26426 insn is a prefixed insn where we need to emit a 'p' before the insn.
26428 In addition, if the insn is part of a PC-relative reference to an external
26429 label optimization, this is recorded also. */
26430 void
26431 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26433 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26434 == MAYBE_PREFIXED_YES
26435 && get_attr_prefixed (insn) == PREFIXED_YES);
26436 return;
26439 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26440 We use it to emit a 'p' for prefixed insns that is set in
26441 FINAL_PRESCAN_INSN. */
26442 void
26443 rs6000_asm_output_opcode (FILE *stream)
26445 if (prepend_p_to_next_insn)
26447 fprintf (stream, "p");
26449 /* Reset the flag in the case where there are separate insn lines in the
26450 sequence, so the 'p' is only emitted for the first line. This shows up
26451 when we are doing the PCREL_OPT optimization, in that the label created
26452 with %r<n> would have a leading 'p' printed. */
26453 prepend_p_to_next_insn = false;
26456 return;
26459 /* Emit the relocation to tie the next instruction to a previous instruction
26460 that loads up an external address. This is used to do the PCREL_OPT
26461 optimization. Note, the label is generated after the PLD of the got
26462 pc-relative address to allow for the assembler to insert NOPs before the PLD
26463 instruction. The operand is a constant integer that is the label
26464 number. */
26466 void
26467 output_pcrel_opt_reloc (rtx label_num)
26469 rtx operands[1] = { label_num };
26470 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26471 operands);
26474 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26475 should be adjusted to reflect any required changes. This macro is used when
26476 there is some systematic length adjustment required that would be difficult
26477 to express in the length attribute.
26479 In the PowerPC, we use this to adjust the length of an instruction if one or
26480 more prefixed instructions are generated, using the attribute
26481 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26482 hardware requires that a prefied instruciton does not cross a 64-byte
26483 boundary. This means the compiler has to assume the length of the first
26484 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26485 already set for the non-prefixed instruction, we just need to udpate for the
26486 difference. */
26489 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26491 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26493 rtx pattern = PATTERN (insn);
26494 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26495 && get_attr_prefixed (insn) == PREFIXED_YES)
26497 int num_prefixed = get_attr_max_prefixed_insns (insn);
26498 length += 4 * (num_prefixed + 1);
26502 return length;
26506 #ifdef HAVE_GAS_HIDDEN
26507 # define USE_HIDDEN_LINKONCE 1
26508 #else
26509 # define USE_HIDDEN_LINKONCE 0
26510 #endif
26512 /* Fills in the label name that should be used for a 476 link stack thunk. */
26514 void
26515 get_ppc476_thunk_name (char name[32])
26517 gcc_assert (TARGET_LINK_STACK);
26519 if (USE_HIDDEN_LINKONCE)
26520 sprintf (name, "__ppc476.get_thunk");
26521 else
26522 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26525 /* This function emits the simple thunk routine that is used to preserve
26526 the link stack on the 476 cpu. */
26528 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26529 static void
26530 rs6000_code_end (void)
26532 char name[32];
26533 tree decl;
26535 if (!TARGET_LINK_STACK)
26536 return;
26538 get_ppc476_thunk_name (name);
26540 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26541 build_function_type_list (void_type_node, NULL_TREE));
26542 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26543 NULL_TREE, void_type_node);
26544 TREE_PUBLIC (decl) = 1;
26545 TREE_STATIC (decl) = 1;
26547 #if RS6000_WEAK
26548 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26550 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26551 targetm.asm_out.unique_section (decl, 0);
26552 switch_to_section (get_named_section (decl, NULL, 0));
26553 DECL_WEAK (decl) = 1;
26554 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26555 targetm.asm_out.globalize_label (asm_out_file, name);
26556 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26557 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26559 else
26560 #endif
26562 switch_to_section (text_section);
26563 ASM_OUTPUT_LABEL (asm_out_file, name);
26566 DECL_INITIAL (decl) = make_node (BLOCK);
26567 current_function_decl = decl;
26568 allocate_struct_function (decl, false);
26569 init_function_start (decl);
26570 first_function_block_is_cold = false;
26571 /* Make sure unwind info is emitted for the thunk if needed. */
26572 final_start_function (emit_barrier (), asm_out_file, 1);
26574 fputs ("\tblr\n", asm_out_file);
26576 final_end_function ();
26577 init_insn_lengths ();
26578 free_after_compilation (cfun);
26579 set_cfun (NULL);
26580 current_function_decl = NULL;
26583 /* Add r30 to hard reg set if the prologue sets it up and it is not
26584 pic_offset_table_rtx. */
26586 static void
26587 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26589 if (!TARGET_SINGLE_PIC_BASE
26590 && TARGET_TOC
26591 && TARGET_MINIMAL_TOC
26592 && !constant_pool_empty_p ())
26593 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26594 if (cfun->machine->split_stack_argp_used)
26595 add_to_hard_reg_set (&set->set, Pmode, 12);
26597 /* Make sure the hard reg set doesn't include r2, which was possibly added
26598 via PIC_OFFSET_TABLE_REGNUM. */
26599 if (TARGET_TOC)
26600 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26604 /* Helper function for rs6000_split_logical to emit a logical instruction after
26605 spliting the operation to single GPR registers.
26607 DEST is the destination register.
26608 OP1 and OP2 are the input source registers.
26609 CODE is the base operation (AND, IOR, XOR, NOT).
26610 MODE is the machine mode.
26611 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26612 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26613 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26615 static void
26616 rs6000_split_logical_inner (rtx dest,
26617 rtx op1,
26618 rtx op2,
26619 enum rtx_code code,
26620 machine_mode mode,
26621 bool complement_final_p,
26622 bool complement_op1_p,
26623 bool complement_op2_p)
26625 rtx bool_rtx;
26627 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26628 if (op2 && CONST_INT_P (op2)
26629 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26630 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26632 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26633 HOST_WIDE_INT value = INTVAL (op2) & mask;
26635 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26636 if (code == AND)
26638 if (value == 0)
26640 emit_insn (gen_rtx_SET (dest, const0_rtx));
26641 return;
26644 else if (value == mask)
26646 if (!rtx_equal_p (dest, op1))
26647 emit_insn (gen_rtx_SET (dest, op1));
26648 return;
26652 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26653 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26654 else if (code == IOR || code == XOR)
26656 if (value == 0)
26658 if (!rtx_equal_p (dest, op1))
26659 emit_insn (gen_rtx_SET (dest, op1));
26660 return;
26665 if (code == AND && mode == SImode
26666 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26668 emit_insn (gen_andsi3 (dest, op1, op2));
26669 return;
26672 if (complement_op1_p)
26673 op1 = gen_rtx_NOT (mode, op1);
26675 if (complement_op2_p)
26676 op2 = gen_rtx_NOT (mode, op2);
26678 /* For canonical RTL, if only one arm is inverted it is the first. */
26679 if (!complement_op1_p && complement_op2_p)
26680 std::swap (op1, op2);
26682 bool_rtx = ((code == NOT)
26683 ? gen_rtx_NOT (mode, op1)
26684 : gen_rtx_fmt_ee (code, mode, op1, op2));
26686 if (complement_final_p)
26687 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26689 emit_insn (gen_rtx_SET (dest, bool_rtx));
26692 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26693 operations are split immediately during RTL generation to allow for more
26694 optimizations of the AND/IOR/XOR.
26696 OPERANDS is an array containing the destination and two input operands.
26697 CODE is the base operation (AND, IOR, XOR, NOT).
26698 MODE is the machine mode.
26699 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26700 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26701 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26702 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26703 formation of the AND instructions. */
26705 static void
26706 rs6000_split_logical_di (rtx operands[3],
26707 enum rtx_code code,
26708 bool complement_final_p,
26709 bool complement_op1_p,
26710 bool complement_op2_p)
26712 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26713 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26714 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26715 enum hi_lo { hi = 0, lo = 1 };
26716 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26717 size_t i;
26719 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26720 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26721 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26722 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26724 if (code == NOT)
26725 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26726 else
26728 if (!CONST_INT_P (operands[2]))
26730 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26731 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26733 else
26735 HOST_WIDE_INT value = INTVAL (operands[2]);
26736 HOST_WIDE_INT value_hi_lo[2];
26738 gcc_assert (!complement_final_p);
26739 gcc_assert (!complement_op1_p);
26740 gcc_assert (!complement_op2_p);
26742 value_hi_lo[hi] = value >> 32;
26743 value_hi_lo[lo] = value & lower_32bits;
26745 for (i = 0; i < 2; i++)
26747 HOST_WIDE_INT sub_value = value_hi_lo[i];
26749 if (sub_value & sign_bit)
26750 sub_value |= upper_32bits;
26752 op2_hi_lo[i] = GEN_INT (sub_value);
26754 /* If this is an AND instruction, check to see if we need to load
26755 the value in a register. */
26756 if (code == AND && sub_value != -1 && sub_value != 0
26757 && !and_operand (op2_hi_lo[i], SImode))
26758 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26763 for (i = 0; i < 2; i++)
26765 /* Split large IOR/XOR operations. */
26766 if ((code == IOR || code == XOR)
26767 && CONST_INT_P (op2_hi_lo[i])
26768 && !complement_final_p
26769 && !complement_op1_p
26770 && !complement_op2_p
26771 && !logical_const_operand (op2_hi_lo[i], SImode))
26773 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26774 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26775 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26776 rtx tmp = gen_reg_rtx (SImode);
26778 /* Make sure the constant is sign extended. */
26779 if ((hi_16bits & sign_bit) != 0)
26780 hi_16bits |= upper_32bits;
26782 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26783 code, SImode, false, false, false);
26785 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26786 code, SImode, false, false, false);
26788 else
26789 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26790 code, SImode, complement_final_p,
26791 complement_op1_p, complement_op2_p);
26794 return;
26797 /* Split the insns that make up boolean operations operating on multiple GPR
26798 registers. The boolean MD patterns ensure that the inputs either are
26799 exactly the same as the output registers, or there is no overlap.
26801 OPERANDS is an array containing the destination and two input operands.
26802 CODE is the base operation (AND, IOR, XOR, NOT).
26803 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26804 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26805 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26807 void
26808 rs6000_split_logical (rtx operands[3],
26809 enum rtx_code code,
26810 bool complement_final_p,
26811 bool complement_op1_p,
26812 bool complement_op2_p)
26814 machine_mode mode = GET_MODE (operands[0]);
26815 machine_mode sub_mode;
26816 rtx op0, op1, op2;
26817 int sub_size, regno0, regno1, nregs, i;
26819 /* If this is DImode, use the specialized version that can run before
26820 register allocation. */
26821 if (mode == DImode && !TARGET_POWERPC64)
26823 rs6000_split_logical_di (operands, code, complement_final_p,
26824 complement_op1_p, complement_op2_p);
26825 return;
26828 op0 = operands[0];
26829 op1 = operands[1];
26830 op2 = (code == NOT) ? NULL_RTX : operands[2];
26831 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26832 sub_size = GET_MODE_SIZE (sub_mode);
26833 regno0 = REGNO (op0);
26834 regno1 = REGNO (op1);
26836 gcc_assert (reload_completed);
26837 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26838 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26840 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26841 gcc_assert (nregs > 1);
26843 if (op2 && REG_P (op2))
26844 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26846 for (i = 0; i < nregs; i++)
26848 int offset = i * sub_size;
26849 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26850 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26851 rtx sub_op2 = ((code == NOT)
26852 ? NULL_RTX
26853 : simplify_subreg (sub_mode, op2, mode, offset));
26855 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26856 complement_final_p, complement_op1_p,
26857 complement_op2_p);
26860 return;
26863 /* Emit instructions to move SRC to DST. Called by splitters for
26864 multi-register moves. It will emit at most one instruction for
26865 each register that is accessed; that is, it won't emit li/lis pairs
26866 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26867 register. */
26869 void
26870 rs6000_split_multireg_move (rtx dst, rtx src)
26872 /* The register number of the first register being moved. */
26873 int reg;
26874 /* The mode that is to be moved. */
26875 machine_mode mode;
26876 /* The mode that the move is being done in, and its size. */
26877 machine_mode reg_mode;
26878 int reg_mode_size;
26879 /* The number of registers that will be moved. */
26880 int nregs;
26882 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26883 mode = GET_MODE (dst);
26884 nregs = hard_regno_nregs (reg, mode);
26886 /* If we have a vector quad register for MMA, and this is a load or store,
26887 see if we can use vector paired load/stores. */
26888 if (mode == XOmode && TARGET_MMA
26889 && (MEM_P (dst) || MEM_P (src)))
26891 reg_mode = OOmode;
26892 nregs /= 2;
26894 /* If we have a vector pair/quad mode, split it into two/four separate
26895 vectors. */
26896 else if (mode == OOmode || mode == XOmode)
26897 reg_mode = V1TImode;
26898 else if (FP_REGNO_P (reg))
26899 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26900 (TARGET_HARD_FLOAT ? DFmode : SFmode);
26901 else if (ALTIVEC_REGNO_P (reg))
26902 reg_mode = V16QImode;
26903 else
26904 reg_mode = word_mode;
26905 reg_mode_size = GET_MODE_SIZE (reg_mode);
26907 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26909 /* TDmode residing in FP registers is special, since the ISA requires that
26910 the lower-numbered word of a register pair is always the most significant
26911 word, even in little-endian mode. This does not match the usual subreg
26912 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26913 the appropriate constituent registers "by hand" in little-endian mode.
26915 Note we do not need to check for destructive overlap here since TDmode
26916 can only reside in even/odd register pairs. */
26917 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26919 rtx p_src, p_dst;
26920 int i;
26922 for (i = 0; i < nregs; i++)
26924 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26925 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26926 else
26927 p_src = simplify_gen_subreg (reg_mode, src, mode,
26928 i * reg_mode_size);
26930 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26931 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26932 else
26933 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26934 i * reg_mode_size);
26936 emit_insn (gen_rtx_SET (p_dst, p_src));
26939 return;
26942 /* The __vector_pair and __vector_quad modes are multi-register
26943 modes, so if we have to load or store the registers, we have to be
26944 careful to properly swap them if we're in little endian mode
26945 below. This means the last register gets the first memory
26946 location. We also need to be careful of using the right register
26947 numbers if we are splitting XO to OO. */
26948 if (mode == OOmode || mode == XOmode)
26950 nregs = hard_regno_nregs (reg, mode);
26951 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
26952 if (MEM_P (dst))
26954 unsigned offset = 0;
26955 unsigned size = GET_MODE_SIZE (reg_mode);
26957 /* If we are reading an accumulator register, we have to
26958 deprime it before we can access it. */
26959 if (TARGET_MMA
26960 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
26961 emit_insn (gen_mma_xxmfacc (src, src));
26963 for (int i = 0; i < nregs; i += reg_mode_nregs)
26965 unsigned subreg
26966 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
26967 rtx dst2 = adjust_address (dst, reg_mode, offset);
26968 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
26969 offset += size;
26970 emit_insn (gen_rtx_SET (dst2, src2));
26973 return;
26976 if (MEM_P (src))
26978 unsigned offset = 0;
26979 unsigned size = GET_MODE_SIZE (reg_mode);
26981 for (int i = 0; i < nregs; i += reg_mode_nregs)
26983 unsigned subreg
26984 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
26985 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
26986 rtx src2 = adjust_address (src, reg_mode, offset);
26987 offset += size;
26988 emit_insn (gen_rtx_SET (dst2, src2));
26991 /* If we are writing an accumulator register, we have to
26992 prime it after we've written it. */
26993 if (TARGET_MMA
26994 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
26995 emit_insn (gen_mma_xxmtacc (dst, dst));
26997 return;
27000 if (GET_CODE (src) == UNSPEC
27001 || GET_CODE (src) == UNSPEC_VOLATILE)
27003 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27004 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27005 gcc_assert (REG_P (dst));
27006 if (GET_MODE (src) == XOmode)
27007 gcc_assert (FP_REGNO_P (REGNO (dst)));
27008 if (GET_MODE (src) == OOmode)
27009 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27011 int nvecs = XVECLEN (src, 0);
27012 for (int i = 0; i < nvecs; i++)
27014 rtx op;
27015 int regno = reg + i;
27017 if (WORDS_BIG_ENDIAN)
27019 op = XVECEXP (src, 0, i);
27021 /* If we are loading an even VSX register and the memory location
27022 is adjacent to the next register's memory location (if any),
27023 then we can load them both with one LXVP instruction. */
27024 if ((regno & 1) == 0)
27026 rtx op2 = XVECEXP (src, 0, i + 1);
27027 if (adjacent_mem_locations (op, op2) == op)
27029 op = adjust_address (op, OOmode, 0);
27030 /* Skip the next register, since we're going to
27031 load it together with this register. */
27032 i++;
27036 else
27038 op = XVECEXP (src, 0, nvecs - i - 1);
27040 /* If we are loading an even VSX register and the memory location
27041 is adjacent to the next register's memory location (if any),
27042 then we can load them both with one LXVP instruction. */
27043 if ((regno & 1) == 0)
27045 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27046 if (adjacent_mem_locations (op2, op) == op2)
27048 op = adjust_address (op2, OOmode, 0);
27049 /* Skip the next register, since we're going to
27050 load it together with this register. */
27051 i++;
27056 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27057 emit_insn (gen_rtx_SET (dst_i, op));
27060 /* We are writing an accumulator register, so we have to
27061 prime it after we've written it. */
27062 if (GET_MODE (src) == XOmode)
27063 emit_insn (gen_mma_xxmtacc (dst, dst));
27065 return;
27068 /* Register -> register moves can use common code. */
27071 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27073 /* If we are reading an accumulator register, we have to
27074 deprime it before we can access it. */
27075 if (TARGET_MMA
27076 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27077 emit_insn (gen_mma_xxmfacc (src, src));
27079 /* Move register range backwards, if we might have destructive
27080 overlap. */
27081 int i;
27082 /* XO/OO are opaque so cannot use subregs. */
27083 if (mode == OOmode || mode == XOmode )
27085 for (i = nregs - 1; i >= 0; i--)
27087 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27088 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27089 emit_insn (gen_rtx_SET (dst_i, src_i));
27092 else
27094 for (i = nregs - 1; i >= 0; i--)
27095 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27096 i * reg_mode_size),
27097 simplify_gen_subreg (reg_mode, src, mode,
27098 i * reg_mode_size)));
27101 /* If we are writing an accumulator register, we have to
27102 prime it after we've written it. */
27103 if (TARGET_MMA
27104 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27105 emit_insn (gen_mma_xxmtacc (dst, dst));
27107 else
27109 int i;
27110 int j = -1;
27111 bool used_update = false;
27112 rtx restore_basereg = NULL_RTX;
27114 if (MEM_P (src) && INT_REGNO_P (reg))
27116 rtx breg;
27118 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27119 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27121 rtx delta_rtx;
27122 breg = XEXP (XEXP (src, 0), 0);
27123 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27124 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27125 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27126 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27127 src = replace_equiv_address (src, breg);
27129 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27131 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27133 rtx basereg = XEXP (XEXP (src, 0), 0);
27134 if (TARGET_UPDATE)
27136 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27137 emit_insn (gen_rtx_SET (ndst,
27138 gen_rtx_MEM (reg_mode,
27139 XEXP (src, 0))));
27140 used_update = true;
27142 else
27143 emit_insn (gen_rtx_SET (basereg,
27144 XEXP (XEXP (src, 0), 1)));
27145 src = replace_equiv_address (src, basereg);
27147 else
27149 rtx basereg = gen_rtx_REG (Pmode, reg);
27150 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27151 src = replace_equiv_address (src, basereg);
27155 breg = XEXP (src, 0);
27156 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27157 breg = XEXP (breg, 0);
27159 /* If the base register we are using to address memory is
27160 also a destination reg, then change that register last. */
27161 if (REG_P (breg)
27162 && REGNO (breg) >= REGNO (dst)
27163 && REGNO (breg) < REGNO (dst) + nregs)
27164 j = REGNO (breg) - REGNO (dst);
27166 else if (MEM_P (dst) && INT_REGNO_P (reg))
27168 rtx breg;
27170 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27171 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27173 rtx delta_rtx;
27174 breg = XEXP (XEXP (dst, 0), 0);
27175 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27176 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27177 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27179 /* We have to update the breg before doing the store.
27180 Use store with update, if available. */
27182 if (TARGET_UPDATE)
27184 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27185 emit_insn (TARGET_32BIT
27186 ? (TARGET_POWERPC64
27187 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27188 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27189 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27190 used_update = true;
27192 else
27193 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27194 dst = replace_equiv_address (dst, breg);
27196 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27197 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27199 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27201 rtx basereg = XEXP (XEXP (dst, 0), 0);
27202 if (TARGET_UPDATE)
27204 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27205 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27206 XEXP (dst, 0)),
27207 nsrc));
27208 used_update = true;
27210 else
27211 emit_insn (gen_rtx_SET (basereg,
27212 XEXP (XEXP (dst, 0), 1)));
27213 dst = replace_equiv_address (dst, basereg);
27215 else
27217 rtx basereg = XEXP (XEXP (dst, 0), 0);
27218 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27219 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27220 && REG_P (basereg)
27221 && REG_P (offsetreg)
27222 && REGNO (basereg) != REGNO (offsetreg));
27223 if (REGNO (basereg) == 0)
27225 rtx tmp = offsetreg;
27226 offsetreg = basereg;
27227 basereg = tmp;
27229 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27230 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27231 dst = replace_equiv_address (dst, basereg);
27234 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27235 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27238 /* If we are reading an accumulator register, we have to
27239 deprime it before we can access it. */
27240 if (TARGET_MMA && REG_P (src)
27241 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27242 emit_insn (gen_mma_xxmfacc (src, src));
27244 for (i = 0; i < nregs; i++)
27246 /* Calculate index to next subword. */
27247 ++j;
27248 if (j == nregs)
27249 j = 0;
27251 /* If compiler already emitted move of first word by
27252 store with update, no need to do anything. */
27253 if (j == 0 && used_update)
27254 continue;
27256 /* XO/OO are opaque so cannot use subregs. */
27257 if (mode == OOmode || mode == XOmode )
27259 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27260 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27261 emit_insn (gen_rtx_SET (dst_i, src_i));
27263 else
27264 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27265 j * reg_mode_size),
27266 simplify_gen_subreg (reg_mode, src, mode,
27267 j * reg_mode_size)));
27270 /* If we are writing an accumulator register, we have to
27271 prime it after we've written it. */
27272 if (TARGET_MMA && REG_P (dst)
27273 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27274 emit_insn (gen_mma_xxmtacc (dst, dst));
27276 if (restore_basereg != NULL_RTX)
27277 emit_insn (restore_basereg);
27281 /* Return true if the peephole2 can combine a load involving a combination of
27282 an addis instruction and a load with an offset that can be fused together on
27283 a power8. */
27285 bool
27286 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27287 rtx addis_value, /* addis value. */
27288 rtx target, /* target register that is loaded. */
27289 rtx mem) /* bottom part of the memory addr. */
27291 rtx addr;
27292 rtx base_reg;
27294 /* Validate arguments. */
27295 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27296 return false;
27298 if (!base_reg_operand (target, GET_MODE (target)))
27299 return false;
27301 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27302 return false;
27304 /* Allow sign/zero extension. */
27305 if (GET_CODE (mem) == ZERO_EXTEND
27306 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27307 mem = XEXP (mem, 0);
27309 if (!MEM_P (mem))
27310 return false;
27312 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27313 return false;
27315 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27316 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27317 return false;
27319 /* Validate that the register used to load the high value is either the
27320 register being loaded, or we can safely replace its use.
27322 This function is only called from the peephole2 pass and we assume that
27323 there are 2 instructions in the peephole (addis and load), so we want to
27324 check if the target register was not used in the memory address and the
27325 register to hold the addis result is dead after the peephole. */
27326 if (REGNO (addis_reg) != REGNO (target))
27328 if (reg_mentioned_p (target, mem))
27329 return false;
27331 if (!peep2_reg_dead_p (2, addis_reg))
27332 return false;
27334 /* If the target register being loaded is the stack pointer, we must
27335 avoid loading any other value into it, even temporarily. */
27336 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27337 return false;
27340 base_reg = XEXP (addr, 0);
27341 return REGNO (addis_reg) == REGNO (base_reg);
27344 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27345 sequence. We adjust the addis register to use the target register. If the
27346 load sign extends, we adjust the code to do the zero extending load, and an
27347 explicit sign extension later since the fusion only covers zero extending
27348 loads.
27350 The operands are:
27351 operands[0] register set with addis (to be replaced with target)
27352 operands[1] value set via addis
27353 operands[2] target register being loaded
27354 operands[3] D-form memory reference using operands[0]. */
27356 void
27357 expand_fusion_gpr_load (rtx *operands)
27359 rtx addis_value = operands[1];
27360 rtx target = operands[2];
27361 rtx orig_mem = operands[3];
27362 rtx new_addr, new_mem, orig_addr, offset;
27363 enum rtx_code plus_or_lo_sum;
27364 machine_mode target_mode = GET_MODE (target);
27365 machine_mode extend_mode = target_mode;
27366 machine_mode ptr_mode = Pmode;
27367 enum rtx_code extend = UNKNOWN;
27369 if (GET_CODE (orig_mem) == ZERO_EXTEND
27370 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27372 extend = GET_CODE (orig_mem);
27373 orig_mem = XEXP (orig_mem, 0);
27374 target_mode = GET_MODE (orig_mem);
27377 gcc_assert (MEM_P (orig_mem));
27379 orig_addr = XEXP (orig_mem, 0);
27380 plus_or_lo_sum = GET_CODE (orig_addr);
27381 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27383 offset = XEXP (orig_addr, 1);
27384 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27385 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27387 if (extend != UNKNOWN)
27388 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27390 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27391 UNSPEC_FUSION_GPR);
27392 emit_insn (gen_rtx_SET (target, new_mem));
27394 if (extend == SIGN_EXTEND)
27396 int sub_off = ((BYTES_BIG_ENDIAN)
27397 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27398 : 0);
27399 rtx sign_reg
27400 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27402 emit_insn (gen_rtx_SET (target,
27403 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27406 return;
27409 /* Emit the addis instruction that will be part of a fused instruction
27410 sequence. */
27412 void
27413 emit_fusion_addis (rtx target, rtx addis_value)
27415 rtx fuse_ops[10];
27416 const char *addis_str = NULL;
27418 /* Emit the addis instruction. */
27419 fuse_ops[0] = target;
27420 if (satisfies_constraint_L (addis_value))
27422 fuse_ops[1] = addis_value;
27423 addis_str = "lis %0,%v1";
27426 else if (GET_CODE (addis_value) == PLUS)
27428 rtx op0 = XEXP (addis_value, 0);
27429 rtx op1 = XEXP (addis_value, 1);
27431 if (REG_P (op0) && CONST_INT_P (op1)
27432 && satisfies_constraint_L (op1))
27434 fuse_ops[1] = op0;
27435 fuse_ops[2] = op1;
27436 addis_str = "addis %0,%1,%v2";
27440 else if (GET_CODE (addis_value) == HIGH)
27442 rtx value = XEXP (addis_value, 0);
27443 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27445 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27446 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27447 if (TARGET_ELF)
27448 addis_str = "addis %0,%2,%1@toc@ha";
27450 else if (TARGET_XCOFF)
27451 addis_str = "addis %0,%1@u(%2)";
27453 else
27454 gcc_unreachable ();
27457 else if (GET_CODE (value) == PLUS)
27459 rtx op0 = XEXP (value, 0);
27460 rtx op1 = XEXP (value, 1);
27462 if (GET_CODE (op0) == UNSPEC
27463 && XINT (op0, 1) == UNSPEC_TOCREL
27464 && CONST_INT_P (op1))
27466 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27467 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27468 fuse_ops[3] = op1;
27469 if (TARGET_ELF)
27470 addis_str = "addis %0,%2,%1+%3@toc@ha";
27472 else if (TARGET_XCOFF)
27473 addis_str = "addis %0,%1+%3@u(%2)";
27475 else
27476 gcc_unreachable ();
27480 else if (satisfies_constraint_L (value))
27482 fuse_ops[1] = value;
27483 addis_str = "lis %0,%v1";
27486 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27488 fuse_ops[1] = value;
27489 addis_str = "lis %0,%1@ha";
27493 if (!addis_str)
27494 fatal_insn ("Could not generate addis value for fusion", addis_value);
27496 output_asm_insn (addis_str, fuse_ops);
27499 /* Emit a D-form load or store instruction that is the second instruction
27500 of a fusion sequence. */
27502 static void
27503 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27505 rtx fuse_ops[10];
27506 char insn_template[80];
27508 fuse_ops[0] = load_reg;
27509 fuse_ops[1] = addis_reg;
27511 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27513 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27514 fuse_ops[2] = offset;
27515 output_asm_insn (insn_template, fuse_ops);
27518 else if (GET_CODE (offset) == UNSPEC
27519 && XINT (offset, 1) == UNSPEC_TOCREL)
27521 if (TARGET_ELF)
27522 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27524 else if (TARGET_XCOFF)
27525 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27527 else
27528 gcc_unreachable ();
27530 fuse_ops[2] = XVECEXP (offset, 0, 0);
27531 output_asm_insn (insn_template, fuse_ops);
27534 else if (GET_CODE (offset) == PLUS
27535 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27536 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27537 && CONST_INT_P (XEXP (offset, 1)))
27539 rtx tocrel_unspec = XEXP (offset, 0);
27540 if (TARGET_ELF)
27541 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27543 else if (TARGET_XCOFF)
27544 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27546 else
27547 gcc_unreachable ();
27549 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27550 fuse_ops[3] = XEXP (offset, 1);
27551 output_asm_insn (insn_template, fuse_ops);
27554 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27556 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27558 fuse_ops[2] = offset;
27559 output_asm_insn (insn_template, fuse_ops);
27562 else
27563 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27565 return;
27568 /* Given an address, convert it into the addis and load offset parts. Addresses
27569 created during the peephole2 process look like:
27570 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27571 (unspec [(...)] UNSPEC_TOCREL)) */
27573 static void
27574 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27576 rtx hi, lo;
27578 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27580 hi = XEXP (addr, 0);
27581 lo = XEXP (addr, 1);
27583 else
27584 gcc_unreachable ();
27586 *p_hi = hi;
27587 *p_lo = lo;
27590 /* Return a string to fuse an addis instruction with a gpr load to the same
27591 register that we loaded up the addis instruction. The address that is used
27592 is the logical address that was formed during peephole2:
27593 (lo_sum (high) (low-part))
27595 The code is complicated, so we call output_asm_insn directly, and just
27596 return "". */
27598 const char *
27599 emit_fusion_gpr_load (rtx target, rtx mem)
27601 rtx addis_value;
27602 rtx addr;
27603 rtx load_offset;
27604 const char *load_str = NULL;
27605 machine_mode mode;
27607 if (GET_CODE (mem) == ZERO_EXTEND)
27608 mem = XEXP (mem, 0);
27610 gcc_assert (REG_P (target) && MEM_P (mem));
27612 addr = XEXP (mem, 0);
27613 fusion_split_address (addr, &addis_value, &load_offset);
27615 /* Now emit the load instruction to the same register. */
27616 mode = GET_MODE (mem);
27617 switch (mode)
27619 case E_QImode:
27620 load_str = "lbz";
27621 break;
27623 case E_HImode:
27624 load_str = "lhz";
27625 break;
27627 case E_SImode:
27628 case E_SFmode:
27629 load_str = "lwz";
27630 break;
27632 case E_DImode:
27633 case E_DFmode:
27634 gcc_assert (TARGET_POWERPC64);
27635 load_str = "ld";
27636 break;
27638 default:
27639 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27642 /* Emit the addis instruction. */
27643 emit_fusion_addis (target, addis_value);
27645 /* Emit the D-form load instruction. */
27646 emit_fusion_load (target, target, load_offset, load_str);
27648 return "";
27651 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27652 ignores it then. */
27653 static GTY(()) tree atomic_hold_decl;
27654 static GTY(()) tree atomic_clear_decl;
27655 static GTY(()) tree atomic_update_decl;
27657 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27658 static void
27659 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27661 if (!TARGET_HARD_FLOAT)
27663 #ifdef RS6000_GLIBC_ATOMIC_FENV
27664 if (atomic_hold_decl == NULL_TREE)
27666 atomic_hold_decl
27667 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27668 get_identifier ("__atomic_feholdexcept"),
27669 build_function_type_list (void_type_node,
27670 double_ptr_type_node,
27671 NULL_TREE));
27672 TREE_PUBLIC (atomic_hold_decl) = 1;
27673 DECL_EXTERNAL (atomic_hold_decl) = 1;
27676 if (atomic_clear_decl == NULL_TREE)
27678 atomic_clear_decl
27679 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27680 get_identifier ("__atomic_feclearexcept"),
27681 build_function_type_list (void_type_node,
27682 NULL_TREE));
27683 TREE_PUBLIC (atomic_clear_decl) = 1;
27684 DECL_EXTERNAL (atomic_clear_decl) = 1;
27687 tree const_double = build_qualified_type (double_type_node,
27688 TYPE_QUAL_CONST);
27689 tree const_double_ptr = build_pointer_type (const_double);
27690 if (atomic_update_decl == NULL_TREE)
27692 atomic_update_decl
27693 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27694 get_identifier ("__atomic_feupdateenv"),
27695 build_function_type_list (void_type_node,
27696 const_double_ptr,
27697 NULL_TREE));
27698 TREE_PUBLIC (atomic_update_decl) = 1;
27699 DECL_EXTERNAL (atomic_update_decl) = 1;
27702 tree fenv_var = create_tmp_var_raw (double_type_node);
27703 TREE_ADDRESSABLE (fenv_var) = 1;
27704 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27705 build4 (TARGET_EXPR, double_type_node, fenv_var,
27706 void_node, NULL_TREE, NULL_TREE));
27708 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27709 *clear = build_call_expr (atomic_clear_decl, 0);
27710 *update = build_call_expr (atomic_update_decl, 1,
27711 fold_convert (const_double_ptr, fenv_addr));
27712 #endif
27713 return;
27716 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
27717 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
27718 tree call_mffs = build_call_expr (mffs, 0);
27720 /* Generates the equivalent of feholdexcept (&fenv_var)
27722 *fenv_var = __builtin_mffs ();
27723 double fenv_hold;
27724 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27725 __builtin_mtfsf (0xff, fenv_hold); */
27727 /* Mask to clear everything except for the rounding modes and non-IEEE
27728 arithmetic flag. */
27729 const unsigned HOST_WIDE_INT hold_exception_mask
27730 = HOST_WIDE_INT_C (0xffffffff00000007);
27732 tree fenv_var = create_tmp_var_raw (double_type_node);
27734 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27735 NULL_TREE, NULL_TREE);
27737 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27738 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27739 build_int_cst (uint64_type_node,
27740 hold_exception_mask));
27742 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27743 fenv_llu_and);
27745 tree hold_mtfsf = build_call_expr (mtfsf, 2,
27746 build_int_cst (unsigned_type_node, 0xff),
27747 fenv_hold_mtfsf);
27749 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27751 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27753 double fenv_clear = __builtin_mffs ();
27754 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27755 __builtin_mtfsf (0xff, fenv_clear); */
27757 /* Mask to clear everything except for the rounding modes and non-IEEE
27758 arithmetic flag. */
27759 const unsigned HOST_WIDE_INT clear_exception_mask
27760 = HOST_WIDE_INT_C (0xffffffff00000000);
27762 tree fenv_clear = create_tmp_var_raw (double_type_node);
27764 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27765 call_mffs, NULL_TREE, NULL_TREE);
27767 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27768 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
27769 fenv_clean_llu,
27770 build_int_cst (uint64_type_node,
27771 clear_exception_mask));
27773 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27774 fenv_clear_llu_and);
27776 tree clear_mtfsf = build_call_expr (mtfsf, 2,
27777 build_int_cst (unsigned_type_node, 0xff),
27778 fenv_clear_mtfsf);
27780 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
27782 /* Generates the equivalent of feupdateenv (&fenv_var)
27784 double old_fenv = __builtin_mffs ();
27785 double fenv_update;
27786 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27787 (*(uint64_t*)fenv_var 0x1ff80fff);
27788 __builtin_mtfsf (0xff, fenv_update); */
27790 const unsigned HOST_WIDE_INT update_exception_mask
27791 = HOST_WIDE_INT_C (0xffffffff1fffff00);
27792 const unsigned HOST_WIDE_INT new_exception_mask
27793 = HOST_WIDE_INT_C (0x1ff80fff);
27795 tree old_fenv = create_tmp_var_raw (double_type_node);
27796 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
27797 call_mffs, NULL_TREE, NULL_TREE);
27799 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
27800 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
27801 build_int_cst (uint64_type_node,
27802 update_exception_mask));
27804 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27805 build_int_cst (uint64_type_node,
27806 new_exception_mask));
27808 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
27809 old_llu_and, new_llu_and);
27811 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27812 new_llu_mask);
27814 tree update_mtfsf = build_call_expr (mtfsf, 2,
27815 build_int_cst (unsigned_type_node, 0xff),
27816 fenv_update_mtfsf);
27818 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
27821 void
27822 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
27824 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27826 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27827 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27829 /* The destination of the vmrgew instruction layout is:
27830 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27831 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27832 vmrgew instruction will be correct. */
27833 if (BYTES_BIG_ENDIAN)
27835 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
27836 GEN_INT (0)));
27837 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
27838 GEN_INT (3)));
27840 else
27842 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
27843 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
27846 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27847 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27849 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
27850 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
27852 if (BYTES_BIG_ENDIAN)
27853 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27854 else
27855 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27858 void
27859 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
27861 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27863 rtx_tmp0 = gen_reg_rtx (V2DImode);
27864 rtx_tmp1 = gen_reg_rtx (V2DImode);
27866 /* The destination of the vmrgew instruction layout is:
27867 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27868 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27869 vmrgew instruction will be correct. */
27870 if (BYTES_BIG_ENDIAN)
27872 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
27873 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
27875 else
27877 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
27878 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
27881 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27882 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27884 if (signed_convert)
27886 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
27887 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
27889 else
27891 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27892 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27895 if (BYTES_BIG_ENDIAN)
27896 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27897 else
27898 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27901 void
27902 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27903 rtx src2)
27905 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27907 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27908 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27910 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27911 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27913 rtx_tmp2 = gen_reg_rtx (V4SImode);
27914 rtx_tmp3 = gen_reg_rtx (V4SImode);
27916 if (signed_convert)
27918 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27919 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27921 else
27923 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
27924 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
27927 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
27930 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27932 static bool
27933 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
27934 optimization_type opt_type)
27936 switch (op)
27938 case rsqrt_optab:
27939 return (opt_type == OPTIMIZE_FOR_SPEED
27940 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
27942 default:
27943 return true;
27947 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27949 static HOST_WIDE_INT
27950 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
27952 if (TREE_CODE (exp) == STRING_CST
27953 && (STRICT_ALIGNMENT || !optimize_size))
27954 return MAX (align, BITS_PER_WORD);
27955 return align;
27958 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27960 static HOST_WIDE_INT
27961 rs6000_starting_frame_offset (void)
27963 if (FRAME_GROWS_DOWNWARD)
27964 return 0;
27965 return RS6000_STARTING_FRAME_OFFSET;
27969 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27970 function names from <foo>l to <foo>f128 if the default long double type is
27971 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27972 include file switches the names on systems that support long double as IEEE
27973 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27974 In the future, glibc will export names like __ieee128_sinf128 and we can
27975 switch to using those instead of using sinf128, which pollutes the user's
27976 namespace.
27978 This will switch the names for Fortran math functions as well (which doesn't
27979 use math.h). However, Fortran needs other changes to the compiler and
27980 library before you can switch the real*16 type at compile time.
27982 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27983 only do this transformation if the __float128 type is enabled. This
27984 prevents us from doing the transformation on older 32-bit ports that might
27985 have enabled using IEEE 128-bit floating point as the default long double
27986 type. */
27988 static tree
27989 rs6000_mangle_decl_assembler_name (tree decl, tree id)
27991 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
27992 && TREE_CODE (decl) == FUNCTION_DECL
27993 && DECL_IS_UNDECLARED_BUILTIN (decl)
27994 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
27996 size_t len = IDENTIFIER_LENGTH (id);
27997 const char *name = IDENTIFIER_POINTER (id);
27998 char *newname = NULL;
28000 /* See if it is one of the built-in functions with an unusual name. */
28001 switch (DECL_FUNCTION_CODE (decl))
28003 case BUILT_IN_DREML:
28004 newname = xstrdup ("__remainderieee128");
28005 break;
28007 case BUILT_IN_GAMMAL:
28008 newname = xstrdup ("__lgammaieee128");
28009 break;
28011 case BUILT_IN_GAMMAL_R:
28012 case BUILT_IN_LGAMMAL_R:
28013 newname = xstrdup ("__lgammaieee128_r");
28014 break;
28016 case BUILT_IN_NEXTTOWARD:
28017 newname = xstrdup ("__nexttoward_to_ieee128");
28018 break;
28020 case BUILT_IN_NEXTTOWARDF:
28021 newname = xstrdup ("__nexttowardf_to_ieee128");
28022 break;
28024 case BUILT_IN_NEXTTOWARDL:
28025 newname = xstrdup ("__nexttowardieee128");
28026 break;
28028 case BUILT_IN_POW10L:
28029 newname = xstrdup ("__exp10ieee128");
28030 break;
28032 case BUILT_IN_SCALBL:
28033 newname = xstrdup ("__scalbieee128");
28034 break;
28036 case BUILT_IN_SIGNIFICANDL:
28037 newname = xstrdup ("__significandieee128");
28038 break;
28040 case BUILT_IN_SINCOSL:
28041 newname = xstrdup ("__sincosieee128");
28042 break;
28044 default:
28045 break;
28048 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28049 if (!newname)
28051 size_t printf_len = strlen ("printf");
28052 size_t scanf_len = strlen ("scanf");
28053 size_t printf_chk_len = strlen ("printf_chk");
28055 if (len >= printf_len
28056 && strcmp (name + len - printf_len, "printf") == 0)
28057 newname = xasprintf ("__%sieee128", name);
28059 else if (len >= scanf_len
28060 && strcmp (name + len - scanf_len, "scanf") == 0)
28061 newname = xasprintf ("__isoc99_%sieee128", name);
28063 else if (len >= printf_chk_len
28064 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28065 newname = xasprintf ("%sieee128", name);
28067 else if (name[len - 1] == 'l')
28069 bool uses_ieee128_p = false;
28070 tree type = TREE_TYPE (decl);
28071 machine_mode ret_mode = TYPE_MODE (type);
28073 /* See if the function returns a IEEE 128-bit floating point type or
28074 complex type. */
28075 if (ret_mode == TFmode || ret_mode == TCmode)
28076 uses_ieee128_p = true;
28077 else
28079 function_args_iterator args_iter;
28080 tree arg;
28082 /* See if the function passes a IEEE 128-bit floating point type
28083 or complex type. */
28084 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28086 machine_mode arg_mode = TYPE_MODE (arg);
28087 if (arg_mode == TFmode || arg_mode == TCmode)
28089 uses_ieee128_p = true;
28090 break;
28095 /* If we passed or returned an IEEE 128-bit floating point type,
28096 change the name. Use __<name>ieee128, instead of <name>l. */
28097 if (uses_ieee128_p)
28098 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28102 if (newname)
28104 if (TARGET_DEBUG_BUILTIN)
28105 fprintf (stderr, "Map %s => %s\n", name, newname);
28107 id = get_identifier (newname);
28108 free (newname);
28112 return id;
28115 /* Predict whether the given loop in gimple will be transformed in the RTL
28116 doloop_optimize pass. */
28118 static bool
28119 rs6000_predict_doloop_p (struct loop *loop)
28121 gcc_assert (loop);
28123 /* On rs6000, targetm.can_use_doloop_p is actually
28124 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28125 if (loop->inner != NULL)
28127 if (dump_file && (dump_flags & TDF_DETAILS))
28128 fprintf (dump_file, "Predict doloop failure due to"
28129 " loop nesting.\n");
28130 return false;
28133 return true;
28136 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28138 static machine_mode
28139 rs6000_preferred_doloop_mode (machine_mode)
28141 return word_mode;
28144 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28146 static bool
28147 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28149 gcc_assert (MEM_P (mem));
28151 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28152 type addresses, so don't allow MEMs with those address types to be
28153 substituted as an equivalent expression. See PR93974 for details. */
28154 if (GET_CODE (XEXP (mem, 0)) == AND)
28155 return true;
28157 return false;
28160 /* Implement TARGET_INVALID_CONVERSION. */
28162 static const char *
28163 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28165 /* Make sure we're working with the canonical types. */
28166 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28167 fromtype = TYPE_CANONICAL (fromtype);
28168 if (TYPE_CANONICAL (totype) != NULL_TREE)
28169 totype = TYPE_CANONICAL (totype);
28171 machine_mode frommode = TYPE_MODE (fromtype);
28172 machine_mode tomode = TYPE_MODE (totype);
28174 if (frommode != tomode)
28176 /* Do not allow conversions to/from XOmode and OOmode types. */
28177 if (frommode == XOmode)
28178 return N_("invalid conversion from type %<__vector_quad%>");
28179 if (tomode == XOmode)
28180 return N_("invalid conversion to type %<__vector_quad%>");
28181 if (frommode == OOmode)
28182 return N_("invalid conversion from type %<__vector_pair%>");
28183 if (tomode == OOmode)
28184 return N_("invalid conversion to type %<__vector_pair%>");
28187 /* Conversion allowed. */
28188 return NULL;
28191 /* Convert a SFmode constant to the integer bit pattern. */
28193 long
28194 rs6000_const_f32_to_i32 (rtx operand)
28196 long value;
28197 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28199 gcc_assert (GET_MODE (operand) == SFmode);
28200 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28201 return value;
28204 void
28205 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28207 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28208 inform (input_location,
28209 "the result for the xxspltidp instruction "
28210 "is undefined for subnormal input values");
28211 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28214 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28216 static bool
28217 rs6000_gen_pic_addr_diff_vec (void)
28219 return rs6000_relative_jumptables;
28222 void
28223 rs6000_output_addr_vec_elt (FILE *file, int value)
28225 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28226 char buf[100];
28228 fprintf (file, "%s", directive);
28229 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28230 assemble_name (file, buf);
28231 fprintf (file, "\n");
28235 /* Copy an integer constant to the vector constant structure. */
28237 static void
28238 constant_int_to_128bit_vector (rtx op,
28239 machine_mode mode,
28240 size_t byte_num,
28241 vec_const_128bit_type *info)
28243 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28244 unsigned bitsize = GET_MODE_BITSIZE (mode);
28246 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28247 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28250 /* Copy a floating point constant to the vector constant structure. */
28252 static void
28253 constant_fp_to_128bit_vector (rtx op,
28254 machine_mode mode,
28255 size_t byte_num,
28256 vec_const_128bit_type *info)
28258 unsigned bitsize = GET_MODE_BITSIZE (mode);
28259 unsigned num_words = bitsize / 32;
28260 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28261 long real_words[VECTOR_128BIT_WORDS];
28263 /* Make sure we don't overflow the real_words array and that it is
28264 filled completely. */
28265 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28267 real_to_target (real_words, rtype, mode);
28269 /* Iterate over each 32-bit word in the floating point constant. The
28270 real_to_target function puts out words in target endian fashion. We need
28271 to arrange the order so that the bytes are written in big endian order. */
28272 for (unsigned num = 0; num < num_words; num++)
28274 unsigned endian_num = (BYTES_BIG_ENDIAN
28275 ? num
28276 : num_words - 1 - num);
28278 unsigned uvalue = real_words[endian_num];
28279 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28280 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28283 /* Mark that this constant involves floating point. */
28284 info->fp_constant_p = true;
28287 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28288 structure INFO.
28290 Break out the constant out to bytes, half words, words, and double words.
28291 Return true if we have successfully converted the constant.
28293 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28294 constants. Integer and floating point scalar constants are splatted to fill
28295 out the vector. */
28297 bool
28298 vec_const_128bit_to_bytes (rtx op,
28299 machine_mode mode,
28300 vec_const_128bit_type *info)
28302 /* Initialize the constant structure. */
28303 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28305 /* Assume CONST_INTs are DImode. */
28306 if (mode == VOIDmode)
28307 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28309 if (mode == VOIDmode)
28310 return false;
28312 unsigned size = GET_MODE_SIZE (mode);
28313 bool splat_p = false;
28315 if (size > VECTOR_128BIT_BYTES)
28316 return false;
28318 /* Set up the bits. */
28319 switch (GET_CODE (op))
28321 /* Integer constants, default to double word. */
28322 case CONST_INT:
28324 constant_int_to_128bit_vector (op, mode, 0, info);
28325 splat_p = true;
28326 break;
28329 /* Floating point constants. */
28330 case CONST_DOUBLE:
28332 /* Fail if the floating point constant is the wrong mode. */
28333 if (GET_MODE (op) != mode)
28334 return false;
28336 /* SFmode stored as scalars are stored in DFmode format. */
28337 if (mode == SFmode)
28339 mode = DFmode;
28340 size = GET_MODE_SIZE (DFmode);
28343 constant_fp_to_128bit_vector (op, mode, 0, info);
28344 splat_p = true;
28345 break;
28348 /* Vector constants, iterate over each element. On little endian
28349 systems, we have to reverse the element numbers. */
28350 case CONST_VECTOR:
28352 /* Fail if the vector constant is the wrong mode or size. */
28353 if (GET_MODE (op) != mode
28354 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28355 return false;
28357 machine_mode ele_mode = GET_MODE_INNER (mode);
28358 size_t ele_size = GET_MODE_SIZE (ele_mode);
28359 size_t nunits = GET_MODE_NUNITS (mode);
28361 for (size_t num = 0; num < nunits; num++)
28363 rtx ele = CONST_VECTOR_ELT (op, num);
28364 size_t byte_num = (BYTES_BIG_ENDIAN
28365 ? num
28366 : nunits - 1 - num) * ele_size;
28368 if (CONST_INT_P (ele))
28369 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28370 else if (CONST_DOUBLE_P (ele))
28371 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28372 else
28373 return false;
28376 break;
28379 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28380 Since we are duplicating the element, we don't have to worry about
28381 endian issues. */
28382 case VEC_DUPLICATE:
28384 /* Fail if the vector duplicate is the wrong mode or size. */
28385 if (GET_MODE (op) != mode
28386 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28387 return false;
28389 machine_mode ele_mode = GET_MODE_INNER (mode);
28390 size_t ele_size = GET_MODE_SIZE (ele_mode);
28391 rtx ele = XEXP (op, 0);
28392 size_t nunits = GET_MODE_NUNITS (mode);
28394 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28395 return false;
28397 for (size_t num = 0; num < nunits; num++)
28399 size_t byte_num = num * ele_size;
28401 if (CONST_INT_P (ele))
28402 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28403 else
28404 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28407 break;
28410 /* Any thing else, just return failure. */
28411 default:
28412 return false;
28415 /* Splat the constant to fill 128 bits if desired. */
28416 if (splat_p && size < VECTOR_128BIT_BYTES)
28418 if ((VECTOR_128BIT_BYTES % size) != 0)
28419 return false;
28421 for (size_t offset = size;
28422 offset < VECTOR_128BIT_BYTES;
28423 offset += size)
28424 memcpy ((void *) &info->bytes[offset],
28425 (void *) &info->bytes[0],
28426 size);
28429 /* Remember original size. */
28430 info->original_size = size;
28432 /* Determine if the bytes are all the same. */
28433 unsigned char first_byte = info->bytes[0];
28434 info->all_bytes_same = true;
28435 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28436 if (first_byte != info->bytes[i])
28438 info->all_bytes_same = false;
28439 break;
28442 /* Pack half words together & determine if all of the half words are the
28443 same. */
28444 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28445 info->half_words[i] = ((info->bytes[i * 2] << 8)
28446 | info->bytes[(i * 2) + 1]);
28448 unsigned short first_hword = info->half_words[0];
28449 info->all_half_words_same = true;
28450 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28451 if (first_hword != info->half_words[i])
28453 info->all_half_words_same = false;
28454 break;
28457 /* Pack words together & determine if all of the words are the same. */
28458 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28459 info->words[i] = ((info->bytes[i * 4] << 24)
28460 | (info->bytes[(i * 4) + 1] << 16)
28461 | (info->bytes[(i * 4) + 2] << 8)
28462 | info->bytes[(i * 4) + 3]);
28464 info->all_words_same
28465 = (info->words[0] == info->words[1]
28466 && info->words[0] == info->words[1]
28467 && info->words[0] == info->words[2]
28468 && info->words[0] == info->words[3]);
28470 /* Pack double words together & determine if all of the double words are the
28471 same. */
28472 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28474 unsigned HOST_WIDE_INT d_word = 0;
28475 for (size_t j = 0; j < 8; j++)
28476 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28478 info->double_words[i] = d_word;
28481 info->all_double_words_same
28482 = (info->double_words[0] == info->double_words[1]);
28484 return true;
28487 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28488 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28489 value to be used with the LXVKQ instruction. */
28491 unsigned
28492 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28494 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28495 floating point hardware and VSX registers are available. */
28496 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28497 || !TARGET_VSX)
28498 return 0;
28500 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28501 that are 0. */
28502 if (vsx_const->words[1] != 0
28503 || vsx_const->words[2] != 0
28504 || vsx_const->words[3] != 0)
28505 return 0;
28507 /* See if we have a match for the first word. */
28508 switch (vsx_const->words[0])
28510 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
28511 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
28512 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
28513 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
28514 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
28515 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
28516 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
28517 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
28518 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
28519 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
28520 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
28521 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
28522 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
28523 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
28524 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
28525 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
28526 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
28527 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
28529 /* anything else cannot be loaded. */
28530 default:
28531 break;
28534 return 0;
28537 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28538 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28539 value to be used with the XXSPLTIW instruction. */
28541 unsigned
28542 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28544 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28545 return 0;
28547 if (!vsx_const->all_words_same)
28548 return 0;
28550 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28551 if (vsx_const->all_bytes_same)
28552 return 0;
28554 /* See if we can use VSPLTISH or VSPLTISW. */
28555 if (vsx_const->all_half_words_same)
28557 unsigned short h_word = vsx_const->half_words[0];
28558 short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000;
28559 if (EASY_VECTOR_15 (sign_h_word))
28560 return 0;
28563 unsigned int word = vsx_const->words[0];
28564 int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000;
28565 if (EASY_VECTOR_15 (sign_word))
28566 return 0;
28568 return vsx_const->words[0];
28571 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28572 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28573 value to be used with the XXSPLTIDP instruction. */
28575 unsigned
28576 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28578 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28579 return 0;
28581 /* Reject if the two 64-bit segments are not the same. */
28582 if (!vsx_const->all_double_words_same)
28583 return 0;
28585 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28586 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28587 if (vsx_const->all_bytes_same
28588 || vsx_const->all_half_words_same
28589 || vsx_const->all_words_same)
28590 return 0;
28592 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28594 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28595 pattern and the signalling NaN bit pattern. Recognize infinity and
28596 negative infinity. */
28598 /* Bit representation of DFmode normal quiet NaN. */
28599 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28601 /* Bit representation of DFmode normal signaling NaN. */
28602 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28604 /* Bit representation of DFmode positive infinity. */
28605 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28607 /* Bit representation of DFmode negative infinity. */
28608 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28610 if (value != RS6000_CONST_DF_NAN
28611 && value != RS6000_CONST_DF_NANS
28612 && value != RS6000_CONST_DF_INF
28613 && value != RS6000_CONST_DF_NEG_INF)
28615 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28616 the exponent, and 52 bits for the mantissa (not counting the hidden
28617 bit used for normal numbers). NaN values have the exponent set to all
28618 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28620 int df_exponent = (value >> 52) & 0x7ff;
28621 unsigned HOST_WIDE_INT
28622 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
28624 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
28625 return 0;
28627 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28628 the exponent all 0 bits, and the mantissa non-zero. If the value is
28629 subnormal, then the hidden bit in the mantissa is not set. */
28630 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
28631 return 0;
28634 /* Change the representation to DFmode constant. */
28635 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
28637 /* real_from_target takes the target words in target order. */
28638 if (!BYTES_BIG_ENDIAN)
28639 std::swap (df_words[0], df_words[1]);
28641 REAL_VALUE_TYPE rv_type;
28642 real_from_target (&rv_type, df_words, DFmode);
28644 const REAL_VALUE_TYPE *rv = &rv_type;
28646 /* Validate that the number can be stored as a SFmode value. */
28647 if (!exact_real_truncate (SFmode, rv))
28648 return 0;
28650 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28651 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28652 instruction. */
28653 long sf_value;
28654 real_to_target (&sf_value, rv, SFmode);
28656 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28657 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28658 0 bits, and the mantissa non-zero. */
28659 long sf_exponent = (sf_value >> 23) & 0xFF;
28660 long sf_mantissa = sf_value & 0x7FFFFF;
28662 if (sf_exponent == 0 && sf_mantissa != 0)
28663 return 0;
28665 /* Return the immediate to be used. */
28666 return sf_value;
28670 struct gcc_target targetm = TARGET_INITIALIZER;
28672 #include "gt-rs6000.h"