doc: Tweak PIM4 link
[official-gcc.git] / gcc / config / rs6000 / rs6000.cc
blobf2bd9edea8a16db8c224687773dc00f803ed0954
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2024 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "sreal.h"
76 #include "ipa-cp.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "except.h"
80 #include "case-cfn-macros.h"
81 #include "ppc-auxv.h"
82 #include "rs6000-internal.h"
83 #include "opts.h"
85 /* This file should be included last. */
86 #include "target-def.h"
88 extern tree rs6000_builtin_mask_for_load (void);
89 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
90 extern tree rs6000_builtin_reciprocal (tree);
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
106 /* Don't enable PC-relative addressing if the target does not support it. */
107 #ifndef PCREL_SUPPORTED_BY_OS
108 #define PCREL_SUPPORTED_BY_OS 0
109 #endif
111 #ifdef USING_ELFOS_H
112 /* Counter for labels which are to be placed in .fixup. */
113 int fixuplabelno = 0;
114 #endif
116 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 int dot_symbols;
119 /* Specify the machine mode that pointers have. After generation of rtl, the
120 compiler makes no further distinction between pointers and any other objects
121 of this machine mode. */
122 scalar_int_mode rs6000_pmode;
124 /* Track use of r13 in 64bit AIX TLS. */
125 static bool xcoff_tls_exec_model_detected = false;
127 /* Width in bits of a pointer. */
128 unsigned rs6000_pointer_size;
130 #ifdef HAVE_AS_GNU_ATTRIBUTE
131 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
132 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
133 # endif
134 /* Flag whether floating point values have been passed/returned.
135 Note that this doesn't say whether fprs are used, since the
136 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
137 should be set for soft-float values passed in gprs and ieee128
138 values passed in vsx registers. */
139 bool rs6000_passes_float = false;
140 bool rs6000_passes_long_double = false;
141 /* Flag whether vector values have been passed/returned. */
142 bool rs6000_passes_vector = false;
143 /* Flag whether small (<= 8 byte) structures have been returned. */
144 bool rs6000_returns_struct = false;
145 #endif
147 /* Value is TRUE if register/mode pair is acceptable. */
148 static bool rs6000_hard_regno_mode_ok_p
149 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
151 /* Maximum number of registers needed for a given register class and mode. */
152 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
154 /* How many registers are needed for a given register and mode. */
155 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157 /* Map register number to register class. */
158 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
160 static int dbg_cost_ctrl;
162 /* Flag to say the TOC is initialized */
163 int toc_initialized, need_toc_init;
164 char toc_label_name[10];
166 /* Cached value of rs6000_variable_issue. This is cached in
167 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
168 static short cached_can_issue_more;
170 static GTY(()) section *read_only_data_section;
171 static GTY(()) section *private_data_section;
172 static GTY(()) section *tls_data_section;
173 static GTY(()) section *tls_private_data_section;
174 static GTY(()) section *read_only_private_data_section;
175 static GTY(()) section *sdata2_section;
177 section *toc_section = 0;
179 /* Describe the vector unit used for modes. */
180 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
181 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
183 /* Register classes for various constraints that are based on the target
184 switches. */
185 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
187 /* Describe the alignment of a vector. */
188 int rs6000_vector_align[NUM_MACHINE_MODES];
190 /* What modes to automatically generate reciprocal divide estimate (fre) and
191 reciprocal sqrt (frsqrte) for. */
192 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
194 /* Masks to determine which reciprocal esitmate instructions to generate
195 automatically. */
196 enum rs6000_recip_mask {
197 RECIP_SF_DIV = 0x001, /* Use divide estimate */
198 RECIP_DF_DIV = 0x002,
199 RECIP_V4SF_DIV = 0x004,
200 RECIP_V2DF_DIV = 0x008,
202 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
203 RECIP_DF_RSQRT = 0x020,
204 RECIP_V4SF_RSQRT = 0x040,
205 RECIP_V2DF_RSQRT = 0x080,
207 /* Various combination of flags for -mrecip=xxx. */
208 RECIP_NONE = 0,
209 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
210 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
211 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
213 RECIP_HIGH_PRECISION = RECIP_ALL,
215 /* On low precision machines like the power5, don't enable double precision
216 reciprocal square root estimate, since it isn't accurate enough. */
217 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
220 /* -mrecip options. */
221 static struct
223 const char *string; /* option name */
224 unsigned int mask; /* mask bits to set */
225 } recip_options[] = {
226 { "all", RECIP_ALL },
227 { "none", RECIP_NONE },
228 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
229 | RECIP_V2DF_DIV) },
230 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
231 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
232 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
233 | RECIP_V2DF_RSQRT) },
234 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
235 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
238 /* On PowerPC, we have a limited number of target clones that we care about
239 which means we can use an array to hold the options, rather than having more
240 elaborate data structures to identify each possible variation. Order the
241 clones from the default to the highest ISA. */
242 enum {
243 CLONE_DEFAULT = 0, /* default clone. */
244 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
245 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
246 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
247 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
248 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
249 CLONE_MAX
252 /* Map compiler ISA bits into HWCAP names. */
253 struct clone_map {
254 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
255 const char *name; /* name to use in __builtin_cpu_supports. */
258 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
259 { 0, "" }, /* Default options. */
260 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
261 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
262 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
263 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
264 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
268 /* Newer LIBCs explicitly export this symbol to declare that they provide
269 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
270 reference to this symbol whenever we expand a CPU builtin, so that
271 we never link against an old LIBC. */
272 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
274 /* True if we have expanded a CPU builtin. */
275 bool cpu_builtin_p = false;
277 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
278 macros that have changed. Languages that don't support the preprocessor
279 don't link in rs6000-c.cc, so we can't call it directly. */
280 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
282 /* Simplfy register classes into simpler classifications. We assume
283 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
284 check for standard register classes (gpr/floating/altivec/vsx) and
285 floating/vector classes (float/altivec/vsx). */
287 enum rs6000_reg_type {
288 NO_REG_TYPE,
289 PSEUDO_REG_TYPE,
290 GPR_REG_TYPE,
291 VSX_REG_TYPE,
292 ALTIVEC_REG_TYPE,
293 FPR_REG_TYPE,
294 SPR_REG_TYPE,
295 CR_REG_TYPE
298 /* Map register class to register type. */
299 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
301 /* First/last register type for the 'normal' register types (i.e. general
302 purpose, floating point, altivec, and VSX registers). */
303 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
305 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
308 /* Register classes we care about in secondary reload or go if legitimate
309 address. We only need to worry about GPR, FPR, and Altivec registers here,
310 along an ANY field that is the OR of the 3 register classes. */
312 enum rs6000_reload_reg_type {
313 RELOAD_REG_GPR, /* General purpose registers. */
314 RELOAD_REG_FPR, /* Traditional floating point regs. */
315 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
316 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
317 N_RELOAD_REG
320 /* For setting up register classes, loop through the 3 register classes mapping
321 into real registers, and skip the ANY class, which is just an OR of the
322 bits. */
323 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
324 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
326 /* Map reload register type to a register in the register class. */
327 struct reload_reg_map_type {
328 const char *name; /* Register class name. */
329 int reg; /* Register in the register class. */
332 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
333 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
334 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
335 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
336 { "Any", -1 }, /* RELOAD_REG_ANY. */
339 /* Mask bits for each register class, indexed per mode. Historically the
340 compiler has been more restrictive which types can do PRE_MODIFY instead of
341 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
342 typedef unsigned char addr_mask_type;
344 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
345 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
346 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
347 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
348 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
349 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
350 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
351 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
353 /* Register type masks based on the type, of valid addressing modes. */
354 struct rs6000_reg_addr {
355 enum insn_code reload_load; /* INSN to reload for loading. */
356 enum insn_code reload_store; /* INSN to reload for storing. */
357 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
358 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
359 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
360 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
361 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
364 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
366 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
367 static inline bool
368 mode_supports_pre_incdec_p (machine_mode mode)
370 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
371 != 0);
374 /* Helper function to say whether a mode supports PRE_MODIFY. */
375 static inline bool
376 mode_supports_pre_modify_p (machine_mode mode)
378 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
379 != 0);
382 /* Return true if we have D-form addressing in altivec registers. */
383 static inline bool
384 mode_supports_vmx_dform (machine_mode mode)
386 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
389 /* Return true if we have D-form addressing in VSX registers. This addressing
390 is more limited than normal d-form addressing in that the offset must be
391 aligned on a 16-byte boundary. */
392 static inline bool
393 mode_supports_dq_form (machine_mode mode)
395 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
396 != 0);
399 /* Given that there exists at least one variable that is set (produced)
400 by OUT_INSN and read (consumed) by IN_INSN, return true iff
401 IN_INSN represents one or more memory store operations and none of
402 the variables set by OUT_INSN is used by IN_INSN as the address of a
403 store operation. If either IN_INSN or OUT_INSN does not represent
404 a "single" RTL SET expression (as loosely defined by the
405 implementation of the single_set function) or a PARALLEL with only
406 SETs, CLOBBERs, and USEs inside, this function returns false.
408 This rs6000-specific version of store_data_bypass_p checks for
409 certain conditions that result in assertion failures (and internal
410 compiler errors) in the generic store_data_bypass_p function and
411 returns false rather than calling store_data_bypass_p if one of the
412 problematic conditions is detected. */
415 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
417 rtx out_set, in_set;
418 rtx out_pat, in_pat;
419 rtx out_exp, in_exp;
420 int i, j;
422 in_set = single_set (in_insn);
423 if (in_set)
425 if (MEM_P (SET_DEST (in_set)))
427 out_set = single_set (out_insn);
428 if (!out_set)
430 out_pat = PATTERN (out_insn);
431 if (GET_CODE (out_pat) == PARALLEL)
433 for (i = 0; i < XVECLEN (out_pat, 0); i++)
435 out_exp = XVECEXP (out_pat, 0, i);
436 if ((GET_CODE (out_exp) == CLOBBER)
437 || (GET_CODE (out_exp) == USE))
438 continue;
439 else if (GET_CODE (out_exp) != SET)
440 return false;
446 else
448 in_pat = PATTERN (in_insn);
449 if (GET_CODE (in_pat) != PARALLEL)
450 return false;
452 for (i = 0; i < XVECLEN (in_pat, 0); i++)
454 in_exp = XVECEXP (in_pat, 0, i);
455 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
456 continue;
457 else if (GET_CODE (in_exp) != SET)
458 return false;
460 if (MEM_P (SET_DEST (in_exp)))
462 out_set = single_set (out_insn);
463 if (!out_set)
465 out_pat = PATTERN (out_insn);
466 if (GET_CODE (out_pat) != PARALLEL)
467 return false;
468 for (j = 0; j < XVECLEN (out_pat, 0); j++)
470 out_exp = XVECEXP (out_pat, 0, j);
471 if ((GET_CODE (out_exp) == CLOBBER)
472 || (GET_CODE (out_exp) == USE))
473 continue;
474 else if (GET_CODE (out_exp) != SET)
475 return false;
481 return store_data_bypass_p (out_insn, in_insn);
485 /* Processor costs (relative to an add) */
487 const struct processor_costs *rs6000_cost;
489 /* Instruction size costs on 32bit processors. */
490 static const
491 struct processor_costs size32_cost = {
492 COSTS_N_INSNS (1), /* mulsi */
493 COSTS_N_INSNS (1), /* mulsi_const */
494 COSTS_N_INSNS (1), /* mulsi_const9 */
495 COSTS_N_INSNS (1), /* muldi */
496 COSTS_N_INSNS (1), /* divsi */
497 COSTS_N_INSNS (1), /* divdi */
498 COSTS_N_INSNS (1), /* fp */
499 COSTS_N_INSNS (1), /* dmul */
500 COSTS_N_INSNS (1), /* sdiv */
501 COSTS_N_INSNS (1), /* ddiv */
502 32, /* cache line size */
503 0, /* l1 cache */
504 0, /* l2 cache */
505 0, /* streams */
506 0, /* SF->DF convert */
509 /* Instruction size costs on 64bit processors. */
510 static const
511 struct processor_costs size64_cost = {
512 COSTS_N_INSNS (1), /* mulsi */
513 COSTS_N_INSNS (1), /* mulsi_const */
514 COSTS_N_INSNS (1), /* mulsi_const9 */
515 COSTS_N_INSNS (1), /* muldi */
516 COSTS_N_INSNS (1), /* divsi */
517 COSTS_N_INSNS (1), /* divdi */
518 COSTS_N_INSNS (1), /* fp */
519 COSTS_N_INSNS (1), /* dmul */
520 COSTS_N_INSNS (1), /* sdiv */
521 COSTS_N_INSNS (1), /* ddiv */
522 128, /* cache line size */
523 0, /* l1 cache */
524 0, /* l2 cache */
525 0, /* streams */
526 0, /* SF->DF convert */
529 /* Instruction costs on RS64A processors. */
530 static const
531 struct processor_costs rs64a_cost = {
532 COSTS_N_INSNS (20), /* mulsi */
533 COSTS_N_INSNS (12), /* mulsi_const */
534 COSTS_N_INSNS (8), /* mulsi_const9 */
535 COSTS_N_INSNS (34), /* muldi */
536 COSTS_N_INSNS (65), /* divsi */
537 COSTS_N_INSNS (67), /* divdi */
538 COSTS_N_INSNS (4), /* fp */
539 COSTS_N_INSNS (4), /* dmul */
540 COSTS_N_INSNS (31), /* sdiv */
541 COSTS_N_INSNS (31), /* ddiv */
542 128, /* cache line size */
543 128, /* l1 cache */
544 2048, /* l2 cache */
545 1, /* streams */
546 0, /* SF->DF convert */
549 /* Instruction costs on MPCCORE processors. */
550 static const
551 struct processor_costs mpccore_cost = {
552 COSTS_N_INSNS (2), /* mulsi */
553 COSTS_N_INSNS (2), /* mulsi_const */
554 COSTS_N_INSNS (2), /* mulsi_const9 */
555 COSTS_N_INSNS (2), /* muldi */
556 COSTS_N_INSNS (6), /* divsi */
557 COSTS_N_INSNS (6), /* divdi */
558 COSTS_N_INSNS (4), /* fp */
559 COSTS_N_INSNS (5), /* dmul */
560 COSTS_N_INSNS (10), /* sdiv */
561 COSTS_N_INSNS (17), /* ddiv */
562 32, /* cache line size */
563 4, /* l1 cache */
564 16, /* l2 cache */
565 1, /* streams */
566 0, /* SF->DF convert */
569 /* Instruction costs on PPC403 processors. */
570 static const
571 struct processor_costs ppc403_cost = {
572 COSTS_N_INSNS (4), /* mulsi */
573 COSTS_N_INSNS (4), /* mulsi_const */
574 COSTS_N_INSNS (4), /* mulsi_const9 */
575 COSTS_N_INSNS (4), /* muldi */
576 COSTS_N_INSNS (33), /* divsi */
577 COSTS_N_INSNS (33), /* divdi */
578 COSTS_N_INSNS (11), /* fp */
579 COSTS_N_INSNS (11), /* dmul */
580 COSTS_N_INSNS (11), /* sdiv */
581 COSTS_N_INSNS (11), /* ddiv */
582 32, /* cache line size */
583 4, /* l1 cache */
584 16, /* l2 cache */
585 1, /* streams */
586 0, /* SF->DF convert */
589 /* Instruction costs on PPC405 processors. */
590 static const
591 struct processor_costs ppc405_cost = {
592 COSTS_N_INSNS (5), /* mulsi */
593 COSTS_N_INSNS (4), /* mulsi_const */
594 COSTS_N_INSNS (3), /* mulsi_const9 */
595 COSTS_N_INSNS (5), /* muldi */
596 COSTS_N_INSNS (35), /* divsi */
597 COSTS_N_INSNS (35), /* divdi */
598 COSTS_N_INSNS (11), /* fp */
599 COSTS_N_INSNS (11), /* dmul */
600 COSTS_N_INSNS (11), /* sdiv */
601 COSTS_N_INSNS (11), /* ddiv */
602 32, /* cache line size */
603 16, /* l1 cache */
604 128, /* l2 cache */
605 1, /* streams */
606 0, /* SF->DF convert */
609 /* Instruction costs on PPC440 processors. */
610 static const
611 struct processor_costs ppc440_cost = {
612 COSTS_N_INSNS (3), /* mulsi */
613 COSTS_N_INSNS (2), /* mulsi_const */
614 COSTS_N_INSNS (2), /* mulsi_const9 */
615 COSTS_N_INSNS (3), /* muldi */
616 COSTS_N_INSNS (34), /* divsi */
617 COSTS_N_INSNS (34), /* divdi */
618 COSTS_N_INSNS (5), /* fp */
619 COSTS_N_INSNS (5), /* dmul */
620 COSTS_N_INSNS (19), /* sdiv */
621 COSTS_N_INSNS (33), /* ddiv */
622 32, /* cache line size */
623 32, /* l1 cache */
624 256, /* l2 cache */
625 1, /* streams */
626 0, /* SF->DF convert */
629 /* Instruction costs on PPC476 processors. */
630 static const
631 struct processor_costs ppc476_cost = {
632 COSTS_N_INSNS (4), /* mulsi */
633 COSTS_N_INSNS (4), /* mulsi_const */
634 COSTS_N_INSNS (4), /* mulsi_const9 */
635 COSTS_N_INSNS (4), /* muldi */
636 COSTS_N_INSNS (11), /* divsi */
637 COSTS_N_INSNS (11), /* divdi */
638 COSTS_N_INSNS (6), /* fp */
639 COSTS_N_INSNS (6), /* dmul */
640 COSTS_N_INSNS (19), /* sdiv */
641 COSTS_N_INSNS (33), /* ddiv */
642 32, /* l1 cache line size */
643 32, /* l1 cache */
644 512, /* l2 cache */
645 1, /* streams */
646 0, /* SF->DF convert */
649 /* Instruction costs on PPC601 processors. */
650 static const
651 struct processor_costs ppc601_cost = {
652 COSTS_N_INSNS (5), /* mulsi */
653 COSTS_N_INSNS (5), /* mulsi_const */
654 COSTS_N_INSNS (5), /* mulsi_const9 */
655 COSTS_N_INSNS (5), /* muldi */
656 COSTS_N_INSNS (36), /* divsi */
657 COSTS_N_INSNS (36), /* divdi */
658 COSTS_N_INSNS (4), /* fp */
659 COSTS_N_INSNS (5), /* dmul */
660 COSTS_N_INSNS (17), /* sdiv */
661 COSTS_N_INSNS (31), /* ddiv */
662 32, /* cache line size */
663 32, /* l1 cache */
664 256, /* l2 cache */
665 1, /* streams */
666 0, /* SF->DF convert */
669 /* Instruction costs on PPC603 processors. */
670 static const
671 struct processor_costs ppc603_cost = {
672 COSTS_N_INSNS (5), /* mulsi */
673 COSTS_N_INSNS (3), /* mulsi_const */
674 COSTS_N_INSNS (2), /* mulsi_const9 */
675 COSTS_N_INSNS (5), /* muldi */
676 COSTS_N_INSNS (37), /* divsi */
677 COSTS_N_INSNS (37), /* divdi */
678 COSTS_N_INSNS (3), /* fp */
679 COSTS_N_INSNS (4), /* dmul */
680 COSTS_N_INSNS (18), /* sdiv */
681 COSTS_N_INSNS (33), /* ddiv */
682 32, /* cache line size */
683 8, /* l1 cache */
684 64, /* l2 cache */
685 1, /* streams */
686 0, /* SF->DF convert */
689 /* Instruction costs on PPC604 processors. */
690 static const
691 struct processor_costs ppc604_cost = {
692 COSTS_N_INSNS (4), /* mulsi */
693 COSTS_N_INSNS (4), /* mulsi_const */
694 COSTS_N_INSNS (4), /* mulsi_const9 */
695 COSTS_N_INSNS (4), /* muldi */
696 COSTS_N_INSNS (20), /* divsi */
697 COSTS_N_INSNS (20), /* divdi */
698 COSTS_N_INSNS (3), /* fp */
699 COSTS_N_INSNS (3), /* dmul */
700 COSTS_N_INSNS (18), /* sdiv */
701 COSTS_N_INSNS (32), /* ddiv */
702 32, /* cache line size */
703 16, /* l1 cache */
704 512, /* l2 cache */
705 1, /* streams */
706 0, /* SF->DF convert */
709 /* Instruction costs on PPC604e processors. */
710 static const
711 struct processor_costs ppc604e_cost = {
712 COSTS_N_INSNS (2), /* mulsi */
713 COSTS_N_INSNS (2), /* mulsi_const */
714 COSTS_N_INSNS (2), /* mulsi_const9 */
715 COSTS_N_INSNS (2), /* muldi */
716 COSTS_N_INSNS (20), /* divsi */
717 COSTS_N_INSNS (20), /* divdi */
718 COSTS_N_INSNS (3), /* fp */
719 COSTS_N_INSNS (3), /* dmul */
720 COSTS_N_INSNS (18), /* sdiv */
721 COSTS_N_INSNS (32), /* ddiv */
722 32, /* cache line size */
723 32, /* l1 cache */
724 1024, /* l2 cache */
725 1, /* streams */
726 0, /* SF->DF convert */
729 /* Instruction costs on PPC620 processors. */
730 static const
731 struct processor_costs ppc620_cost = {
732 COSTS_N_INSNS (5), /* mulsi */
733 COSTS_N_INSNS (4), /* mulsi_const */
734 COSTS_N_INSNS (3), /* mulsi_const9 */
735 COSTS_N_INSNS (7), /* muldi */
736 COSTS_N_INSNS (21), /* divsi */
737 COSTS_N_INSNS (37), /* divdi */
738 COSTS_N_INSNS (3), /* fp */
739 COSTS_N_INSNS (3), /* dmul */
740 COSTS_N_INSNS (18), /* sdiv */
741 COSTS_N_INSNS (32), /* ddiv */
742 128, /* cache line size */
743 32, /* l1 cache */
744 1024, /* l2 cache */
745 1, /* streams */
746 0, /* SF->DF convert */
749 /* Instruction costs on PPC630 processors. */
750 static const
751 struct processor_costs ppc630_cost = {
752 COSTS_N_INSNS (5), /* mulsi */
753 COSTS_N_INSNS (4), /* mulsi_const */
754 COSTS_N_INSNS (3), /* mulsi_const9 */
755 COSTS_N_INSNS (7), /* muldi */
756 COSTS_N_INSNS (21), /* divsi */
757 COSTS_N_INSNS (37), /* divdi */
758 COSTS_N_INSNS (3), /* fp */
759 COSTS_N_INSNS (3), /* dmul */
760 COSTS_N_INSNS (17), /* sdiv */
761 COSTS_N_INSNS (21), /* ddiv */
762 128, /* cache line size */
763 64, /* l1 cache */
764 1024, /* l2 cache */
765 1, /* streams */
766 0, /* SF->DF convert */
769 /* Instruction costs on Cell processor. */
770 /* COSTS_N_INSNS (1) ~ one add. */
771 static const
772 struct processor_costs ppccell_cost = {
773 COSTS_N_INSNS (9/2)+2, /* mulsi */
774 COSTS_N_INSNS (6/2), /* mulsi_const */
775 COSTS_N_INSNS (6/2), /* mulsi_const9 */
776 COSTS_N_INSNS (15/2)+2, /* muldi */
777 COSTS_N_INSNS (38/2), /* divsi */
778 COSTS_N_INSNS (70/2), /* divdi */
779 COSTS_N_INSNS (10/2), /* fp */
780 COSTS_N_INSNS (10/2), /* dmul */
781 COSTS_N_INSNS (74/2), /* sdiv */
782 COSTS_N_INSNS (74/2), /* ddiv */
783 128, /* cache line size */
784 32, /* l1 cache */
785 512, /* l2 cache */
786 6, /* streams */
787 0, /* SF->DF convert */
790 /* Instruction costs on PPC750 and PPC7400 processors. */
791 static const
792 struct processor_costs ppc750_cost = {
793 COSTS_N_INSNS (5), /* mulsi */
794 COSTS_N_INSNS (3), /* mulsi_const */
795 COSTS_N_INSNS (2), /* mulsi_const9 */
796 COSTS_N_INSNS (5), /* muldi */
797 COSTS_N_INSNS (17), /* divsi */
798 COSTS_N_INSNS (17), /* divdi */
799 COSTS_N_INSNS (3), /* fp */
800 COSTS_N_INSNS (3), /* dmul */
801 COSTS_N_INSNS (17), /* sdiv */
802 COSTS_N_INSNS (31), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 512, /* l2 cache */
806 1, /* streams */
807 0, /* SF->DF convert */
810 /* Instruction costs on PPC7450 processors. */
811 static const
812 struct processor_costs ppc7450_cost = {
813 COSTS_N_INSNS (4), /* mulsi */
814 COSTS_N_INSNS (3), /* mulsi_const */
815 COSTS_N_INSNS (3), /* mulsi_const9 */
816 COSTS_N_INSNS (4), /* muldi */
817 COSTS_N_INSNS (23), /* divsi */
818 COSTS_N_INSNS (23), /* divdi */
819 COSTS_N_INSNS (5), /* fp */
820 COSTS_N_INSNS (5), /* dmul */
821 COSTS_N_INSNS (21), /* sdiv */
822 COSTS_N_INSNS (35), /* ddiv */
823 32, /* cache line size */
824 32, /* l1 cache */
825 1024, /* l2 cache */
826 1, /* streams */
827 0, /* SF->DF convert */
830 /* Instruction costs on PPC8540 processors. */
831 static const
832 struct processor_costs ppc8540_cost = {
833 COSTS_N_INSNS (4), /* mulsi */
834 COSTS_N_INSNS (4), /* mulsi_const */
835 COSTS_N_INSNS (4), /* mulsi_const9 */
836 COSTS_N_INSNS (4), /* muldi */
837 COSTS_N_INSNS (19), /* divsi */
838 COSTS_N_INSNS (19), /* divdi */
839 COSTS_N_INSNS (4), /* fp */
840 COSTS_N_INSNS (4), /* dmul */
841 COSTS_N_INSNS (29), /* sdiv */
842 COSTS_N_INSNS (29), /* ddiv */
843 32, /* cache line size */
844 32, /* l1 cache */
845 256, /* l2 cache */
846 1, /* prefetch streams /*/
847 0, /* SF->DF convert */
850 /* Instruction costs on E300C2 and E300C3 cores. */
851 static const
852 struct processor_costs ppce300c2c3_cost = {
853 COSTS_N_INSNS (4), /* mulsi */
854 COSTS_N_INSNS (4), /* mulsi_const */
855 COSTS_N_INSNS (4), /* mulsi_const9 */
856 COSTS_N_INSNS (4), /* muldi */
857 COSTS_N_INSNS (19), /* divsi */
858 COSTS_N_INSNS (19), /* divdi */
859 COSTS_N_INSNS (3), /* fp */
860 COSTS_N_INSNS (4), /* dmul */
861 COSTS_N_INSNS (18), /* sdiv */
862 COSTS_N_INSNS (33), /* ddiv */
864 16, /* l1 cache */
865 16, /* l2 cache */
866 1, /* prefetch streams /*/
867 0, /* SF->DF convert */
870 /* Instruction costs on PPCE500MC processors. */
871 static const
872 struct processor_costs ppce500mc_cost = {
873 COSTS_N_INSNS (4), /* mulsi */
874 COSTS_N_INSNS (4), /* mulsi_const */
875 COSTS_N_INSNS (4), /* mulsi_const9 */
876 COSTS_N_INSNS (4), /* muldi */
877 COSTS_N_INSNS (14), /* divsi */
878 COSTS_N_INSNS (14), /* divdi */
879 COSTS_N_INSNS (8), /* fp */
880 COSTS_N_INSNS (10), /* dmul */
881 COSTS_N_INSNS (36), /* sdiv */
882 COSTS_N_INSNS (66), /* ddiv */
883 64, /* cache line size */
884 32, /* l1 cache */
885 128, /* l2 cache */
886 1, /* prefetch streams /*/
887 0, /* SF->DF convert */
890 /* Instruction costs on PPCE500MC64 processors. */
891 static const
892 struct processor_costs ppce500mc64_cost = {
893 COSTS_N_INSNS (4), /* mulsi */
894 COSTS_N_INSNS (4), /* mulsi_const */
895 COSTS_N_INSNS (4), /* mulsi_const9 */
896 COSTS_N_INSNS (4), /* muldi */
897 COSTS_N_INSNS (14), /* divsi */
898 COSTS_N_INSNS (14), /* divdi */
899 COSTS_N_INSNS (4), /* fp */
900 COSTS_N_INSNS (10), /* dmul */
901 COSTS_N_INSNS (36), /* sdiv */
902 COSTS_N_INSNS (66), /* ddiv */
903 64, /* cache line size */
904 32, /* l1 cache */
905 128, /* l2 cache */
906 1, /* prefetch streams /*/
907 0, /* SF->DF convert */
910 /* Instruction costs on PPCE5500 processors. */
911 static const
912 struct processor_costs ppce5500_cost = {
913 COSTS_N_INSNS (5), /* mulsi */
914 COSTS_N_INSNS (5), /* mulsi_const */
915 COSTS_N_INSNS (4), /* mulsi_const9 */
916 COSTS_N_INSNS (5), /* muldi */
917 COSTS_N_INSNS (14), /* divsi */
918 COSTS_N_INSNS (14), /* divdi */
919 COSTS_N_INSNS (7), /* fp */
920 COSTS_N_INSNS (10), /* dmul */
921 COSTS_N_INSNS (36), /* sdiv */
922 COSTS_N_INSNS (66), /* ddiv */
923 64, /* cache line size */
924 32, /* l1 cache */
925 128, /* l2 cache */
926 1, /* prefetch streams /*/
927 0, /* SF->DF convert */
930 /* Instruction costs on PPCE6500 processors. */
931 static const
932 struct processor_costs ppce6500_cost = {
933 COSTS_N_INSNS (5), /* mulsi */
934 COSTS_N_INSNS (5), /* mulsi_const */
935 COSTS_N_INSNS (4), /* mulsi_const9 */
936 COSTS_N_INSNS (5), /* muldi */
937 COSTS_N_INSNS (14), /* divsi */
938 COSTS_N_INSNS (14), /* divdi */
939 COSTS_N_INSNS (7), /* fp */
940 COSTS_N_INSNS (10), /* dmul */
941 COSTS_N_INSNS (36), /* sdiv */
942 COSTS_N_INSNS (66), /* ddiv */
943 64, /* cache line size */
944 32, /* l1 cache */
945 128, /* l2 cache */
946 1, /* prefetch streams /*/
947 0, /* SF->DF convert */
950 /* Instruction costs on AppliedMicro Titan processors. */
951 static const
952 struct processor_costs titan_cost = {
953 COSTS_N_INSNS (5), /* mulsi */
954 COSTS_N_INSNS (5), /* mulsi_const */
955 COSTS_N_INSNS (5), /* mulsi_const9 */
956 COSTS_N_INSNS (5), /* muldi */
957 COSTS_N_INSNS (18), /* divsi */
958 COSTS_N_INSNS (18), /* divdi */
959 COSTS_N_INSNS (10), /* fp */
960 COSTS_N_INSNS (10), /* dmul */
961 COSTS_N_INSNS (46), /* sdiv */
962 COSTS_N_INSNS (72), /* ddiv */
963 32, /* cache line size */
964 32, /* l1 cache */
965 512, /* l2 cache */
966 1, /* prefetch streams /*/
967 0, /* SF->DF convert */
970 /* Instruction costs on POWER4 and POWER5 processors. */
971 static const
972 struct processor_costs power4_cost = {
973 COSTS_N_INSNS (3), /* mulsi */
974 COSTS_N_INSNS (2), /* mulsi_const */
975 COSTS_N_INSNS (2), /* mulsi_const9 */
976 COSTS_N_INSNS (4), /* muldi */
977 COSTS_N_INSNS (18), /* divsi */
978 COSTS_N_INSNS (34), /* divdi */
979 COSTS_N_INSNS (3), /* fp */
980 COSTS_N_INSNS (3), /* dmul */
981 COSTS_N_INSNS (17), /* sdiv */
982 COSTS_N_INSNS (17), /* ddiv */
983 128, /* cache line size */
984 32, /* l1 cache */
985 1024, /* l2 cache */
986 8, /* prefetch streams /*/
987 0, /* SF->DF convert */
990 /* Instruction costs on POWER6 processors. */
991 static const
992 struct processor_costs power6_cost = {
993 COSTS_N_INSNS (8), /* mulsi */
994 COSTS_N_INSNS (8), /* mulsi_const */
995 COSTS_N_INSNS (8), /* mulsi_const9 */
996 COSTS_N_INSNS (8), /* muldi */
997 COSTS_N_INSNS (22), /* divsi */
998 COSTS_N_INSNS (28), /* divdi */
999 COSTS_N_INSNS (3), /* fp */
1000 COSTS_N_INSNS (3), /* dmul */
1001 COSTS_N_INSNS (13), /* sdiv */
1002 COSTS_N_INSNS (16), /* ddiv */
1003 128, /* cache line size */
1004 64, /* l1 cache */
1005 2048, /* l2 cache */
1006 16, /* prefetch streams */
1007 0, /* SF->DF convert */
1010 /* Instruction costs on POWER7 processors. */
1011 static const
1012 struct processor_costs power7_cost = {
1013 COSTS_N_INSNS (2), /* mulsi */
1014 COSTS_N_INSNS (2), /* mulsi_const */
1015 COSTS_N_INSNS (2), /* mulsi_const9 */
1016 COSTS_N_INSNS (2), /* muldi */
1017 COSTS_N_INSNS (18), /* divsi */
1018 COSTS_N_INSNS (34), /* divdi */
1019 COSTS_N_INSNS (3), /* fp */
1020 COSTS_N_INSNS (3), /* dmul */
1021 COSTS_N_INSNS (13), /* sdiv */
1022 COSTS_N_INSNS (16), /* ddiv */
1023 128, /* cache line size */
1024 32, /* l1 cache */
1025 256, /* l2 cache */
1026 12, /* prefetch streams */
1027 COSTS_N_INSNS (3), /* SF->DF convert */
1030 /* Instruction costs on POWER8 processors. */
1031 static const
1032 struct processor_costs power8_cost = {
1033 COSTS_N_INSNS (3), /* mulsi */
1034 COSTS_N_INSNS (3), /* mulsi_const */
1035 COSTS_N_INSNS (3), /* mulsi_const9 */
1036 COSTS_N_INSNS (3), /* muldi */
1037 COSTS_N_INSNS (19), /* divsi */
1038 COSTS_N_INSNS (35), /* divdi */
1039 COSTS_N_INSNS (3), /* fp */
1040 COSTS_N_INSNS (3), /* dmul */
1041 COSTS_N_INSNS (14), /* sdiv */
1042 COSTS_N_INSNS (17), /* ddiv */
1043 128, /* cache line size */
1044 32, /* l1 cache */
1045 512, /* l2 cache */
1046 12, /* prefetch streams */
1047 COSTS_N_INSNS (3), /* SF->DF convert */
1050 /* Instruction costs on POWER9 processors. */
1051 static const
1052 struct processor_costs power9_cost = {
1053 COSTS_N_INSNS (3), /* mulsi */
1054 COSTS_N_INSNS (3), /* mulsi_const */
1055 COSTS_N_INSNS (3), /* mulsi_const9 */
1056 COSTS_N_INSNS (3), /* muldi */
1057 COSTS_N_INSNS (8), /* divsi */
1058 COSTS_N_INSNS (12), /* divdi */
1059 COSTS_N_INSNS (3), /* fp */
1060 COSTS_N_INSNS (3), /* dmul */
1061 COSTS_N_INSNS (13), /* sdiv */
1062 COSTS_N_INSNS (18), /* ddiv */
1063 128, /* cache line size */
1064 32, /* l1 cache */
1065 512, /* l2 cache */
1066 8, /* prefetch streams */
1067 COSTS_N_INSNS (3), /* SF->DF convert */
1070 /* Instruction costs on Power10/Power11 processors. */
1071 static const
1072 struct processor_costs power10_cost = {
1073 COSTS_N_INSNS (2), /* mulsi */
1074 COSTS_N_INSNS (2), /* mulsi_const */
1075 COSTS_N_INSNS (2), /* mulsi_const9 */
1076 COSTS_N_INSNS (2), /* muldi */
1077 COSTS_N_INSNS (6), /* divsi */
1078 COSTS_N_INSNS (6), /* divdi */
1079 COSTS_N_INSNS (2), /* fp */
1080 COSTS_N_INSNS (2), /* dmul */
1081 COSTS_N_INSNS (11), /* sdiv */
1082 COSTS_N_INSNS (13), /* ddiv */
1083 128, /* cache line size */
1084 32, /* l1 cache */
1085 512, /* l2 cache */
1086 16, /* prefetch streams */
1087 COSTS_N_INSNS (2), /* SF->DF convert */
1090 /* Instruction costs on POWER A2 processors. */
1091 static const
1092 struct processor_costs ppca2_cost = {
1093 COSTS_N_INSNS (16), /* mulsi */
1094 COSTS_N_INSNS (16), /* mulsi_const */
1095 COSTS_N_INSNS (16), /* mulsi_const9 */
1096 COSTS_N_INSNS (16), /* muldi */
1097 COSTS_N_INSNS (22), /* divsi */
1098 COSTS_N_INSNS (28), /* divdi */
1099 COSTS_N_INSNS (3), /* fp */
1100 COSTS_N_INSNS (3), /* dmul */
1101 COSTS_N_INSNS (59), /* sdiv */
1102 COSTS_N_INSNS (72), /* ddiv */
1104 16, /* l1 cache */
1105 2048, /* l2 cache */
1106 16, /* prefetch streams */
1107 0, /* SF->DF convert */
1110 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1111 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1114 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool,
1115 code_helper = ERROR_MARK);
1116 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1118 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1119 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1120 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
1121 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1122 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1123 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1124 bool);
1125 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1126 unsigned int);
1127 static bool is_microcoded_insn (rtx_insn *);
1128 static bool is_nonpipeline_insn (rtx_insn *);
1129 static bool is_cracked_insn (rtx_insn *);
1130 static bool is_load_insn (rtx, rtx *);
1131 static bool is_store_insn (rtx, rtx *);
1132 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1133 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1134 static bool insn_must_be_first_in_group (rtx_insn *);
1135 static bool insn_must_be_last_in_group (rtx_insn *);
1136 bool easy_vector_constant (rtx, machine_mode);
1137 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1138 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1139 #if TARGET_MACHO
1140 static tree get_prev_label (tree);
1141 #endif
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1145 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1146 machine_mode, rtx);
1147 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1148 machine_mode,
1149 rtx);
1150 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1151 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1152 enum reg_class);
1153 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1154 reg_class_t,
1155 reg_class_t);
1156 static bool rs6000_debug_can_change_mode_class (machine_mode,
1157 machine_mode,
1158 reg_class_t);
1160 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1161 = rs6000_mode_dependent_address;
1163 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1164 machine_mode, rtx)
1165 = rs6000_secondary_reload_class;
1167 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1168 = rs6000_preferred_reload_class;
1170 const int INSN_NOT_AVAILABLE = -1;
1172 static void rs6000_print_isa_options (FILE *, int, const char *,
1173 HOST_WIDE_INT);
1174 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1176 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1177 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1178 enum rs6000_reg_type,
1179 machine_mode,
1180 secondary_reload_info *,
1181 bool);
1182 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1184 /* Hash table stuff for keeping track of TOC entries. */
1186 struct GTY((for_user)) toc_hash_struct
1188 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1189 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1190 rtx key;
1191 machine_mode key_mode;
1192 int labelno;
1195 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1197 static hashval_t hash (toc_hash_struct *);
1198 static bool equal (toc_hash_struct *, toc_hash_struct *);
1201 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1205 /* Default register names. */
1206 char rs6000_reg_names[][8] =
1208 /* GPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* FPRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* VRs */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 "8", "9", "10", "11", "12", "13", "14", "15",
1221 "16", "17", "18", "19", "20", "21", "22", "23",
1222 "24", "25", "26", "27", "28", "29", "30", "31",
1223 /* lr ctr ca ap */
1224 "lr", "ctr", "ca", "ap",
1225 /* cr0..cr7 */
1226 "0", "1", "2", "3", "4", "5", "6", "7",
1227 /* vrsave vscr sfp */
1228 "vrsave", "vscr", "sfp",
1231 #ifdef TARGET_REGNAMES
1232 static const char alt_reg_names[][8] =
1234 /* GPRs */
1235 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1236 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1237 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1238 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1239 /* FPRs */
1240 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1241 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1242 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1243 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1244 /* VRs */
1245 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1246 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1247 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1248 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1249 /* lr ctr ca ap */
1250 "lr", "ctr", "ca", "ap",
1251 /* cr0..cr7 */
1252 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1253 /* vrsave vscr sfp */
1254 "vrsave", "vscr", "sfp",
1256 #endif
1258 /* Table of valid machine attributes. */
1260 static const attribute_spec rs6000_gnu_attributes[] =
1262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1263 affects_type_identity, handler, exclude } */
1264 { "altivec", 1, 1, false, true, false, false,
1265 rs6000_handle_altivec_attribute, NULL },
1266 { "longcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute, NULL },
1268 { "shortcall", 0, 0, false, true, true, false,
1269 rs6000_handle_longcall_attribute, NULL },
1270 { "ms_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute, NULL },
1272 { "gcc_struct", 0, 0, false, false, false, false,
1273 rs6000_handle_struct_attribute, NULL },
1274 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1275 SUBTARGET_ATTRIBUTE_TABLE,
1276 #endif
1279 static const scoped_attribute_specs rs6000_gnu_attribute_table =
1281 "gnu", { rs6000_gnu_attributes }
1284 static const scoped_attribute_specs *const rs6000_attribute_table[] =
1286 &rs6000_gnu_attribute_table
1289 #ifndef TARGET_PROFILE_KERNEL
1290 #define TARGET_PROFILE_KERNEL 0
1291 #endif
1293 /* Initialize the GCC target structure. */
1294 #undef TARGET_ATTRIBUTE_TABLE
1295 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1296 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1297 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1298 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1299 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1301 #undef TARGET_ASM_ALIGNED_DI_OP
1302 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1304 /* Default unaligned ops are only provided for ELF. Find the ops needed
1305 for non-ELF systems. */
1306 #ifndef OBJECT_FORMAT_ELF
1307 #if TARGET_XCOFF
1308 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1309 64-bit targets. */
1310 #undef TARGET_ASM_UNALIGNED_HI_OP
1311 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1312 #undef TARGET_ASM_UNALIGNED_SI_OP
1313 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1314 #undef TARGET_ASM_UNALIGNED_DI_OP
1315 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1316 #else
1317 /* For Darwin. */
1318 #undef TARGET_ASM_UNALIGNED_HI_OP
1319 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1320 #undef TARGET_ASM_UNALIGNED_SI_OP
1321 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1322 #undef TARGET_ASM_UNALIGNED_DI_OP
1323 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1324 #undef TARGET_ASM_ALIGNED_DI_OP
1325 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1326 #endif
1327 #endif
1329 /* This hook deals with fixups for relocatable code and DI-mode objects
1330 in 64-bit code. */
1331 #undef TARGET_ASM_INTEGER
1332 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1334 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1335 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1336 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1337 #endif
1339 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1340 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1341 rs6000_print_patchable_function_entry
1343 #undef TARGET_SET_UP_BY_PROLOGUE
1344 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1346 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1347 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1348 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1349 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1350 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1354 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1356 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1357 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1359 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1360 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1362 #undef TARGET_INTERNAL_ARG_POINTER
1363 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1365 #undef TARGET_HAVE_TLS
1366 #define TARGET_HAVE_TLS HAVE_AS_TLS
1368 #undef TARGET_CANNOT_FORCE_CONST_MEM
1369 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1371 #undef TARGET_DELEGITIMIZE_ADDRESS
1372 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1374 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1375 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1377 #undef TARGET_LEGITIMATE_COMBINED_INSN
1378 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1380 #undef TARGET_ASM_FUNCTION_PROLOGUE
1381 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1382 #undef TARGET_ASM_FUNCTION_EPILOGUE
1383 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1388 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1389 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1391 #undef TARGET_LEGITIMIZE_ADDRESS
1392 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1394 #undef TARGET_SCHED_VARIABLE_ISSUE
1395 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1397 #undef TARGET_SCHED_ISSUE_RATE
1398 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1399 #undef TARGET_SCHED_ADJUST_COST
1400 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1401 #undef TARGET_SCHED_ADJUST_PRIORITY
1402 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1403 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1404 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1405 #undef TARGET_SCHED_INIT
1406 #define TARGET_SCHED_INIT rs6000_sched_init
1407 #undef TARGET_SCHED_FINISH
1408 #define TARGET_SCHED_FINISH rs6000_sched_finish
1409 #undef TARGET_SCHED_REORDER
1410 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1411 #undef TARGET_SCHED_REORDER2
1412 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1420 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1421 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1422 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1423 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1424 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1425 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1426 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1427 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1429 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1430 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1432 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1433 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1434 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1435 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1436 rs6000_builtin_support_vector_misalignment
1437 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1438 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1441 rs6000_builtin_vectorization_cost
1442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1444 rs6000_preferred_simd_mode
1445 #undef TARGET_VECTORIZE_CREATE_COSTS
1446 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1448 #undef TARGET_LOOP_UNROLL_ADJUST
1449 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1451 #undef TARGET_INIT_BUILTINS
1452 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1453 #undef TARGET_BUILTIN_DECL
1454 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1456 #undef TARGET_FOLD_BUILTIN
1457 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1458 #undef TARGET_GIMPLE_FOLD_BUILTIN
1459 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1461 #undef TARGET_EXPAND_BUILTIN
1462 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1464 #undef TARGET_MANGLE_TYPE
1465 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1467 #undef TARGET_INIT_LIBFUNCS
1468 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1470 #if TARGET_MACHO
1471 #undef TARGET_BINDS_LOCAL_P
1472 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1473 #endif
1475 #undef TARGET_MS_BITFIELD_LAYOUT_P
1476 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1478 #undef TARGET_ASM_OUTPUT_MI_THUNK
1479 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1481 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1482 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1485 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1487 #undef TARGET_REGISTER_MOVE_COST
1488 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1489 #undef TARGET_MEMORY_MOVE_COST
1490 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1491 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1492 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1493 rs6000_ira_change_pseudo_allocno_class
1494 #undef TARGET_CANNOT_COPY_INSN_P
1495 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1496 #undef TARGET_RTX_COSTS
1497 #define TARGET_RTX_COSTS rs6000_rtx_costs
1498 #undef TARGET_ADDRESS_COST
1499 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1500 #undef TARGET_INSN_COST
1501 #define TARGET_INSN_COST rs6000_insn_cost
1503 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1504 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1506 #undef TARGET_PROMOTE_FUNCTION_MODE
1507 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1509 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1510 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1512 #undef TARGET_RETURN_IN_MEMORY
1513 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1515 #undef TARGET_RETURN_IN_MSB
1516 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1518 #undef TARGET_SETUP_INCOMING_VARARGS
1519 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1521 /* Always strict argument naming on rs6000. */
1522 #undef TARGET_STRICT_ARGUMENT_NAMING
1523 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1525 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1526 #undef TARGET_SPLIT_COMPLEX_ARG
1527 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1528 #undef TARGET_MUST_PASS_IN_STACK
1529 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1530 #undef TARGET_PASS_BY_REFERENCE
1531 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1532 #undef TARGET_ARG_PARTIAL_BYTES
1533 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1534 #undef TARGET_FUNCTION_ARG_ADVANCE
1535 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1536 #undef TARGET_FUNCTION_ARG
1537 #define TARGET_FUNCTION_ARG rs6000_function_arg
1538 #undef TARGET_FUNCTION_ARG_PADDING
1539 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1540 #undef TARGET_FUNCTION_ARG_BOUNDARY
1541 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1543 #undef TARGET_BUILD_BUILTIN_VA_LIST
1544 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1546 #undef TARGET_EXPAND_BUILTIN_VA_START
1547 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1550 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1552 #undef TARGET_EH_RETURN_FILTER_MODE
1553 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1555 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1556 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1561 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1562 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1563 rs6000_libgcc_floating_mode_supported_p
1565 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1566 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1568 #undef TARGET_FLOATN_MODE
1569 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1571 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1572 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1574 #undef TARGET_MD_ASM_ADJUST
1575 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1577 #undef TARGET_OPTION_OVERRIDE
1578 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1580 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1581 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1582 rs6000_builtin_vectorized_function
1584 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1585 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1586 rs6000_builtin_md_vectorized_function
1588 #undef TARGET_STACK_PROTECT_GUARD
1589 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1591 #if !TARGET_MACHO
1592 #undef TARGET_STACK_PROTECT_FAIL
1593 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1594 #endif
1596 #ifdef HAVE_AS_TLS
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 #endif
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1603 addis tmp,anchor,high
1604 add dest,tmp,low
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1620 #undef TARGET_SECONDARY_RELOAD
1621 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED
1623 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1624 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1625 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1627 #undef TARGET_LEGITIMATE_ADDRESS_P
1628 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1630 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1631 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1633 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1634 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1636 #undef TARGET_CAN_ELIMINATE
1637 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1639 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1640 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1642 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1643 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1645 #undef TARGET_TRAMPOLINE_INIT
1646 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1648 #undef TARGET_FUNCTION_VALUE
1649 #define TARGET_FUNCTION_VALUE rs6000_function_value
1651 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1652 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1654 #undef TARGET_OPTION_SAVE
1655 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1657 #undef TARGET_OPTION_RESTORE
1658 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1660 #undef TARGET_OPTION_PRINT
1661 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1663 #undef TARGET_CAN_INLINE_P
1664 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1666 #undef TARGET_SET_CURRENT_FUNCTION
1667 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1669 #undef TARGET_LEGITIMATE_CONSTANT_P
1670 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1672 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1673 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1675 #undef TARGET_CAN_USE_DOLOOP_P
1676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1678 #undef TARGET_PREDICT_DOLOOP_P
1679 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1681 #undef TARGET_HAVE_COUNT_REG_DECR_P
1682 #define TARGET_HAVE_COUNT_REG_DECR_P true
1684 /* 1000000000 is infinite cost in IVOPTs. */
1685 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1686 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1688 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1689 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1691 #undef TARGET_PREFERRED_DOLOOP_MODE
1692 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1694 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1695 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1697 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1698 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1699 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1700 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1701 #undef TARGET_UNWIND_WORD_MODE
1702 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1704 #undef TARGET_OFFLOAD_OPTIONS
1705 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1707 #undef TARGET_C_MODE_FOR_SUFFIX
1708 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1710 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
1711 #define TARGET_C_MODE_FOR_FLOATING_TYPE rs6000_c_mode_for_floating_type
1713 #undef TARGET_INVALID_BINARY_OP
1714 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1716 #undef TARGET_OPTAB_SUPPORTED_P
1717 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1719 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1720 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1722 #undef TARGET_COMPARE_VERSION_PRIORITY
1723 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1725 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1726 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1727 rs6000_generate_version_dispatcher_body
1729 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1730 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1731 rs6000_get_function_versions_dispatcher
1733 #undef TARGET_OPTION_FUNCTION_VERSIONS
1734 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1736 #undef TARGET_HARD_REGNO_NREGS
1737 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1738 #undef TARGET_HARD_REGNO_MODE_OK
1739 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1741 #undef TARGET_MODES_TIEABLE_P
1742 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1744 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1745 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1746 rs6000_hard_regno_call_part_clobbered
1748 #undef TARGET_SLOW_UNALIGNED_ACCESS
1749 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1751 #undef TARGET_CAN_CHANGE_MODE_CLASS
1752 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1754 #undef TARGET_CONSTANT_ALIGNMENT
1755 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1757 #undef TARGET_STARTING_FRAME_OFFSET
1758 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1760 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1761 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1763 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1764 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1766 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1767 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1768 rs6000_cannot_substitute_mem_equiv_p
1770 #undef TARGET_INVALID_CONVERSION
1771 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1773 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1774 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1776 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1777 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1779 #undef TARGET_CONST_ANCHOR
1780 #define TARGET_CONST_ANCHOR 0x8000
1782 #undef TARGET_OVERLAP_OP_BY_PIECES_P
1783 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
1787 /* Processor table. */
1788 struct rs6000_ptt
1790 const char *const name; /* Canonical processor name. */
1791 const enum processor_type processor; /* Processor type enum value. */
1792 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1795 static struct rs6000_ptt const processor_target_table[] =
1797 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1798 #include "rs6000-cpus.def"
1799 #undef RS6000_CPU
1802 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1803 name is invalid. */
1805 static int
1806 rs6000_cpu_name_lookup (const char *name)
1808 size_t i;
1810 if (name != NULL)
1812 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1813 if (! strcmp (name, processor_target_table[i].name))
1814 return (int)i;
1817 return -1;
1821 /* Return number of consecutive hard regs needed starting at reg REGNO
1822 to hold something of mode MODE.
1823 This is ordinarily the length in words of a value of mode MODE
1824 but can be less for certain modes in special long registers.
1826 POWER and PowerPC GPRs hold 32 bits worth;
1827 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1829 static int
1830 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1832 unsigned HOST_WIDE_INT reg_size;
1834 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1835 128-bit floating point that can go in vector registers, which has VSX
1836 memory addressing. */
1837 if (FP_REGNO_P (regno))
1838 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1839 ? UNITS_PER_VSX_WORD
1840 : UNITS_PER_FP_WORD);
1842 else if (ALTIVEC_REGNO_P (regno))
1843 reg_size = UNITS_PER_ALTIVEC_WORD;
1845 else
1846 reg_size = UNITS_PER_WORD;
1848 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1851 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1852 MODE. */
1853 static int
1854 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1856 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1858 if (COMPLEX_MODE_P (mode))
1859 mode = GET_MODE_INNER (mode);
1861 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1862 registers. */
1863 if (mode == OOmode)
1864 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1866 /* MMA accumulator modes need FPR registers divisible by 4. */
1867 if (mode == XOmode)
1868 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1870 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1871 register combinations, and use PTImode where we need to deal with quad
1872 word memory operations. Don't allow quad words in the argument or frame
1873 pointer registers, just registers 0..31. */
1874 if (mode == PTImode)
1875 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1876 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1877 && ((regno & 1) == 0));
1879 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1880 implementations. Don't allow an item to be split between a FP register
1881 and an Altivec register. Allow TImode in all VSX registers if the user
1882 asked for it. */
1883 if (TARGET_VSX && VSX_REGNO_P (regno)
1884 && (VECTOR_MEM_VSX_P (mode)
1885 || VECTOR_ALIGNMENT_P (mode)
1886 || reg_addr[mode].scalar_in_vmx_p
1887 || mode == TImode
1888 || (TARGET_VADDUQM && mode == V1TImode)))
1890 if (FP_REGNO_P (regno))
1891 return FP_REGNO_P (last_regno);
1893 if (ALTIVEC_REGNO_P (regno))
1895 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1896 return 0;
1898 return ALTIVEC_REGNO_P (last_regno);
1902 /* The GPRs can hold any mode, but values bigger than one register
1903 cannot go past R31. */
1904 if (INT_REGNO_P (regno))
1905 return INT_REGNO_P (last_regno);
1907 /* The float registers (except for VSX vector modes) can only hold floating
1908 modes and DImode. */
1909 if (FP_REGNO_P (regno))
1911 if (VECTOR_ALIGNMENT_P (mode))
1912 return false;
1914 if (SCALAR_FLOAT_MODE_P (mode)
1915 && (mode != TDmode || (regno % 2) == 0)
1916 && FP_REGNO_P (last_regno))
1917 return 1;
1919 if (GET_MODE_CLASS (mode) == MODE_INT)
1921 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1922 return 1;
1924 if (TARGET_POPCNTD && mode == SImode)
1925 return 1;
1927 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1928 return 1;
1931 return 0;
1934 /* The CR register can only hold CC modes. */
1935 if (CR_REGNO_P (regno))
1936 return GET_MODE_CLASS (mode) == MODE_CC;
1938 if (CA_REGNO_P (regno))
1939 return mode == Pmode || mode == SImode;
1941 /* AltiVec only in AldyVec registers. */
1942 if (ALTIVEC_REGNO_P (regno))
1943 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1944 || mode == V1TImode);
1946 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1947 and it must be able to fit within the register set. */
1949 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1952 /* Implement TARGET_HARD_REGNO_NREGS. */
1954 static unsigned int
1955 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1957 return rs6000_hard_regno_nregs[mode][regno];
1960 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1962 static bool
1963 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1965 return rs6000_hard_regno_mode_ok_p[mode][regno];
1968 /* Implement TARGET_MODES_TIEABLE_P.
1970 PTImode cannot tie with other modes because PTImode is restricted to even
1971 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1972 57744).
1974 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1975 registers) or XOmode (vector quad, restricted to FPR registers divisible
1976 by 4) to tie with other modes.
1978 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1979 128-bit floating point on VSX systems ties with other vectors. */
1981 static bool
1982 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1984 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1985 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1986 return mode1 == mode2;
1988 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1989 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1990 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1991 return false;
1993 if (SCALAR_FLOAT_MODE_P (mode1))
1994 return SCALAR_FLOAT_MODE_P (mode2);
1995 if (SCALAR_FLOAT_MODE_P (mode2))
1996 return false;
1998 if (GET_MODE_CLASS (mode1) == MODE_CC)
1999 return GET_MODE_CLASS (mode2) == MODE_CC;
2000 if (GET_MODE_CLASS (mode2) == MODE_CC)
2001 return false;
2003 return true;
2006 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2008 static bool
2009 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2010 machine_mode mode)
2012 if (TARGET_32BIT
2013 && TARGET_POWERPC64
2014 && GET_MODE_SIZE (mode) > 4
2015 && INT_REGNO_P (regno))
2016 return true;
2018 if (TARGET_VSX
2019 && FP_REGNO_P (regno)
2020 && GET_MODE_SIZE (mode) > 8
2021 && !FLOAT128_2REG_P (mode))
2022 return true;
2024 return false;
2027 /* Print interesting facts about registers. */
2028 static void
2029 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2031 int r, m;
2033 for (r = first_regno; r <= last_regno; ++r)
2035 const char *comma = "";
2036 int len;
2038 if (first_regno == last_regno)
2039 fprintf (stderr, "%s:\t", reg_name);
2040 else
2041 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2043 len = 8;
2044 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2045 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2047 if (len > 70)
2049 fprintf (stderr, ",\n\t");
2050 len = 8;
2051 comma = "";
2054 if (rs6000_hard_regno_nregs[m][r] > 1)
2055 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2056 rs6000_hard_regno_nregs[m][r]);
2057 else
2058 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2060 comma = ", ";
2063 if (call_used_or_fixed_reg_p (r))
2065 if (len > 70)
2067 fprintf (stderr, ",\n\t");
2068 len = 8;
2069 comma = "";
2072 len += fprintf (stderr, "%s%s", comma, "call-used");
2073 comma = ", ";
2076 if (fixed_regs[r])
2078 if (len > 70)
2080 fprintf (stderr, ",\n\t");
2081 len = 8;
2082 comma = "";
2085 len += fprintf (stderr, "%s%s", comma, "fixed");
2086 comma = ", ";
2089 if (len > 70)
2091 fprintf (stderr, ",\n\t");
2092 comma = "";
2095 len += fprintf (stderr, "%sreg-class = %s", comma,
2096 reg_class_names[(int)rs6000_regno_regclass[r]]);
2097 comma = ", ";
2099 if (len > 70)
2101 fprintf (stderr, ",\n\t");
2102 comma = "";
2105 fprintf (stderr, "%sregno = %d\n", comma, r);
2109 static const char *
2110 rs6000_debug_vector_unit (enum rs6000_vector v)
2112 const char *ret;
2114 switch (v)
2116 case VECTOR_NONE: ret = "none"; break;
2117 case VECTOR_ALTIVEC: ret = "altivec"; break;
2118 case VECTOR_VSX: ret = "vsx"; break;
2119 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2120 default: ret = "unknown"; break;
2123 return ret;
2126 /* Inner function printing just the address mask for a particular reload
2127 register class. */
2128 DEBUG_FUNCTION char *
2129 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2131 static char ret[8];
2132 char *p = ret;
2134 if ((mask & RELOAD_REG_VALID) != 0)
2135 *p++ = 'v';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2139 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2140 *p++ = 'm';
2141 else if (keep_spaces)
2142 *p++ = ' ';
2144 if ((mask & RELOAD_REG_INDEXED) != 0)
2145 *p++ = 'i';
2146 else if (keep_spaces)
2147 *p++ = ' ';
2149 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2150 *p++ = 'O';
2151 else if ((mask & RELOAD_REG_OFFSET) != 0)
2152 *p++ = 'o';
2153 else if (keep_spaces)
2154 *p++ = ' ';
2156 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2157 *p++ = '+';
2158 else if (keep_spaces)
2159 *p++ = ' ';
2161 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2162 *p++ = '+';
2163 else if (keep_spaces)
2164 *p++ = ' ';
2166 if ((mask & RELOAD_REG_AND_M16) != 0)
2167 *p++ = '&';
2168 else if (keep_spaces)
2169 *p++ = ' ';
2171 *p = '\0';
2173 return ret;
2176 /* Print the address masks in a human readble fashion. */
2177 DEBUG_FUNCTION void
2178 rs6000_debug_print_mode (ssize_t m)
2180 ssize_t rc;
2181 int spaces = 0;
2183 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2184 for (rc = 0; rc < N_RELOAD_REG; rc++)
2185 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2186 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2188 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2189 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2191 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2192 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2193 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2194 spaces = 0;
2196 else
2197 spaces += strlen (" Reload=sl");
2199 if (reg_addr[m].scalar_in_vmx_p)
2201 fprintf (stderr, "%*s Upper=y", spaces, "");
2202 spaces = 0;
2204 else
2205 spaces += strlen (" Upper=y");
2207 if (rs6000_vector_unit[m] != VECTOR_NONE
2208 || rs6000_vector_mem[m] != VECTOR_NONE)
2210 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2211 spaces, "",
2212 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2213 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2216 fputs ("\n", stderr);
2219 #define DEBUG_FMT_ID "%-32s= "
2220 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2221 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2222 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2224 /* Print various interesting information with -mdebug=reg. */
2225 static void
2226 rs6000_debug_reg_global (void)
2228 static const char *const tf[2] = { "false", "true" };
2229 const char *nl = (const char *)0;
2230 int m;
2231 size_t m1, m2, v;
2232 char costly_num[20];
2233 char nop_num[20];
2234 char flags_buffer[40];
2235 const char *costly_str;
2236 const char *nop_str;
2237 const char *trace_str;
2238 const char *abi_str;
2239 const char *cmodel_str;
2240 struct cl_target_option cl_opts;
2242 /* Modes we want tieable information on. */
2243 static const machine_mode print_tieable_modes[] = {
2244 QImode,
2245 HImode,
2246 SImode,
2247 DImode,
2248 TImode,
2249 PTImode,
2250 SFmode,
2251 DFmode,
2252 TFmode,
2253 IFmode,
2254 KFmode,
2255 SDmode,
2256 DDmode,
2257 TDmode,
2258 V2SImode,
2259 V2SFmode,
2260 V16QImode,
2261 V8HImode,
2262 V4SImode,
2263 V2DImode,
2264 V1TImode,
2265 V32QImode,
2266 V16HImode,
2267 V8SImode,
2268 V4DImode,
2269 V2TImode,
2270 V4SFmode,
2271 V2DFmode,
2272 V8SFmode,
2273 V4DFmode,
2274 OOmode,
2275 XOmode,
2276 CCmode,
2277 CCUNSmode,
2278 CCEQmode,
2279 CCFPmode,
2282 /* Virtual regs we are interested in. */
2283 const static struct {
2284 int regno; /* register number. */
2285 const char *name; /* register name. */
2286 } virtual_regs[] = {
2287 { STACK_POINTER_REGNUM, "stack pointer:" },
2288 { TOC_REGNUM, "toc: " },
2289 { STATIC_CHAIN_REGNUM, "static chain: " },
2290 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2291 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2292 { ARG_POINTER_REGNUM, "arg pointer: " },
2293 { FRAME_POINTER_REGNUM, "frame pointer:" },
2294 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2295 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2296 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2297 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2298 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2299 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2300 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2301 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2302 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2305 fputs ("\nHard register information:\n", stderr);
2306 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2307 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2308 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2309 LAST_ALTIVEC_REGNO,
2310 "vs");
2311 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2312 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2313 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2314 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2315 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2316 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2318 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2319 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2320 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2322 fprintf (stderr,
2323 "\n"
2324 "d reg_class = %s\n"
2325 "v reg_class = %s\n"
2326 "wa reg_class = %s\n"
2327 "we reg_class = %s\n"
2328 "wr reg_class = %s\n"
2329 "wx reg_class = %s\n"
2330 "wA reg_class = %s\n"
2331 "\n",
2332 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2333 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2334 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2335 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2336 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2337 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2338 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2340 nl = "\n";
2341 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2342 rs6000_debug_print_mode (m);
2344 fputs ("\n", stderr);
2346 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2348 machine_mode mode1 = print_tieable_modes[m1];
2349 bool first_time = true;
2351 nl = (const char *)0;
2352 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2354 machine_mode mode2 = print_tieable_modes[m2];
2355 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2357 if (first_time)
2359 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2360 nl = "\n";
2361 first_time = false;
2364 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2368 if (!first_time)
2369 fputs ("\n", stderr);
2372 if (nl)
2373 fputs (nl, stderr);
2375 if (rs6000_recip_control)
2377 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2379 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2380 if (rs6000_recip_bits[m])
2382 fprintf (stderr,
2383 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2384 GET_MODE_NAME (m),
2385 (RS6000_RECIP_AUTO_RE_P (m)
2386 ? "auto"
2387 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2388 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2389 ? "auto"
2390 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2393 fputs ("\n", stderr);
2396 if (rs6000_cpu_index >= 0)
2398 const char *name = processor_target_table[rs6000_cpu_index].name;
2399 HOST_WIDE_INT flags
2400 = processor_target_table[rs6000_cpu_index].target_enable;
2402 sprintf (flags_buffer, "-mcpu=%s flags", name);
2403 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2405 else
2406 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2408 if (rs6000_tune_index >= 0)
2410 const char *name = processor_target_table[rs6000_tune_index].name;
2411 HOST_WIDE_INT flags
2412 = processor_target_table[rs6000_tune_index].target_enable;
2414 sprintf (flags_buffer, "-mtune=%s flags", name);
2415 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2417 else
2418 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2420 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2421 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2422 rs6000_isa_flags);
2424 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2425 rs6000_isa_flags_explicit);
2427 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2429 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2430 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2432 switch (rs6000_sched_costly_dep)
2434 case max_dep_latency:
2435 costly_str = "max_dep_latency";
2436 break;
2438 case no_dep_costly:
2439 costly_str = "no_dep_costly";
2440 break;
2442 case all_deps_costly:
2443 costly_str = "all_deps_costly";
2444 break;
2446 case true_store_to_load_dep_costly:
2447 costly_str = "true_store_to_load_dep_costly";
2448 break;
2450 case store_to_load_dep_costly:
2451 costly_str = "store_to_load_dep_costly";
2452 break;
2454 default:
2455 costly_str = costly_num;
2456 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2457 break;
2460 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2462 switch (rs6000_sched_insert_nops)
2464 case sched_finish_regroup_exact:
2465 nop_str = "sched_finish_regroup_exact";
2466 break;
2468 case sched_finish_pad_groups:
2469 nop_str = "sched_finish_pad_groups";
2470 break;
2472 case sched_finish_none:
2473 nop_str = "sched_finish_none";
2474 break;
2476 default:
2477 nop_str = nop_num;
2478 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2479 break;
2482 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2484 switch (rs6000_sdata)
2486 default:
2487 case SDATA_NONE:
2488 break;
2490 case SDATA_DATA:
2491 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2492 break;
2494 case SDATA_SYSV:
2495 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2496 break;
2498 case SDATA_EABI:
2499 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2500 break;
2504 switch (rs6000_traceback)
2506 case traceback_default: trace_str = "default"; break;
2507 case traceback_none: trace_str = "none"; break;
2508 case traceback_part: trace_str = "part"; break;
2509 case traceback_full: trace_str = "full"; break;
2510 default: trace_str = "unknown"; break;
2513 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2515 switch (rs6000_current_cmodel)
2517 case CMODEL_SMALL: cmodel_str = "small"; break;
2518 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2519 case CMODEL_LARGE: cmodel_str = "large"; break;
2520 default: cmodel_str = "unknown"; break;
2523 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2525 switch (rs6000_current_abi)
2527 case ABI_NONE: abi_str = "none"; break;
2528 case ABI_AIX: abi_str = "aix"; break;
2529 case ABI_ELFv2: abi_str = "ELFv2"; break;
2530 case ABI_V4: abi_str = "V4"; break;
2531 case ABI_DARWIN: abi_str = "darwin"; break;
2532 default: abi_str = "unknown"; break;
2535 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2537 if (rs6000_altivec_abi)
2538 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2540 if (rs6000_aix_extabi)
2541 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2543 if (rs6000_darwin64_abi)
2544 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2546 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2547 (TARGET_SOFT_FLOAT ? "true" : "false"));
2549 if (TARGET_LINK_STACK)
2550 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2552 if (TARGET_P8_FUSION)
2554 char options[80];
2556 strcpy (options, "power8");
2557 if (TARGET_P8_FUSION_SIGN)
2558 strcat (options, ", sign");
2560 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2563 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2564 TARGET_SECURE_PLT ? "secure" : "bss");
2565 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2566 aix_struct_return ? "aix" : "sysv");
2567 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2568 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2569 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2570 tf[!!rs6000_align_branch_targets]);
2571 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2572 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2573 rs6000_long_double_type_size);
2574 if (rs6000_long_double_type_size > 64)
2576 fprintf (stderr, DEBUG_FMT_S, "long double type",
2577 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2578 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2579 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2581 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2582 (int)rs6000_sched_restricted_insns_priority);
2583 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2584 (int)END_BUILTINS);
2586 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2587 (int)TARGET_FLOAT128_ENABLE_TYPE);
2589 if (TARGET_VSX)
2590 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2591 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2593 if (TARGET_DIRECT_MOVE_128)
2594 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2595 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2599 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2600 legitimate address support to figure out the appropriate addressing to
2601 use. */
2603 static void
2604 rs6000_setup_reg_addr_masks (void)
2606 ssize_t rc, reg, m, nregs;
2607 addr_mask_type any_addr_mask, addr_mask;
2609 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2611 machine_mode m2 = (machine_mode) m;
2612 bool complex_p = false;
2613 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2614 size_t msize;
2616 if (COMPLEX_MODE_P (m2))
2618 complex_p = true;
2619 m2 = GET_MODE_INNER (m2);
2622 msize = GET_MODE_SIZE (m2);
2624 /* SDmode is special in that we want to access it only via REG+REG
2625 addressing on power7 and above, since we want to use the LFIWZX and
2626 STFIWZX instructions to load it. */
2627 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2629 any_addr_mask = 0;
2630 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2632 addr_mask = 0;
2633 reg = reload_reg_map[rc].reg;
2635 /* Can mode values go in the GPR/FPR/Altivec registers? */
2636 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2638 bool small_int_vsx_p = (small_int_p
2639 && (rc == RELOAD_REG_FPR
2640 || rc == RELOAD_REG_VMX));
2642 nregs = rs6000_hard_regno_nregs[m][reg];
2643 addr_mask |= RELOAD_REG_VALID;
2645 /* Indicate if the mode takes more than 1 physical register. If
2646 it takes a single register, indicate it can do REG+REG
2647 addressing. Small integers in VSX registers can only do
2648 REG+REG addressing. */
2649 if (small_int_vsx_p)
2650 addr_mask |= RELOAD_REG_INDEXED;
2651 else if (nregs > 1 || m == BLKmode || complex_p)
2652 addr_mask |= RELOAD_REG_MULTIPLE;
2653 else
2654 addr_mask |= RELOAD_REG_INDEXED;
2656 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2657 addressing. If we allow scalars into Altivec registers,
2658 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2660 For VSX systems, we don't allow update addressing for
2661 DFmode/SFmode if those registers can go in both the
2662 traditional floating point registers and Altivec registers.
2663 The load/store instructions for the Altivec registers do not
2664 have update forms. If we allowed update addressing, it seems
2665 to break IV-OPT code using floating point if the index type is
2666 int instead of long (PR target/81550 and target/84042). */
2668 if (TARGET_UPDATE
2669 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2670 && msize <= 8
2671 && !VECTOR_MODE_P (m2)
2672 && !VECTOR_ALIGNMENT_P (m2)
2673 && !complex_p
2674 && (m != E_DFmode || !TARGET_VSX)
2675 && (m != E_SFmode || !TARGET_P8_VECTOR)
2676 && !small_int_vsx_p)
2678 addr_mask |= RELOAD_REG_PRE_INCDEC;
2680 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2681 we don't allow PRE_MODIFY for some multi-register
2682 operations. */
2683 switch (m)
2685 default:
2686 addr_mask |= RELOAD_REG_PRE_MODIFY;
2687 break;
2689 case E_DImode:
2690 if (TARGET_POWERPC64)
2691 addr_mask |= RELOAD_REG_PRE_MODIFY;
2692 break;
2694 case E_DFmode:
2695 case E_DDmode:
2696 if (TARGET_HARD_FLOAT)
2697 addr_mask |= RELOAD_REG_PRE_MODIFY;
2698 break;
2703 /* GPR and FPR registers can do REG+OFFSET addressing, except
2704 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2705 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2706 if ((addr_mask != 0) && !indexed_only_p
2707 && msize <= 8
2708 && (rc == RELOAD_REG_GPR
2709 || ((msize == 8 || m2 == SFmode)
2710 && (rc == RELOAD_REG_FPR
2711 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2712 addr_mask |= RELOAD_REG_OFFSET;
2714 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2715 instructions are enabled. The offset for 128-bit VSX registers is
2716 only 12-bits. While GPRs can handle the full offset range, VSX
2717 registers can only handle the restricted range. */
2718 else if ((addr_mask != 0) && !indexed_only_p
2719 && msize == 16 && TARGET_P9_VECTOR
2720 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2721 || (m2 == TImode && TARGET_VSX)))
2723 addr_mask |= RELOAD_REG_OFFSET;
2724 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2725 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2728 /* Vector pairs can do both indexed and offset loads if the
2729 instructions are enabled, otherwise they can only do offset loads
2730 since it will be broken into two vector moves. Vector quads can
2731 only do offset loads. */
2732 else if ((addr_mask != 0) && TARGET_MMA
2733 && (m2 == OOmode || m2 == XOmode))
2735 addr_mask |= RELOAD_REG_OFFSET;
2736 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2738 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2739 if (m2 == OOmode)
2740 addr_mask |= RELOAD_REG_INDEXED;
2744 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2745 addressing on 128-bit types. */
2746 if (rc == RELOAD_REG_VMX && msize == 16
2747 && (addr_mask & RELOAD_REG_VALID) != 0)
2748 addr_mask |= RELOAD_REG_AND_M16;
2750 reg_addr[m].addr_mask[rc] = addr_mask;
2751 any_addr_mask |= addr_mask;
2754 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2759 /* Initialize the various global tables that are based on register size. */
2760 static void
2761 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2763 ssize_t r, m, c;
2764 int align64;
2765 int align32;
2767 /* Precalculate REGNO_REG_CLASS. */
2768 rs6000_regno_regclass[0] = GENERAL_REGS;
2769 for (r = 1; r < 32; ++r)
2770 rs6000_regno_regclass[r] = BASE_REGS;
2772 for (r = 32; r < 64; ++r)
2773 rs6000_regno_regclass[r] = FLOAT_REGS;
2775 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2776 rs6000_regno_regclass[r] = NO_REGS;
2778 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2779 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2781 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2782 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2783 rs6000_regno_regclass[r] = CR_REGS;
2785 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2786 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2787 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2788 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2789 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2790 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2791 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2793 /* Precalculate register class to simpler reload register class. We don't
2794 need all of the register classes that are combinations of different
2795 classes, just the simple ones that have constraint letters. */
2796 for (c = 0; c < N_REG_CLASSES; c++)
2797 reg_class_to_reg_type[c] = NO_REG_TYPE;
2799 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2800 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2801 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2802 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2803 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2804 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2805 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2806 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2807 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2808 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2810 if (TARGET_VSX)
2812 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2813 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2815 else
2817 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2818 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2821 /* Precalculate the valid memory formats as well as the vector information,
2822 this must be set up before the rs6000_hard_regno_nregs_internal calls
2823 below. */
2824 gcc_assert ((int)VECTOR_NONE == 0);
2825 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2826 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2828 gcc_assert ((int)CODE_FOR_nothing == 0);
2829 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2831 gcc_assert ((int)NO_REGS == 0);
2832 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2834 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2835 believes it can use native alignment or still uses 128-bit alignment. */
2836 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2838 align64 = 64;
2839 align32 = 32;
2841 else
2843 align64 = 128;
2844 align32 = 128;
2847 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2848 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2849 if (TARGET_FLOAT128_TYPE)
2851 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2852 rs6000_vector_align[KFmode] = 128;
2854 if (FLOAT128_IEEE_P (TFmode))
2856 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2857 rs6000_vector_align[TFmode] = 128;
2861 /* V2DF mode, VSX only. */
2862 if (TARGET_VSX)
2864 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2865 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2866 rs6000_vector_align[V2DFmode] = align64;
2869 /* V4SF mode, either VSX or Altivec. */
2870 if (TARGET_VSX)
2872 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2873 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2874 rs6000_vector_align[V4SFmode] = align32;
2876 else if (TARGET_ALTIVEC)
2878 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2879 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2880 rs6000_vector_align[V4SFmode] = align32;
2883 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2884 and stores. */
2885 if (TARGET_ALTIVEC)
2887 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2888 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2889 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2890 rs6000_vector_align[V4SImode] = align32;
2891 rs6000_vector_align[V8HImode] = align32;
2892 rs6000_vector_align[V16QImode] = align32;
2894 if (TARGET_VSX)
2896 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2897 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2898 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2900 else
2902 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2903 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2904 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2908 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2909 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2910 if (TARGET_VSX)
2912 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2913 rs6000_vector_unit[V2DImode]
2914 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2915 rs6000_vector_align[V2DImode] = align64;
2917 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2918 rs6000_vector_unit[V1TImode]
2919 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2920 rs6000_vector_align[V1TImode] = 128;
2923 /* DFmode, see if we want to use the VSX unit. Memory is handled
2924 differently, so don't set rs6000_vector_mem. */
2925 if (TARGET_VSX)
2927 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2928 rs6000_vector_align[DFmode] = 64;
2931 /* SFmode, see if we want to use the VSX unit. */
2932 if (TARGET_P8_VECTOR)
2934 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2935 rs6000_vector_align[SFmode] = 32;
2938 /* Allow TImode in VSX register and set the VSX memory macros. */
2939 if (TARGET_VSX)
2941 rs6000_vector_mem[TImode] = VECTOR_VSX;
2942 rs6000_vector_align[TImode] = align64;
2945 /* Add support for vector pairs and vector quad registers. */
2946 if (TARGET_MMA)
2948 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2949 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2950 rs6000_vector_align[OOmode] = 256;
2952 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2953 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2954 rs6000_vector_align[XOmode] = 512;
2957 /* Register class constraints for the constraints that depend on compile
2958 switches. When the VSX code was added, different constraints were added
2959 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2960 of the VSX registers are used. The register classes for scalar floating
2961 point types is set, based on whether we allow that type into the upper
2962 (Altivec) registers. GCC has register classes to target the Altivec
2963 registers for load/store operations, to select using a VSX memory
2964 operation instead of the traditional floating point operation. The
2965 constraints are:
2967 d - Register class to use with traditional DFmode instructions.
2968 v - Altivec register.
2969 wa - Any VSX register.
2970 wc - Reserved to represent individual CR bits (used in LLVM).
2971 wn - always NO_REGS.
2972 wr - GPR if 64-bit mode is permitted.
2973 wx - Float register if we can do 32-bit int stores. */
2975 if (TARGET_HARD_FLOAT)
2976 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2977 if (TARGET_ALTIVEC)
2978 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2979 if (TARGET_VSX)
2980 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2982 if (TARGET_POWERPC64)
2984 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2985 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2988 if (TARGET_STFIWX)
2989 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2991 /* Support for new direct moves (ISA 3.0 + 64bit). */
2992 if (TARGET_DIRECT_MOVE_128)
2993 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2995 /* Set up the reload helper and direct move functions. */
2996 if (TARGET_VSX || TARGET_ALTIVEC)
2998 if (TARGET_64BIT)
3000 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3001 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3002 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3003 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3004 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3005 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3006 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3007 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3008 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3009 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3010 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3011 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3012 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3013 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3014 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3015 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3016 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3017 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3018 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3019 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3021 if (FLOAT128_VECTOR_P (KFmode))
3023 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3024 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3027 if (FLOAT128_VECTOR_P (TFmode))
3029 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3030 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3033 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3034 available. */
3035 if (TARGET_NO_SDMODE_STACK)
3037 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3038 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3041 if (TARGET_VSX)
3043 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3044 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3047 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3049 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3050 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3051 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3052 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3053 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3054 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3055 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3056 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3057 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3059 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3060 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3061 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3062 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3063 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3064 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3065 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3066 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3067 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3069 if (FLOAT128_VECTOR_P (KFmode))
3071 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3072 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3075 if (FLOAT128_VECTOR_P (TFmode))
3077 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3078 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3081 if (TARGET_MMA)
3083 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3084 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3085 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3086 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3090 else
3092 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3093 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3094 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3095 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3096 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3097 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3098 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3099 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3100 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3101 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3102 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3103 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3104 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3105 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3106 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3107 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3108 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3109 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3110 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3111 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3113 if (FLOAT128_VECTOR_P (KFmode))
3115 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3116 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3119 if (FLOAT128_IEEE_P (TFmode))
3121 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3122 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3125 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3126 available. */
3127 if (TARGET_NO_SDMODE_STACK)
3129 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3130 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3133 if (TARGET_VSX)
3135 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3136 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3139 if (TARGET_DIRECT_MOVE)
3141 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3142 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3143 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3147 reg_addr[DFmode].scalar_in_vmx_p = true;
3148 reg_addr[DImode].scalar_in_vmx_p = true;
3150 if (TARGET_P8_VECTOR)
3152 reg_addr[SFmode].scalar_in_vmx_p = true;
3153 reg_addr[SImode].scalar_in_vmx_p = true;
3155 if (TARGET_P9_VECTOR)
3157 reg_addr[HImode].scalar_in_vmx_p = true;
3158 reg_addr[QImode].scalar_in_vmx_p = true;
3163 /* Precalculate HARD_REGNO_NREGS. */
3164 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3165 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3166 rs6000_hard_regno_nregs[m][r]
3167 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3169 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3170 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3171 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3172 rs6000_hard_regno_mode_ok_p[m][r]
3173 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3175 /* Precalculate CLASS_MAX_NREGS sizes. */
3176 for (c = 0; c < LIM_REG_CLASSES; ++c)
3178 int reg_size;
3180 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3181 reg_size = UNITS_PER_VSX_WORD;
3183 else if (c == ALTIVEC_REGS)
3184 reg_size = UNITS_PER_ALTIVEC_WORD;
3186 else if (c == FLOAT_REGS)
3187 reg_size = UNITS_PER_FP_WORD;
3189 else
3190 reg_size = UNITS_PER_WORD;
3192 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3194 machine_mode m2 = (machine_mode)m;
3195 int reg_size2 = reg_size;
3197 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3198 in VSX. */
3199 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3200 reg_size2 = UNITS_PER_FP_WORD;
3202 rs6000_class_max_nregs[m][c]
3203 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3207 /* Calculate which modes to automatically generate code to use a the
3208 reciprocal divide and square root instructions. In the future, possibly
3209 automatically generate the instructions even if the user did not specify
3210 -mrecip. The older machines double precision reciprocal sqrt estimate is
3211 not accurate enough. */
3212 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3213 if (TARGET_FRES)
3214 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3215 if (TARGET_FRE)
3216 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3217 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3218 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3219 if (VECTOR_UNIT_VSX_P (V2DFmode))
3220 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3222 if (TARGET_FRSQRTES)
3223 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3224 if (TARGET_FRSQRTE)
3225 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3226 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3227 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3228 if (VECTOR_UNIT_VSX_P (V2DFmode))
3229 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3231 if (rs6000_recip_control)
3233 if (!flag_finite_math_only)
3234 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3235 "-ffast-math");
3236 if (flag_trapping_math)
3237 warning (0, "%qs requires %qs or %qs", "-mrecip",
3238 "-fno-trapping-math", "-ffast-math");
3239 if (!flag_reciprocal_math)
3240 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3241 "-ffast-math");
3242 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3244 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3245 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3246 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3248 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3249 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3250 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3252 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3253 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3254 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3256 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3257 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3258 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3261 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3262 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3265 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3266 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3268 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3269 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3270 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3272 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3273 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3274 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3278 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3279 legitimate address support to figure out the appropriate addressing to
3280 use. */
3281 rs6000_setup_reg_addr_masks ();
3283 if (global_init_p || TARGET_DEBUG_TARGET)
3285 if (TARGET_DEBUG_REG)
3286 rs6000_debug_reg_global ();
3288 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3289 fprintf (stderr,
3290 "SImode variable mult cost = %d\n"
3291 "SImode constant mult cost = %d\n"
3292 "SImode short constant mult cost = %d\n"
3293 "DImode multipliciation cost = %d\n"
3294 "SImode division cost = %d\n"
3295 "DImode division cost = %d\n"
3296 "Simple fp operation cost = %d\n"
3297 "DFmode multiplication cost = %d\n"
3298 "SFmode division cost = %d\n"
3299 "DFmode division cost = %d\n"
3300 "cache line size = %d\n"
3301 "l1 cache size = %d\n"
3302 "l2 cache size = %d\n"
3303 "simultaneous prefetches = %d\n"
3304 "\n",
3305 rs6000_cost->mulsi,
3306 rs6000_cost->mulsi_const,
3307 rs6000_cost->mulsi_const9,
3308 rs6000_cost->muldi,
3309 rs6000_cost->divsi,
3310 rs6000_cost->divdi,
3311 rs6000_cost->fp,
3312 rs6000_cost->dmul,
3313 rs6000_cost->sdiv,
3314 rs6000_cost->ddiv,
3315 rs6000_cost->cache_line_size,
3316 rs6000_cost->l1_cache_size,
3317 rs6000_cost->l2_cache_size,
3318 rs6000_cost->simultaneous_prefetches);
3322 #if TARGET_MACHO
3323 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3325 static void
3326 darwin_rs6000_override_options (void)
3328 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3329 off. */
3330 rs6000_altivec_abi = 1;
3331 TARGET_ALTIVEC_VRSAVE = 1;
3332 rs6000_current_abi = ABI_DARWIN;
3334 if (DEFAULT_ABI == ABI_DARWIN
3335 && TARGET_64BIT)
3336 darwin_one_byte_bool = 1;
3338 if (TARGET_64BIT && ! TARGET_POWERPC64)
3340 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3341 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3344 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3345 optimisation, and will not work with the most generic case (where the
3346 symbol is undefined external, but there is no symbl stub). */
3347 if (TARGET_64BIT)
3348 rs6000_default_long_calls = 0;
3350 /* ld_classic is (so far) still used for kernel (static) code, and supports
3351 the JBSR longcall / branch islands. */
3352 if (flag_mkernel)
3354 rs6000_default_long_calls = 1;
3356 /* Allow a kext author to do -mkernel -mhard-float. */
3357 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3358 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3361 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3362 Altivec. */
3363 if (!flag_mkernel && !flag_apple_kext
3364 && TARGET_64BIT
3365 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3366 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3368 /* Unless the user (not the configurer) has explicitly overridden
3369 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3370 G4 unless targeting the kernel. */
3371 if (!flag_mkernel
3372 && !flag_apple_kext
3373 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3374 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3375 && ! OPTION_SET_P (rs6000_cpu_index))
3377 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3380 #endif
3382 /* If not otherwise specified by a target, make 'long double' equivalent to
3383 'double'. */
3385 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3386 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3387 #endif
3389 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3390 to clobber the XER[CA] bit because clobbering that bit without telling
3391 the compiler worked just fine with versions of GCC before GCC 5, and
3392 breaking a lot of older code in ways that are hard to track down is
3393 not such a great idea. */
3395 static rtx_insn *
3396 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3397 vec<machine_mode> & /*input_modes*/,
3398 vec<const char *> & /*constraints*/,
3399 vec<rtx> &/*uses*/, vec<rtx> &clobbers,
3400 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3402 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3403 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3404 return NULL;
3407 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3408 but is called when the optimize level is changed via an attribute or
3409 pragma or when it is reset at the end of the code affected by the
3410 attribute or pragma. It is not called at the beginning of compilation
3411 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3412 actions then, you should have TARGET_OPTION_OVERRIDE call
3413 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3415 static void
3416 rs6000_override_options_after_change (void)
3418 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3419 turns -frename-registers on. */
3420 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3421 || (OPTION_SET_P (flag_unroll_all_loops)
3422 && flag_unroll_all_loops))
3424 if (!OPTION_SET_P (unroll_only_small_loops))
3425 unroll_only_small_loops = 0;
3426 if (!OPTION_SET_P (flag_rename_registers))
3427 flag_rename_registers = 1;
3428 if (!OPTION_SET_P (flag_cunroll_grow_size))
3429 flag_cunroll_grow_size = 1;
3431 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3432 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3434 /* One of the late-combine passes runs after register allocation
3435 and can match define_insn_and_splits that were previously used
3436 only before register allocation. Some of those define_insn_and_splits
3437 use gen_reg_rtx unconditionally. Disable late-combine by default
3438 until the define_insn_and_splits are fixed. */
3439 if (!OPTION_SET_P (flag_late_combine_instructions))
3440 flag_late_combine_instructions = 0;
3443 #ifdef TARGET_USES_LINUX64_OPT
3444 static void
3445 rs6000_linux64_override_options ()
3447 if (!OPTION_SET_P (rs6000_alignment_flags))
3448 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3449 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3451 if (DEFAULT_ABI != ABI_AIX)
3453 rs6000_current_abi = ABI_AIX;
3454 error (INVALID_64BIT, "call");
3456 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3457 if (ELFv2_ABI_CHECK)
3459 rs6000_current_abi = ABI_ELFv2;
3460 if (dot_symbols)
3461 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3463 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3465 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3466 error (INVALID_64BIT, "relocatable");
3468 if (rs6000_isa_flags & OPTION_MASK_EABI)
3470 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3471 error (INVALID_64BIT, "eabi");
3473 if (TARGET_PROTOTYPE)
3475 target_prototype = 0;
3476 error (INVALID_64BIT, "prototype");
3478 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3480 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3481 error ("%<-m64%> requires a PowerPC64 cpu");
3483 if (!OPTION_SET_P (rs6000_current_cmodel))
3484 SET_CMODEL (CMODEL_MEDIUM);
3485 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3487 if (OPTION_SET_P (rs6000_current_cmodel)
3488 && rs6000_current_cmodel != CMODEL_SMALL)
3489 error ("%<-mcmodel%> incompatible with other toc options");
3490 if (TARGET_MINIMAL_TOC)
3491 SET_CMODEL (CMODEL_SMALL);
3492 else if (TARGET_PCREL
3493 || (PCREL_SUPPORTED_BY_OS
3494 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3495 /* Ignore -mno-minimal-toc. */
3497 else
3498 SET_CMODEL (CMODEL_SMALL);
3500 if (rs6000_current_cmodel != CMODEL_SMALL)
3502 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3503 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3504 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3505 TARGET_NO_SUM_IN_TOC = 0;
3507 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3509 if (OPTION_SET_P (rs6000_pltseq))
3510 warning (0, "%qs unsupported for this ABI",
3511 "-mpltseq");
3512 rs6000_pltseq = false;
3515 else if (TARGET_64BIT)
3516 error (INVALID_32BIT, "32");
3517 else
3519 if (TARGET_PROFILE_KERNEL)
3521 profile_kernel = 0;
3522 error (INVALID_32BIT, "profile-kernel");
3524 if (OPTION_SET_P (rs6000_current_cmodel))
3526 SET_CMODEL (CMODEL_SMALL);
3527 error (INVALID_32BIT, "cmodel");
3531 #endif
3533 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3534 This support is only in little endian GLIBC 2.32 or newer. */
3535 static bool
3536 glibc_supports_ieee_128bit (void)
3538 #ifdef OPTION_GLIBC
3539 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3540 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3541 return true;
3542 #endif /* OPTION_GLIBC. */
3544 return false;
3547 /* Override command line options.
3549 Combine build-specific configuration information with options
3550 specified on the command line to set various state variables which
3551 influence code generation, optimization, and expansion of built-in
3552 functions. Assure that command-line configuration preferences are
3553 compatible with each other and with the build configuration; issue
3554 warnings while adjusting configuration or error messages while
3555 rejecting configuration.
3557 Upon entry to this function:
3559 This function is called once at the beginning of
3560 compilation, and then again at the start and end of compiling
3561 each section of code that has a different configuration, as
3562 indicated, for example, by adding the
3564 __attribute__((__target__("cpu=power9")))
3566 qualifier to a function definition or, for example, by bracketing
3567 code between
3569 #pragma GCC target("altivec")
3573 #pragma GCC reset_options
3575 directives. Parameter global_init_p is true for the initial
3576 invocation, which initializes global variables, and false for all
3577 subsequent invocations.
3580 Various global state information is assumed to be valid. This
3581 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3582 default CPU specified at build configure time, TARGET_DEFAULT,
3583 representing the default set of option flags for the default
3584 target, and OPTION_SET_P (rs6000_isa_flags), representing
3585 which options were requested on the command line.
3587 Upon return from this function:
3589 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3590 was set by name on the command line. Additionally, if certain
3591 attributes are automatically enabled or disabled by this function
3592 in order to assure compatibility between options and
3593 configuration, the flags associated with those attributes are
3594 also set. By setting these "explicit bits", we avoid the risk
3595 that other code might accidentally overwrite these particular
3596 attributes with "default values".
3598 The various bits of rs6000_isa_flags are set to indicate the
3599 target options that have been selected for the most current
3600 compilation efforts. This has the effect of also turning on the
3601 associated TARGET_XXX values since these are macros which are
3602 generally defined to test the corresponding bit of the
3603 rs6000_isa_flags variable.
3605 Various other global variables and fields of global structures
3606 (over 50 in all) are initialized to reflect the desired options
3607 for the most current compilation efforts. */
3609 static bool
3610 rs6000_option_override_internal (bool global_init_p)
3612 bool ret = true;
3614 HOST_WIDE_INT set_masks;
3615 HOST_WIDE_INT ignore_masks;
3616 int cpu_index = -1;
3617 int tune_index;
3618 struct cl_target_option *main_target_opt
3619 = ((global_init_p || target_option_default_node == NULL)
3620 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3622 /* Print defaults. */
3623 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3624 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3626 /* Remember the explicit arguments. */
3627 if (global_init_p)
3628 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3630 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3631 library functions, so warn about it. The flag may be useful for
3632 performance studies from time to time though, so don't disable it
3633 entirely. */
3634 if (OPTION_SET_P (rs6000_alignment_flags)
3635 && rs6000_alignment_flags == MASK_ALIGN_POWER
3636 && DEFAULT_ABI == ABI_DARWIN
3637 && TARGET_64BIT)
3638 warning (0, "%qs is not supported for 64-bit Darwin;"
3639 " it is incompatible with the installed C and C++ libraries",
3640 "-malign-power");
3642 /* Numerous experiment shows that IRA based loop pressure
3643 calculation works better for RTL loop invariant motion on targets
3644 with enough (>= 32) registers. It is an expensive optimization.
3645 So it is on only for peak performance. */
3646 if (optimize >= 3 && global_init_p
3647 && !OPTION_SET_P (flag_ira_loop_pressure))
3648 flag_ira_loop_pressure = 1;
3650 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3651 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3652 options were already specified. */
3653 if (flag_sanitize & SANITIZE_USER_ADDRESS
3654 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3655 flag_asynchronous_unwind_tables = 1;
3657 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3658 loop unroller is active. It is only checked during unrolling, so
3659 we can just set it on by default. */
3660 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3661 flag_variable_expansion_in_unroller = 1;
3663 /* Set the pointer size. */
3664 if (TARGET_64BIT)
3666 rs6000_pmode = DImode;
3667 rs6000_pointer_size = 64;
3669 else
3671 rs6000_pmode = SImode;
3672 rs6000_pointer_size = 32;
3675 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3676 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3677 must explicitly specify it and we won't interfere with the user's
3678 specification. */
3680 set_masks = POWERPC_MASKS;
3681 #ifdef OS_MISSING_ALTIVEC
3682 if (OS_MISSING_ALTIVEC)
3683 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3684 | OTHER_VSX_VECTOR_MASKS);
3685 #endif
3687 /* Don't override by the processor default if given explicitly. */
3688 set_masks &= ~rs6000_isa_flags_explicit;
3690 /* Without option powerpc64 specified explicitly, we need to ensure
3691 powerpc64 always enabled for 64 bit here, otherwise some following
3692 checks can use unexpected TARGET_POWERPC64 value. */
3693 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3694 && TARGET_64BIT)
3696 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3697 /* Need to stop powerpc64 from being unset in later processing,
3698 so clear it in set_masks. But as PR108240 shows, to keep it
3699 consistent with before, we want to make this only if 64 bit
3700 is enabled explicitly. This is a hack, revisit this later. */
3701 if (rs6000_isa_flags_explicit & OPTION_MASK_64BIT)
3702 set_masks &= ~OPTION_MASK_POWERPC64;
3705 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3706 the cpu in a target attribute or pragma, but did not specify a tuning
3707 option, use the cpu for the tuning option rather than the option specified
3708 with -mtune on the command line. Process a '--with-cpu' configuration
3709 request as an implicit --cpu. */
3710 if (rs6000_cpu_index >= 0)
3711 cpu_index = rs6000_cpu_index;
3712 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3713 cpu_index = main_target_opt->x_rs6000_cpu_index;
3714 else if (OPTION_TARGET_CPU_DEFAULT)
3715 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3717 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3718 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3719 with those from the cpu, except for options that were explicitly set. If
3720 we don't have a cpu, do not override the target bits set in
3721 TARGET_DEFAULT. */
3722 if (cpu_index >= 0)
3724 rs6000_cpu_index = cpu_index;
3725 rs6000_isa_flags &= ~set_masks;
3726 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3727 & set_masks);
3729 else
3731 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3732 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3733 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3734 to using rs6000_isa_flags, we need to do the initialization here.
3736 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3737 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3738 HOST_WIDE_INT flags;
3739 if (TARGET_DEFAULT)
3740 flags = TARGET_DEFAULT;
3741 else
3743 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3744 const char *default_cpu = (!TARGET_POWERPC64
3745 ? "powerpc"
3746 : (BYTES_BIG_ENDIAN
3747 ? "powerpc64"
3748 : "powerpc64le"));
3749 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3750 flags = processor_target_table[default_cpu_index].target_enable;
3752 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3755 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3756 since they do not save and restore the high half of the GPRs correctly
3757 in all cases. If the user explicitly specifies it, we won't interfere
3758 with the user's specification. */
3759 #ifdef OS_MISSING_POWERPC64
3760 if (OS_MISSING_POWERPC64
3761 && TARGET_32BIT
3762 && TARGET_POWERPC64
3763 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3764 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3765 #endif
3767 if (rs6000_tune_index >= 0)
3768 tune_index = rs6000_tune_index;
3769 else if (cpu_index >= 0)
3770 rs6000_tune_index = tune_index = cpu_index;
3771 else
3773 size_t i;
3774 enum processor_type tune_proc
3775 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3777 tune_index = -1;
3778 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3779 if (processor_target_table[i].processor == tune_proc)
3781 tune_index = i;
3782 break;
3786 if (cpu_index >= 0)
3787 rs6000_cpu = processor_target_table[cpu_index].processor;
3788 else
3789 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3791 gcc_assert (tune_index >= 0);
3792 rs6000_tune = processor_target_table[tune_index].processor;
3794 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3795 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3796 || rs6000_cpu == PROCESSOR_PPCE5500)
3798 if (TARGET_ALTIVEC)
3799 error ("AltiVec not supported in this target");
3802 /* If we are optimizing big endian systems for space, use the load/store
3803 multiple instructions. */
3804 if (BYTES_BIG_ENDIAN && optimize_size)
3805 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3807 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3808 because the hardware doesn't support the instructions used in little
3809 endian mode, and causes an alignment trap. The 750 does not cause an
3810 alignment trap (except when the target is unaligned). */
3812 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3814 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3815 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3816 warning (0, "%qs is not supported on little endian systems",
3817 "-mmultiple");
3820 /* If little-endian, default to -mstrict-align on older processors. */
3821 if (!BYTES_BIG_ENDIAN
3822 && !(processor_target_table[tune_index].target_enable
3823 & OPTION_MASK_POWER8))
3824 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3826 /* Add some warnings for VSX. */
3827 if (TARGET_VSX)
3829 bool explicit_vsx_p = rs6000_isa_flags_explicit & OPTION_MASK_VSX;
3830 if (!TARGET_HARD_FLOAT)
3832 if (explicit_vsx_p)
3834 if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)
3835 error ("%<-mvsx%> and %<-msoft-float%> are incompatible");
3836 else
3837 warning (0, N_("%<-mvsx%> requires hardware floating-point"));
3839 rs6000_isa_flags &= ~OPTION_MASK_VSX;
3840 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3842 else if (TARGET_AVOID_XFORM > 0)
3844 if (explicit_vsx_p && OPTION_SET_P (TARGET_AVOID_XFORM))
3845 error ("%<-mvsx%> and %<-mavoid-indexed-addresses%>"
3846 " are incompatible");
3847 else
3848 warning (0, N_("%<-mvsx%> needs indexed addressing"));
3849 rs6000_isa_flags &= ~OPTION_MASK_VSX;
3850 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3852 else if (!TARGET_ALTIVEC
3853 && (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3855 if (explicit_vsx_p)
3856 error ("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3857 else
3858 warning (0, N_("%<-mno-altivec%> disables vsx"));
3859 rs6000_isa_flags &= ~OPTION_MASK_VSX;
3860 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3864 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3865 the -mcpu setting to enable options that conflict. */
3866 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3867 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3868 | OPTION_MASK_ALTIVEC
3869 | OPTION_MASK_VSX)) != 0)
3870 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO)
3871 & ~rs6000_isa_flags_explicit);
3873 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3874 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3876 #ifdef XCOFF_DEBUGGING_INFO
3877 /* For AIX default to 64-bit DWARF. */
3878 if (!OPTION_SET_P (dwarf_offset_size))
3879 dwarf_offset_size = POINTER_SIZE_UNITS;
3880 #endif
3882 /* Handle explicit -mno-{altivec,vsx} and turn off all of
3883 the options that depend on those flags. */
3884 ignore_masks = rs6000_disable_incompatible_switches ();
3886 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3887 unless the user explicitly used the -mno-<option> to disable the code. */
3888 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3889 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3890 else if (TARGET_P9_MINMAX)
3892 if (cpu_index >= 0)
3894 if (cpu_index == PROCESSOR_POWER9)
3896 /* legacy behavior: allow -mcpu=power9 with certain
3897 capabilities explicitly disabled. */
3898 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3900 else
3901 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3902 "for <xxx> less than power9", "-mcpu");
3904 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3905 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3906 & rs6000_isa_flags_explicit))
3907 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3908 were explicitly cleared. */
3909 error ("%qs incompatible with explicitly disabled options",
3910 "-mpower9-minmax");
3911 else
3912 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3914 else if (TARGET_P8_VECTOR || TARGET_POWER8 || TARGET_CRYPTO)
3915 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3916 else if (TARGET_VSX)
3917 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3918 else if (TARGET_POPCNTD)
3919 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3920 else if (TARGET_DFP)
3921 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3922 else if (TARGET_CMPB)
3923 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3924 else if (TARGET_FPRND)
3925 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3926 else if (TARGET_POPCNTB)
3927 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3928 else if (TARGET_ALTIVEC)
3929 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3931 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3932 target attribute or pragma which automatically enables both options,
3933 unless the altivec ABI was set. This is set by default for 64-bit, but
3934 not for 32-bit. Don't move this before the above code using ignore_masks,
3935 since it can reset the cleared VSX/ALTIVEC flag again. */
3936 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3938 rs6000_isa_flags &= ~(OPTION_MASK_VSX & ~rs6000_isa_flags_explicit);
3939 /* Don't mask off ALTIVEC if it is enabled by an explicit VSX. */
3940 if (!TARGET_VSX)
3941 rs6000_isa_flags &= ~(OPTION_MASK_ALTIVEC & ~rs6000_isa_flags_explicit);
3944 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3946 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3947 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3948 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3951 if (!TARGET_FPRND && TARGET_VSX)
3953 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3954 /* TARGET_VSX = 1 implies Power 7 and newer */
3955 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3956 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3959 /* Assert !TARGET_VSX if !TARGET_ALTIVEC and make some adjustments
3960 based on either !TARGET_VSX or !TARGET_ALTIVEC concise. */
3961 gcc_assert (TARGET_ALTIVEC || !TARGET_VSX);
3963 if (TARGET_P8_VECTOR && !TARGET_VSX)
3964 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3966 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3968 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3969 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3970 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3973 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3974 silently turn off quad memory mode. */
3975 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3977 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3978 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3980 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3981 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3983 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3984 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3987 /* Non-atomic quad memory load/store are disabled for little endian, since
3988 the words are reversed, but atomic operations can still be done by
3989 swapping the words. */
3990 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3992 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3993 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3994 "mode"));
3996 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3999 /* Assume if the user asked for normal quad memory instructions, they want
4000 the atomic versions as well, unless they explicity told us not to use quad
4001 word atomic instructions. */
4002 if (TARGET_QUAD_MEMORY
4003 && !TARGET_QUAD_MEMORY_ATOMIC
4004 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4005 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4007 /* If we can shrink-wrap the TOC register save separately, then use
4008 -msave-toc-indirect unless explicitly disabled. */
4009 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4010 && flag_shrink_wrap_separate
4011 && optimize_function_for_speed_p (cfun))
4012 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4014 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4015 generating power8 instructions. Power9 does not optimize power8 fusion
4016 cases. */
4017 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4019 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4020 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4021 else
4022 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4025 /* Setting additional fusion flags turns on base fusion. */
4026 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4028 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4030 if (TARGET_P8_FUSION_SIGN)
4031 error ("%qs requires %qs", "-mpower8-fusion-sign",
4032 "-mpower8-fusion");
4034 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4036 else
4037 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4040 /* Power8 does not fuse sign extended loads with the addis. If we are
4041 optimizing at high levels for speed, convert a sign extended load into a
4042 zero extending load, and an explicit sign extension. */
4043 if (TARGET_P8_FUSION
4044 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4045 && optimize_function_for_speed_p (cfun)
4046 && optimize >= 3)
4047 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4049 /* ISA 3.0 vector instructions include ISA 2.07. */
4050 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4051 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4053 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4054 support. If we only have ISA 2.06 support, and the user did not specify
4055 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4056 but we don't enable the full vectorization support */
4057 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4058 TARGET_ALLOW_MOVMISALIGN = 1;
4060 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4062 if (TARGET_ALLOW_MOVMISALIGN > 0
4063 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4064 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4066 TARGET_ALLOW_MOVMISALIGN = 0;
4069 /* Determine when unaligned vector accesses are permitted, and when
4070 they are preferred over masked Altivec loads. Note that if
4071 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4072 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4073 not true. */
4074 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4076 if (!TARGET_VSX)
4078 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4079 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4081 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4084 else if (!TARGET_ALLOW_MOVMISALIGN)
4086 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4087 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4088 "-mallow-movmisalign");
4090 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4094 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4096 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4097 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4098 else
4099 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4102 /* Use long double size to select the appropriate long double. We use
4103 TYPE_PRECISION to differentiate the 3 different long double types. We map
4104 128 into the precision used for TFmode. */
4105 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4106 ? 64
4107 : 128);
4109 /* Set long double size before the IEEE 128-bit tests. */
4110 if (!OPTION_SET_P (rs6000_long_double_type_size))
4112 if (main_target_opt != NULL
4113 && (main_target_opt->x_rs6000_long_double_type_size
4114 != default_long_double_size))
4115 error ("target attribute or pragma changes %<long double%> size");
4116 else
4117 rs6000_long_double_type_size = default_long_double_size;
4120 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4121 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4122 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4123 those systems will not pick up this default. Warn if the user changes the
4124 default unless -Wno-psabi. */
4125 if (!OPTION_SET_P (rs6000_ieeequad))
4126 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4128 else if (TARGET_LONG_DOUBLE_128)
4130 if (global_options.x_rs6000_ieeequad
4131 && (!TARGET_POPCNTD || !TARGET_VSX))
4132 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4134 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4136 /* Determine if the user can change the default long double type at
4137 compilation time. You need GLIBC 2.32 or newer to be able to
4138 change the long double type. Only issue one warning. */
4139 static bool warned_change_long_double;
4141 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4143 warned_change_long_double = true;
4144 if (TARGET_IEEEQUAD)
4145 warning (OPT_Wpsabi, "Using IEEE extended precision "
4146 "%<long double%>");
4147 else
4148 warning (OPT_Wpsabi, "Using IBM extended precision "
4149 "%<long double%>");
4154 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4155 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4156 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4157 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4158 the keyword as well as the type. */
4159 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4161 /* IEEE 128-bit floating point requires VSX support. */
4162 if (TARGET_FLOAT128_KEYWORD)
4164 if (!TARGET_VSX)
4166 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4167 error ("%qs requires VSX support", "-mfloat128");
4169 TARGET_FLOAT128_TYPE = 0;
4170 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4171 | OPTION_MASK_FLOAT128_HW);
4173 else if (!TARGET_FLOAT128_TYPE)
4175 TARGET_FLOAT128_TYPE = 1;
4176 warning (0, "The %<-mfloat128%> option may not be fully supported");
4180 /* Enable the __float128 keyword under Linux by default. */
4181 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4182 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4183 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4185 /* If we have are supporting the float128 type and full ISA 3.0 support,
4186 enable -mfloat128-hardware by default. However, don't enable the
4187 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4188 because sometimes the compiler wants to put things in an integer
4189 container, and if we don't have __int128 support, it is impossible. */
4190 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4191 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4192 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4193 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4195 if (TARGET_FLOAT128_HW
4196 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4198 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4199 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4201 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4204 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4206 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4207 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4209 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4212 /* Enable -mprefixed by default on power10 systems. */
4213 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4214 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4216 /* -mprefixed requires -mcpu=power10 (or later). */
4217 else if (TARGET_PREFIXED && !TARGET_POWER10)
4219 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4220 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4222 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4225 /* -mpcrel requires prefixed load/store addressing. */
4226 if (TARGET_PCREL && !TARGET_PREFIXED)
4228 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4229 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4231 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4234 /* Print the options after updating the defaults. */
4235 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4236 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4238 /* E500mc does "better" if we inline more aggressively. Respect the
4239 user's opinion, though. */
4240 if (rs6000_block_move_inline_limit == 0
4241 && (rs6000_tune == PROCESSOR_PPCE500MC
4242 || rs6000_tune == PROCESSOR_PPCE500MC64
4243 || rs6000_tune == PROCESSOR_PPCE5500
4244 || rs6000_tune == PROCESSOR_PPCE6500))
4245 rs6000_block_move_inline_limit = 128;
4247 /* store_one_arg depends on expand_block_move to handle at least the
4248 size of reg_parm_stack_space. */
4249 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4250 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4252 if (global_init_p)
4254 /* If the appropriate debug option is enabled, replace the target hooks
4255 with debug versions that call the real version and then prints
4256 debugging information. */
4257 if (TARGET_DEBUG_COST)
4259 targetm.rtx_costs = rs6000_debug_rtx_costs;
4260 targetm.address_cost = rs6000_debug_address_cost;
4261 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4264 if (TARGET_DEBUG_ADDR)
4266 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4267 targetm.legitimize_address = rs6000_debug_legitimize_address;
4268 rs6000_secondary_reload_class_ptr
4269 = rs6000_debug_secondary_reload_class;
4270 targetm.secondary_memory_needed
4271 = rs6000_debug_secondary_memory_needed;
4272 targetm.can_change_mode_class
4273 = rs6000_debug_can_change_mode_class;
4274 rs6000_preferred_reload_class_ptr
4275 = rs6000_debug_preferred_reload_class;
4276 rs6000_mode_dependent_address_ptr
4277 = rs6000_debug_mode_dependent_address;
4280 if (rs6000_veclibabi_name)
4282 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4283 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4284 else
4286 error ("unknown vectorization library ABI type in "
4287 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4288 ret = false;
4293 /* Enable Altivec ABI for AIX -maltivec. */
4294 if (TARGET_XCOFF
4295 && (TARGET_ALTIVEC || TARGET_VSX)
4296 && !OPTION_SET_P (rs6000_altivec_abi))
4298 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4299 error ("target attribute or pragma changes AltiVec ABI");
4300 else
4301 rs6000_altivec_abi = 1;
4304 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4305 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4306 be explicitly overridden in either case. */
4307 if (TARGET_ELF)
4309 if (!OPTION_SET_P (rs6000_altivec_abi)
4310 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4312 if (main_target_opt != NULL &&
4313 !main_target_opt->x_rs6000_altivec_abi)
4314 error ("target attribute or pragma changes AltiVec ABI");
4315 else
4316 rs6000_altivec_abi = 1;
4320 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4321 So far, the only darwin64 targets are also MACH-O. */
4322 if (TARGET_MACHO
4323 && DEFAULT_ABI == ABI_DARWIN
4324 && TARGET_64BIT)
4326 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4327 error ("target attribute or pragma changes darwin64 ABI");
4328 else
4330 rs6000_darwin64_abi = 1;
4331 /* Default to natural alignment, for better performance. */
4332 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4336 /* Place FP constants in the constant pool instead of TOC
4337 if section anchors enabled. */
4338 if (flag_section_anchors
4339 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4340 TARGET_NO_FP_IN_TOC = 1;
4342 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4343 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4345 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4346 SUBTARGET_OVERRIDE_OPTIONS;
4347 #endif
4348 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4349 SUBSUBTARGET_OVERRIDE_OPTIONS;
4350 #endif
4351 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4352 SUB3TARGET_OVERRIDE_OPTIONS;
4353 #endif
4355 /* If the ABI has support for PC-relative relocations, enable it by default.
4356 This test depends on the sub-target tests above setting the code model to
4357 medium for ELF v2 systems. */
4358 if (PCREL_SUPPORTED_BY_OS
4359 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4360 rs6000_isa_flags |= OPTION_MASK_PCREL;
4362 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4363 after the subtarget override options are done. */
4364 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4366 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4367 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4369 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4372 /* Enable -mmma by default on power10 systems. */
4373 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4374 rs6000_isa_flags |= OPTION_MASK_MMA;
4376 /* Turn off vector pair/mma options on non-power10 systems. */
4377 else if (!TARGET_POWER10 && TARGET_MMA)
4379 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4380 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4382 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4385 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4386 generating power10 instructions. */
4387 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4389 if (rs6000_tune == PROCESSOR_POWER10
4390 || rs6000_tune == PROCESSOR_POWER11)
4391 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4392 else
4393 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4396 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4397 such as "*movoo" uses vector pair access which use VSX registers.
4398 So make MMA require VSX support here. */
4399 if (TARGET_MMA && !TARGET_VSX)
4401 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4402 error ("%qs requires %qs", "-mmma", "-mvsx");
4403 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4406 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4407 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4409 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4410 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4412 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4413 && rs6000_tune != PROCESSOR_POWER5
4414 && rs6000_tune != PROCESSOR_POWER6
4415 && rs6000_tune != PROCESSOR_POWER7
4416 && rs6000_tune != PROCESSOR_POWER8
4417 && rs6000_tune != PROCESSOR_POWER9
4418 && rs6000_tune != PROCESSOR_POWER10
4419 && rs6000_tune != PROCESSOR_POWER11
4420 && rs6000_tune != PROCESSOR_PPCA2
4421 && rs6000_tune != PROCESSOR_CELL
4422 && rs6000_tune != PROCESSOR_PPC476);
4423 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4424 || rs6000_tune == PROCESSOR_POWER5
4425 || rs6000_tune == PROCESSOR_POWER7
4426 || rs6000_tune == PROCESSOR_POWER8);
4427 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4428 || rs6000_tune == PROCESSOR_POWER5
4429 || rs6000_tune == PROCESSOR_POWER6
4430 || rs6000_tune == PROCESSOR_POWER7
4431 || rs6000_tune == PROCESSOR_POWER8
4432 || rs6000_tune == PROCESSOR_POWER9
4433 || rs6000_tune == PROCESSOR_POWER10
4434 || rs6000_tune == PROCESSOR_POWER11
4435 || rs6000_tune == PROCESSOR_PPCE500MC
4436 || rs6000_tune == PROCESSOR_PPCE500MC64
4437 || rs6000_tune == PROCESSOR_PPCE5500
4438 || rs6000_tune == PROCESSOR_PPCE6500);
4440 /* Allow debug switches to override the above settings. These are set to -1
4441 in rs6000.opt to indicate the user hasn't directly set the switch. */
4442 if (TARGET_ALWAYS_HINT >= 0)
4443 rs6000_always_hint = TARGET_ALWAYS_HINT;
4445 if (TARGET_SCHED_GROUPS >= 0)
4446 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4448 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4449 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4451 rs6000_sched_restricted_insns_priority
4452 = (rs6000_sched_groups ? 1 : 0);
4454 /* Handle -msched-costly-dep option. */
4455 rs6000_sched_costly_dep
4456 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4458 if (rs6000_sched_costly_dep_str)
4460 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4461 rs6000_sched_costly_dep = no_dep_costly;
4462 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4463 rs6000_sched_costly_dep = all_deps_costly;
4464 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4465 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4466 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4467 rs6000_sched_costly_dep = store_to_load_dep_costly;
4468 else
4469 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4470 atoi (rs6000_sched_costly_dep_str));
4473 /* Handle -minsert-sched-nops option. */
4474 rs6000_sched_insert_nops
4475 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4477 if (rs6000_sched_insert_nops_str)
4479 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4480 rs6000_sched_insert_nops = sched_finish_none;
4481 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4482 rs6000_sched_insert_nops = sched_finish_pad_groups;
4483 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4484 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4485 else
4486 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4487 atoi (rs6000_sched_insert_nops_str));
4490 /* Handle stack protector */
4491 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4492 #ifdef TARGET_THREAD_SSP_OFFSET
4493 rs6000_stack_protector_guard = SSP_TLS;
4494 #else
4495 rs6000_stack_protector_guard = SSP_GLOBAL;
4496 #endif
4498 #ifdef TARGET_THREAD_SSP_OFFSET
4499 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4500 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4501 #endif
4503 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4505 char *endp;
4506 const char *str = rs6000_stack_protector_guard_offset_str;
4508 errno = 0;
4509 long offset = strtol (str, &endp, 0);
4510 if (!*str || *endp || errno)
4511 error ("%qs is not a valid number in %qs", str,
4512 "-mstack-protector-guard-offset=");
4514 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4515 || (TARGET_64BIT && (offset & 3)))
4516 error ("%qs is not a valid offset in %qs", str,
4517 "-mstack-protector-guard-offset=");
4519 rs6000_stack_protector_guard_offset = offset;
4522 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4524 const char *str = rs6000_stack_protector_guard_reg_str;
4525 int reg = decode_reg_name (str);
4527 if (!IN_RANGE (reg, 1, 31))
4528 error ("%qs is not a valid base register in %qs", str,
4529 "-mstack-protector-guard-reg=");
4531 rs6000_stack_protector_guard_reg = reg;
4534 if (rs6000_stack_protector_guard == SSP_TLS
4535 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4536 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4538 if (global_init_p)
4540 #ifdef TARGET_REGNAMES
4541 /* If the user desires alternate register names, copy in the
4542 alternate names now. */
4543 if (TARGET_REGNAMES)
4544 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4545 #endif
4547 /* Set aix_struct_return last, after the ABI is determined.
4548 If -maix-struct-return or -msvr4-struct-return was explicitly
4549 used, don't override with the ABI default. */
4550 if (!OPTION_SET_P (aix_struct_return))
4551 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4553 #if 0
4554 /* IBM XL compiler defaults to unsigned bitfields. */
4555 if (TARGET_XL_COMPAT)
4556 flag_signed_bitfields = 0;
4557 #endif
4559 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4560 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4562 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4564 /* We can only guarantee the availability of DI pseudo-ops when
4565 assembling for 64-bit targets. */
4566 if (!TARGET_64BIT)
4568 targetm.asm_out.aligned_op.di = NULL;
4569 targetm.asm_out.unaligned_op.di = NULL;
4573 /* Set branch target alignment, if not optimizing for size. */
4574 if (!optimize_size)
4576 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4577 aligned 8byte to avoid misprediction by the branch predictor. */
4578 if (rs6000_tune == PROCESSOR_TITAN
4579 || rs6000_tune == PROCESSOR_CELL)
4581 if (flag_align_functions && !str_align_functions)
4582 str_align_functions = "8";
4583 if (flag_align_jumps && !str_align_jumps)
4584 str_align_jumps = "8";
4585 if (flag_align_loops && !str_align_loops)
4586 str_align_loops = "8";
4588 if (rs6000_align_branch_targets)
4590 if (flag_align_functions && !str_align_functions)
4591 str_align_functions = "16";
4592 if (flag_align_jumps && !str_align_jumps)
4593 str_align_jumps = "16";
4594 if (flag_align_loops && !str_align_loops)
4596 can_override_loop_align = 1;
4597 str_align_loops = "16";
4602 /* Arrange to save and restore machine status around nested functions. */
4603 init_machine_status = rs6000_init_machine_status;
4605 /* We should always be splitting complex arguments, but we can't break
4606 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4607 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4608 targetm.calls.split_complex_arg = NULL;
4610 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4611 if (DEFAULT_ABI == ABI_AIX)
4612 targetm.calls.custom_function_descriptors = 0;
4615 /* Initialize rs6000_cost with the appropriate target costs. */
4616 if (optimize_size)
4617 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4618 else
4619 switch (rs6000_tune)
4621 case PROCESSOR_RS64A:
4622 rs6000_cost = &rs64a_cost;
4623 break;
4625 case PROCESSOR_MPCCORE:
4626 rs6000_cost = &mpccore_cost;
4627 break;
4629 case PROCESSOR_PPC403:
4630 rs6000_cost = &ppc403_cost;
4631 break;
4633 case PROCESSOR_PPC405:
4634 rs6000_cost = &ppc405_cost;
4635 break;
4637 case PROCESSOR_PPC440:
4638 rs6000_cost = &ppc440_cost;
4639 break;
4641 case PROCESSOR_PPC476:
4642 rs6000_cost = &ppc476_cost;
4643 break;
4645 case PROCESSOR_PPC601:
4646 rs6000_cost = &ppc601_cost;
4647 break;
4649 case PROCESSOR_PPC603:
4650 rs6000_cost = &ppc603_cost;
4651 break;
4653 case PROCESSOR_PPC604:
4654 rs6000_cost = &ppc604_cost;
4655 break;
4657 case PROCESSOR_PPC604e:
4658 rs6000_cost = &ppc604e_cost;
4659 break;
4661 case PROCESSOR_PPC620:
4662 rs6000_cost = &ppc620_cost;
4663 break;
4665 case PROCESSOR_PPC630:
4666 rs6000_cost = &ppc630_cost;
4667 break;
4669 case PROCESSOR_CELL:
4670 rs6000_cost = &ppccell_cost;
4671 break;
4673 case PROCESSOR_PPC750:
4674 case PROCESSOR_PPC7400:
4675 rs6000_cost = &ppc750_cost;
4676 break;
4678 case PROCESSOR_PPC7450:
4679 rs6000_cost = &ppc7450_cost;
4680 break;
4682 case PROCESSOR_PPC8540:
4683 case PROCESSOR_PPC8548:
4684 rs6000_cost = &ppc8540_cost;
4685 break;
4687 case PROCESSOR_PPCE300C2:
4688 case PROCESSOR_PPCE300C3:
4689 rs6000_cost = &ppce300c2c3_cost;
4690 break;
4692 case PROCESSOR_PPCE500MC:
4693 rs6000_cost = &ppce500mc_cost;
4694 break;
4696 case PROCESSOR_PPCE500MC64:
4697 rs6000_cost = &ppce500mc64_cost;
4698 break;
4700 case PROCESSOR_PPCE5500:
4701 rs6000_cost = &ppce5500_cost;
4702 break;
4704 case PROCESSOR_PPCE6500:
4705 rs6000_cost = &ppce6500_cost;
4706 break;
4708 case PROCESSOR_TITAN:
4709 rs6000_cost = &titan_cost;
4710 break;
4712 case PROCESSOR_POWER4:
4713 case PROCESSOR_POWER5:
4714 rs6000_cost = &power4_cost;
4715 break;
4717 case PROCESSOR_POWER6:
4718 rs6000_cost = &power6_cost;
4719 break;
4721 case PROCESSOR_POWER7:
4722 rs6000_cost = &power7_cost;
4723 break;
4725 case PROCESSOR_POWER8:
4726 rs6000_cost = &power8_cost;
4727 break;
4729 case PROCESSOR_POWER9:
4730 rs6000_cost = &power9_cost;
4731 break;
4733 case PROCESSOR_POWER10:
4734 case PROCESSOR_POWER11:
4735 rs6000_cost = &power10_cost;
4736 break;
4738 case PROCESSOR_PPCA2:
4739 rs6000_cost = &ppca2_cost;
4740 break;
4742 default:
4743 gcc_unreachable ();
4746 if (global_init_p)
4748 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4749 param_simultaneous_prefetches,
4750 rs6000_cost->simultaneous_prefetches);
4751 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4752 param_l1_cache_size,
4753 rs6000_cost->l1_cache_size);
4754 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4755 param_l1_cache_line_size,
4756 rs6000_cost->cache_line_size);
4757 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4758 param_l2_cache_size,
4759 rs6000_cost->l2_cache_size);
4761 /* Increase loop peeling limits based on performance analysis. */
4762 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4763 param_max_peeled_insns, 400);
4764 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4765 param_max_completely_peeled_insns, 400);
4767 /* The lxvl/stxvl instructions don't perform well before Power10. */
4768 if (TARGET_POWER10)
4769 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4770 param_vect_partial_vector_usage, 1);
4771 else
4772 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4773 param_vect_partial_vector_usage, 0);
4775 /* Use the 'model' -fsched-pressure algorithm by default. */
4776 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4777 param_sched_pressure_algorithm,
4778 SCHED_PRESSURE_MODEL);
4780 /* If using typedef char *va_list, signal that
4781 __builtin_va_start (&ap, 0) can be optimized to
4782 ap = __builtin_next_arg (0). */
4783 if (DEFAULT_ABI != ABI_V4)
4784 targetm.expand_builtin_va_start = NULL;
4787 rs6000_override_options_after_change ();
4789 /* If not explicitly specified via option, decide whether to generate indexed
4790 load/store instructions. A value of -1 indicates that the
4791 initial value of this variable has not been overwritten. During
4792 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4793 if (TARGET_AVOID_XFORM == -1)
4794 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4795 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4796 need indexed accesses and the type used is the scalar type of the element
4797 being loaded or stored. */
4798 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4799 && !TARGET_ALTIVEC);
4801 /* Set the -mrecip options. */
4802 if (rs6000_recip_name)
4804 char *p = ASTRDUP (rs6000_recip_name);
4805 char *q;
4806 unsigned int mask, i;
4807 bool invert;
4809 while ((q = strtok (p, ",")) != NULL)
4811 p = NULL;
4812 if (*q == '!')
4814 invert = true;
4815 q++;
4817 else
4818 invert = false;
4820 if (!strcmp (q, "default"))
4821 mask = ((TARGET_RECIP_PRECISION)
4822 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4823 else
4825 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4826 if (!strcmp (q, recip_options[i].string))
4828 mask = recip_options[i].mask;
4829 break;
4832 if (i == ARRAY_SIZE (recip_options))
4834 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4835 invert = false;
4836 mask = 0;
4837 ret = false;
4841 if (invert)
4842 rs6000_recip_control &= ~mask;
4843 else
4844 rs6000_recip_control |= mask;
4848 /* We only support ROP protection on certain targets. */
4849 if (rs6000_rop_protect)
4851 /* Disallow CPU targets we don't support. */
4852 if (!TARGET_POWER8)
4853 error ("%<-mrop-protect%> requires %<-mcpu=power8%> or later");
4855 /* Disallow ABI targets we don't support. */
4856 if (DEFAULT_ABI != ABI_ELFv2)
4857 error ("%<-mrop-protect%> requires the ELFv2 ABI");
4860 /* Initialize all of the registers. */
4861 rs6000_init_hard_regno_mode_ok (global_init_p);
4863 /* Save the initial options in case the user does function specific options */
4864 if (global_init_p)
4865 target_option_default_node = target_option_current_node
4866 = build_target_option_node (&global_options, &global_options_set);
4868 /* If not explicitly specified via option, decide whether to generate the
4869 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4870 if (TARGET_LINK_STACK == -1)
4871 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4873 /* Deprecate use of -mno-speculate-indirect-jumps. */
4874 if (!rs6000_speculate_indirect_jumps)
4875 warning (0, "%qs is deprecated and not recommended in any circumstances",
4876 "-mno-speculate-indirect-jumps");
4878 return ret;
4881 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4882 define the target cpu type. */
4884 static void
4885 rs6000_option_override (void)
4887 (void) rs6000_option_override_internal (true);
4891 /* Implement LOOP_ALIGN. */
4892 align_flags
4893 rs6000_loop_align (rtx label)
4895 basic_block bb;
4896 int ninsns;
4898 /* Don't override loop alignment if -falign-loops was specified. */
4899 if (!can_override_loop_align)
4900 return align_loops;
4902 bb = BLOCK_FOR_INSN (label);
4903 ninsns = num_loop_insns(bb->loop_father);
4905 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4906 if (ninsns > 4 && ninsns <= 8
4907 && (rs6000_tune == PROCESSOR_POWER4
4908 || rs6000_tune == PROCESSOR_POWER5
4909 || rs6000_tune == PROCESSOR_POWER6
4910 || rs6000_tune == PROCESSOR_POWER7
4911 || rs6000_tune == PROCESSOR_POWER8))
4912 return align_flags (5);
4913 else
4914 return align_loops;
4917 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4918 after applying N number of iterations. This routine does not determine
4919 how may iterations are required to reach desired alignment. */
4921 static bool
4922 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4924 if (is_packed)
4925 return false;
4927 if (TARGET_32BIT)
4929 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4930 return true;
4932 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4933 return true;
4935 return false;
4937 else
4939 if (TARGET_MACHO)
4940 return false;
4942 /* Assuming that all other types are naturally aligned. CHECKME! */
4943 return true;
4947 /* Return true if the vector misalignment factor is supported by the
4948 target. */
4949 static bool
4950 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4951 const_tree type,
4952 int misalignment,
4953 bool is_packed)
4955 if (TARGET_VSX)
4957 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4958 return true;
4960 /* Return if movmisalign pattern is not supported for this mode. */
4961 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4962 return false;
4964 if (misalignment == -1)
4966 /* Misalignment factor is unknown at compile time but we know
4967 it's word aligned. */
4968 if (rs6000_vector_alignment_reachable (type, is_packed))
4970 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4972 if (element_size == 64 || element_size == 32)
4973 return true;
4976 return false;
4979 /* VSX supports word-aligned vector. */
4980 if (misalignment % 4 == 0)
4981 return true;
4983 return false;
4986 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4987 static int
4988 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4989 tree vectype, int misalign)
4991 unsigned elements;
4992 tree elem_type;
4994 switch (type_of_cost)
4996 case scalar_stmt:
4997 case scalar_store:
4998 case vector_stmt:
4999 case vector_store:
5000 case vec_to_scalar:
5001 case scalar_to_vec:
5002 case cond_branch_not_taken:
5003 return 1;
5004 case scalar_load:
5005 case vector_load:
5006 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5007 return 2;
5009 case vec_perm:
5010 /* Power7 has only one permute unit, make it a bit expensive. */
5011 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5012 return 3;
5013 else
5014 return 1;
5016 case vec_promote_demote:
5017 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5018 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5019 return 4;
5020 else
5021 return 1;
5023 case cond_branch_taken:
5024 return 3;
5026 case unaligned_load:
5027 case vector_gather_load:
5028 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5029 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5030 return 2;
5032 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5034 elements = TYPE_VECTOR_SUBPARTS (vectype);
5035 /* See PR102767, consider V1TI to keep consistency. */
5036 if (elements == 2 || elements == 1)
5037 /* Double word aligned. */
5038 return 4;
5040 if (elements == 4)
5042 switch (misalign)
5044 case 8:
5045 /* Double word aligned. */
5046 return 4;
5048 case -1:
5049 /* Unknown misalignment. */
5050 case 4:
5051 case 12:
5052 /* Word aligned. */
5053 return 33;
5055 default:
5056 gcc_unreachable ();
5061 if (TARGET_ALTIVEC)
5062 /* Misaligned loads are not supported. */
5063 gcc_unreachable ();
5065 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5066 return 4;
5068 case unaligned_store:
5069 case vector_scatter_store:
5070 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5071 return 1;
5073 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5075 elements = TYPE_VECTOR_SUBPARTS (vectype);
5076 /* See PR102767, consider V1TI to keep consistency. */
5077 if (elements == 2 || elements == 1)
5078 /* Double word aligned. */
5079 return 2;
5081 if (elements == 4)
5083 switch (misalign)
5085 case 8:
5086 /* Double word aligned. */
5087 return 2;
5089 case -1:
5090 /* Unknown misalignment. */
5091 case 4:
5092 case 12:
5093 /* Word aligned. */
5094 return 23;
5096 default:
5097 gcc_unreachable ();
5102 if (TARGET_ALTIVEC)
5103 /* Misaligned stores are not supported. */
5104 gcc_unreachable ();
5106 return 2;
5108 case vec_construct:
5109 /* This is a rough approximation assuming non-constant elements
5110 constructed into a vector via element insertion. FIXME:
5111 vec_construct is not granular enough for uniformly good
5112 decisions. If the initialization is a splat, this is
5113 cheaper than we estimate. Improve this someday. */
5114 elem_type = TREE_TYPE (vectype);
5115 /* 32-bit vectors loaded into registers are stored as double
5116 precision, so we need 2 permutes, 2 converts, and 1 merge
5117 to construct a vector of short floats from them. */
5118 if (SCALAR_FLOAT_TYPE_P (elem_type)
5119 && TYPE_PRECISION (elem_type) == 32)
5120 return 5;
5121 /* On POWER9, integer vector types are built up in GPRs and then
5122 use a direct move (2 cycles). For POWER8 this is even worse,
5123 as we need two direct moves and a merge, and the direct moves
5124 are five cycles. */
5125 else if (INTEGRAL_TYPE_P (elem_type))
5127 if (TARGET_P9_VECTOR)
5128 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5129 else
5130 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5132 else
5133 /* V2DFmode doesn't need a direct move. */
5134 return 2;
5136 default:
5137 gcc_unreachable ();
5141 /* Implement targetm.vectorize.preferred_simd_mode. */
5143 static machine_mode
5144 rs6000_preferred_simd_mode (scalar_mode mode)
5146 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5148 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5149 return vmode.require ();
5151 return word_mode;
5154 class rs6000_cost_data : public vector_costs
5156 public:
5157 using vector_costs::vector_costs;
5159 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5160 stmt_vec_info stmt_info, slp_tree, tree vectype,
5161 int misalign,
5162 vect_cost_model_location where) override;
5163 void finish_cost (const vector_costs *) override;
5165 protected:
5166 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5167 vect_cost_model_location, unsigned int);
5168 void density_test (loop_vec_info);
5169 void adjust_vect_cost_per_loop (loop_vec_info);
5170 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5172 /* Total number of vectorized stmts (loop only). */
5173 unsigned m_nstmts = 0;
5174 /* Total number of loads (loop only). */
5175 unsigned m_nloads = 0;
5176 /* Total number of stores (loop only). */
5177 unsigned m_nstores = 0;
5178 /* Reduction factor for suggesting unroll factor (loop only). */
5179 unsigned m_reduc_factor = 0;
5180 /* Possible extra penalized cost on vector construction (loop only). */
5181 unsigned m_extra_ctor_cost = 0;
5182 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5183 instruction is needed by the vectorization. */
5184 bool m_vect_nonmem = false;
5185 /* If this loop gets vectorized with emulated gather load. */
5186 bool m_gather_load = false;
5189 /* Test for likely overcommitment of vector hardware resources. If a
5190 loop iteration is relatively large, and too large a percentage of
5191 instructions in the loop are vectorized, the cost model may not
5192 adequately reflect delays from unavailable vector resources.
5193 Penalize the loop body cost for this case. */
5195 void
5196 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5198 /* This density test only cares about the cost of vector version of the
5199 loop, so immediately return if we are passed costing for the scalar
5200 version (namely computing single scalar iteration cost). */
5201 if (m_costing_for_scalar)
5202 return;
5204 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5205 basic_block *bbs = get_loop_body (loop);
5206 int nbbs = loop->num_nodes;
5207 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5209 for (int i = 0; i < nbbs; i++)
5211 basic_block bb = bbs[i];
5212 gimple_stmt_iterator gsi;
5214 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5216 gimple *stmt = gsi_stmt (gsi);
5217 if (is_gimple_debug (stmt))
5218 continue;
5220 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5222 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5223 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5224 not_vec_cost++;
5228 free (bbs);
5229 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5231 if (density_pct > rs6000_density_pct_threshold
5232 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5234 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5235 if (dump_enabled_p ())
5236 dump_printf_loc (MSG_NOTE, vect_location,
5237 "density %d%%, cost %d exceeds threshold, penalizing "
5238 "loop body cost by %u%%\n", density_pct,
5239 vec_cost + not_vec_cost, rs6000_density_penalty);
5242 /* Check whether we need to penalize the body cost to account
5243 for excess strided or elementwise loads. */
5244 if (m_extra_ctor_cost > 0)
5246 gcc_assert (m_nloads <= m_nstmts);
5247 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5249 /* It's likely to be bounded by latency and execution resources
5250 from many scalar loads which are strided or elementwise loads
5251 into a vector if both conditions below are found:
5252 1. there are many loads, it's easy to result in a long wait
5253 for load units;
5254 2. load has a big proportion of all vectorized statements,
5255 it's not easy to schedule other statements to spread among
5256 the loads.
5257 One typical case is the innermost loop of the hotspot of SPEC2017
5258 503.bwaves_r without loop interchange. */
5259 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5260 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5262 m_costs[vect_body] += m_extra_ctor_cost;
5263 if (dump_enabled_p ())
5264 dump_printf_loc (MSG_NOTE, vect_location,
5265 "Found %u loads and "
5266 "load pct. %u%% exceed "
5267 "the threshold, "
5268 "penalizing loop body "
5269 "cost by extra cost %u "
5270 "for ctor.\n",
5271 m_nloads, load_pct,
5272 m_extra_ctor_cost);
5277 /* Implement targetm.vectorize.create_costs. */
5279 static vector_costs *
5280 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5282 return new rs6000_cost_data (vinfo, costing_for_scalar);
5285 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5286 For some statement, we would like to further fine-grain tweak the cost on
5287 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5288 information on statement operation codes etc. One typical case here is
5289 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5290 for scalar cost, but it should be priced more whatever transformed to either
5291 compare + branch or compare + isel instructions. */
5293 static unsigned
5294 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5295 struct _stmt_vec_info *stmt_info)
5297 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5298 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5300 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5301 if (subcode == COND_EXPR)
5302 return 2;
5305 return 0;
5308 /* Helper function for add_stmt_cost. Check each statement cost
5309 entry, gather information and update the target_cost fields
5310 accordingly. */
5311 void
5312 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5313 stmt_vec_info stmt_info,
5314 vect_cost_model_location where,
5315 unsigned int orig_count)
5318 /* Check whether we're doing something other than just a copy loop.
5319 Not all such loops may be profitably vectorized; see
5320 rs6000_finish_cost. */
5321 if (kind == vec_to_scalar
5322 || kind == vec_perm
5323 || kind == vec_promote_demote
5324 || kind == vec_construct
5325 || kind == scalar_to_vec
5326 || (where == vect_body && kind == vector_stmt))
5327 m_vect_nonmem = true;
5329 /* Gather some information when we are costing the vectorized instruction
5330 for the statements located in a loop body. */
5331 if (!m_costing_for_scalar
5332 && is_a<loop_vec_info> (m_vinfo)
5333 && where == vect_body)
5335 m_nstmts += orig_count;
5337 if (kind == scalar_load
5338 || kind == vector_load
5339 || kind == unaligned_load
5340 || kind == vector_gather_load)
5342 m_nloads += orig_count;
5343 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5344 m_gather_load = true;
5346 else if (kind == scalar_store
5347 || kind == vector_store
5348 || kind == unaligned_store
5349 || kind == vector_scatter_store)
5350 m_nstores += orig_count;
5351 else if ((kind == scalar_stmt
5352 || kind == vector_stmt
5353 || kind == vec_to_scalar)
5354 && stmt_info
5355 && vect_is_reduction (stmt_info))
5357 /* Loop body contains normal int or fp operations and epilogue
5358 contains vector reduction. For simplicity, we assume int
5359 operation takes one cycle and fp operation takes one more. */
5360 tree lhs = gimple_get_lhs (stmt_info->stmt);
5361 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5362 unsigned int basic_cost = is_float ? 2 : 1;
5363 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5366 /* Power processors do not currently have instructions for strided
5367 and elementwise loads, and instead we must generate multiple
5368 scalar loads. This leads to undercounting of the cost. We
5369 account for this by scaling the construction cost by the number
5370 of elements involved, and saving this as extra cost that we may
5371 or may not need to apply. When finalizing the cost of the loop,
5372 the extra penalty is applied when the load density heuristics
5373 are satisfied. */
5374 if (kind == vec_construct && stmt_info
5375 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5376 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5377 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5379 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5380 unsigned int nunits = vect_nunits_for_cost (vectype);
5381 /* As PR103702 shows, it's possible that vectorizer wants to do
5382 costings for only one unit here, it's no need to do any
5383 penalization for it, so simply early return here. */
5384 if (nunits == 1)
5385 return;
5386 /* i386 port adopts nunits * stmt_cost as the penalized cost
5387 for this kind of penalization, we used to follow it but
5388 found it could result in an unreliable body cost especially
5389 for V16QI/V8HI modes. To make it better, we choose this
5390 new heuristic: for each scalar load, we use 2 as penalized
5391 cost for the case with 2 nunits and use 1 for the other
5392 cases. It's without much supporting theory, mainly
5393 concluded from the broad performance evaluations on Power8,
5394 Power9 and Power10. One possibly related point is that:
5395 vector construction for more units would use more insns,
5396 it has more chances to schedule them better (even run in
5397 parallelly when enough available units at that time), so
5398 it seems reasonable not to penalize that much for them. */
5399 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5400 unsigned int extra_cost = nunits * adjusted_cost;
5401 m_extra_ctor_cost += extra_cost;
5406 unsigned
5407 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5408 stmt_vec_info stmt_info, slp_tree,
5409 tree vectype, int misalign,
5410 vect_cost_model_location where)
5412 unsigned retval = 0;
5414 if (flag_vect_cost_model)
5416 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5417 misalign);
5418 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5419 /* Statements in an inner loop relative to the loop being
5420 vectorized are weighted more heavily. The value here is
5421 arbitrary and could potentially be improved with analysis. */
5422 unsigned int orig_count = count;
5423 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5424 m_costs[where] += retval;
5426 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5429 return retval;
5432 /* For some target specific vectorization cost which can't be handled per stmt,
5433 we check the requisite conditions and adjust the vectorization cost
5434 accordingly if satisfied. One typical example is to model shift cost for
5435 vector with length by counting number of required lengths under condition
5436 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5438 void
5439 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5441 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5443 rgroup_controls *rgc;
5444 unsigned int num_vectors_m1;
5445 unsigned int shift_cnt = 0;
5446 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5447 if (rgc->type)
5448 /* Each length needs one shift to fill into bits 0-7. */
5449 shift_cnt += num_vectors_m1 + 1;
5451 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5452 NULL_TREE, 0, vect_body);
5456 /* Determine suggested unroll factor by considering some below factors:
5458 - unroll option/pragma which can disable unrolling for this loop;
5459 - simple hardware resource model for non memory vector insns;
5460 - aggressive heuristics when iteration count is unknown:
5461 - reduction case to break cross iteration dependency;
5462 - emulated gather load;
5463 - estimated iteration count when iteration count is unknown;
5467 unsigned int
5468 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5470 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5472 /* Don't unroll if it's specified explicitly not to be unrolled. */
5473 if (loop->unroll == 1
5474 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5475 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5476 return 1;
5478 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5479 /* Don't unroll if no vector instructions excepting for memory access. */
5480 if (nstmts_nonldst == 0)
5481 return 1;
5483 /* Consider breaking cross iteration dependency for reduction. */
5484 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5486 /* Use this simple hardware resource model that how many non ld/st
5487 vector instructions can be issued per cycle. */
5488 unsigned int issue_width = rs6000_vect_unroll_issue;
5489 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5490 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5491 /* Make sure it is power of 2. */
5492 uf = 1 << ceil_log2 (uf);
5494 /* If the iteration count is known, the costing would be exact enough,
5495 don't worry it could be worse. */
5496 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5497 return uf;
5499 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5500 loop if either condition is satisfied:
5501 - reduction factor exceeds the threshold;
5502 - emulated gather load adopted. */
5503 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5504 || m_gather_load)
5505 return uf;
5507 /* Check if we can conclude it's good to unroll from the estimated
5508 iteration count. */
5509 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5510 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5511 unsigned int unrolled_vf = vf * uf;
5512 if (est_niter == -1 || est_niter < unrolled_vf)
5513 /* When the estimated iteration of this loop is unknown, it's possible
5514 that we are able to vectorize this loop with the original VF but fail
5515 to vectorize it with the unrolled VF any more if the actual iteration
5516 count is in between. */
5517 return 1;
5518 else
5520 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5521 unsigned int epil_niter = est_niter % vf;
5522 /* Even if we have partial vector support, it can be still inefficent
5523 to calculate the length when the iteration count is unknown, so
5524 only expect it's good to unroll when the epilogue iteration count
5525 is not bigger than VF (only one time length calculation). */
5526 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5527 && epil_niter_unr <= vf)
5528 return uf;
5529 /* Without partial vector support, conservatively unroll this when
5530 the epilogue iteration count is less than the original one
5531 (epilogue execution time wouldn't be longer than before). */
5532 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5533 && epil_niter_unr <= epil_niter)
5534 return uf;
5537 return 1;
5540 void
5541 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5543 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5545 adjust_vect_cost_per_loop (loop_vinfo);
5546 density_test (loop_vinfo);
5548 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5549 that require versioning for any reason. The vectorization is at
5550 best a wash inside the loop, and the versioning checks make
5551 profitability highly unlikely and potentially quite harmful. */
5552 if (!m_vect_nonmem
5553 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5554 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5555 m_costs[vect_body] += 10000;
5557 m_suggested_unroll_factor
5558 = determine_suggested_unroll_factor (loop_vinfo);
5561 vector_costs::finish_cost (scalar_costs);
5564 /* Implement targetm.loop_unroll_adjust. */
5566 static unsigned
5567 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5569 if (unroll_only_small_loops)
5571 /* TODO: These are hardcoded values right now. We probably should use
5572 a PARAM here. */
5573 if (loop->ninsns <= 6)
5574 return MIN (4, nunroll);
5575 if (loop->ninsns <= 10)
5576 return MIN (2, nunroll);
5578 return 0;
5581 return nunroll;
5584 /* Returns a function decl for a vectorized version of the builtin function
5585 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5586 if it is not available.
5588 Implement targetm.vectorize.builtin_vectorized_function. */
5590 static tree
5591 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5592 tree type_in)
5594 machine_mode in_mode, out_mode;
5595 int in_n, out_n;
5597 if (TARGET_DEBUG_BUILTIN)
5598 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5599 combined_fn_name (combined_fn (fn)),
5600 GET_MODE_NAME (TYPE_MODE (type_out)),
5601 GET_MODE_NAME (TYPE_MODE (type_in)));
5603 /* TODO: Should this be gcc_assert? */
5604 if (TREE_CODE (type_out) != VECTOR_TYPE
5605 || TREE_CODE (type_in) != VECTOR_TYPE)
5606 return NULL_TREE;
5608 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5609 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5610 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5611 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5613 switch (fn)
5615 CASE_CFN_COPYSIGN:
5616 if (VECTOR_UNIT_VSX_P (V2DFmode)
5617 && out_mode == DFmode && out_n == 2
5618 && in_mode == DFmode && in_n == 2)
5619 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5620 if (VECTOR_UNIT_VSX_P (V4SFmode)
5621 && out_mode == SFmode && out_n == 4
5622 && in_mode == SFmode && in_n == 4)
5623 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5624 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5625 && out_mode == SFmode && out_n == 4
5626 && in_mode == SFmode && in_n == 4)
5627 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5628 break;
5629 CASE_CFN_CEIL:
5630 if (VECTOR_UNIT_VSX_P (V2DFmode)
5631 && out_mode == DFmode && out_n == 2
5632 && in_mode == DFmode && in_n == 2)
5633 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5634 if (VECTOR_UNIT_VSX_P (V4SFmode)
5635 && out_mode == SFmode && out_n == 4
5636 && in_mode == SFmode && in_n == 4)
5637 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5638 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5639 && out_mode == SFmode && out_n == 4
5640 && in_mode == SFmode && in_n == 4)
5641 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5642 break;
5643 CASE_CFN_FLOOR:
5644 if (VECTOR_UNIT_VSX_P (V2DFmode)
5645 && out_mode == DFmode && out_n == 2
5646 && in_mode == DFmode && in_n == 2)
5647 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5648 if (VECTOR_UNIT_VSX_P (V4SFmode)
5649 && out_mode == SFmode && out_n == 4
5650 && in_mode == SFmode && in_n == 4)
5651 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5652 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5653 && out_mode == SFmode && out_n == 4
5654 && in_mode == SFmode && in_n == 4)
5655 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5656 break;
5657 CASE_CFN_FMA:
5658 if (VECTOR_UNIT_VSX_P (V2DFmode)
5659 && out_mode == DFmode && out_n == 2
5660 && in_mode == DFmode && in_n == 2)
5661 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5662 if (VECTOR_UNIT_VSX_P (V4SFmode)
5663 && out_mode == SFmode && out_n == 4
5664 && in_mode == SFmode && in_n == 4)
5665 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5666 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5667 && out_mode == SFmode && out_n == 4
5668 && in_mode == SFmode && in_n == 4)
5669 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5670 break;
5671 CASE_CFN_TRUNC:
5672 if (VECTOR_UNIT_VSX_P (V2DFmode)
5673 && out_mode == DFmode && out_n == 2
5674 && in_mode == DFmode && in_n == 2)
5675 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5676 if (VECTOR_UNIT_VSX_P (V4SFmode)
5677 && out_mode == SFmode && out_n == 4
5678 && in_mode == SFmode && in_n == 4)
5679 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5680 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5681 && out_mode == SFmode && out_n == 4
5682 && in_mode == SFmode && in_n == 4)
5683 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5684 break;
5685 CASE_CFN_NEARBYINT:
5686 if (VECTOR_UNIT_VSX_P (V2DFmode)
5687 && flag_unsafe_math_optimizations
5688 && out_mode == DFmode && out_n == 2
5689 && in_mode == DFmode && in_n == 2)
5690 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5691 if (VECTOR_UNIT_VSX_P (V4SFmode)
5692 && flag_unsafe_math_optimizations
5693 && out_mode == SFmode && out_n == 4
5694 && in_mode == SFmode && in_n == 4)
5695 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5696 break;
5697 CASE_CFN_RINT:
5698 if (VECTOR_UNIT_VSX_P (V2DFmode)
5699 && !flag_trapping_math
5700 && out_mode == DFmode && out_n == 2
5701 && in_mode == DFmode && in_n == 2)
5702 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5703 if (VECTOR_UNIT_VSX_P (V4SFmode)
5704 && !flag_trapping_math
5705 && out_mode == SFmode && out_n == 4
5706 && in_mode == SFmode && in_n == 4)
5707 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5708 break;
5709 default:
5710 break;
5713 /* Generate calls to libmass if appropriate. */
5714 if (rs6000_veclib_handler)
5715 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5717 return NULL_TREE;
5720 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5721 library with vectorized intrinsics. */
5723 static tree
5724 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5725 tree type_in)
5727 char name[32];
5728 const char *suffix = NULL;
5729 tree fntype, new_fndecl, bdecl = NULL_TREE;
5730 int n_args = 1;
5731 const char *bname;
5732 machine_mode el_mode, in_mode;
5733 int n, in_n;
5735 /* Libmass is suitable for unsafe math only as it does not correctly support
5736 parts of IEEE with the required precision such as denormals. Only support
5737 it if we have VSX to use the simd d2 or f4 functions.
5738 XXX: Add variable length support. */
5739 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5740 return NULL_TREE;
5742 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5743 n = TYPE_VECTOR_SUBPARTS (type_out);
5744 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5745 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5746 if (el_mode != in_mode
5747 || n != in_n)
5748 return NULL_TREE;
5750 switch (fn)
5752 CASE_CFN_ATAN2:
5753 CASE_CFN_HYPOT:
5754 CASE_CFN_POW:
5755 n_args = 2;
5756 gcc_fallthrough ();
5758 CASE_CFN_ACOS:
5759 CASE_CFN_ACOSH:
5760 CASE_CFN_ASIN:
5761 CASE_CFN_ASINH:
5762 CASE_CFN_ATAN:
5763 CASE_CFN_ATANH:
5764 CASE_CFN_CBRT:
5765 CASE_CFN_COS:
5766 CASE_CFN_COSH:
5767 CASE_CFN_ERF:
5768 CASE_CFN_ERFC:
5769 CASE_CFN_EXP2:
5770 CASE_CFN_EXP:
5771 CASE_CFN_EXPM1:
5772 CASE_CFN_LGAMMA:
5773 CASE_CFN_LOG10:
5774 CASE_CFN_LOG1P:
5775 CASE_CFN_LOG2:
5776 CASE_CFN_LOG:
5777 CASE_CFN_SIN:
5778 CASE_CFN_SINH:
5779 CASE_CFN_SQRT:
5780 CASE_CFN_TAN:
5781 CASE_CFN_TANH:
5782 if (el_mode == DFmode && n == 2)
5784 bdecl = mathfn_built_in (double_type_node, fn);
5785 suffix = "d2"; /* pow -> powd2 */
5787 else if (el_mode == SFmode && n == 4)
5789 bdecl = mathfn_built_in (float_type_node, fn);
5790 suffix = "4"; /* powf -> powf4 */
5792 else
5793 return NULL_TREE;
5794 if (!bdecl)
5795 return NULL_TREE;
5796 break;
5798 default:
5799 return NULL_TREE;
5802 gcc_assert (suffix != NULL);
5803 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5804 if (!bname)
5805 return NULL_TREE;
5807 strcpy (name, bname + strlen ("__builtin_"));
5808 strcat (name, suffix);
5810 if (n_args == 1)
5811 fntype = build_function_type_list (type_out, type_in, NULL);
5812 else if (n_args == 2)
5813 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5814 else
5815 gcc_unreachable ();
5817 /* Build a function declaration for the vectorized function. */
5818 new_fndecl = build_decl (BUILTINS_LOCATION,
5819 FUNCTION_DECL, get_identifier (name), fntype);
5820 TREE_PUBLIC (new_fndecl) = 1;
5821 DECL_EXTERNAL (new_fndecl) = 1;
5822 DECL_IS_NOVOPS (new_fndecl) = 1;
5823 TREE_READONLY (new_fndecl) = 1;
5825 return new_fndecl;
5829 /* Default CPU string for rs6000*_file_start functions. */
5830 static const char *rs6000_default_cpu;
5832 #ifdef USING_ELFOS_H
5833 const char *rs6000_machine;
5835 const char *
5836 rs6000_machine_from_flags (void)
5838 /* e300 and e500 */
5839 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5840 return "e300";
5841 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5842 return "e500";
5843 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5844 return "e500mc";
5845 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5846 return "e500mc64";
5847 if (rs6000_cpu == PROCESSOR_PPCE5500)
5848 return "e5500";
5849 if (rs6000_cpu == PROCESSOR_PPCE6500)
5850 return "e6500";
5852 /* 400 series */
5853 if (rs6000_cpu == PROCESSOR_PPC403)
5854 return "\"403\"";
5855 if (rs6000_cpu == PROCESSOR_PPC405)
5856 return "\"405\"";
5857 if (rs6000_cpu == PROCESSOR_PPC440)
5858 return "\"440\"";
5859 if (rs6000_cpu == PROCESSOR_PPC476)
5860 return "\"476\"";
5862 /* A2 */
5863 if (rs6000_cpu == PROCESSOR_PPCA2)
5864 return "a2";
5866 /* Cell BE */
5867 if (rs6000_cpu == PROCESSOR_CELL)
5868 return "cell";
5870 /* Titan */
5871 if (rs6000_cpu == PROCESSOR_TITAN)
5872 return "titan";
5874 /* 500 series and 800 series */
5875 if (rs6000_cpu == PROCESSOR_MPCCORE)
5876 return "\"821\"";
5878 #if 0
5879 /* This (and ppc64 below) are disabled here (for now at least) because
5880 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5881 are #define'd as some of these. Untangling that is a job for later. */
5883 /* 600 series and 700 series, "classic" */
5884 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5885 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5886 || rs6000_cpu == PROCESSOR_PPC750)
5887 return "ppc";
5888 #endif
5890 /* Classic with AltiVec, "G4" */
5891 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5892 return "\"7450\"";
5894 #if 0
5895 /* The older 64-bit CPUs */
5896 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5897 || rs6000_cpu == PROCESSOR_RS64A)
5898 return "ppc64";
5899 #endif
5901 HOST_WIDE_INT flags = rs6000_isa_flags;
5903 /* Disable the flags that should never influence the .machine selection. */
5904 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL
5905 | OPTION_MASK_ALTIVEC);
5907 if ((flags & (POWER11_MASKS_SERVER & ~ISA_3_1_MASKS_SERVER)) != 0)
5908 return "power11";
5909 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5910 return "power10";
5911 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5912 return "power9";
5913 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5914 return "power8";
5915 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5916 return "power7";
5917 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5918 return "power6";
5919 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5920 return "power5";
5921 if ((flags & ISA_2_1_MASKS) != 0)
5922 return "power4";
5923 if ((flags & OPTION_MASK_POWERPC64) != 0)
5924 return "ppc64";
5925 return "ppc";
5928 void
5929 emit_asm_machine (void)
5931 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5932 if (TARGET_ALTIVEC)
5933 fprintf (asm_out_file, "\t.machine altivec\n");
5935 #endif
5937 /* Do anything needed at the start of the asm file. */
5939 static void
5940 rs6000_file_start (void)
5942 char buffer[80];
5943 const char *start = buffer;
5944 FILE *file = asm_out_file;
5946 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5948 default_file_start ();
5950 if (flag_verbose_asm)
5952 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5954 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5956 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5957 start = "";
5960 if (OPTION_SET_P (rs6000_cpu_index))
5962 fprintf (file, "%s -mcpu=%s", start,
5963 processor_target_table[rs6000_cpu_index].name);
5964 start = "";
5967 if (OPTION_SET_P (rs6000_tune_index))
5969 fprintf (file, "%s -mtune=%s", start,
5970 processor_target_table[rs6000_tune_index].name);
5971 start = "";
5974 if (PPC405_ERRATUM77)
5976 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5977 start = "";
5980 #ifdef USING_ELFOS_H
5981 switch (rs6000_sdata)
5983 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5984 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5985 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5986 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5989 if (rs6000_sdata && g_switch_value)
5991 fprintf (file, "%s -G %d", start,
5992 g_switch_value);
5993 start = "";
5995 #endif
5997 if (*start == '\0')
5998 putc ('\n', file);
6001 #ifdef USING_ELFOS_H
6002 rs6000_machine = rs6000_machine_from_flags ();
6003 emit_asm_machine ();
6004 #endif
6006 if (DEFAULT_ABI == ABI_ELFv2)
6007 fprintf (file, "\t.abiversion 2\n");
6011 /* Return nonzero if this function is known to have a null epilogue. */
6014 direct_return (void)
6016 if (reload_completed)
6018 rs6000_stack_t *info = rs6000_stack_info ();
6020 if (info->first_gp_reg_save == 32
6021 && info->first_fp_reg_save == 64
6022 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6023 && ! info->lr_save_p
6024 && ! info->cr_save_p
6025 && info->vrsave_size == 0
6026 && ! info->push_p)
6027 return 1;
6030 return 0;
6033 /* Helper for num_insns_constant. Calculate number of instructions to
6034 load VALUE to a single gpr using combinations of addi, addis, ori,
6035 oris, sldi and rldimi instructions. */
6037 static int
6038 num_insns_constant_gpr (HOST_WIDE_INT value)
6040 /* signed constant loadable with addi */
6041 if (SIGNED_INTEGER_16BIT_P (value))
6042 return 1;
6044 /* constant loadable with addis */
6045 else if ((value & 0xffff) == 0
6046 && (value >> 31 == -1 || value >> 31 == 0))
6047 return 1;
6049 /* PADDI can support up to 34 bit signed integers. */
6050 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6051 return 1;
6053 else if (TARGET_POWERPC64)
6055 int num_insns = 0;
6056 rs6000_emit_set_long_const (nullptr, value, &num_insns);
6057 return num_insns;
6060 else
6061 return 2;
6064 /* Helper for num_insns_constant. Allow constants formed by the
6065 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6066 and handle modes that require multiple gprs. */
6068 static int
6069 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6071 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6072 int total = 0;
6073 while (nregs-- > 0)
6075 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6076 int insns = num_insns_constant_gpr (low);
6077 if (insns > 2
6078 /* We won't get more than 2 from num_insns_constant_gpr
6079 except when TARGET_POWERPC64 and mode is DImode or
6080 wider, so the register mode must be DImode. */
6081 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6082 insns = 2;
6083 total += insns;
6084 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6085 it all at once would be UB. */
6086 value >>= (BITS_PER_WORD - 1);
6087 value >>= 1;
6089 return total;
6092 /* Return the number of instructions it takes to form a constant in as
6093 many gprs are needed for MODE. */
6096 num_insns_constant (rtx op, machine_mode mode)
6098 HOST_WIDE_INT val;
6100 switch (GET_CODE (op))
6102 case CONST_INT:
6103 val = INTVAL (op);
6104 break;
6106 case CONST_WIDE_INT:
6108 int insns = 0;
6109 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6110 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6111 DImode);
6112 return insns;
6115 case CONST_DOUBLE:
6117 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6119 if (mode == SFmode || mode == SDmode)
6121 long l;
6123 if (mode == SDmode)
6124 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6125 else
6126 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6127 /* See the first define_split in rs6000.md handling a
6128 const_double_operand. */
6129 val = l;
6130 mode = SImode;
6132 else if (mode == DFmode || mode == DDmode)
6134 long l[2];
6136 if (mode == DDmode)
6137 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6138 else
6139 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6141 /* See the second (32-bit) and third (64-bit) define_split
6142 in rs6000.md handling a const_double_operand. */
6143 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6144 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6145 mode = DImode;
6147 else if (mode == TFmode || mode == TDmode
6148 || mode == KFmode || mode == IFmode)
6150 long l[4];
6151 int insns;
6153 if (mode == TDmode)
6154 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6155 else
6156 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6158 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6159 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6160 insns = num_insns_constant_multi (val, DImode);
6161 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6162 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6163 insns += num_insns_constant_multi (val, DImode);
6164 return insns;
6166 else
6167 gcc_unreachable ();
6169 break;
6171 default:
6172 gcc_unreachable ();
6175 return num_insns_constant_multi (val, mode);
6178 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6179 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6180 corresponding element of the vector, but for V4SFmode, the
6181 corresponding "float" is interpreted as an SImode integer. */
6183 HOST_WIDE_INT
6184 const_vector_elt_as_int (rtx op, unsigned int elt)
6186 rtx tmp;
6188 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6189 gcc_assert (GET_MODE (op) != V2DImode
6190 && GET_MODE (op) != V2DFmode);
6192 tmp = CONST_VECTOR_ELT (op, elt);
6193 if (GET_MODE (op) == V4SFmode)
6194 tmp = gen_lowpart (SImode, tmp);
6195 return INTVAL (tmp);
6198 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6199 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6200 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6201 all items are set to the same value and contain COPIES replicas of the
6202 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6203 operand and the others are set to the value of the operand's msb. */
6205 static bool
6206 vspltis_constant (rtx op, unsigned step, unsigned copies)
6208 machine_mode mode = GET_MODE (op);
6209 machine_mode inner = GET_MODE_INNER (mode);
6211 unsigned i;
6212 unsigned nunits;
6213 unsigned bitsize;
6214 unsigned mask;
6216 HOST_WIDE_INT val;
6217 HOST_WIDE_INT splat_val;
6218 HOST_WIDE_INT msb_val;
6220 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6221 return false;
6223 nunits = GET_MODE_NUNITS (mode);
6224 bitsize = GET_MODE_BITSIZE (inner);
6225 mask = GET_MODE_MASK (inner);
6227 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6228 splat_val = val;
6229 msb_val = val >= 0 ? 0 : -1;
6231 if (val == 0 && step > 1)
6233 /* Special case for loading most significant bit with step > 1.
6234 In that case, match 0s in all but step-1s elements, where match
6235 EASY_VECTOR_MSB. */
6236 for (i = 1; i < nunits; ++i)
6238 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6239 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6240 if ((i & (step - 1)) == step - 1)
6242 if (!EASY_VECTOR_MSB (elt_val, inner))
6243 break;
6245 else if (elt_val)
6246 break;
6248 if (i == nunits)
6249 return true;
6252 /* Construct the value to be splatted, if possible. If not, return 0. */
6253 for (i = 2; i <= copies; i *= 2)
6255 HOST_WIDE_INT small_val;
6256 bitsize /= 2;
6257 small_val = splat_val >> bitsize;
6258 mask >>= bitsize;
6259 if (splat_val != ((HOST_WIDE_INT)
6260 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6261 | (small_val & mask)))
6262 return false;
6263 splat_val = small_val;
6264 inner = smallest_int_mode_for_size (bitsize);
6267 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6268 if (EASY_VECTOR_15 (splat_val))
6271 /* Also check if we can splat, and then add the result to itself. Do so if
6272 the value is positive, of if the splat instruction is using OP's mode;
6273 for splat_val < 0, the splat and the add should use the same mode. */
6274 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6275 && (splat_val >= 0 || (step == 1 && copies == 1)))
6278 /* Also check if are loading up the most significant bit which can be done by
6279 loading up -1 and shifting the value left by -1. Only do this for
6280 step 1 here, for larger steps it is done earlier. */
6281 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6284 else
6285 return false;
6287 /* Check if VAL is present in every STEP-th element, and the
6288 other elements are filled with its most significant bit. */
6289 for (i = 1; i < nunits; ++i)
6291 HOST_WIDE_INT desired_val;
6292 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6293 if ((i & (step - 1)) == 0)
6294 desired_val = val;
6295 else
6296 desired_val = msb_val;
6298 if (desired_val != const_vector_elt_as_int (op, elt))
6299 return false;
6302 return true;
6305 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6306 instruction, filling in the bottom elements with 0 or -1.
6308 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6309 for the number of zeroes to shift in, or negative for the number of 0xff
6310 bytes to shift in.
6312 OP is a CONST_VECTOR. */
6315 vspltis_shifted (rtx op)
6317 machine_mode mode = GET_MODE (op);
6318 machine_mode inner = GET_MODE_INNER (mode);
6320 unsigned i, j;
6321 unsigned nunits;
6322 unsigned mask;
6324 HOST_WIDE_INT val;
6326 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6327 return false;
6329 /* We need to create pseudo registers to do the shift, so don't recognize
6330 shift vector constants after reload. Don't match it even before RA
6331 after split1 is done, because there won't be further splitting pass
6332 before RA to do the splitting. */
6333 if (!can_create_pseudo_p ()
6334 || (cfun->curr_properties & PROP_rtl_split_insns))
6335 return false;
6337 nunits = GET_MODE_NUNITS (mode);
6338 mask = GET_MODE_MASK (inner);
6340 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6342 /* Check if the value can really be the operand of a vspltis[bhw]. */
6343 if (EASY_VECTOR_15 (val))
6346 /* Also check if we are loading up the most significant bit which can be done
6347 by loading up -1 and shifting the value left by -1. */
6348 else if (EASY_VECTOR_MSB (val, inner))
6351 else
6352 return 0;
6354 /* Check if VAL is present in every STEP-th element until we find elements
6355 that are 0 or all 1 bits. */
6356 for (i = 1; i < nunits; ++i)
6358 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6359 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6361 /* If the value isn't the splat value, check for the remaining elements
6362 being 0/-1. */
6363 if (val != elt_val)
6365 if (elt_val == 0)
6367 for (j = i+1; j < nunits; ++j)
6369 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6370 if (const_vector_elt_as_int (op, elt2) != 0)
6371 return 0;
6374 return (nunits - i) * GET_MODE_SIZE (inner);
6377 else if ((elt_val & mask) == mask)
6379 for (j = i+1; j < nunits; ++j)
6381 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6382 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6383 return 0;
6386 return -((nunits - i) * GET_MODE_SIZE (inner));
6389 else
6390 return 0;
6394 /* If all elements are equal, we don't need to do VSLDOI. */
6395 return 0;
6399 /* Return non-zero (element mode byte size) if OP is of the given MODE
6400 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6403 easy_altivec_constant (rtx op, machine_mode mode)
6405 unsigned step, copies;
6407 if (mode == VOIDmode)
6408 mode = GET_MODE (op);
6409 else if (mode != GET_MODE (op))
6410 return 0;
6412 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6413 constants. */
6414 if (mode == V2DFmode)
6415 return zero_constant (op, mode) ? 8 : 0;
6417 else if (mode == V2DImode)
6419 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6420 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6421 return 0;
6423 if (zero_constant (op, mode))
6424 return 8;
6426 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6427 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6428 return 8;
6430 return 0;
6433 /* V1TImode is a special container for TImode. Ignore for now. */
6434 else if (mode == V1TImode)
6435 return 0;
6437 /* Start with a vspltisw. */
6438 step = GET_MODE_NUNITS (mode) / 4;
6439 copies = 1;
6441 if (vspltis_constant (op, step, copies))
6442 return 4;
6444 /* Then try with a vspltish. */
6445 if (step == 1)
6446 copies <<= 1;
6447 else
6448 step >>= 1;
6450 if (vspltis_constant (op, step, copies))
6451 return 2;
6453 /* And finally a vspltisb. */
6454 if (step == 1)
6455 copies <<= 1;
6456 else
6457 step >>= 1;
6459 if (vspltis_constant (op, step, copies))
6460 return 1;
6462 if (vspltis_shifted (op) != 0)
6463 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6465 return 0;
6468 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6469 result is OP. Abort if it is not possible. */
6472 gen_easy_altivec_constant (rtx op)
6474 machine_mode mode = GET_MODE (op);
6475 int nunits = GET_MODE_NUNITS (mode);
6476 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6477 unsigned step = nunits / 4;
6478 unsigned copies = 1;
6480 /* Start with a vspltisw. */
6481 if (vspltis_constant (op, step, copies))
6482 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6484 /* Then try with a vspltish. */
6485 if (step == 1)
6486 copies <<= 1;
6487 else
6488 step >>= 1;
6490 if (vspltis_constant (op, step, copies))
6491 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6493 /* And finally a vspltisb. */
6494 if (step == 1)
6495 copies <<= 1;
6496 else
6497 step >>= 1;
6499 if (vspltis_constant (op, step, copies))
6500 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6502 gcc_unreachable ();
6505 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6506 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6508 Return the number of instructions needed (1 or 2) into the address pointed
6509 via NUM_INSNS_PTR.
6511 Return the constant that is being split via CONSTANT_PTR. */
6513 bool
6514 xxspltib_constant_p (rtx op,
6515 machine_mode mode,
6516 int *num_insns_ptr,
6517 int *constant_ptr)
6519 size_t nunits = GET_MODE_NUNITS (mode);
6520 size_t i;
6521 HOST_WIDE_INT value;
6522 rtx element;
6524 /* Set the returned values to out of bound values. */
6525 *num_insns_ptr = -1;
6526 *constant_ptr = 256;
6528 if (!TARGET_P9_VECTOR)
6529 return false;
6531 if (mode == VOIDmode)
6532 mode = GET_MODE (op);
6534 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6535 return false;
6537 /* Handle (vec_duplicate <constant>). */
6538 if (GET_CODE (op) == VEC_DUPLICATE)
6540 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6541 && mode != V2DImode)
6542 return false;
6544 element = XEXP (op, 0);
6545 if (!CONST_INT_P (element))
6546 return false;
6548 value = INTVAL (element);
6549 if (!IN_RANGE (value, -128, 127))
6550 return false;
6553 /* Handle (const_vector [...]). */
6554 else if (GET_CODE (op) == CONST_VECTOR)
6556 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6557 && mode != V2DImode)
6558 return false;
6560 element = CONST_VECTOR_ELT (op, 0);
6561 if (!CONST_INT_P (element))
6562 return false;
6564 value = INTVAL (element);
6565 if (!IN_RANGE (value, -128, 127))
6566 return false;
6568 for (i = 1; i < nunits; i++)
6570 element = CONST_VECTOR_ELT (op, i);
6571 if (!CONST_INT_P (element))
6572 return false;
6574 if (value != INTVAL (element))
6575 return false;
6579 /* Handle integer constants being loaded into the upper part of the VSX
6580 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6581 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6582 else if (CONST_INT_P (op))
6584 if (!SCALAR_INT_MODE_P (mode))
6585 return false;
6587 value = INTVAL (op);
6588 if (!IN_RANGE (value, -128, 127))
6589 return false;
6591 if (!IN_RANGE (value, -1, 0))
6593 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6594 return false;
6596 if (EASY_VECTOR_15 (value))
6597 return false;
6601 else
6602 return false;
6604 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6605 sign extend. Special case 0/-1 to allow getting any VSX register instead
6606 of an Altivec register. */
6607 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6608 && EASY_VECTOR_15 (value))
6609 return false;
6611 /* Return # of instructions and the constant byte for XXSPLTIB. */
6612 if (mode == V16QImode)
6613 *num_insns_ptr = 1;
6615 else if (IN_RANGE (value, -1, 0))
6616 *num_insns_ptr = 1;
6618 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6619 single XXSPLTIW or XXSPLTIDP instruction. */
6620 else if (vsx_prefixed_constant (op, mode))
6621 return false;
6623 /* Return XXSPLITB followed by a sign extend operation to convert the
6624 constant to V8HImode or V4SImode. */
6625 else
6626 *num_insns_ptr = 2;
6628 *constant_ptr = (int) value;
6629 return true;
6632 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6633 instructions vupkhsw and vspltisw.
6635 Return the constant that is being split via CONSTANT_PTR. */
6637 bool
6638 vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
6640 HOST_WIDE_INT value;
6641 rtx elt;
6643 if (!TARGET_P8_VECTOR)
6644 return false;
6646 if (mode != V2DImode)
6647 return false;
6649 if (!const_vec_duplicate_p (op, &elt))
6650 return false;
6652 value = INTVAL (elt);
6653 if (value == 0 || value == 1
6654 || !EASY_VECTOR_15 (value))
6655 return false;
6657 if (constant_ptr)
6658 *constant_ptr = (int) value;
6659 return true;
6662 const char *
6663 output_vec_const_move (rtx *operands)
6665 int shift;
6666 machine_mode mode;
6667 rtx dest, vec;
6669 dest = operands[0];
6670 vec = operands[1];
6671 mode = GET_MODE (dest);
6673 if (TARGET_VSX)
6675 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6676 int xxspltib_value = 256;
6677 int num_insns = -1;
6679 if (zero_constant (vec, mode))
6681 if (TARGET_P9_VECTOR)
6682 return "xxspltib %x0,0";
6684 else if (dest_vmx_p)
6685 return "vspltisw %0,0";
6687 else
6688 return "xxlxor %x0,%x0,%x0";
6691 if (all_ones_constant (vec, mode))
6693 if (TARGET_P9_VECTOR)
6694 return "xxspltib %x0,255";
6696 else if (dest_vmx_p)
6697 return "vspltisw %0,-1";
6699 else if (TARGET_P8_VECTOR)
6700 return "xxlorc %x0,%x0,%x0";
6702 else
6703 gcc_unreachable ();
6706 vec_const_128bit_type vsx_const;
6707 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6709 unsigned imm = constant_generates_lxvkq (&vsx_const);
6710 if (imm)
6712 operands[2] = GEN_INT (imm);
6713 return "lxvkq %x0,%2";
6716 imm = constant_generates_xxspltiw (&vsx_const);
6717 if (imm)
6719 operands[2] = GEN_INT (imm);
6720 return "xxspltiw %x0,%2";
6723 imm = constant_generates_xxspltidp (&vsx_const);
6724 if (imm)
6726 operands[2] = GEN_INT (imm);
6727 return "xxspltidp %x0,%2";
6731 if (TARGET_P9_VECTOR
6732 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6734 if (num_insns == 1)
6736 operands[2] = GEN_INT (xxspltib_value & 0xff);
6737 return "xxspltib %x0,%2";
6740 return "#";
6744 if (TARGET_ALTIVEC)
6746 rtx splat_vec;
6748 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6749 if (zero_constant (vec, mode))
6750 return "vspltisw %0,0";
6752 if (all_ones_constant (vec, mode))
6753 return "vspltisw %0,-1";
6755 /* Do we need to construct a value using VSLDOI? */
6756 shift = vspltis_shifted (vec);
6757 if (shift != 0)
6758 return "#";
6760 splat_vec = gen_easy_altivec_constant (vec);
6761 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6762 operands[1] = XEXP (splat_vec, 0);
6763 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6764 return "#";
6766 switch (GET_MODE (splat_vec))
6768 case E_V4SImode:
6769 return "vspltisw %0,%1";
6771 case E_V8HImode:
6772 return "vspltish %0,%1";
6774 case E_V16QImode:
6775 return "vspltisb %0,%1";
6777 default:
6778 gcc_unreachable ();
6782 gcc_unreachable ();
6785 /* Initialize vector TARGET to VALS. */
6787 void
6788 rs6000_expand_vector_init (rtx target, rtx vals)
6790 machine_mode mode = GET_MODE (target);
6791 machine_mode inner_mode = GET_MODE_INNER (mode);
6792 unsigned int n_elts = GET_MODE_NUNITS (mode);
6793 int n_var = 0, one_var = -1;
6794 bool all_same = true, all_const_zero = true;
6795 rtx x, mem;
6796 unsigned int i;
6798 for (i = 0; i < n_elts; ++i)
6800 x = XVECEXP (vals, 0, i);
6801 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6802 ++n_var, one_var = i;
6803 else if (x != CONST0_RTX (inner_mode))
6804 all_const_zero = false;
6806 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6807 all_same = false;
6810 if (n_var == 0)
6812 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6813 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6814 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6816 /* Zero register. */
6817 emit_move_insn (target, CONST0_RTX (mode));
6818 return;
6820 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6822 /* Splat immediate. */
6823 emit_insn (gen_rtx_SET (target, const_vec));
6824 return;
6826 else
6828 /* Load from constant pool. */
6829 emit_move_insn (target, const_vec);
6830 return;
6834 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6835 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6837 rtx op[2];
6838 size_t i;
6839 size_t num_elements = all_same ? 1 : 2;
6840 for (i = 0; i < num_elements; i++)
6842 op[i] = XVECEXP (vals, 0, i);
6843 /* Just in case there is a SUBREG with a smaller mode, do a
6844 conversion. */
6845 if (GET_MODE (op[i]) != inner_mode)
6847 rtx tmp = gen_reg_rtx (inner_mode);
6848 convert_move (tmp, op[i], 0);
6849 op[i] = tmp;
6851 /* Allow load with splat double word. */
6852 else if (MEM_P (op[i]))
6854 if (!all_same)
6855 op[i] = force_reg (inner_mode, op[i]);
6857 else if (!REG_P (op[i]))
6858 op[i] = force_reg (inner_mode, op[i]);
6861 if (all_same)
6863 if (mode == V2DFmode)
6864 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6865 else
6866 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6868 else
6870 if (mode == V2DFmode)
6871 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6872 else
6873 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6875 return;
6878 /* Special case initializing vector int if we are on 64-bit systems with
6879 direct move or we have the ISA 3.0 instructions. */
6880 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6881 && TARGET_DIRECT_MOVE_64BIT)
6883 if (all_same)
6885 rtx element0 = XVECEXP (vals, 0, 0);
6886 if (MEM_P (element0))
6887 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6888 else
6889 element0 = force_reg (SImode, element0);
6891 if (TARGET_P9_VECTOR)
6892 emit_insn (gen_vsx_splat_v4si (target, element0));
6893 else
6895 rtx tmp = gen_reg_rtx (DImode);
6896 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6897 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6899 return;
6901 else
6903 rtx elements[4];
6904 size_t i;
6906 for (i = 0; i < 4; i++)
6907 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6909 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6910 elements[2], elements[3]));
6911 return;
6915 /* With single precision floating point on VSX, know that internally single
6916 precision is actually represented as a double, and either make 2 V2DF
6917 vectors, and convert these vectors to single precision, or do one
6918 conversion, and splat the result to the other elements. */
6919 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6921 if (all_same)
6923 rtx element0 = XVECEXP (vals, 0, 0);
6925 if (TARGET_P9_VECTOR)
6927 if (MEM_P (element0))
6928 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6930 emit_insn (gen_vsx_splat_v4sf (target, element0));
6933 else
6935 rtx freg = gen_reg_rtx (V4SFmode);
6936 rtx sreg = force_reg (SFmode, element0);
6937 rtx cvt = (TARGET_XSCVDPSPN
6938 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6939 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6941 emit_insn (cvt);
6942 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6943 const0_rtx));
6946 else
6948 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6950 rtx tmp_sf[4];
6951 rtx tmp_si[4];
6952 rtx tmp_di[4];
6953 rtx mrg_di[4];
6954 for (i = 0; i < 4; i++)
6956 tmp_si[i] = gen_reg_rtx (SImode);
6957 tmp_di[i] = gen_reg_rtx (DImode);
6958 mrg_di[i] = gen_reg_rtx (DImode);
6959 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6960 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6961 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6964 if (!BYTES_BIG_ENDIAN)
6966 std::swap (tmp_di[0], tmp_di[1]);
6967 std::swap (tmp_di[2], tmp_di[3]);
6970 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6971 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6972 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6973 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6975 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6976 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6977 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6979 else
6981 rtx dbl_even = gen_reg_rtx (V2DFmode);
6982 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6983 rtx flt_even = gen_reg_rtx (V4SFmode);
6984 rtx flt_odd = gen_reg_rtx (V4SFmode);
6985 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6986 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6987 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6988 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6990 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6991 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6992 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6993 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6994 rs6000_expand_extract_even (target, flt_even, flt_odd);
6997 return;
7000 /* Special case initializing vector short/char that are splats if we are on
7001 64-bit systems with direct move. */
7002 if (all_same && TARGET_DIRECT_MOVE_64BIT
7003 && (mode == V16QImode || mode == V8HImode))
7005 rtx op0 = XVECEXP (vals, 0, 0);
7006 rtx di_tmp = gen_reg_rtx (DImode);
7008 if (!REG_P (op0))
7009 op0 = force_reg (GET_MODE_INNER (mode), op0);
7011 if (mode == V16QImode)
7013 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7014 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7015 return;
7018 if (mode == V8HImode)
7020 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7021 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7022 return;
7026 /* Store value to stack temp. Load vector element. Splat. However, splat
7027 of 64-bit items is not supported on Altivec. */
7028 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7030 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7031 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7032 XVECEXP (vals, 0, 0));
7033 x = gen_rtx_UNSPEC (VOIDmode,
7034 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7035 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7036 gen_rtvec (2,
7037 gen_rtx_SET (target, mem),
7038 x)));
7039 x = gen_rtx_VEC_SELECT (inner_mode, target,
7040 gen_rtx_PARALLEL (VOIDmode,
7041 gen_rtvec (1, const0_rtx)));
7042 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7043 return;
7046 /* One field is non-constant. Load constant then overwrite
7047 varying field. */
7048 if (n_var == 1)
7050 rtx copy = copy_rtx (vals);
7052 /* Load constant part of vector, substitute neighboring value for
7053 varying element. */
7054 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7055 rs6000_expand_vector_init (target, copy);
7057 /* Insert variable. */
7058 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7059 GEN_INT (one_var));
7060 return;
7063 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7065 rtx op[16];
7066 /* Force the values into word_mode registers. */
7067 for (i = 0; i < n_elts; i++)
7069 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7070 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7071 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7074 /* Take unsigned char big endianness on 64bit as example for below
7075 construction, the input values are: A, B, C, D, ..., O, P. */
7077 if (TARGET_DIRECT_MOVE_128)
7079 /* Move to VSX register with vec_concat, each has 2 values.
7080 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7081 vr1[1] = { xxxxxxxC, xxxxxxxD };
7083 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7084 rtx vr1[8];
7085 for (i = 0; i < n_elts / 2; i++)
7087 vr1[i] = gen_reg_rtx (V2DImode);
7088 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7089 op[i * 2 + 1]));
7092 /* Pack vectors with 2 values into vectors with 4 values.
7093 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7094 vr2[1] = { xxxExxxF, xxxGxxxH };
7095 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7096 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7097 rtx vr2[4];
7098 for (i = 0; i < n_elts / 4; i++)
7100 vr2[i] = gen_reg_rtx (V4SImode);
7101 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7102 vr1[i * 2 + 1]));
7105 /* Pack vectors with 4 values into vectors with 8 values.
7106 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7107 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7108 rtx vr3[2];
7109 for (i = 0; i < n_elts / 8; i++)
7111 vr3[i] = gen_reg_rtx (V8HImode);
7112 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7113 vr2[i * 2 + 1]));
7116 /* If it's V8HImode, it's done and return it. */
7117 if (mode == V8HImode)
7119 emit_insn (gen_rtx_SET (target, vr3[0]));
7120 return;
7123 /* Pack vectors with 8 values into 16 values. */
7124 rtx res = gen_reg_rtx (V16QImode);
7125 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7126 emit_insn (gen_rtx_SET (target, res));
7128 else
7130 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7131 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7132 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7133 rtx perm_idx;
7135 /* Set up some common gen routines and values. */
7136 if (BYTES_BIG_ENDIAN)
7138 if (mode == V16QImode)
7140 merge_v16qi = gen_altivec_vmrghb;
7141 merge_v8hi = gen_altivec_vmrglh;
7143 else
7144 merge_v8hi = gen_altivec_vmrghh;
7146 merge_v4si = gen_altivec_vmrglw;
7147 perm_idx = GEN_INT (3);
7149 else
7151 if (mode == V16QImode)
7153 merge_v16qi = gen_altivec_vmrglb;
7154 merge_v8hi = gen_altivec_vmrghh;
7156 else
7157 merge_v8hi = gen_altivec_vmrglh;
7159 merge_v4si = gen_altivec_vmrghw;
7160 perm_idx = GEN_INT (0);
7163 /* Move to VSX register with direct move.
7164 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7165 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7167 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7168 rtx vr_qi[16];
7169 for (i = 0; i < n_elts; i++)
7171 vr_qi[i] = gen_reg_rtx (V16QImode);
7172 if (TARGET_POWERPC64)
7173 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7174 else
7175 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7178 /* Merge/move to vector short.
7179 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7180 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7182 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7183 rtx vr_hi[8];
7184 for (i = 0; i < 8; i++)
7186 rtx tmp = vr_qi[i];
7187 if (mode == V16QImode)
7189 tmp = gen_reg_rtx (V16QImode);
7190 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7192 vr_hi[i] = gen_reg_rtx (V8HImode);
7193 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7196 /* Merge vector short to vector int.
7197 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7198 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7200 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7201 rtx vr_si[4];
7202 for (i = 0; i < 4; i++)
7204 rtx tmp = gen_reg_rtx (V8HImode);
7205 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7206 vr_si[i] = gen_reg_rtx (V4SImode);
7207 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7210 /* Merge vector int to vector long.
7211 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7212 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7213 rtx vr_di[2];
7214 for (i = 0; i < 2; i++)
7216 rtx tmp = gen_reg_rtx (V4SImode);
7217 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7218 vr_di[i] = gen_reg_rtx (V2DImode);
7219 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7222 rtx res = gen_reg_rtx (V2DImode);
7223 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7224 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7227 return;
7230 /* Construct the vector in memory one field at a time
7231 and load the whole vector. */
7232 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7233 for (i = 0; i < n_elts; i++)
7234 emit_move_insn (adjust_address_nv (mem, inner_mode,
7235 i * GET_MODE_SIZE (inner_mode)),
7236 XVECEXP (vals, 0, i));
7237 emit_move_insn (target, mem);
7240 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7241 is variable and also counts by vector element size for p9 and above. */
7243 static void
7244 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7246 machine_mode mode = GET_MODE (target);
7248 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7250 machine_mode inner_mode = GET_MODE (val);
7252 int width = GET_MODE_SIZE (inner_mode);
7254 gcc_assert (width >= 1 && width <= 8);
7256 int shift = exact_log2 (width);
7258 machine_mode idx_mode = GET_MODE (idx);
7260 machine_mode shift_mode;
7261 /* Gen function pointers for shifting left and generation of permutation
7262 control vectors. */
7263 rtx (*gen_ashl) (rtx, rtx, rtx);
7264 rtx (*gen_pcvr1) (rtx, rtx);
7265 rtx (*gen_pcvr2) (rtx, rtx);
7267 if (TARGET_POWERPC64)
7269 shift_mode = DImode;
7270 gen_ashl = gen_ashldi3;
7271 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7272 : gen_altivec_lvsr_reg_di;
7273 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7274 : gen_altivec_lvsl_reg_di;
7276 else
7278 shift_mode = SImode;
7279 gen_ashl = gen_ashlsi3;
7280 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7281 : gen_altivec_lvsr_reg_si;
7282 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7283 : gen_altivec_lvsl_reg_si;
7285 /* Generate the IDX for permute shift, width is the vector element size.
7286 idx = idx * width. */
7287 rtx tmp = gen_reg_rtx (shift_mode);
7288 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7290 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7292 /* Generate one permutation control vector used for rotating the element
7293 at to-insert position to element zero in target vector. lvsl is
7294 used for big endianness while lvsr is used for little endianness:
7295 lvs[lr] v1,0,idx. */
7296 rtx pcvr1 = gen_reg_rtx (V16QImode);
7297 emit_insn (gen_pcvr1 (pcvr1, tmp));
7299 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7300 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7301 pcvr1);
7302 emit_insn (perm1);
7304 /* Insert val into element 0 of target vector. */
7305 rs6000_expand_vector_set (target, val, const0_rtx);
7307 /* Rotate back with a reversed permutation control vector generated from:
7308 lvs[rl] v2,0,idx. */
7309 rtx pcvr2 = gen_reg_rtx (V16QImode);
7310 emit_insn (gen_pcvr2 (pcvr2, tmp));
7312 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7313 pcvr2);
7314 emit_insn (perm2);
7317 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7318 is variable and also counts by vector element size for p7 & p8. */
7320 static void
7321 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7323 machine_mode mode = GET_MODE (target);
7325 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7327 machine_mode inner_mode = GET_MODE (val);
7328 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7330 int width = GET_MODE_SIZE (inner_mode);
7331 gcc_assert (width >= 1 && width <= 4);
7333 int shift = exact_log2 (width);
7335 machine_mode idx_mode = GET_MODE (idx);
7337 machine_mode shift_mode;
7338 rtx (*gen_ashl)(rtx, rtx, rtx);
7339 rtx (*gen_add)(rtx, rtx, rtx);
7340 rtx (*gen_sub)(rtx, rtx, rtx);
7341 rtx (*gen_lvsl)(rtx, rtx);
7343 if (TARGET_POWERPC64)
7345 shift_mode = DImode;
7346 gen_ashl = gen_ashldi3;
7347 gen_add = gen_adddi3;
7348 gen_sub = gen_subdi3;
7349 gen_lvsl = gen_altivec_lvsl_reg_di;
7351 else
7353 shift_mode = SImode;
7354 gen_ashl = gen_ashlsi3;
7355 gen_add = gen_addsi3;
7356 gen_sub = gen_subsi3;
7357 gen_lvsl = gen_altivec_lvsl_reg_si;
7360 /* idx = idx * width. */
7361 rtx tmp = gen_reg_rtx (shift_mode);
7362 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7364 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7366 /* For LE: idx = idx + 8. */
7367 if (!BYTES_BIG_ENDIAN)
7368 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7369 else
7370 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7372 /* lxv vs33, mask.
7373 DImode: 0xffffffffffffffff0000000000000000
7374 SImode: 0x00000000ffffffff0000000000000000
7375 HImode: 0x000000000000ffff0000000000000000.
7376 QImode: 0x00000000000000ff0000000000000000. */
7377 rtx mask = gen_reg_rtx (V16QImode);
7378 rtx mask_v2di = gen_reg_rtx (V2DImode);
7379 rtvec v = rtvec_alloc (2);
7380 if (!BYTES_BIG_ENDIAN)
7382 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7383 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7385 else
7387 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7388 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7390 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7391 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7392 emit_insn (gen_rtx_SET (mask, sub_mask));
7394 /* mtvsrd[wz] f0,tmp_val. */
7395 rtx tmp_val = gen_reg_rtx (SImode);
7396 if (inner_mode == E_SFmode)
7397 if (TARGET_DIRECT_MOVE_64BIT)
7398 emit_insn (gen_movsi_from_sf (tmp_val, val));
7399 else
7401 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7402 emit_insn (gen_movsf_hardfloat (stack, val));
7403 rtx stack2 = copy_rtx (stack);
7404 PUT_MODE (stack2, SImode);
7405 emit_move_insn (tmp_val, stack2);
7407 else
7408 tmp_val = force_reg (SImode, val);
7410 rtx val_v16qi = gen_reg_rtx (V16QImode);
7411 rtx val_v2di = gen_reg_rtx (V2DImode);
7412 rtvec vec_val = rtvec_alloc (2);
7413 if (!BYTES_BIG_ENDIAN)
7415 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7416 RTVEC_ELT (vec_val, 1) = tmp_val;
7418 else
7420 RTVEC_ELT (vec_val, 0) = tmp_val;
7421 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7423 emit_insn (
7424 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7425 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7426 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7428 /* lvsl 13,0,idx. */
7429 rtx pcv = gen_reg_rtx (V16QImode);
7430 emit_insn (gen_lvsl (pcv, tmp));
7432 /* vperm 1,1,1,13. */
7433 /* vperm 0,0,0,13. */
7434 rtx val_perm = gen_reg_rtx (V16QImode);
7435 rtx mask_perm = gen_reg_rtx (V16QImode);
7436 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7437 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7439 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7441 /* xxsel 34,34,32,33. */
7442 emit_insn (
7443 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7446 /* Set field ELT_RTX of TARGET to VAL. */
7448 void
7449 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7451 machine_mode mode = GET_MODE (target);
7452 machine_mode inner_mode = GET_MODE_INNER (mode);
7453 rtx reg = gen_reg_rtx (mode);
7454 rtx mask, mem, x;
7455 int width = GET_MODE_SIZE (inner_mode);
7456 int i;
7458 val = force_reg (GET_MODE (val), val);
7460 if (VECTOR_MEM_VSX_P (mode))
7462 if (!CONST_INT_P (elt_rtx))
7464 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7465 when elt_rtx is variable. */
7466 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7468 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7469 return;
7471 else if (TARGET_VSX)
7473 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7474 return;
7476 else
7477 gcc_assert (CONST_INT_P (elt_rtx));
7480 rtx insn = NULL_RTX;
7482 if (mode == V2DFmode)
7483 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7485 else if (mode == V2DImode)
7486 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7488 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7490 if (mode == V4SImode)
7491 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7492 else if (mode == V8HImode)
7493 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7494 else if (mode == V16QImode)
7495 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7496 else if (mode == V4SFmode)
7497 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7500 if (insn)
7502 emit_insn (insn);
7503 return;
7507 /* Simplify setting single element vectors like V1TImode. */
7508 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7509 && INTVAL (elt_rtx) == 0)
7511 emit_move_insn (target, gen_lowpart (mode, val));
7512 return;
7515 /* Load single variable value. */
7516 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7517 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7518 x = gen_rtx_UNSPEC (VOIDmode,
7519 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7520 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7521 gen_rtvec (2,
7522 gen_rtx_SET (reg, mem),
7523 x)));
7525 /* Linear sequence. */
7526 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7527 for (i = 0; i < 16; ++i)
7528 XVECEXP (mask, 0, i) = GEN_INT (i);
7530 /* Set permute mask to insert element into target. */
7531 for (i = 0; i < width; ++i)
7532 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7533 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7535 if (BYTES_BIG_ENDIAN)
7536 x = gen_rtx_UNSPEC (mode,
7537 gen_rtvec (3, target, reg,
7538 force_reg (V16QImode, x)),
7539 UNSPEC_VPERM);
7540 else
7542 if (TARGET_P9_VECTOR)
7543 x = gen_rtx_UNSPEC (mode,
7544 gen_rtvec (3, reg, target,
7545 force_reg (V16QImode, x)),
7546 UNSPEC_VPERMR);
7547 else
7549 /* Invert selector. We prefer to generate VNAND on P8 so
7550 that future fusion opportunities can kick in, but must
7551 generate VNOR elsewhere. */
7552 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7553 rtx iorx = (TARGET_P8_VECTOR
7554 ? gen_rtx_IOR (V16QImode, notx, notx)
7555 : gen_rtx_AND (V16QImode, notx, notx));
7556 rtx tmp = gen_reg_rtx (V16QImode);
7557 emit_insn (gen_rtx_SET (tmp, iorx));
7559 /* Permute with operands reversed and adjusted selector. */
7560 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7561 UNSPEC_VPERM);
7565 emit_insn (gen_rtx_SET (target, x));
7568 /* Extract field ELT from VEC into TARGET. */
7570 void
7571 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7573 machine_mode mode = GET_MODE (vec);
7574 machine_mode inner_mode = GET_MODE_INNER (mode);
7575 rtx mem;
7577 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7579 switch (mode)
7581 default:
7582 break;
7583 case E_V1TImode:
7584 emit_move_insn (target, gen_lowpart (TImode, vec));
7585 break;
7586 case E_V2DFmode:
7587 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7588 return;
7589 case E_V2DImode:
7590 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7591 return;
7592 case E_V4SFmode:
7593 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7594 return;
7595 case E_V16QImode:
7596 if (TARGET_DIRECT_MOVE_64BIT)
7598 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7599 return;
7601 else
7602 break;
7603 case E_V8HImode:
7604 if (TARGET_DIRECT_MOVE_64BIT)
7606 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7607 return;
7609 else
7610 break;
7611 case E_V4SImode:
7612 if (TARGET_DIRECT_MOVE_64BIT)
7614 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7615 return;
7617 break;
7620 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7621 && TARGET_DIRECT_MOVE_64BIT)
7623 if (GET_MODE (elt) != DImode)
7625 rtx tmp = gen_reg_rtx (DImode);
7626 convert_move (tmp, elt, 0);
7627 elt = tmp;
7629 else if (!REG_P (elt))
7630 elt = force_reg (DImode, elt);
7632 switch (mode)
7634 case E_V1TImode:
7635 emit_move_insn (target, gen_lowpart (TImode, vec));
7636 return;
7638 case E_V2DFmode:
7639 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7640 return;
7642 case E_V2DImode:
7643 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7644 return;
7646 case E_V4SFmode:
7647 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7648 return;
7650 case E_V4SImode:
7651 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7652 return;
7654 case E_V8HImode:
7655 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7656 return;
7658 case E_V16QImode:
7659 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7660 return;
7662 default:
7663 gcc_unreachable ();
7667 /* Allocate mode-sized buffer. */
7668 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7670 emit_move_insn (mem, vec);
7671 if (CONST_INT_P (elt))
7673 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7675 /* Add offset to field within buffer matching vector element. */
7676 mem = adjust_address_nv (mem, inner_mode,
7677 modulo_elt * GET_MODE_SIZE (inner_mode));
7678 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7680 else
7682 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7683 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7685 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7686 if (ele_size > 1)
7687 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7688 rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7689 new_addr = change_address (mem, inner_mode, new_addr);
7690 emit_move_insn (target, new_addr);
7694 /* Return the offset within a memory object (MEM) of a vector type to a given
7695 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7696 the element is constant, we return a constant integer.
7698 Otherwise, we use a base register temporary to calculate the offset after
7699 masking it to fit within the bounds of the vector and scaling it. The
7700 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7701 built-in function. */
7703 static rtx
7704 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7706 if (CONST_INT_P (element))
7707 return GEN_INT (INTVAL (element) * scalar_size);
7709 /* All insns should use the 'Q' constraint (address is a single register) if
7710 the element number is not a constant. */
7711 gcc_assert (satisfies_constraint_Q (mem));
7713 /* Mask the element to make sure the element number is between 0 and the
7714 maximum number of elements - 1 so that we don't generate an address
7715 outside the vector. */
7716 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7717 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7718 emit_insn (gen_rtx_SET (base_tmp, and_op));
7720 /* Shift the element to get the byte offset from the element number. */
7721 int shift = exact_log2 (scalar_size);
7722 gcc_assert (shift >= 0);
7724 if (shift > 0)
7726 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7727 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7730 return base_tmp;
7733 /* Helper function update PC-relative addresses when we are adjusting a memory
7734 address (ADDR) to a vector to point to a scalar field within the vector with
7735 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7736 use the base register temporary (BASE_TMP) to form the address. */
7738 static rtx
7739 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7741 rtx new_addr = NULL;
7743 gcc_assert (CONST_INT_P (element_offset));
7745 if (GET_CODE (addr) == CONST)
7746 addr = XEXP (addr, 0);
7748 if (GET_CODE (addr) == PLUS)
7750 rtx op0 = XEXP (addr, 0);
7751 rtx op1 = XEXP (addr, 1);
7753 if (CONST_INT_P (op1))
7755 HOST_WIDE_INT offset
7756 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7758 if (offset == 0)
7759 new_addr = op0;
7761 else
7763 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7764 new_addr = gen_rtx_CONST (Pmode, plus);
7768 else
7770 emit_move_insn (base_tmp, addr);
7771 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7775 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7777 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7778 new_addr = gen_rtx_CONST (Pmode, plus);
7781 else
7782 gcc_unreachable ();
7784 return new_addr;
7787 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7788 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7789 temporary (BASE_TMP) to fixup the address. Return the new memory address
7790 that is valid for reads or writes to a given register (SCALAR_REG).
7792 This function is expected to be called after reload is completed when we are
7793 splitting insns. The temporary BASE_TMP might be set multiple times with
7794 this code. */
7797 rs6000_adjust_vec_address (rtx scalar_reg,
7798 rtx mem,
7799 rtx element,
7800 rtx base_tmp,
7801 machine_mode scalar_mode)
7803 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7804 rtx addr = XEXP (mem, 0);
7805 rtx new_addr;
7807 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7808 gcc_assert (!reg_mentioned_p (base_tmp, element));
7810 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7811 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7813 /* Calculate what we need to add to the address to get the element
7814 address. */
7815 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7817 /* Create the new address pointing to the element within the vector. If we
7818 are adding 0, we don't have to change the address. */
7819 if (element_offset == const0_rtx)
7820 new_addr = addr;
7822 /* A simple indirect address can be converted into a reg + offset
7823 address. */
7824 else if (REG_P (addr) || SUBREG_P (addr))
7825 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7827 /* For references to local static variables, fold a constant offset into the
7828 address. */
7829 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7830 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7832 /* Optimize D-FORM addresses with constant offset with a constant element, to
7833 include the element offset in the address directly. */
7834 else if (GET_CODE (addr) == PLUS)
7836 rtx op0 = XEXP (addr, 0);
7837 rtx op1 = XEXP (addr, 1);
7839 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7840 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7842 /* op0 should never be r0, because r0+offset is not valid. But it
7843 doesn't hurt to make sure it is not r0. */
7844 gcc_assert (reg_or_subregno (op0) != 0);
7846 /* D-FORM address with constant element number. */
7847 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7848 rtx offset_rtx = GEN_INT (offset);
7849 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7851 else
7853 /* If we don't have a D-FORM address with a constant element number,
7854 add the two elements in the current address. Then add the offset.
7856 Previously, we tried to add the offset to OP1 and change the
7857 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7858 complicated because we had to verify that op1 was not GPR0 and we
7859 had a constant element offset (due to the way ADDI is defined).
7860 By doing the add of OP0 and OP1 first, and then adding in the
7861 offset, it has the benefit that if D-FORM instructions are
7862 allowed, the offset is part of the memory access to the vector
7863 element. */
7864 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7865 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7869 else
7871 emit_move_insn (base_tmp, addr);
7872 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7875 /* If the address isn't valid, move the address into the temporary base
7876 register. Some reasons it could not be valid include:
7878 The address offset overflowed the 16 or 34 bit offset size;
7879 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7880 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7881 Only X_FORM loads can be done, and the address is D_FORM. */
7883 enum insn_form iform
7884 = address_to_insn_form (new_addr, scalar_mode,
7885 reg_to_non_prefixed (scalar_reg, scalar_mode));
7887 if (iform == INSN_FORM_BAD)
7889 emit_move_insn (base_tmp, new_addr);
7890 new_addr = base_tmp;
7893 return change_address (mem, scalar_mode, new_addr);
7896 /* Split a variable vec_extract operation into the component instructions. */
7898 void
7899 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7900 rtx tmp_altivec)
7902 machine_mode mode = GET_MODE (src);
7903 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7904 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7905 int byte_shift = exact_log2 (scalar_size);
7907 gcc_assert (byte_shift >= 0);
7909 /* If we are given a memory address, optimize to load just the element. We
7910 don't have to adjust the vector element number on little endian
7911 systems. */
7912 if (MEM_P (src))
7914 emit_move_insn (dest,
7915 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7916 scalar_mode));
7917 return;
7920 else if (REG_P (src) || SUBREG_P (src))
7922 int num_elements = GET_MODE_NUNITS (mode);
7923 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7924 int bit_shift = 7 - exact_log2 (num_elements);
7925 rtx element2;
7926 unsigned int dest_regno = reg_or_subregno (dest);
7927 unsigned int src_regno = reg_or_subregno (src);
7928 unsigned int element_regno = reg_or_subregno (element);
7930 gcc_assert (REG_P (tmp_gpr));
7932 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7933 a general purpose register. */
7934 if (TARGET_P9_VECTOR
7935 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7936 && INT_REGNO_P (dest_regno)
7937 && ALTIVEC_REGNO_P (src_regno)
7938 && INT_REGNO_P (element_regno))
7940 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7941 rtx element_si = gen_rtx_REG (SImode, element_regno);
7943 if (mode == V16QImode)
7944 emit_insn (BYTES_BIG_ENDIAN
7945 ? gen_vextublx (dest_si, element_si, src)
7946 : gen_vextubrx (dest_si, element_si, src));
7948 else if (mode == V8HImode)
7950 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7951 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7952 emit_insn (BYTES_BIG_ENDIAN
7953 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7954 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7958 else
7960 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7961 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7962 emit_insn (BYTES_BIG_ENDIAN
7963 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7964 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7967 return;
7971 gcc_assert (REG_P (tmp_altivec));
7973 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7974 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7975 will shift the element into the upper position (adding 3 to convert a
7976 byte shift into a bit shift). */
7977 if (scalar_size == 8)
7979 if (!BYTES_BIG_ENDIAN)
7981 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7982 element2 = tmp_gpr;
7984 else
7985 element2 = element;
7987 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7988 bit. */
7989 emit_insn (gen_rtx_SET (tmp_gpr,
7990 gen_rtx_AND (DImode,
7991 gen_rtx_ASHIFT (DImode,
7992 element2,
7993 GEN_INT (6)),
7994 GEN_INT (64))));
7996 else
7998 if (!BYTES_BIG_ENDIAN)
8000 rtx num_ele_m1 = GEN_INT (num_elements - 1);
8002 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8003 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8004 element2 = tmp_gpr;
8006 else
8007 element2 = element;
8009 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8012 /* Get the value into the lower byte of the Altivec register where VSLO
8013 expects it. */
8014 if (TARGET_P9_VECTOR)
8015 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8016 else if (can_create_pseudo_p ())
8017 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8018 else
8020 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8021 emit_move_insn (tmp_di, tmp_gpr);
8022 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8025 /* Do the VSLO to get the value into the final location. */
8026 switch (mode)
8028 case E_V2DFmode:
8029 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8030 return;
8032 case E_V2DImode:
8033 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8034 return;
8036 case E_V4SFmode:
8038 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8039 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8040 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8041 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8042 tmp_altivec));
8044 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8045 return;
8048 case E_V4SImode:
8049 case E_V8HImode:
8050 case E_V16QImode:
8052 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8053 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8054 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8055 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8056 tmp_altivec));
8057 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8058 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8059 GEN_INT (64 - bits_in_element)));
8060 return;
8063 default:
8064 gcc_unreachable ();
8067 return;
8069 else
8070 gcc_unreachable ();
8073 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8074 selects whether the alignment is abi mandated, optional, or
8075 both abi and optional alignment. */
8077 unsigned int
8078 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8080 if (how != align_opt)
8082 if (VECTOR_TYPE_P (type) && align < 128)
8083 align = 128;
8086 if (how != align_abi)
8088 if (TREE_CODE (type) == ARRAY_TYPE
8089 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8091 if (align < BITS_PER_WORD)
8092 align = BITS_PER_WORD;
8096 return align;
8099 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8100 instructions simply ignore the low bits; VSX memory instructions
8101 are aligned to 4 or 8 bytes. */
8103 static bool
8104 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8106 return (STRICT_ALIGNMENT
8107 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8108 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8109 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8110 && (int) align < VECTOR_ALIGN (mode)))));
8113 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8115 unsigned int
8116 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8118 if (computed <= 32 || TYPE_PACKED (type))
8119 return computed;
8121 /* Strip initial arrays. */
8122 while (TREE_CODE (type) == ARRAY_TYPE)
8123 type = TREE_TYPE (type);
8125 /* If RECORD or UNION, recursively find the first field. */
8126 while (AGGREGATE_TYPE_P (type))
8128 tree field = TYPE_FIELDS (type);
8130 /* Skip all non field decls */
8131 while (field != NULL
8132 && (TREE_CODE (field) != FIELD_DECL
8133 || DECL_FIELD_ABI_IGNORED (field)))
8134 field = DECL_CHAIN (field);
8136 if (! field)
8137 break;
8139 /* A packed field does not contribute any extra alignment. */
8140 if (DECL_PACKED (field))
8141 return computed;
8143 type = TREE_TYPE (field);
8145 /* Strip arrays. */
8146 while (TREE_CODE (type) == ARRAY_TYPE)
8147 type = TREE_TYPE (type);
8150 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8151 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8152 computed = MIN (computed, 32);
8154 return computed;
8157 /* AIX increases natural record alignment to doubleword if the innermost first
8158 field is an FP double while the FP fields remain word aligned.
8159 Only called if TYPE initially is a RECORD or UNION. */
8161 unsigned int
8162 rs6000_special_round_type_align (tree type, unsigned int computed,
8163 unsigned int specified)
8165 unsigned int align = MAX (computed, specified);
8167 if (TYPE_PACKED (type) || align >= 64)
8168 return align;
8170 /* If RECORD or UNION, recursively find the first field. */
8173 tree field = TYPE_FIELDS (type);
8175 /* Skip all non field decls */
8176 while (field != NULL
8177 && (TREE_CODE (field) != FIELD_DECL
8178 || DECL_FIELD_ABI_IGNORED (field)))
8179 field = DECL_CHAIN (field);
8181 if (! field)
8182 break;
8184 /* A packed field does not contribute any extra alignment. */
8185 if (DECL_PACKED (field))
8186 return align;
8188 type = TREE_TYPE (field);
8190 /* Strip arrays. */
8191 while (TREE_CODE (type) == ARRAY_TYPE)
8192 type = TREE_TYPE (type);
8193 } while (AGGREGATE_TYPE_P (type));
8195 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8196 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8197 align = MAX (align, 64);
8199 return align;
8202 /* Darwin increases record alignment to the natural alignment of
8203 the first field. */
8205 unsigned int
8206 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8207 unsigned int specified)
8209 unsigned int align = MAX (computed, specified);
8211 if (TYPE_PACKED (type))
8212 return align;
8214 /* Find the first field, looking down into aggregates. */
8215 do {
8216 tree field = TYPE_FIELDS (type);
8217 /* Skip all non field decls */
8218 while (field != NULL
8219 && (TREE_CODE (field) != FIELD_DECL
8220 || DECL_FIELD_ABI_IGNORED (field)))
8221 field = DECL_CHAIN (field);
8222 if (! field)
8223 break;
8224 /* A packed field does not contribute any extra alignment. */
8225 if (DECL_PACKED (field))
8226 return align;
8227 type = TREE_TYPE (field);
8228 while (TREE_CODE (type) == ARRAY_TYPE)
8229 type = TREE_TYPE (type);
8230 } while (AGGREGATE_TYPE_P (type));
8232 if (type != error_mark_node && ! AGGREGATE_TYPE_P (type)
8233 && ! TYPE_PACKED (type) && maximum_field_alignment == 0)
8234 align = MAX (align, TYPE_ALIGN (type));
8236 return align;
8239 /* Return 1 for an operand in small memory on V.4/eabi. */
8242 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8243 machine_mode mode ATTRIBUTE_UNUSED)
8245 #if TARGET_ELF
8246 rtx sym_ref;
8248 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8249 return 0;
8251 if (DEFAULT_ABI != ABI_V4)
8252 return 0;
8254 if (SYMBOL_REF_P (op))
8255 sym_ref = op;
8257 else if (GET_CODE (op) != CONST
8258 || GET_CODE (XEXP (op, 0)) != PLUS
8259 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8260 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8261 return 0;
8263 else
8265 rtx sum = XEXP (op, 0);
8266 HOST_WIDE_INT summand;
8268 /* We have to be careful here, because it is the referenced address
8269 that must be 32k from _SDA_BASE_, not just the symbol. */
8270 summand = INTVAL (XEXP (sum, 1));
8271 if (summand < 0 || summand > g_switch_value)
8272 return 0;
8274 sym_ref = XEXP (sum, 0);
8277 return SYMBOL_REF_SMALL_P (sym_ref);
8278 #else
8279 return 0;
8280 #endif
8283 /* Return true if either operand is a general purpose register. */
8285 bool
8286 gpr_or_gpr_p (rtx op0, rtx op1)
8288 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8289 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8292 /* Return true if this is a move direct operation between GPR registers and
8293 floating point/VSX registers. */
8295 bool
8296 direct_move_p (rtx op0, rtx op1)
8298 if (!REG_P (op0) || !REG_P (op1))
8299 return false;
8301 if (!TARGET_DIRECT_MOVE)
8302 return false;
8304 int regno0 = REGNO (op0);
8305 int regno1 = REGNO (op1);
8306 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8307 return false;
8309 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8310 return true;
8312 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8313 return true;
8315 return false;
8318 /* Return true if the ADDR is an acceptable address for a quad memory
8319 operation of mode MODE (either LQ/STQ for general purpose registers, or
8320 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8321 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8322 3.0 LXV/STXV instruction. */
8324 bool
8325 quad_address_p (rtx addr, machine_mode mode, bool strict)
8327 rtx op0, op1;
8329 if (GET_MODE_SIZE (mode) < 16)
8330 return false;
8332 if (legitimate_indirect_address_p (addr, strict))
8333 return true;
8335 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8336 return false;
8338 /* Is this a valid prefixed address? If the bottom four bits of the offset
8339 are non-zero, we could use a prefixed instruction (which does not have the
8340 DQ-form constraint that the traditional instruction had) instead of
8341 forcing the unaligned offset to a GPR. */
8342 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8343 return true;
8345 if (GET_CODE (addr) != PLUS)
8346 return false;
8348 op0 = XEXP (addr, 0);
8349 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8350 return false;
8352 op1 = XEXP (addr, 1);
8353 if (!CONST_INT_P (op1))
8354 return false;
8356 return quad_address_offset_p (INTVAL (op1));
8359 /* Return true if this is a load or store quad operation. This function does
8360 not handle the atomic quad memory instructions. */
8362 bool
8363 quad_load_store_p (rtx op0, rtx op1)
8365 bool ret;
8367 if (!TARGET_QUAD_MEMORY)
8368 ret = false;
8370 else if (REG_P (op0) && MEM_P (op1))
8371 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8372 && quad_memory_operand (op1, GET_MODE (op1))
8373 && !reg_overlap_mentioned_p (op0, op1));
8375 else if (MEM_P (op0) && REG_P (op1))
8376 ret = (quad_memory_operand (op0, GET_MODE (op0))
8377 && quad_int_reg_operand (op1, GET_MODE (op1)));
8379 else
8380 ret = false;
8382 if (TARGET_DEBUG_ADDR)
8384 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8385 ret ? "true" : "false");
8386 debug_rtx (gen_rtx_SET (op0, op1));
8389 return ret;
8392 /* Given an address, return a constant offset term if one exists. */
8394 static rtx
8395 address_offset (rtx op)
8397 if (GET_CODE (op) == PRE_INC
8398 || GET_CODE (op) == PRE_DEC)
8399 op = XEXP (op, 0);
8400 else if (GET_CODE (op) == PRE_MODIFY
8401 || GET_CODE (op) == LO_SUM)
8402 op = XEXP (op, 1);
8404 if (GET_CODE (op) == CONST)
8405 op = XEXP (op, 0);
8407 if (GET_CODE (op) == PLUS)
8408 op = XEXP (op, 1);
8410 if (CONST_INT_P (op))
8411 return op;
8413 return NULL_RTX;
8416 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8417 the mode. If we can't find (or don't know) the alignment of the symbol
8418 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8419 should be pessimistic]. Offsets are validated in the same way as for
8420 reg + offset. */
8421 static bool
8422 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8424 /* We should not get here with this. */
8425 gcc_checking_assert (! mode_supports_dq_form (mode));
8427 if (GET_CODE (x) == CONST)
8428 x = XEXP (x, 0);
8430 /* If we are building PIC code, then any symbol must be wrapped in an
8431 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8432 bool machopic_offs_p = false;
8433 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8435 x = XVECEXP (x, 0, 0);
8436 machopic_offs_p = true;
8439 rtx sym = NULL_RTX;
8440 unsigned HOST_WIDE_INT offset = 0;
8442 if (GET_CODE (x) == PLUS)
8444 sym = XEXP (x, 0);
8445 if (! SYMBOL_REF_P (sym))
8446 return false;
8447 if (!CONST_INT_P (XEXP (x, 1)))
8448 return false;
8449 offset = INTVAL (XEXP (x, 1));
8451 else if (SYMBOL_REF_P (x))
8452 sym = x;
8453 else if (CONST_INT_P (x))
8454 offset = INTVAL (x);
8455 else if (GET_CODE (x) == LABEL_REF)
8456 offset = 0; // We assume code labels are Pmode aligned
8457 else
8458 return false; // not sure what we have here.
8460 /* If we don't know the alignment of the thing to which the symbol refers,
8461 we assume optimistically it is "enough".
8462 ??? maybe we should be pessimistic instead. */
8463 unsigned align = 0;
8465 if (sym)
8467 tree decl = SYMBOL_REF_DECL (sym);
8468 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8469 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8470 return false;
8471 #if TARGET_MACHO
8472 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8473 /* The decl in an indirection symbol is the original one, which might
8474 be less aligned than the indirection. Our indirections are always
8475 pointer-aligned. */
8477 else
8478 #endif
8479 if (decl && DECL_ALIGN (decl))
8480 align = DECL_ALIGN_UNIT (decl);
8483 unsigned int extra = 0;
8484 switch (mode)
8486 case E_DFmode:
8487 case E_DDmode:
8488 case E_DImode:
8489 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8490 addressing. */
8491 if (VECTOR_MEM_VSX_P (mode))
8492 return false;
8494 if (!TARGET_POWERPC64)
8495 extra = 4;
8496 else if ((offset & 3) || (align & 3))
8497 return false;
8498 break;
8500 case E_TFmode:
8501 case E_IFmode:
8502 case E_KFmode:
8503 case E_TDmode:
8504 case E_TImode:
8505 case E_PTImode:
8506 extra = 8;
8507 if (!TARGET_POWERPC64)
8508 extra = 12;
8509 else if ((offset & 3) || (align & 3))
8510 return false;
8511 break;
8513 default:
8514 break;
8517 /* We only care if the access(es) would cause a change to the high part. */
8518 offset = sext_hwi (offset, 16);
8519 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8522 /* Return true if the MEM operand is a memory operand suitable for use
8523 with a (full width, possibly multiple) gpr load/store. On
8524 powerpc64 this means the offset must be divisible by 4.
8525 Implements 'Y' constraint.
8527 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8528 a constraint function we know the operand has satisfied a suitable
8529 memory predicate.
8531 Offsetting a lo_sum should not be allowed, except where we know by
8532 alignment that a 32k boundary is not crossed. Note that by
8533 "offsetting" here we mean a further offset to access parts of the
8534 MEM. It's fine to have a lo_sum where the inner address is offset
8535 from a sym, since the same sym+offset will appear in the high part
8536 of the address calculation. */
8538 bool
8539 mem_operand_gpr (rtx op, machine_mode mode)
8541 unsigned HOST_WIDE_INT offset;
8542 int extra;
8543 rtx addr = XEXP (op, 0);
8545 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8546 if (TARGET_UPDATE
8547 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8548 && mode_supports_pre_incdec_p (mode)
8549 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8550 return true;
8552 /* Allow prefixed instructions if supported. If the bottom two bits of the
8553 offset are non-zero, we could use a prefixed instruction (which does not
8554 have the DS-form constraint that the traditional instruction had) instead
8555 of forcing the unaligned offset to a GPR. */
8556 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8557 return true;
8559 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8560 really OK. Doing this early avoids teaching all the other machinery
8561 about them. */
8562 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8563 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8565 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8566 if (!rs6000_offsettable_memref_p (op, mode, false))
8567 return false;
8569 op = address_offset (addr);
8570 if (op == NULL_RTX)
8571 return true;
8573 offset = INTVAL (op);
8574 if (TARGET_POWERPC64 && (offset & 3) != 0)
8575 return false;
8577 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8578 if (extra < 0)
8579 extra = 0;
8581 if (GET_CODE (addr) == LO_SUM)
8582 /* For lo_sum addresses, we must allow any offset except one that
8583 causes a wrap, so test only the low 16 bits. */
8584 offset = sext_hwi (offset, 16);
8586 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8589 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8590 enforce an offset divisible by 4 even for 32-bit. */
8592 bool
8593 mem_operand_ds_form (rtx op, machine_mode mode)
8595 unsigned HOST_WIDE_INT offset;
8596 int extra;
8597 rtx addr = XEXP (op, 0);
8599 /* Allow prefixed instructions if supported. If the bottom two bits of the
8600 offset are non-zero, we could use a prefixed instruction (which does not
8601 have the DS-form constraint that the traditional instruction had) instead
8602 of forcing the unaligned offset to a GPR. */
8603 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8604 return true;
8606 if (!offsettable_address_p (false, mode, addr))
8607 return false;
8609 op = address_offset (addr);
8610 if (op == NULL_RTX)
8611 return true;
8613 offset = INTVAL (op);
8614 if ((offset & 3) != 0)
8615 return false;
8617 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8618 if (extra < 0)
8619 extra = 0;
8621 if (GET_CODE (addr) == LO_SUM)
8622 /* For lo_sum addresses, we must allow any offset except one that
8623 causes a wrap, so test only the low 16 bits. */
8624 offset = sext_hwi (offset, 16);
8626 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8629 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8631 static bool
8632 reg_offset_addressing_ok_p (machine_mode mode)
8634 switch (mode)
8636 case E_V16QImode:
8637 case E_V8HImode:
8638 case E_V4SFmode:
8639 case E_V4SImode:
8640 case E_V2DFmode:
8641 case E_V2DImode:
8642 case E_V1TImode:
8643 case E_TImode:
8644 case E_TFmode:
8645 case E_KFmode:
8646 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8647 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8648 a vector mode, if we want to use the VSX registers to move it around,
8649 we need to restrict ourselves to reg+reg addressing. Similarly for
8650 IEEE 128-bit floating point that is passed in a single vector
8651 register. */
8652 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8653 return mode_supports_dq_form (mode);
8654 break;
8656 /* The vector pair/quad types support offset addressing if the
8657 underlying vectors support offset addressing. */
8658 case E_OOmode:
8659 case E_XOmode:
8660 return TARGET_MMA;
8662 case E_SDmode:
8663 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8664 addressing for the LFIWZX and STFIWX instructions. */
8665 if (TARGET_NO_SDMODE_STACK)
8666 return false;
8667 break;
8669 default:
8670 break;
8673 return true;
8676 static bool
8677 virtual_stack_registers_memory_p (rtx op)
8679 int regnum;
8681 if (REG_P (op))
8682 regnum = REGNO (op);
8684 else if (GET_CODE (op) == PLUS
8685 && REG_P (XEXP (op, 0))
8686 && CONST_INT_P (XEXP (op, 1)))
8687 regnum = REGNO (XEXP (op, 0));
8689 else
8690 return false;
8692 return (regnum >= FIRST_VIRTUAL_REGISTER
8693 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8696 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8697 is known to not straddle a 32k boundary. This function is used
8698 to determine whether -mcmodel=medium code can use TOC pointer
8699 relative addressing for OP. This means the alignment of the TOC
8700 pointer must also be taken into account, and unfortunately that is
8701 only 8 bytes. */
8703 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8704 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8705 #endif
8707 static bool
8708 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8709 machine_mode mode)
8711 tree decl;
8712 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8714 if (!SYMBOL_REF_P (op))
8715 return false;
8717 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8718 SYMBOL_REF. */
8719 if (mode_supports_dq_form (mode))
8720 return false;
8722 dsize = GET_MODE_SIZE (mode);
8723 decl = SYMBOL_REF_DECL (op);
8724 if (!decl)
8726 if (dsize == 0)
8727 return false;
8729 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8730 replacing memory addresses with an anchor plus offset. We
8731 could find the decl by rummaging around in the block->objects
8732 VEC for the given offset but that seems like too much work. */
8733 dalign = BITS_PER_UNIT;
8734 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8735 && SYMBOL_REF_ANCHOR_P (op)
8736 && SYMBOL_REF_BLOCK (op) != NULL)
8738 struct object_block *block = SYMBOL_REF_BLOCK (op);
8740 dalign = block->alignment;
8741 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8743 else if (CONSTANT_POOL_ADDRESS_P (op))
8745 /* It would be nice to have get_pool_align().. */
8746 machine_mode cmode = get_pool_mode (op);
8748 dalign = GET_MODE_ALIGNMENT (cmode);
8751 else if (DECL_P (decl))
8753 dalign = DECL_ALIGN (decl);
8755 if (dsize == 0)
8757 /* Allow BLKmode when the entire object is known to not
8758 cross a 32k boundary. */
8759 if (!DECL_SIZE_UNIT (decl))
8760 return false;
8762 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8763 return false;
8765 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8766 if (dsize > 32768)
8767 return false;
8769 dalign /= BITS_PER_UNIT;
8770 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8771 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8772 return dalign >= dsize;
8775 else
8776 gcc_unreachable ();
8778 /* Find how many bits of the alignment we know for this access. */
8779 dalign /= BITS_PER_UNIT;
8780 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8781 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8782 mask = dalign - 1;
8783 lsb = offset & -offset;
8784 mask &= lsb - 1;
8785 dalign = mask + 1;
8787 return dalign >= dsize;
8790 static bool
8791 constant_pool_expr_p (rtx op)
8793 rtx base, offset;
8795 split_const (op, &base, &offset);
8796 return (SYMBOL_REF_P (base)
8797 && CONSTANT_POOL_ADDRESS_P (base)
8798 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8801 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8802 use that as the register to put the HIGH value into if register allocation
8803 is already done. */
8806 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8808 rtx tocrel, tocreg, hi;
8810 gcc_assert (TARGET_TOC);
8812 if (TARGET_DEBUG_ADDR)
8814 if (SYMBOL_REF_P (symbol))
8815 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8816 XSTR (symbol, 0));
8817 else
8819 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8820 GET_RTX_NAME (GET_CODE (symbol)));
8821 debug_rtx (symbol);
8825 if (!can_create_pseudo_p ())
8826 df_set_regs_ever_live (TOC_REGISTER, true);
8828 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8829 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8830 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8831 return tocrel;
8833 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8834 if (largetoc_reg != NULL)
8836 emit_move_insn (largetoc_reg, hi);
8837 hi = largetoc_reg;
8839 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8842 /* These are only used to pass through from print_operand/print_operand_address
8843 to rs6000_output_addr_const_extra over the intervening function
8844 output_addr_const which is not target code. */
8845 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8847 /* Return true if OP is a toc pointer relative address (the output
8848 of create_TOC_reference). If STRICT, do not match non-split
8849 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8850 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8851 TOCREL_OFFSET_RET respectively. */
8853 bool
8854 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8855 const_rtx *tocrel_offset_ret)
8857 if (!TARGET_TOC)
8858 return false;
8860 if (TARGET_CMODEL != CMODEL_SMALL)
8862 /* When strict ensure we have everything tidy. */
8863 if (strict
8864 && !(GET_CODE (op) == LO_SUM
8865 && REG_P (XEXP (op, 0))
8866 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8867 return false;
8869 /* When not strict, allow non-split TOC addresses and also allow
8870 (lo_sum (high ..)) TOC addresses created during reload. */
8871 if (GET_CODE (op) == LO_SUM)
8872 op = XEXP (op, 1);
8875 const_rtx tocrel_base = op;
8876 const_rtx tocrel_offset = const0_rtx;
8878 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8880 tocrel_base = XEXP (op, 0);
8881 tocrel_offset = XEXP (op, 1);
8884 if (tocrel_base_ret)
8885 *tocrel_base_ret = tocrel_base;
8886 if (tocrel_offset_ret)
8887 *tocrel_offset_ret = tocrel_offset;
8889 return (GET_CODE (tocrel_base) == UNSPEC
8890 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8891 && REG_P (XVECEXP (tocrel_base, 0, 1))
8892 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8895 /* Return true if X is a constant pool address, and also for cmodel=medium
8896 if X is a toc-relative address known to be offsettable within MODE. */
8898 bool
8899 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8900 bool strict)
8902 const_rtx tocrel_base, tocrel_offset;
8903 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8904 && (TARGET_CMODEL != CMODEL_MEDIUM
8905 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8906 || mode == QImode
8907 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8908 INTVAL (tocrel_offset), mode)));
8911 static bool
8912 legitimate_small_data_p (machine_mode mode, rtx x)
8914 return (DEFAULT_ABI == ABI_V4
8915 && !flag_pic && !TARGET_TOC
8916 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8917 && small_data_operand (x, mode));
8920 bool
8921 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8922 bool strict, bool worst_case)
8924 unsigned HOST_WIDE_INT offset;
8925 unsigned int extra;
8927 if (GET_CODE (x) != PLUS)
8928 return false;
8929 if (!REG_P (XEXP (x, 0)))
8930 return false;
8931 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8932 return false;
8933 if (mode_supports_dq_form (mode))
8934 return quad_address_p (x, mode, strict);
8935 if (!reg_offset_addressing_ok_p (mode))
8936 return virtual_stack_registers_memory_p (x);
8937 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8938 return true;
8939 if (!CONST_INT_P (XEXP (x, 1)))
8940 return false;
8942 offset = INTVAL (XEXP (x, 1));
8943 extra = 0;
8944 switch (mode)
8946 case E_DFmode:
8947 case E_DDmode:
8948 case E_DImode:
8949 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8950 addressing. */
8951 if (VECTOR_MEM_VSX_P (mode))
8952 return false;
8954 if (!worst_case)
8955 break;
8956 if (!TARGET_POWERPC64)
8957 extra = 4;
8958 else if (offset & 3)
8959 return false;
8960 break;
8962 case E_TFmode:
8963 case E_IFmode:
8964 case E_KFmode:
8965 case E_TDmode:
8966 case E_TImode:
8967 case E_PTImode:
8968 extra = 8;
8969 if (!worst_case)
8970 break;
8971 if (!TARGET_POWERPC64)
8972 extra = 12;
8973 else if (offset & 3)
8974 return false;
8975 break;
8977 default:
8978 break;
8981 if (TARGET_PREFIXED)
8982 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8983 else
8984 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8987 bool
8988 legitimate_indexed_address_p (rtx x, int strict)
8990 rtx op0, op1;
8992 if (GET_CODE (x) != PLUS)
8993 return false;
8995 op0 = XEXP (x, 0);
8996 op1 = XEXP (x, 1);
8998 return (REG_P (op0) && REG_P (op1)
8999 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9000 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9001 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9002 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9005 bool
9006 avoiding_indexed_address_p (machine_mode mode)
9008 unsigned int msize = GET_MODE_SIZE (mode);
9010 /* Avoid indexed addressing for modes that have non-indexed load/store
9011 instruction forms. On power10, vector pairs have an indexed
9012 form, but vector quads don't. */
9013 if (msize > 16)
9014 return msize != 32;
9016 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9019 bool
9020 legitimate_indirect_address_p (rtx x, int strict)
9022 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9025 bool
9026 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9028 if (!TARGET_MACHO || !flag_pic
9029 || mode != SImode || !MEM_P (x))
9030 return false;
9031 x = XEXP (x, 0);
9033 if (GET_CODE (x) != LO_SUM)
9034 return false;
9035 if (!REG_P (XEXP (x, 0)))
9036 return false;
9037 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9038 return false;
9039 x = XEXP (x, 1);
9041 return CONSTANT_P (x);
9044 static bool
9045 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9047 if (GET_CODE (x) != LO_SUM)
9048 return false;
9049 if (!REG_P (XEXP (x, 0)))
9050 return false;
9051 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9052 return false;
9053 /* quad word addresses are restricted, and we can't use LO_SUM. */
9054 if (mode_supports_dq_form (mode))
9055 return false;
9056 x = XEXP (x, 1);
9058 if (TARGET_ELF)
9060 bool large_toc_ok;
9062 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9063 return false;
9064 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9065 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9066 recognizes some LO_SUM addresses as valid although this
9067 function says opposite. In most cases, LRA through different
9068 transformations can generate correct code for address reloads.
9069 It cannot manage only some LO_SUM cases. So we need to add
9070 code here saying that some addresses are still valid. */
9071 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9072 && small_toc_ref (x, VOIDmode));
9073 if (TARGET_TOC && ! large_toc_ok)
9074 return false;
9075 if (GET_MODE_NUNITS (mode) != 1)
9076 return false;
9077 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9078 && !(/* ??? Assume floating point reg based on mode? */
9079 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9080 return false;
9082 return CONSTANT_P (x) || large_toc_ok;
9084 else if (TARGET_MACHO)
9086 if (GET_MODE_NUNITS (mode) != 1)
9087 return false;
9088 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9089 && !(/* see above */
9090 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9091 return false;
9092 #if TARGET_MACHO
9093 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9094 return CONSTANT_P (x);
9095 #endif
9096 /* Macho-O PIC code from here. */
9097 if (GET_CODE (x) == CONST)
9098 x = XEXP (x, 0);
9100 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9101 if (SYMBOL_REF_P (x))
9102 return false;
9104 /* So this is OK if the wrapped object is const. */
9105 if (GET_CODE (x) == UNSPEC
9106 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9107 return CONSTANT_P (XVECEXP (x, 0, 0));
9108 return CONSTANT_P (x);
9110 return false;
9114 /* Try machine-dependent ways of modifying an illegitimate address
9115 to be legitimate. If we find one, return the new, valid address.
9116 This is used from only one place: `memory_address' in explow.cc.
9118 OLDX is the address as it was before break_out_memory_refs was
9119 called. In some cases it is useful to look at this to decide what
9120 needs to be done.
9122 It is always safe for this function to do nothing. It exists to
9123 recognize opportunities to optimize the output.
9125 On RS/6000, first check for the sum of a register with a constant
9126 integer that is out of range. If so, generate code to add the
9127 constant with the low-order 16 bits masked to the register and force
9128 this result into another register (this can be done with `cau').
9129 Then generate an address of REG+(CONST&0xffff), allowing for the
9130 possibility of bit 16 being a one.
9132 Then check for the sum of a register and something not constant, try to
9133 load the other things into a register and return the sum. */
9135 static rtx
9136 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9137 machine_mode mode)
9139 unsigned int extra;
9141 if (!reg_offset_addressing_ok_p (mode)
9142 || mode_supports_dq_form (mode))
9144 if (virtual_stack_registers_memory_p (x))
9145 return x;
9147 /* In theory we should not be seeing addresses of the form reg+0,
9148 but just in case it is generated, optimize it away. */
9149 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9150 return force_reg (Pmode, XEXP (x, 0));
9152 /* For TImode with load/store quad, restrict addresses to just a single
9153 pointer, so it works with both GPRs and VSX registers. */
9154 /* Make sure both operands are registers. */
9155 else if (GET_CODE (x) == PLUS
9156 && (mode != TImode || !TARGET_VSX))
9157 return gen_rtx_PLUS (Pmode,
9158 force_reg (Pmode, XEXP (x, 0)),
9159 force_reg (Pmode, XEXP (x, 1)));
9160 else
9161 return force_reg (Pmode, x);
9163 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9165 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9166 if (model != 0)
9167 return rs6000_legitimize_tls_address (x, model);
9170 extra = 0;
9171 switch (mode)
9173 case E_TFmode:
9174 case E_TDmode:
9175 case E_TImode:
9176 case E_PTImode:
9177 case E_IFmode:
9178 case E_KFmode:
9179 /* As in legitimate_offset_address_p we do not assume
9180 worst-case. The mode here is just a hint as to the registers
9181 used. A TImode is usually in gprs, but may actually be in
9182 fprs. Leave worst-case scenario for reload to handle via
9183 insn constraints. PTImode is only GPRs. */
9184 extra = 8;
9185 break;
9186 default:
9187 break;
9190 if (GET_CODE (x) == PLUS
9191 && REG_P (XEXP (x, 0))
9192 && CONST_INT_P (XEXP (x, 1))
9193 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9194 >= 0x10000 - extra))
9196 HOST_WIDE_INT high_int, low_int;
9197 rtx sum;
9198 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9199 if (low_int >= 0x8000 - extra)
9200 low_int = 0;
9201 high_int = INTVAL (XEXP (x, 1)) - low_int;
9202 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9203 gen_int_mode (high_int, Pmode)), 0);
9204 return plus_constant (Pmode, sum, low_int);
9206 else if (GET_CODE (x) == PLUS
9207 && REG_P (XEXP (x, 0))
9208 && !CONST_INT_P (XEXP (x, 1))
9209 && GET_MODE_NUNITS (mode) == 1
9210 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9211 || (/* ??? Assume floating point reg based on mode? */
9212 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9213 && !avoiding_indexed_address_p (mode))
9215 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9216 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9218 else if ((TARGET_ELF
9219 #if TARGET_MACHO
9220 || !MACHO_DYNAMIC_NO_PIC_P
9221 #endif
9223 && TARGET_32BIT
9224 && TARGET_NO_TOC_OR_PCREL
9225 && !flag_pic
9226 && !CONST_INT_P (x)
9227 && !CONST_WIDE_INT_P (x)
9228 && !CONST_DOUBLE_P (x)
9229 && CONSTANT_P (x)
9230 && GET_MODE_NUNITS (mode) == 1
9231 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9232 || (/* ??? Assume floating point reg based on mode? */
9233 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9235 rtx reg = gen_reg_rtx (Pmode);
9236 if (TARGET_ELF)
9237 emit_insn (gen_elf_high (reg, x));
9238 else
9239 emit_insn (gen_macho_high (Pmode, reg, x));
9240 return gen_rtx_LO_SUM (Pmode, reg, x);
9242 else if (TARGET_TOC
9243 && SYMBOL_REF_P (x)
9244 && constant_pool_expr_p (x)
9245 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9246 return create_TOC_reference (x, NULL_RTX);
9247 else
9248 return x;
9251 /* Debug version of rs6000_legitimize_address. */
9252 static rtx
9253 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9255 rtx ret;
9256 rtx_insn *insns;
9258 start_sequence ();
9259 ret = rs6000_legitimize_address (x, oldx, mode);
9260 insns = get_insns ();
9261 end_sequence ();
9263 if (ret != x)
9265 fprintf (stderr,
9266 "\nrs6000_legitimize_address: mode %s, old code %s, "
9267 "new code %s, modified\n",
9268 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9269 GET_RTX_NAME (GET_CODE (ret)));
9271 fprintf (stderr, "Original address:\n");
9272 debug_rtx (x);
9274 fprintf (stderr, "oldx:\n");
9275 debug_rtx (oldx);
9277 fprintf (stderr, "New address:\n");
9278 debug_rtx (ret);
9280 if (insns)
9282 fprintf (stderr, "Insns added:\n");
9283 debug_rtx_list (insns, 20);
9286 else
9288 fprintf (stderr,
9289 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9290 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9292 debug_rtx (x);
9295 if (insns)
9296 emit_insn (insns);
9298 return ret;
9301 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9302 We need to emit DTP-relative relocations. */
9304 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9305 static void
9306 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9308 switch (size)
9310 case 4:
9311 fputs ("\t.long\t", file);
9312 break;
9313 case 8:
9314 fputs (DOUBLE_INT_ASM_OP, file);
9315 break;
9316 default:
9317 gcc_unreachable ();
9319 output_addr_const (file, x);
9320 if (TARGET_ELF)
9321 fputs ("@dtprel+0x8000", file);
9324 /* Return true if X is a symbol that refers to real (rather than emulated)
9325 TLS. */
9327 static bool
9328 rs6000_real_tls_symbol_ref_p (rtx x)
9330 return (SYMBOL_REF_P (x)
9331 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9334 /* In the name of slightly smaller debug output, and to cater to
9335 general assembler lossage, recognize various UNSPEC sequences
9336 and turn them back into a direct symbol reference. */
9338 static rtx
9339 rs6000_delegitimize_address (rtx orig_x)
9341 rtx x, y, offset;
9343 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9344 encodes loading up the high part of the address of a TOC reference along
9345 with a load of a GPR using the same base register used for the load. We
9346 return the original SYMBOL_REF.
9348 (set (reg:INT1 <reg>
9349 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9351 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9352 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9353 We return the original SYMBOL_REF.
9355 (parallel [(set (reg:DI <base-reg>)
9356 (unspec:DI [(symbol_ref <symbol>)
9357 (const_int <marker>)]
9358 UNSPEC_PCREL_OPT_LD_ADDR))
9359 (set (reg:DI <load-reg>)
9360 (unspec:DI [(const_int 0)]
9361 UNSPEC_PCREL_OPT_LD_DATA))])
9363 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9364 GPR being loaded is the same as the GPR used to hold the external address.
9366 (set (reg:DI <base-reg>)
9367 (unspec:DI [(symbol_ref <symbol>)
9368 (const_int <marker>)]
9369 UNSPEC_PCREL_OPT_LD_SAME_REG))
9371 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9372 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9373 We return the original SYMBOL_REF.
9375 (parallel [(set (reg:DI <base-reg>)
9376 (unspec:DI [(symbol_ref <symbol>)
9377 (const_int <marker>)]
9378 UNSPEC_PCREL_OPT_ST_ADDR))
9379 (use (reg <store-reg>))]) */
9381 if (GET_CODE (orig_x) == UNSPEC)
9382 switch (XINT (orig_x, 1))
9384 case UNSPEC_FUSION_GPR:
9385 case UNSPEC_PCREL_OPT_LD_ADDR:
9386 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9387 case UNSPEC_PCREL_OPT_ST_ADDR:
9388 orig_x = XVECEXP (orig_x, 0, 0);
9389 break;
9391 default:
9392 break;
9395 orig_x = delegitimize_mem_from_attrs (orig_x);
9397 x = orig_x;
9398 if (MEM_P (x))
9399 x = XEXP (x, 0);
9401 y = x;
9402 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9403 y = XEXP (y, 1);
9405 offset = NULL_RTX;
9406 if (GET_CODE (y) == PLUS
9407 && GET_MODE (y) == Pmode
9408 && CONST_INT_P (XEXP (y, 1)))
9410 offset = XEXP (y, 1);
9411 y = XEXP (y, 0);
9414 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9416 y = XVECEXP (y, 0, 0);
9418 #ifdef HAVE_AS_TLS
9419 /* Do not associate thread-local symbols with the original
9420 constant pool symbol. */
9421 if (TARGET_XCOFF
9422 && SYMBOL_REF_P (y)
9423 && CONSTANT_POOL_ADDRESS_P (y)
9424 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9425 return orig_x;
9426 #endif
9428 if (offset != NULL_RTX)
9429 y = gen_rtx_PLUS (Pmode, y, offset);
9430 if (!MEM_P (orig_x))
9431 return y;
9432 else
9433 return replace_equiv_address_nv (orig_x, y);
9436 if (TARGET_MACHO
9437 && GET_CODE (orig_x) == LO_SUM
9438 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9440 y = XEXP (XEXP (orig_x, 1), 0);
9441 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9442 return XVECEXP (y, 0, 0);
9445 return orig_x;
9448 /* Return true if X shouldn't be emitted into the debug info.
9449 The linker doesn't like .toc section references from
9450 .debug_* sections, so reject .toc section symbols. */
9452 static bool
9453 rs6000_const_not_ok_for_debug_p (rtx x)
9455 if (GET_CODE (x) == UNSPEC)
9456 return true;
9457 if (SYMBOL_REF_P (x)
9458 && CONSTANT_POOL_ADDRESS_P (x))
9460 rtx c = get_pool_constant (x);
9461 machine_mode cmode = get_pool_mode (x);
9462 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9463 return true;
9466 return false;
9469 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9471 static bool
9472 rs6000_legitimate_combined_insn (rtx_insn *insn)
9474 int icode = INSN_CODE (insn);
9476 /* Reject creating doloop insns. Combine should not be allowed
9477 to create these for a number of reasons:
9478 1) In a nested loop, if combine creates one of these in an
9479 outer loop and the register allocator happens to allocate ctr
9480 to the outer loop insn, then the inner loop can't use ctr.
9481 Inner loops ought to be more highly optimized.
9482 2) Combine often wants to create one of these from what was
9483 originally a three insn sequence, first combining the three
9484 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9485 allocated ctr, the splitter takes use back to the three insn
9486 sequence. It's better to stop combine at the two insn
9487 sequence.
9488 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9489 insns, the register allocator sometimes uses floating point
9490 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9491 jump insn and output reloads are not implemented for jumps,
9492 the ctrsi/ctrdi splitters need to handle all possible cases.
9493 That's a pain, and it gets to be seriously difficult when a
9494 splitter that runs after reload needs memory to transfer from
9495 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9496 for the difficult case. It's better to not create problems
9497 in the first place. */
9498 if (icode != CODE_FOR_nothing
9499 && (icode == CODE_FOR_bdz_si
9500 || icode == CODE_FOR_bdz_di
9501 || icode == CODE_FOR_bdnz_si
9502 || icode == CODE_FOR_bdnz_di
9503 || icode == CODE_FOR_bdztf_si
9504 || icode == CODE_FOR_bdztf_di
9505 || icode == CODE_FOR_bdnztf_si
9506 || icode == CODE_FOR_bdnztf_di))
9507 return false;
9509 return true;
9512 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9514 static GTY(()) rtx rs6000_tls_symbol;
9515 static rtx
9516 rs6000_tls_get_addr (void)
9518 if (!rs6000_tls_symbol)
9519 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9521 return rs6000_tls_symbol;
9524 /* Construct the SYMBOL_REF for TLS GOT references. */
9526 static GTY(()) rtx rs6000_got_symbol;
9528 rs6000_got_sym (void)
9530 if (!rs6000_got_symbol)
9532 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9533 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9534 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9537 return rs6000_got_symbol;
9540 /* AIX Thread-Local Address support. */
9542 static rtx
9543 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9545 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9546 const char *name;
9547 char *tlsname;
9549 /* Place addr into TOC constant pool. */
9550 sym = force_const_mem (GET_MODE (addr), addr);
9552 /* Output the TOC entry and create the MEM referencing the value. */
9553 if (constant_pool_expr_p (XEXP (sym, 0))
9554 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9556 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9557 mem = gen_const_mem (Pmode, tocref);
9558 set_mem_alias_set (mem, get_TOC_alias_set ());
9560 else
9561 return sym;
9563 /* Use global-dynamic for local-dynamic. */
9564 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9565 || model == TLS_MODEL_LOCAL_DYNAMIC)
9567 /* Create new TOC reference for @m symbol. */
9568 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9569 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9570 strcpy (tlsname, "*LCM");
9571 strcat (tlsname, name + 3);
9572 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9573 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9574 tocref = create_TOC_reference (modaddr, NULL_RTX);
9575 rtx modmem = gen_const_mem (Pmode, tocref);
9576 set_mem_alias_set (modmem, get_TOC_alias_set ());
9578 rtx modreg = gen_reg_rtx (Pmode);
9579 emit_insn (gen_rtx_SET (modreg, modmem));
9581 tmpreg = gen_reg_rtx (Pmode);
9582 emit_insn (gen_rtx_SET (tmpreg, mem));
9584 dest = gen_reg_rtx (Pmode);
9585 if (TARGET_32BIT)
9586 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9587 else
9588 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9589 return dest;
9591 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9592 else if (TARGET_32BIT)
9594 tlsreg = gen_reg_rtx (SImode);
9595 emit_insn (gen_tls_get_tpointer (tlsreg));
9597 else
9599 tlsreg = gen_rtx_REG (DImode, 13);
9600 xcoff_tls_exec_model_detected = true;
9603 /* Load the TOC value into temporary register. */
9604 tmpreg = gen_reg_rtx (Pmode);
9605 emit_insn (gen_rtx_SET (tmpreg, mem));
9606 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9607 gen_rtx_MINUS (Pmode, addr, tlsreg));
9609 /* Add TOC symbol value to TLS pointer. */
9610 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9612 return dest;
9615 /* Passes the tls arg value for global dynamic and local dynamic
9616 emit_library_call_value in rs6000_legitimize_tls_address to
9617 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9618 marker relocs put on __tls_get_addr calls. */
9619 static rtx global_tlsarg;
9621 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9622 this (thread-local) address. */
9624 static rtx
9625 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9627 rtx dest, insn;
9629 if (TARGET_XCOFF)
9630 return rs6000_legitimize_tls_address_aix (addr, model);
9632 dest = gen_reg_rtx (Pmode);
9633 if (model == TLS_MODEL_LOCAL_EXEC
9634 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9636 rtx tlsreg;
9638 if (TARGET_64BIT)
9640 tlsreg = gen_rtx_REG (Pmode, 13);
9641 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9643 else
9645 tlsreg = gen_rtx_REG (Pmode, 2);
9646 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9648 emit_insn (insn);
9650 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9652 rtx tlsreg, tmp;
9654 tmp = gen_reg_rtx (Pmode);
9655 if (TARGET_64BIT)
9657 tlsreg = gen_rtx_REG (Pmode, 13);
9658 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9660 else
9662 tlsreg = gen_rtx_REG (Pmode, 2);
9663 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9665 emit_insn (insn);
9666 if (TARGET_64BIT)
9667 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9668 else
9669 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9670 emit_insn (insn);
9672 else
9674 rtx got, tga, tmp1, tmp2;
9676 /* We currently use relocations like @got@tlsgd for tls, which
9677 means the linker will handle allocation of tls entries, placing
9678 them in the .got section. So use a pointer to the .got section,
9679 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9680 or to secondary GOT sections used by 32-bit -fPIC. */
9681 if (rs6000_pcrel_p ())
9682 got = const0_rtx;
9683 else if (TARGET_64BIT)
9684 got = gen_rtx_REG (Pmode, 2);
9685 else
9687 if (flag_pic == 1)
9688 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9689 else
9691 rtx gsym = rs6000_got_sym ();
9692 got = gen_reg_rtx (Pmode);
9693 if (flag_pic == 0)
9694 rs6000_emit_move (got, gsym, Pmode);
9695 else
9697 rtx mem, lab;
9699 tmp1 = gen_reg_rtx (Pmode);
9700 tmp2 = gen_reg_rtx (Pmode);
9701 mem = gen_const_mem (Pmode, tmp1);
9702 lab = gen_label_rtx ();
9703 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9704 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9705 if (TARGET_LINK_STACK)
9706 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9707 emit_move_insn (tmp2, mem);
9708 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9709 set_unique_reg_note (last, REG_EQUAL, gsym);
9714 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9716 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9717 UNSPEC_TLSGD);
9718 tga = rs6000_tls_get_addr ();
9719 rtx argreg = gen_rtx_REG (Pmode, 3);
9720 emit_insn (gen_rtx_SET (argreg, arg));
9721 global_tlsarg = arg;
9722 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9723 global_tlsarg = NULL_RTX;
9725 /* Make a note so that the result of this call can be CSEd. */
9726 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9727 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9728 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9730 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9732 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9733 tga = rs6000_tls_get_addr ();
9734 tmp1 = gen_reg_rtx (Pmode);
9735 rtx argreg = gen_rtx_REG (Pmode, 3);
9736 emit_insn (gen_rtx_SET (argreg, arg));
9737 global_tlsarg = arg;
9738 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9739 global_tlsarg = NULL_RTX;
9741 /* Make a note so that the result of this call can be CSEd. */
9742 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9743 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9744 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9746 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9748 if (TARGET_64BIT)
9749 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9750 else
9751 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9753 else if (rs6000_tls_size == 32)
9755 tmp2 = gen_reg_rtx (Pmode);
9756 if (TARGET_64BIT)
9757 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9758 else
9759 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9760 emit_insn (insn);
9761 if (TARGET_64BIT)
9762 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9763 else
9764 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9766 else
9768 tmp2 = gen_reg_rtx (Pmode);
9769 if (TARGET_64BIT)
9770 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9771 else
9772 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9773 emit_insn (insn);
9774 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9776 emit_insn (insn);
9778 else
9780 /* IE, or 64-bit offset LE. */
9781 tmp2 = gen_reg_rtx (Pmode);
9782 if (TARGET_64BIT)
9783 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9784 else
9785 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9786 emit_insn (insn);
9787 if (rs6000_pcrel_p ())
9789 if (TARGET_64BIT)
9790 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9791 else
9792 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9794 else if (TARGET_64BIT)
9795 insn = gen_tls_tls_64 (dest, tmp2, addr);
9796 else
9797 insn = gen_tls_tls_32 (dest, tmp2, addr);
9798 emit_insn (insn);
9802 return dest;
9805 /* Only create the global variable for the stack protect guard if we are using
9806 the global flavor of that guard. */
9807 static tree
9808 rs6000_init_stack_protect_guard (void)
9810 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9811 return default_stack_protect_guard ();
9813 return NULL_TREE;
9816 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9818 static bool
9819 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9821 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9822 It can not be put into a constant pool. e.g.
9823 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9824 (high:DI (symbol_ref:DI ("var")..)). */
9825 if (GET_CODE (x) == HIGH)
9826 return true;
9828 /* A TLS symbol in the TOC cannot contain a sum. */
9829 if (GET_CODE (x) == CONST
9830 && GET_CODE (XEXP (x, 0)) == PLUS
9831 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9832 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9833 return true;
9835 /* Allow AIX TOC TLS symbols in the constant pool,
9836 but not ELF TLS symbols. */
9837 return TARGET_ELF && tls_referenced_p (x);
9840 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9841 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9842 can be addressed relative to the toc pointer. */
9844 static bool
9845 use_toc_relative_ref (rtx sym, machine_mode mode)
9847 return ((constant_pool_expr_p (sym)
9848 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9849 get_pool_mode (sym)))
9850 || (TARGET_CMODEL == CMODEL_MEDIUM
9851 && SYMBOL_REF_LOCAL_P (sym)
9852 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9855 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9856 that is a valid memory address for an instruction.
9857 The MODE argument is the machine mode for the MEM expression
9858 that wants to use this address.
9860 On the RS/6000, there are four valid address: a SYMBOL_REF that
9861 refers to a constant pool entry of an address (or the sum of it
9862 plus a constant), a short (16-bit signed) constant plus a register,
9863 the sum of two registers, or a register indirect, possibly with an
9864 auto-increment. For DFmode, DDmode and DImode with a constant plus
9865 register, we must ensure that both words are addressable or PowerPC64
9866 with offset word aligned.
9868 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9869 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9870 because adjacent memory cells are accessed by adding word-sized offsets
9871 during assembly output. */
9872 static bool
9873 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9874 code_helper ch = ERROR_MARK)
9876 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9877 bool quad_offset_p = mode_supports_dq_form (mode);
9879 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9880 return 0;
9882 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9883 if (ch.is_internal_fn ()
9884 && (ch == IFN_LEN_LOAD || ch == IFN_LEN_STORE)
9885 && GET_CODE (x) == PLUS)
9886 return 0;
9888 /* Handle unaligned altivec lvx/stvx type addresses. */
9889 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9890 && GET_CODE (x) == AND
9891 && CONST_INT_P (XEXP (x, 1))
9892 && INTVAL (XEXP (x, 1)) == -16)
9894 x = XEXP (x, 0);
9895 return (legitimate_indirect_address_p (x, reg_ok_strict)
9896 || legitimate_indexed_address_p (x, reg_ok_strict)
9897 || virtual_stack_registers_memory_p (x));
9900 if (legitimate_indirect_address_p (x, reg_ok_strict))
9901 return 1;
9902 if (TARGET_UPDATE
9903 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9904 && mode_supports_pre_incdec_p (mode)
9905 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9906 return 1;
9908 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9909 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9910 return 1;
9912 /* Handle restricted vector d-form offsets in ISA 3.0. */
9913 if (quad_offset_p)
9915 if (quad_address_p (x, mode, reg_ok_strict))
9916 return 1;
9918 else if (virtual_stack_registers_memory_p (x))
9919 return 1;
9921 else if (reg_offset_p)
9923 if (legitimate_small_data_p (mode, x))
9924 return 1;
9925 if (legitimate_constant_pool_address_p (x, mode,
9926 reg_ok_strict || lra_in_progress))
9927 return 1;
9930 /* For TImode, if we have TImode in VSX registers, only allow register
9931 indirect addresses. This will allow the values to go in either GPRs
9932 or VSX registers without reloading. The vector types would tend to
9933 go into VSX registers, so we allow REG+REG, while TImode seems
9934 somewhat split, in that some uses are GPR based, and some VSX based. */
9935 /* FIXME: We could loosen this by changing the following to
9936 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9937 but currently we cannot allow REG+REG addressing for TImode. See
9938 PR72827 for complete details on how this ends up hoodwinking DSE. */
9939 if (mode == TImode && TARGET_VSX)
9940 return 0;
9941 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9942 if (! reg_ok_strict
9943 && reg_offset_p
9944 && GET_CODE (x) == PLUS
9945 && REG_P (XEXP (x, 0))
9946 && (XEXP (x, 0) == virtual_stack_vars_rtx
9947 || XEXP (x, 0) == arg_pointer_rtx)
9948 && CONST_INT_P (XEXP (x, 1)))
9949 return 1;
9950 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9951 return 1;
9952 if (!FLOAT128_2REG_P (mode)
9953 && (TARGET_HARD_FLOAT
9954 || TARGET_POWERPC64
9955 || (mode != DFmode && mode != DDmode))
9956 && (TARGET_POWERPC64 || mode != DImode)
9957 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9958 && mode != PTImode
9959 && !avoiding_indexed_address_p (mode)
9960 && legitimate_indexed_address_p (x, reg_ok_strict))
9961 return 1;
9962 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9963 && mode_supports_pre_modify_p (mode)
9964 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9965 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9966 reg_ok_strict, false)
9967 || (!avoiding_indexed_address_p (mode)
9968 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9969 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9971 /* There is no prefixed version of the load/store with update. */
9972 rtx addr = XEXP (x, 1);
9973 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9975 if (reg_offset_p && !quad_offset_p
9976 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9977 return 1;
9978 return 0;
9981 /* Debug version of rs6000_legitimate_address_p. */
9982 static bool
9983 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9984 code_helper ch)
9986 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict, ch);
9987 fprintf (stderr,
9988 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9989 "strict = %d, reload = %s, code = %s\n",
9990 ret ? "true" : "false",
9991 GET_MODE_NAME (mode),
9992 reg_ok_strict,
9993 (reload_completed ? "after" : "before"),
9994 GET_RTX_NAME (GET_CODE (x)));
9995 debug_rtx (x);
9997 return ret;
10000 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10002 static bool
10003 rs6000_mode_dependent_address_p (const_rtx addr,
10004 addr_space_t as ATTRIBUTE_UNUSED)
10006 return rs6000_mode_dependent_address_ptr (addr);
10009 /* Go to LABEL if ADDR (a legitimate address expression)
10010 has an effect that depends on the machine mode it is used for.
10012 On the RS/6000 this is true of all integral offsets (since AltiVec
10013 and VSX modes don't allow them) or is a pre-increment or decrement.
10015 ??? Except that due to conceptual problems in offsettable_address_p
10016 we can't really report the problems of integral offsets. So leave
10017 this assuming that the adjustable offset must be valid for the
10018 sub-words of a TFmode operand, which is what we had before. */
10020 static bool
10021 rs6000_mode_dependent_address (const_rtx addr)
10023 switch (GET_CODE (addr))
10025 case PLUS:
10026 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10027 is considered a legitimate address before reload, so there
10028 are no offset restrictions in that case. Note that this
10029 condition is safe in strict mode because any address involving
10030 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10031 been rejected as illegitimate. */
10032 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10033 && XEXP (addr, 0) != arg_pointer_rtx
10034 && CONST_INT_P (XEXP (addr, 1)))
10036 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10037 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10038 if (TARGET_PREFIXED)
10039 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10040 else
10041 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10043 break;
10045 case LO_SUM:
10046 /* Anything in the constant pool is sufficiently aligned that
10047 all bytes have the same high part address. */
10048 return !legitimate_constant_pool_address_p (addr, QImode, false);
10050 /* Auto-increment cases are now treated generically in recog.cc. */
10051 case PRE_MODIFY:
10052 return TARGET_UPDATE;
10054 /* AND is only allowed in Altivec loads. */
10055 case AND:
10056 return true;
10058 default:
10059 break;
10062 return false;
10065 /* Debug version of rs6000_mode_dependent_address. */
10066 static bool
10067 rs6000_debug_mode_dependent_address (const_rtx addr)
10069 bool ret = rs6000_mode_dependent_address (addr);
10071 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10072 ret ? "true" : "false");
10073 debug_rtx (addr);
10075 return ret;
10078 /* Implement FIND_BASE_TERM. */
10081 rs6000_find_base_term (rtx op)
10083 rtx base;
10085 base = op;
10086 if (GET_CODE (base) == CONST)
10087 base = XEXP (base, 0);
10088 if (GET_CODE (base) == PLUS)
10089 base = XEXP (base, 0);
10090 if (GET_CODE (base) == UNSPEC)
10091 switch (XINT (base, 1))
10093 case UNSPEC_TOCREL:
10094 case UNSPEC_MACHOPIC_OFFSET:
10095 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10096 for aliasing purposes. */
10097 return XVECEXP (base, 0, 0);
10100 return op;
10103 /* More elaborate version of recog's offsettable_memref_p predicate
10104 that works around the ??? note of rs6000_mode_dependent_address.
10105 In particular it accepts
10107 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10109 in 32-bit mode, that the recog predicate rejects. */
10111 static bool
10112 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10114 bool worst_case;
10116 if (!MEM_P (op))
10117 return false;
10119 /* First mimic offsettable_memref_p. */
10120 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10121 return true;
10123 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10124 the latter predicate knows nothing about the mode of the memory
10125 reference and, therefore, assumes that it is the largest supported
10126 mode (TFmode). As a consequence, legitimate offsettable memory
10127 references are rejected. rs6000_legitimate_offset_address_p contains
10128 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10129 at least with a little bit of help here given that we know the
10130 actual registers used. */
10131 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10132 || GET_MODE_SIZE (reg_mode) == 4);
10133 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10134 strict, worst_case);
10137 /* Determine the reassociation width to be used in reassociate_bb.
10138 This takes into account how many parallel operations we
10139 can actually do of a given type, and also the latency.
10141 int add/sub 6/cycle
10142 mul 2/cycle
10143 vect add/sub/mul 2/cycle
10144 fp add/sub/mul 2/cycle
10145 dfp 1/cycle
10148 static int
10149 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10150 machine_mode mode)
10152 switch (rs6000_tune)
10154 case PROCESSOR_POWER8:
10155 case PROCESSOR_POWER9:
10156 case PROCESSOR_POWER10:
10157 case PROCESSOR_POWER11:
10158 if (DECIMAL_FLOAT_MODE_P (mode))
10159 return 1;
10160 if (VECTOR_MODE_P (mode))
10161 return 4;
10162 if (INTEGRAL_MODE_P (mode))
10163 return 1;
10164 if (FLOAT_MODE_P (mode))
10165 return 4;
10166 break;
10167 default:
10168 break;
10170 return 1;
10173 /* Change register usage conditional on target flags. */
10174 static void
10175 rs6000_conditional_register_usage (void)
10177 int i;
10179 if (TARGET_DEBUG_TARGET)
10180 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10182 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10183 if (TARGET_64BIT)
10184 fixed_regs[13] = call_used_regs[13] = 1;
10186 /* Conditionally disable FPRs. */
10187 if (TARGET_SOFT_FLOAT)
10188 for (i = 32; i < 64; i++)
10189 fixed_regs[i] = call_used_regs[i] = 1;
10191 /* For non PC-relative code, GPR2 is unavailable for register allocation. */
10192 if (FIXED_R2 && !rs6000_pcrel_p ())
10193 fixed_regs[2] = 1;
10195 /* The TOC register is not killed across calls in a way that is
10196 visible to the compiler. */
10197 if (fixed_regs[2] && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2))
10198 call_used_regs[2] = 0;
10200 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10201 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10203 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10204 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10205 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10207 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10208 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10209 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10211 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10212 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10214 if (!TARGET_ALTIVEC && !TARGET_VSX)
10216 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10217 fixed_regs[i] = call_used_regs[i] = 1;
10218 call_used_regs[VRSAVE_REGNO] = 1;
10221 if (TARGET_ALTIVEC || TARGET_VSX)
10222 global_regs[VSCR_REGNO] = 1;
10224 if (TARGET_ALTIVEC_ABI)
10226 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10227 call_used_regs[i] = 1;
10229 /* AIX reserves VR20:31 in non-extended ABI mode. */
10230 if (TARGET_XCOFF && !rs6000_aix_extabi)
10231 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10232 fixed_regs[i] = call_used_regs[i] = 1;
10237 /* Output insns to set DEST equal to the constant SOURCE as a series of
10238 lis, ori and shl instructions and return TRUE. */
10240 bool
10241 rs6000_emit_set_const (rtx dest, rtx source)
10243 machine_mode mode = GET_MODE (dest);
10244 rtx temp, set;
10245 rtx_insn *insn;
10246 HOST_WIDE_INT c;
10248 gcc_checking_assert (CONST_INT_P (source));
10249 c = INTVAL (source);
10250 switch (mode)
10252 case E_QImode:
10253 case E_HImode:
10254 emit_insn (gen_rtx_SET (dest, source));
10255 return true;
10257 case E_SImode:
10258 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10260 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10261 emit_insn (gen_rtx_SET (dest,
10262 gen_rtx_IOR (SImode, temp,
10263 GEN_INT (c & 0xffff))));
10264 break;
10266 case E_DImode:
10267 if (!TARGET_POWERPC64)
10269 rtx hi, lo;
10271 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10272 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10273 emit_move_insn (hi, GEN_INT (c >> 32));
10274 c = sext_hwi (c, 32);
10275 emit_move_insn (lo, GEN_INT (c));
10277 else
10278 rs6000_emit_set_long_const (dest, c);
10279 break;
10281 default:
10282 gcc_unreachable ();
10285 insn = get_last_insn ();
10286 set = single_set (insn);
10287 if (! CONSTANT_P (SET_SRC (set)))
10288 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10290 return true;
10293 /* Check if C can be rotated to a negative value which 'lis' instruction is
10294 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10295 rotated, and return true. Return false otherwise. */
10297 static bool
10298 can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
10300 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10301 int leading_ones = clz_hwi (~c);
10302 int tailing_ones = ctz_hwi (~c);
10303 int middle_zeros = ctz_hwi (c >> tailing_ones);
10304 if (middle_zeros >= 16 && leading_ones + tailing_ones >= 33)
10306 *rot = HOST_BITS_PER_WIDE_INT - tailing_ones;
10307 return true;
10310 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10311 rotated over the highest bit. */
10312 int pos_one = clz_hwi ((c << 16) >> 16);
10313 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
10314 int middle_ones = clz_hwi (~(c << pos_one));
10315 if (middle_zeros >= 16 && middle_ones >= 33)
10317 *rot = pos_one;
10318 return true;
10321 return false;
10324 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10325 another is rotldi.
10327 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10328 is set to the mask operand of rotldi(rldicl), and return true.
10329 Return false otherwise. */
10331 static bool
10332 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c, int *shift,
10333 HOST_WIDE_INT *mask)
10335 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10336 to/from a positive or negative value that 'li' is able to load. */
10337 int n;
10338 if (can_be_rotated_to_lowbits (c, 15, &n)
10339 || can_be_rotated_to_lowbits (~c, 15, &n)
10340 || can_be_rotated_to_negative_lis (c, &n))
10342 *mask = HOST_WIDE_INT_M1;
10343 *shift = HOST_BITS_PER_WIDE_INT - n;
10344 return true;
10347 return false;
10350 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10351 another is rldicl.
10353 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10354 the mask operand of rldicl, and return true.
10355 Return false otherwise. */
10357 static bool
10358 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c, int *shift,
10359 HOST_WIDE_INT *mask)
10361 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10362 to ones and then recheck it. */
10363 int lz = clz_hwi (c);
10365 /* If lz == 0, the left shift is undefined. */
10366 if (!lz)
10367 return false;
10369 HOST_WIDE_INT unmask_c
10370 = c | (HOST_WIDE_INT_M1U << (HOST_BITS_PER_WIDE_INT - lz));
10371 int n;
10372 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10373 || can_be_rotated_to_negative_lis (unmask_c, &n))
10375 *mask = HOST_WIDE_INT_M1U >> lz;
10376 *shift = n == 0 ? 0 : HOST_BITS_PER_WIDE_INT - n;
10377 return true;
10380 return false;
10383 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10384 another is rldicr.
10386 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10387 the mask operand of rldicr, and return true.
10388 Return false otherwise. */
10390 static bool
10391 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c, int *shift,
10392 HOST_WIDE_INT *mask)
10394 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10395 to ones and then recheck it. */
10396 int tz = ctz_hwi (c);
10398 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10399 if (tz >= HOST_BITS_PER_WIDE_INT)
10400 return false;
10402 HOST_WIDE_INT unmask_c = c | ((HOST_WIDE_INT_1U << tz) - 1);
10403 int n;
10404 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10405 || can_be_rotated_to_negative_lis (unmask_c, &n))
10407 *mask = HOST_WIDE_INT_M1U << tz;
10408 *shift = HOST_BITS_PER_WIDE_INT - n;
10409 return true;
10412 return false;
10415 /* Check if value C can be built by 2 instructions: one is 'li', another is
10416 rldic.
10418 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10419 to the mask value about the 'mb' operand of rldic; and return true.
10420 Return false otherwise. */
10422 static bool
10423 can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
10425 /* There are 49 successive ones in the negative value of 'li'. */
10426 int ones = 49;
10428 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10429 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10430 int tz = ctz_hwi (c);
10431 int lz = clz_hwi (c);
10433 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10434 if (lz >= HOST_BITS_PER_WIDE_INT)
10435 return false;
10437 int middle_ones = clz_hwi (~(c << lz));
10438 if (tz + lz + middle_ones >= ones
10439 && (tz - lz) < HOST_BITS_PER_WIDE_INT
10440 && tz < HOST_BITS_PER_WIDE_INT)
10442 *mask = ((1LL << (HOST_BITS_PER_WIDE_INT - tz - lz)) - 1LL) << tz;
10443 *shift = tz;
10444 return true;
10447 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10448 int leading_ones = clz_hwi (~c);
10449 int tailing_ones = ctz_hwi (~c);
10450 int middle_zeros = ctz_hwi (c >> tailing_ones);
10451 if (leading_ones + tailing_ones + middle_zeros >= ones
10452 && middle_zeros < HOST_BITS_PER_WIDE_INT)
10454 *mask = ~(((1ULL << middle_zeros) - 1ULL) << tailing_ones);
10455 *shift = tailing_ones + middle_zeros;
10456 return true;
10459 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10460 /* Get the position for the first bit of successive 1.
10461 The 24th bit would be in successive 0 or 1. */
10462 HOST_WIDE_INT low_mask = (HOST_WIDE_INT_1U << 24) - HOST_WIDE_INT_1U;
10463 int pos_first_1 = ((c & (low_mask + 1)) == 0)
10464 ? clz_hwi (c & low_mask)
10465 : HOST_BITS_PER_WIDE_INT - ctz_hwi (~(c | low_mask));
10467 /* Make sure the left and right shifts are defined. */
10468 if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
10469 return false;
10471 middle_ones = clz_hwi (~c << pos_first_1);
10472 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
10473 if (pos_first_1 < HOST_BITS_PER_WIDE_INT
10474 && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
10475 && middle_ones + middle_zeros >= ones)
10477 *mask = ~(((1ULL << middle_zeros) - 1LL)
10478 << (HOST_BITS_PER_WIDE_INT - pos_first_1));
10479 *shift = HOST_BITS_PER_WIDE_INT - pos_first_1 + middle_zeros;
10480 return true;
10483 return false;
10486 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10487 Output insns to set DEST equal to the constant C as a series of
10488 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10489 only increase *NUM_INSNS as the number of insns, and do not emit
10490 any insns. */
10492 static void
10493 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
10495 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10497 ud1 = c & 0xffff;
10498 ud2 = (c >> 16) & 0xffff;
10499 ud3 = (c >> 32) & 0xffff;
10500 ud4 = (c >> 48) & 0xffff;
10502 /* This lambda is used to emit one insn or just increase the insn count.
10503 When counting the insn number, no need to emit the insn. */
10504 auto count_or_emit_insn = [&num_insns] (rtx dest_or_insn, rtx src = nullptr) {
10505 if (num_insns)
10507 (*num_insns)++;
10508 return;
10511 if (src)
10512 emit_move_insn (dest_or_insn, src);
10513 else
10514 emit_insn (dest_or_insn);
10517 if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c))
10519 /* li/lis/pli */
10520 count_or_emit_insn (dest, GEN_INT (c));
10521 return;
10524 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10525 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
10527 /* li */
10528 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10529 return;
10532 rtx temp
10533 = (num_insns || !can_create_pseudo_p ()) ? dest : gen_reg_rtx (DImode);
10535 if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10536 || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
10538 /* lis[; ori] */
10539 count_or_emit_insn (ud1 != 0 ? temp : dest,
10540 GEN_INT (sext_hwi (ud2 << 16, 32)));
10541 if (ud1 != 0)
10542 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10543 return;
10546 if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
10548 /* lis; xoris */
10549 count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
10550 count_or_emit_insn (dest,
10551 gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
10552 return;
10555 if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10557 /* li; xoris */
10558 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10559 count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
10560 GEN_INT ((ud2 ^ 0xffff) << 16)));
10561 return;
10564 int shift;
10565 HOST_WIDE_INT mask;
10566 if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
10567 || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
10568 || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
10569 || can_be_built_by_li_and_rldic (c, &shift, &mask))
10571 /* li/lis; rldicX */
10572 unsigned HOST_WIDE_INT imm = (c | ~mask);
10573 imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
10575 count_or_emit_insn (temp, GEN_INT (imm));
10576 if (shift != 0)
10577 temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
10578 if (mask != HOST_WIDE_INT_M1)
10579 temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
10580 count_or_emit_insn (dest, temp);
10582 return;
10585 if (ud3 == 0 && ud4 == 0)
10587 gcc_assert ((ud2 & 0x8000) && ud1 != 0);
10588 if (!(ud1 & 0x8000))
10590 /* li; oris */
10591 count_or_emit_insn (temp, GEN_INT (ud1));
10592 count_or_emit_insn (dest,
10593 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10594 return;
10597 /* lis; ori; rldicl */
10598 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10599 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10600 count_or_emit_insn (dest,
10601 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10602 return;
10605 if (ud1 == ud3 && ud2 == ud4)
10607 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10608 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10609 rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
10611 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp,
10612 GEN_INT (0xffffffff));
10613 count_or_emit_insn (rldimi);
10614 return;
10617 if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
10619 /* li; [ori;] rldicl [;oir]. */
10620 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10621 if (ud2 != 0)
10622 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10623 count_or_emit_insn (ud1 != 0 ? temp : dest,
10624 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10625 if (ud1 != 0)
10626 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10627 return;
10630 if (TARGET_PREFIXED)
10632 if (can_create_pseudo_p ())
10634 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10635 rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
10636 count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10637 count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10638 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10639 GEN_INT (0xffffffff));
10640 count_or_emit_insn (rldimi);
10641 return;
10644 /* pli A,H; sldi A,32; paddi A,A,L. */
10645 count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10646 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10648 bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;
10649 /* Use paddi for the low 32 bits. */
10650 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10651 count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
10652 GEN_INT ((ud2 << 16) | ud1)));
10653 /* Use oris, ori for low 32 bits. */
10654 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10655 count_or_emit_insn (dest,
10656 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10657 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10658 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10659 return;
10662 if (can_create_pseudo_p ())
10664 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10665 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10666 rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
10667 rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
10668 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10669 rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
10670 num = (ud4 << 16) | ud3;
10671 rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
10673 rtx rldimi = gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
10674 GEN_INT (0xffffffff));
10675 count_or_emit_insn (rldimi);
10676 return;
10679 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10680 oris DEST,UD2 ; ori DEST,UD1. */
10681 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
10682 if (ud3 != 0)
10683 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
10685 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10686 if (ud2 != 0)
10687 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10688 if (ud1 != 0)
10689 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10691 return;
10694 /* Helper for the following. Get rid of [r+r] memory refs
10695 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10697 static void
10698 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10700 if (MEM_P (operands[0])
10701 && !REG_P (XEXP (operands[0], 0))
10702 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10703 GET_MODE (operands[0]), false))
10704 operands[0]
10705 = replace_equiv_address (operands[0],
10706 copy_addr_to_reg (XEXP (operands[0], 0)));
10708 if (MEM_P (operands[1])
10709 && !REG_P (XEXP (operands[1], 0))
10710 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10711 GET_MODE (operands[1]), false))
10712 operands[1]
10713 = replace_equiv_address (operands[1],
10714 copy_addr_to_reg (XEXP (operands[1], 0)));
10717 /* Generate a vector of constants to permute MODE for a little-endian
10718 storage operation by swapping the two halves of a vector. */
10719 static rtvec
10720 rs6000_const_vec (machine_mode mode)
10722 int i, subparts;
10723 rtvec v;
10725 switch (mode)
10727 case E_V1TImode:
10728 subparts = 1;
10729 break;
10730 case E_V2DFmode:
10731 case E_V2DImode:
10732 subparts = 2;
10733 break;
10734 case E_V4SFmode:
10735 case E_V4SImode:
10736 subparts = 4;
10737 break;
10738 case E_V8HImode:
10739 subparts = 8;
10740 break;
10741 case E_V16QImode:
10742 subparts = 16;
10743 break;
10744 default:
10745 gcc_unreachable();
10748 v = rtvec_alloc (subparts);
10750 for (i = 0; i < subparts / 2; ++i)
10751 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10752 for (i = subparts / 2; i < subparts; ++i)
10753 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10755 return v;
10758 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10759 store operation. */
10760 void
10761 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10763 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10764 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10766 /* Scalar permutations are easier to express in integer modes rather than
10767 floating-point modes, so cast them here. We use V1TImode instead
10768 of TImode to ensure that the values don't go through GPRs. */
10769 if (FLOAT128_VECTOR_P (mode))
10771 dest = gen_lowpart (V1TImode, dest);
10772 source = gen_lowpart (V1TImode, source);
10773 mode = V1TImode;
10776 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10777 scalar. */
10778 if (mode == TImode || mode == V1TImode)
10779 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10780 GEN_INT (64))));
10781 else
10783 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10784 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10788 /* Emit a little-endian load from vector memory location SOURCE to VSX
10789 register DEST in mode MODE. The load is done with two permuting
10790 insn's that represent an lxvd2x and xxpermdi. */
10791 void
10792 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10794 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10795 V1TImode). */
10796 if (mode == TImode || mode == V1TImode)
10798 mode = V2DImode;
10799 dest = gen_lowpart (V2DImode, dest);
10800 source = adjust_address (source, V2DImode, 0);
10803 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10804 rs6000_emit_le_vsx_permute (tmp, source, mode);
10805 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10808 /* Emit a little-endian store to vector memory location DEST from VSX
10809 register SOURCE in mode MODE. The store is done with two permuting
10810 insn's that represent an xxpermdi and an stxvd2x. */
10811 void
10812 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10814 /* This should never be called after LRA. */
10815 gcc_assert (can_create_pseudo_p ());
10817 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10818 V1TImode). */
10819 if (mode == TImode || mode == V1TImode)
10821 mode = V2DImode;
10822 dest = adjust_address (dest, V2DImode, 0);
10823 source = gen_lowpart (V2DImode, source);
10826 rtx tmp = gen_reg_rtx_and_attrs (source);
10827 rs6000_emit_le_vsx_permute (tmp, source, mode);
10828 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10831 /* Emit a sequence representing a little-endian VSX load or store,
10832 moving data from SOURCE to DEST in mode MODE. This is done
10833 separately from rs6000_emit_move to ensure it is called only
10834 during expand. LE VSX loads and stores introduced later are
10835 handled with a split. The expand-time RTL generation allows
10836 us to optimize away redundant pairs of register-permutes. */
10837 void
10838 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10840 gcc_assert (!BYTES_BIG_ENDIAN
10841 && VECTOR_MEM_VSX_P (mode)
10842 && !TARGET_P9_VECTOR
10843 && !gpr_or_gpr_p (dest, source)
10844 && (MEM_P (source) ^ MEM_P (dest)));
10846 if (MEM_P (source))
10848 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10849 rs6000_emit_le_vsx_load (dest, source, mode);
10851 else
10853 if (!REG_P (source))
10854 source = force_reg (mode, source);
10855 rs6000_emit_le_vsx_store (dest, source, mode);
10859 /* Return whether a SFmode or SImode move can be done without converting one
10860 mode to another. This arrises when we have:
10862 (SUBREG:SF (REG:SI ...))
10863 (SUBREG:SI (REG:SF ...))
10865 and one of the values is in a floating point/vector register, where SFmode
10866 scalars are stored in DFmode format. */
10868 bool
10869 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10871 if (TARGET_ALLOW_SF_SUBREG)
10872 return true;
10874 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10875 return true;
10877 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10878 return true;
10880 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10881 if (SUBREG_P (dest))
10883 rtx dest_subreg = SUBREG_REG (dest);
10884 rtx src_subreg = SUBREG_REG (src);
10885 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10888 return false;
10892 /* Helper function to change moves with:
10894 (SUBREG:SF (REG:SI)) and
10895 (SUBREG:SI (REG:SF))
10897 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10898 values are stored as DFmode values in the VSX registers. We need to convert
10899 the bits before we can use a direct move or operate on the bits in the
10900 vector register as an integer type.
10902 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10904 static bool
10905 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10907 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10908 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10909 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10911 rtx inner_source = SUBREG_REG (source);
10912 machine_mode inner_mode = GET_MODE (inner_source);
10914 if (mode == SImode && inner_mode == SFmode)
10916 emit_insn (gen_movsi_from_sf (dest, inner_source));
10917 return true;
10920 if (mode == SFmode && inner_mode == SImode)
10922 emit_insn (gen_movsf_from_si (dest, inner_source));
10923 return true;
10927 return false;
10930 /* Emit a move from SOURCE to DEST in mode MODE. */
10931 void
10932 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10934 rtx operands[2];
10935 operands[0] = dest;
10936 operands[1] = source;
10938 if (TARGET_DEBUG_ADDR)
10940 fprintf (stderr,
10941 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10942 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10943 GET_MODE_NAME (mode),
10944 lra_in_progress,
10945 reload_completed,
10946 can_create_pseudo_p ());
10947 debug_rtx (dest);
10948 fprintf (stderr, "source:\n");
10949 debug_rtx (source);
10952 /* Check that we get CONST_WIDE_INT only when we should. */
10953 if (CONST_WIDE_INT_P (operands[1])
10954 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10955 gcc_unreachable ();
10957 #ifdef HAVE_AS_GNU_ATTRIBUTE
10958 /* If we use a long double type, set the flags in .gnu_attribute that say
10959 what the long double type is. This is to allow the linker's warning
10960 message for the wrong long double to be useful, even if the function does
10961 not do a call (for example, doing a 128-bit add on power9 if the long
10962 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10963 used if they aren't the default long dobule type. */
10964 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10966 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10967 rs6000_passes_float = rs6000_passes_long_double = true;
10969 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10970 rs6000_passes_float = rs6000_passes_long_double = true;
10972 #endif
10974 /* See if we need to special case SImode/SFmode SUBREG moves. */
10975 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10976 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10977 return;
10979 /* Check if GCC is setting up a block move that will end up using FP
10980 registers as temporaries. We must make sure this is acceptable. */
10981 if (MEM_P (operands[0])
10982 && MEM_P (operands[1])
10983 && mode == DImode
10984 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10985 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10986 && ! (rs6000_slow_unaligned_access (SImode,
10987 (MEM_ALIGN (operands[0]) > 32
10988 ? 32 : MEM_ALIGN (operands[0])))
10989 || rs6000_slow_unaligned_access (SImode,
10990 (MEM_ALIGN (operands[1]) > 32
10991 ? 32 : MEM_ALIGN (operands[1]))))
10992 && ! MEM_VOLATILE_P (operands [0])
10993 && ! MEM_VOLATILE_P (operands [1]))
10995 emit_move_insn (adjust_address (operands[0], SImode, 0),
10996 adjust_address (operands[1], SImode, 0));
10997 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10998 adjust_address (copy_rtx (operands[1]), SImode, 4));
10999 return;
11002 if (can_create_pseudo_p () && MEM_P (operands[0])
11003 && !gpc_reg_operand (operands[1], mode))
11004 operands[1] = force_reg (mode, operands[1]);
11006 /* Recognize the case where operand[1] is a reference to thread-local
11007 data and load its address to a register. */
11008 if (tls_referenced_p (operands[1]))
11010 enum tls_model model;
11011 rtx tmp = operands[1];
11012 rtx addend = NULL;
11014 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11016 addend = XEXP (XEXP (tmp, 0), 1);
11017 tmp = XEXP (XEXP (tmp, 0), 0);
11020 gcc_assert (SYMBOL_REF_P (tmp));
11021 model = SYMBOL_REF_TLS_MODEL (tmp);
11022 gcc_assert (model != 0);
11024 tmp = rs6000_legitimize_tls_address (tmp, model);
11025 if (addend)
11027 tmp = gen_rtx_PLUS (mode, tmp, addend);
11028 tmp = force_operand (tmp, operands[0]);
11030 operands[1] = tmp;
11033 /* 128-bit constant floating-point values on Darwin should really be loaded
11034 as two parts. However, this premature splitting is a problem when DFmode
11035 values can go into Altivec registers. */
11036 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
11037 && !reg_addr[DFmode].scalar_in_vmx_p)
11039 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11040 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11041 DFmode);
11042 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11043 GET_MODE_SIZE (DFmode)),
11044 simplify_gen_subreg (DFmode, operands[1], mode,
11045 GET_MODE_SIZE (DFmode)),
11046 DFmode);
11047 return;
11050 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11051 p1:SD) if p1 is not of floating point class and p0 is spilled as
11052 we can have no analogous movsd_store for this. */
11053 if (lra_in_progress && mode == DDmode
11054 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11055 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11056 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
11057 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11059 enum reg_class cl;
11060 int regno = REGNO (SUBREG_REG (operands[1]));
11062 if (!HARD_REGISTER_NUM_P (regno))
11064 cl = reg_preferred_class (regno);
11065 regno = reg_renumber[regno];
11066 if (regno < 0)
11067 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11069 if (regno >= 0 && ! FP_REGNO_P (regno))
11071 mode = SDmode;
11072 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11073 operands[1] = SUBREG_REG (operands[1]);
11076 if (lra_in_progress
11077 && mode == SDmode
11078 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11079 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11080 && (REG_P (operands[1])
11081 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
11083 int regno = reg_or_subregno (operands[1]);
11084 enum reg_class cl;
11086 if (!HARD_REGISTER_NUM_P (regno))
11088 cl = reg_preferred_class (regno);
11089 gcc_assert (cl != NO_REGS);
11090 regno = reg_renumber[regno];
11091 if (regno < 0)
11092 regno = ira_class_hard_regs[cl][0];
11094 if (FP_REGNO_P (regno))
11096 if (GET_MODE (operands[0]) != DDmode)
11097 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11098 emit_insn (gen_movsd_store (operands[0], operands[1]));
11100 else if (INT_REGNO_P (regno))
11101 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11102 else
11103 gcc_unreachable();
11104 return;
11106 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11107 p:DD)) if p0 is not of floating point class and p1 is spilled as
11108 we can have no analogous movsd_load for this. */
11109 if (lra_in_progress && mode == DDmode
11110 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
11111 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11112 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11113 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11115 enum reg_class cl;
11116 int regno = REGNO (SUBREG_REG (operands[0]));
11118 if (!HARD_REGISTER_NUM_P (regno))
11120 cl = reg_preferred_class (regno);
11121 regno = reg_renumber[regno];
11122 if (regno < 0)
11123 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11125 if (regno >= 0 && ! FP_REGNO_P (regno))
11127 mode = SDmode;
11128 operands[0] = SUBREG_REG (operands[0]);
11129 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11132 if (lra_in_progress
11133 && mode == SDmode
11134 && (REG_P (operands[0])
11135 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
11136 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11137 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11139 int regno = reg_or_subregno (operands[0]);
11140 enum reg_class cl;
11142 if (!HARD_REGISTER_NUM_P (regno))
11144 cl = reg_preferred_class (regno);
11145 gcc_assert (cl != NO_REGS);
11146 regno = reg_renumber[regno];
11147 if (regno < 0)
11148 regno = ira_class_hard_regs[cl][0];
11150 if (FP_REGNO_P (regno))
11152 if (GET_MODE (operands[1]) != DDmode)
11153 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11154 emit_insn (gen_movsd_load (operands[0], operands[1]));
11156 else if (INT_REGNO_P (regno))
11157 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11158 else
11159 gcc_unreachable();
11160 return;
11163 /* FIXME: In the long term, this switch statement should go away
11164 and be replaced by a sequence of tests based on things like
11165 mode == Pmode. */
11166 switch (mode)
11168 case E_HImode:
11169 case E_QImode:
11170 if (CONSTANT_P (operands[1])
11171 && !CONST_INT_P (operands[1]))
11172 operands[1] = force_const_mem (mode, operands[1]);
11173 break;
11175 case E_TFmode:
11176 case E_TDmode:
11177 case E_IFmode:
11178 case E_KFmode:
11179 if (FLOAT128_2REG_P (mode))
11180 rs6000_eliminate_indexed_memrefs (operands);
11181 /* fall through */
11183 case E_DFmode:
11184 case E_DDmode:
11185 case E_SFmode:
11186 case E_SDmode:
11187 if (CONSTANT_P (operands[1])
11188 && ! easy_fp_constant (operands[1], mode))
11189 operands[1] = force_const_mem (mode, operands[1]);
11190 break;
11192 case E_V16QImode:
11193 case E_V8HImode:
11194 case E_V4SFmode:
11195 case E_V4SImode:
11196 case E_V2DFmode:
11197 case E_V2DImode:
11198 case E_V1TImode:
11199 if (CONSTANT_P (operands[1])
11200 && !easy_vector_constant (operands[1], mode))
11201 operands[1] = force_const_mem (mode, operands[1]);
11202 break;
11204 case E_OOmode:
11205 case E_XOmode:
11206 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
11207 error ("%qs is an opaque type, and you cannot set it to other values",
11208 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
11209 break;
11211 case E_SImode:
11212 case E_DImode:
11213 /* Use default pattern for address of ELF small data */
11214 if (TARGET_ELF
11215 && mode == Pmode
11216 && DEFAULT_ABI == ABI_V4
11217 && (SYMBOL_REF_P (operands[1])
11218 || GET_CODE (operands[1]) == CONST)
11219 && small_data_operand (operands[1], mode))
11221 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11222 return;
11225 /* Use the default pattern for loading up PC-relative addresses. */
11226 if (TARGET_PCREL && mode == Pmode
11227 && pcrel_local_or_external_address (operands[1], Pmode))
11229 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11230 return;
11233 if (DEFAULT_ABI == ABI_V4
11234 && mode == Pmode && mode == SImode
11235 && flag_pic == 1 && got_operand (operands[1], mode))
11237 emit_insn (gen_movsi_got (operands[0], operands[1]));
11238 return;
11241 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11242 && TARGET_NO_TOC_OR_PCREL
11243 && ! flag_pic
11244 && mode == Pmode
11245 && CONSTANT_P (operands[1])
11246 && GET_CODE (operands[1]) != HIGH
11247 && !CONST_INT_P (operands[1]))
11249 rtx target = (!can_create_pseudo_p ()
11250 ? operands[0]
11251 : gen_reg_rtx (mode));
11253 /* If this is a function address on -mcall-aixdesc,
11254 convert it to the address of the descriptor. */
11255 if (DEFAULT_ABI == ABI_AIX
11256 && SYMBOL_REF_P (operands[1])
11257 && XSTR (operands[1], 0)[0] == '.')
11259 const char *name = XSTR (operands[1], 0);
11260 rtx new_ref;
11261 while (*name == '.')
11262 name++;
11263 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11264 CONSTANT_POOL_ADDRESS_P (new_ref)
11265 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11266 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11267 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11268 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11269 operands[1] = new_ref;
11272 if (DEFAULT_ABI == ABI_DARWIN)
11274 #if TARGET_MACHO
11275 /* This is not PIC code, but could require the subset of
11276 indirections used by mdynamic-no-pic. */
11277 if (MACHO_DYNAMIC_NO_PIC_P)
11279 /* Take care of any required data indirection. */
11280 operands[1] = rs6000_machopic_legitimize_pic_address (
11281 operands[1], mode, operands[0]);
11282 if (operands[0] != operands[1])
11283 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11284 return;
11286 #endif
11287 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11288 emit_insn (gen_macho_low (Pmode, operands[0],
11289 target, operands[1]));
11290 return;
11293 emit_insn (gen_elf_high (target, operands[1]));
11294 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11295 return;
11298 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11299 and we have put it in the TOC, we just need to make a TOC-relative
11300 reference to it. */
11301 if (TARGET_TOC
11302 && SYMBOL_REF_P (operands[1])
11303 && use_toc_relative_ref (operands[1], mode))
11304 operands[1] = create_TOC_reference (operands[1], operands[0]);
11305 else if (mode == Pmode
11306 && CONSTANT_P (operands[1])
11307 && GET_CODE (operands[1]) != HIGH
11308 && ((REG_P (operands[0])
11309 && FP_REGNO_P (REGNO (operands[0])))
11310 || !CONST_INT_P (operands[1])
11311 || (num_insns_constant (operands[1], mode)
11312 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11313 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11314 && (TARGET_CMODEL == CMODEL_SMALL
11315 || can_create_pseudo_p ()
11316 || (REG_P (operands[0])
11317 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11320 #if TARGET_MACHO
11321 /* Darwin uses a special PIC legitimizer. */
11322 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11324 operands[1] =
11325 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11326 operands[0]);
11327 if (operands[0] != operands[1])
11328 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11329 return;
11331 #endif
11333 /* If we are to limit the number of things we put in the TOC and
11334 this is a symbol plus a constant we can add in one insn,
11335 just put the symbol in the TOC and add the constant. */
11336 if (GET_CODE (operands[1]) == CONST
11337 && TARGET_NO_SUM_IN_TOC
11338 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11339 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11340 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11341 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11342 && ! side_effects_p (operands[0]))
11344 rtx sym =
11345 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11346 rtx other = XEXP (XEXP (operands[1], 0), 1);
11348 sym = force_reg (mode, sym);
11349 emit_insn (gen_add3_insn (operands[0], sym, other));
11350 return;
11353 operands[1] = force_const_mem (mode, operands[1]);
11355 if (TARGET_TOC
11356 && SYMBOL_REF_P (XEXP (operands[1], 0))
11357 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11359 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11360 operands[0]);
11361 operands[1] = gen_const_mem (mode, tocref);
11362 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11365 break;
11367 case E_TImode:
11368 if (!VECTOR_MEM_VSX_P (TImode))
11369 rs6000_eliminate_indexed_memrefs (operands);
11370 break;
11372 case E_PTImode:
11373 rs6000_eliminate_indexed_memrefs (operands);
11374 break;
11376 default:
11377 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11380 /* Above, we may have called force_const_mem which may have returned
11381 an invalid address. If we can, fix this up; otherwise, reload will
11382 have to deal with it. */
11383 if (MEM_P (operands[1]))
11384 operands[1] = validize_mem (operands[1]);
11386 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11390 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11391 static void
11392 init_float128_ibm (machine_mode mode)
11394 if (!TARGET_XL_COMPAT)
11396 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11397 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11398 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11399 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11401 if (!TARGET_HARD_FLOAT)
11403 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11404 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11405 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11406 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11407 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11408 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11409 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11410 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11412 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11413 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11414 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11415 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11416 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11417 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11418 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11419 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11422 else
11424 set_optab_libfunc (add_optab, mode, "_xlqadd");
11425 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11426 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11427 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11430 /* Add various conversions for IFmode to use the traditional TFmode
11431 names. */
11432 if (mode == IFmode)
11434 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11435 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11436 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11437 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11438 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11439 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11441 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11442 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11444 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11445 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11447 if (TARGET_POWERPC64)
11449 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11450 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11451 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11452 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11457 /* Set up IEEE 128-bit floating point routines. Use different names if the
11458 arguments can be passed in a vector register. The historical PowerPC
11459 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11460 continue to use that if we aren't using vector registers to pass IEEE
11461 128-bit floating point. */
11463 static void
11464 init_float128_ieee (machine_mode mode)
11466 if (FLOAT128_VECTOR_P (mode))
11468 set_optab_libfunc (add_optab, mode, "__addkf3");
11469 set_optab_libfunc (sub_optab, mode, "__subkf3");
11470 set_optab_libfunc (neg_optab, mode, "__negkf2");
11471 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11472 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11473 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11474 set_optab_libfunc (abs_optab, mode, "__abskf2");
11475 set_optab_libfunc (powi_optab, mode, "__powikf2");
11477 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11478 set_optab_libfunc (ne_optab, mode, "__nekf2");
11479 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11480 set_optab_libfunc (ge_optab, mode, "__gekf2");
11481 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11482 set_optab_libfunc (le_optab, mode, "__lekf2");
11483 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11485 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11486 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11487 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11488 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11490 set_conv_libfunc (trunc_optab, mode, IFmode, "__trunctfkf2");
11491 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11492 set_conv_libfunc (trunc_optab, mode, TFmode, "__trunctfkf2");
11494 set_conv_libfunc (sext_optab, IFmode, mode, "__extendkftf2");
11495 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11496 set_conv_libfunc (sext_optab, TFmode, mode, "__extendkftf2");
11498 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11499 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11500 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11501 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11502 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11503 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11505 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11506 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11507 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11508 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11510 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11511 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11512 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11513 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11515 if (TARGET_POWERPC64)
11517 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11518 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11519 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11520 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11524 else
11526 set_optab_libfunc (add_optab, mode, "_q_add");
11527 set_optab_libfunc (sub_optab, mode, "_q_sub");
11528 set_optab_libfunc (neg_optab, mode, "_q_neg");
11529 set_optab_libfunc (smul_optab, mode, "_q_mul");
11530 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11531 if (TARGET_PPC_GPOPT)
11532 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11534 set_optab_libfunc (eq_optab, mode, "_q_feq");
11535 set_optab_libfunc (ne_optab, mode, "_q_fne");
11536 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11537 set_optab_libfunc (ge_optab, mode, "_q_fge");
11538 set_optab_libfunc (lt_optab, mode, "_q_flt");
11539 set_optab_libfunc (le_optab, mode, "_q_fle");
11541 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11542 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11543 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11544 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11545 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11546 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11547 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11548 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11552 static void
11553 rs6000_init_libfuncs (void)
11555 /* __float128 support. */
11556 if (TARGET_FLOAT128_TYPE)
11558 init_float128_ibm (IFmode);
11559 init_float128_ieee (KFmode);
11562 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11563 if (TARGET_LONG_DOUBLE_128)
11565 if (!TARGET_IEEEQUAD)
11566 init_float128_ibm (TFmode);
11568 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11569 else
11570 init_float128_ieee (TFmode);
11574 /* Emit a potentially record-form instruction, setting DST from SRC.
11575 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11576 signed comparison of DST with zero. If DOT is 1, the generated RTL
11577 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11578 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11579 a separate COMPARE. */
11581 void
11582 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11584 if (dot == 0)
11586 emit_move_insn (dst, src);
11587 return;
11590 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11592 emit_move_insn (dst, src);
11593 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11594 return;
11597 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11598 if (dot == 1)
11600 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11601 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11603 else
11605 rtx set = gen_rtx_SET (dst, src);
11606 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11611 /* A validation routine: say whether CODE, a condition code, and MODE
11612 match. The other alternatives either don't make sense or should
11613 never be generated. */
11615 void
11616 validate_condition_mode (enum rtx_code code, machine_mode mode)
11618 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11619 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11620 && GET_MODE_CLASS (mode) == MODE_CC);
11622 /* These don't make sense. */
11623 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11624 || mode != CCUNSmode);
11626 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11627 || mode == CCUNSmode);
11629 gcc_assert (mode == CCFPmode
11630 || (code != ORDERED && code != UNORDERED
11631 && code != UNEQ && code != LTGT
11632 && code != UNGT && code != UNLT
11633 && code != UNGE && code != UNLE));
11635 /* These are invalid; the information is not there. */
11636 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11640 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11641 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11642 not zero, store there the bit offset (counted from the right) where
11643 the single stretch of 1 bits begins; and similarly for B, the bit
11644 offset where it ends. */
11646 bool
11647 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11649 unsigned HOST_WIDE_INT val = INTVAL (mask);
11650 unsigned HOST_WIDE_INT bit;
11651 int nb, ne;
11652 int n = GET_MODE_PRECISION (mode);
11654 if (mode != DImode && mode != SImode)
11655 return false;
11657 if (INTVAL (mask) >= 0)
11659 bit = val & -val;
11660 ne = exact_log2 (bit);
11661 nb = exact_log2 (val + bit);
11663 else if (val + 1 == 0)
11665 nb = n;
11666 ne = 0;
11668 else if (val & 1)
11670 val = ~val;
11671 bit = val & -val;
11672 nb = exact_log2 (bit);
11673 ne = exact_log2 (val + bit);
11675 else
11677 bit = val & -val;
11678 ne = exact_log2 (bit);
11679 if (val + bit == 0)
11680 nb = n;
11681 else
11682 nb = 0;
11685 nb--;
11687 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11688 return false;
11690 if (b)
11691 *b = nb;
11692 if (e)
11693 *e = ne;
11695 return true;
11698 bool
11699 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11701 int nb, ne;
11702 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11704 if (TARGET_64BIT)
11705 return true;
11706 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11707 <= 0x7fffffff. */
11708 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11711 return false;
11714 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11715 or rldicr instruction, to implement an AND with it in mode MODE. */
11717 bool
11718 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11720 int nb, ne;
11722 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11723 return false;
11725 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11726 does not wrap. */
11727 if (mode == DImode)
11728 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11730 /* For SImode, rlwinm can do everything. */
11731 if (mode == SImode)
11732 return (nb < 32 && ne < 32);
11734 return false;
11737 /* Return the instruction template for an AND with mask in mode MODE, with
11738 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11740 const char *
11741 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11743 int nb, ne;
11745 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11746 gcc_unreachable ();
11748 if (mode == DImode && ne == 0)
11750 operands[3] = GEN_INT (63 - nb);
11751 if (dot)
11752 return "rldicl. %0,%1,0,%3";
11753 return "rldicl %0,%1,0,%3";
11756 if (mode == DImode && nb == 63)
11758 operands[3] = GEN_INT (63 - ne);
11759 if (dot)
11760 return "rldicr. %0,%1,0,%3";
11761 return "rldicr %0,%1,0,%3";
11764 if (nb < 32 && ne < 32)
11766 operands[3] = GEN_INT (31 - nb);
11767 operands[4] = GEN_INT (31 - ne);
11768 if (dot)
11769 return "rlwinm. %0,%1,0,%3,%4";
11770 return "rlwinm %0,%1,0,%3,%4";
11773 gcc_unreachable ();
11776 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11777 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11778 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11780 bool
11781 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11783 int nb, ne;
11785 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11786 return false;
11788 int n = GET_MODE_PRECISION (mode);
11789 int sh = -1;
11791 if (CONST_INT_P (XEXP (shift, 1)))
11793 sh = INTVAL (XEXP (shift, 1));
11794 if (sh < 0 || sh >= n)
11795 return false;
11798 rtx_code code = GET_CODE (shift);
11800 /* Convert any shift by 0 to a rotate, to simplify below code. */
11801 if (sh == 0)
11802 code = ROTATE;
11804 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11805 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11806 code = ASHIFT;
11807 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11809 code = LSHIFTRT;
11810 sh = n - sh;
11813 /* DImode rotates need rld*. */
11814 if (mode == DImode && code == ROTATE)
11815 return (nb == 63 || ne == 0 || ne == sh);
11817 /* SImode rotates need rlw*. */
11818 if (mode == SImode && code == ROTATE)
11819 return (nb < 32 && ne < 32 && sh < 32);
11821 /* Wrap-around masks are only okay for rotates. */
11822 if (ne > nb)
11823 return false;
11825 /* Variable shifts are only okay for rotates. */
11826 if (sh < 0)
11827 return false;
11829 /* Don't allow ASHIFT if the mask is wrong for that. */
11830 if (code == ASHIFT && ne < sh)
11831 return false;
11833 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11834 if the mask is wrong for that. */
11835 if (nb < 32 && ne < 32 && sh < 32
11836 && !(code == LSHIFTRT && nb >= 32 - sh))
11837 return true;
11839 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11840 if the mask is wrong for that. */
11841 if (code == LSHIFTRT)
11842 sh = 64 - sh;
11843 if (nb == 63 || ne == 0 || ne == sh)
11844 return !(code == LSHIFTRT && nb >= sh);
11846 return false;
11849 /* Return the instruction template for a shift with mask in mode MODE, with
11850 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11852 const char *
11853 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11855 int nb, ne;
11857 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11858 gcc_unreachable ();
11860 if (mode == DImode && ne == 0)
11862 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11863 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11864 operands[3] = GEN_INT (63 - nb);
11865 if (dot)
11866 return "rld%I2cl. %0,%1,%2,%3";
11867 return "rld%I2cl %0,%1,%2,%3";
11870 if (mode == DImode && nb == 63)
11872 operands[3] = GEN_INT (63 - ne);
11873 if (dot)
11874 return "rld%I2cr. %0,%1,%2,%3";
11875 return "rld%I2cr %0,%1,%2,%3";
11878 if (mode == DImode
11879 && GET_CODE (operands[4]) != LSHIFTRT
11880 && CONST_INT_P (operands[2])
11881 && ne == INTVAL (operands[2]))
11883 operands[3] = GEN_INT (63 - nb);
11884 if (dot)
11885 return "rld%I2c. %0,%1,%2,%3";
11886 return "rld%I2c %0,%1,%2,%3";
11889 if (nb < 32 && ne < 32)
11891 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11892 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11893 operands[3] = GEN_INT (31 - nb);
11894 operands[4] = GEN_INT (31 - ne);
11895 /* This insn can also be a 64-bit rotate with mask that really makes
11896 it just a shift right (with mask); the %h below are to adjust for
11897 that situation (shift count is >= 32 in that case). */
11898 if (dot)
11899 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11900 return "rlw%I2nm %0,%1,%h2,%3,%4";
11903 gcc_unreachable ();
11906 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11907 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11908 ASHIFT, or LSHIFTRT) in mode MODE. */
11910 bool
11911 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11913 int nb, ne;
11915 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11916 return false;
11918 int n = GET_MODE_PRECISION (mode);
11920 int sh = INTVAL (XEXP (shift, 1));
11921 if (sh < 0 || sh >= n)
11922 return false;
11924 rtx_code code = GET_CODE (shift);
11926 /* Convert any shift by 0 to a rotate, to simplify below code. */
11927 if (sh == 0)
11928 code = ROTATE;
11930 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11931 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11932 code = ASHIFT;
11933 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11935 code = LSHIFTRT;
11936 sh = n - sh;
11939 /* DImode rotates need rldimi. */
11940 if (mode == DImode && code == ROTATE)
11941 return (ne == sh);
11943 /* SImode rotates need rlwimi. */
11944 if (mode == SImode && code == ROTATE)
11945 return (nb < 32 && ne < 32 && sh < 32);
11947 /* Wrap-around masks are only okay for rotates. */
11948 if (ne > nb)
11949 return false;
11951 /* Don't allow ASHIFT if the mask is wrong for that. */
11952 if (code == ASHIFT && ne < sh)
11953 return false;
11955 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11956 if the mask is wrong for that. */
11957 if (nb < 32 && ne < 32 && sh < 32
11958 && !(code == LSHIFTRT && nb >= 32 - sh))
11959 return true;
11961 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11962 if the mask is wrong for that. */
11963 if (code == LSHIFTRT)
11964 sh = 64 - sh;
11965 if (ne == sh)
11966 return !(code == LSHIFTRT && nb >= sh);
11968 return false;
11971 /* Return the instruction template for an insert with mask in mode MODE, with
11972 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11974 const char *
11975 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11977 int nb, ne;
11979 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11980 gcc_unreachable ();
11982 /* Prefer rldimi because rlwimi is cracked. */
11983 if (TARGET_POWERPC64
11984 && (!dot || mode == DImode)
11985 && GET_CODE (operands[4]) != LSHIFTRT
11986 && ne == INTVAL (operands[2]))
11988 operands[3] = GEN_INT (63 - nb);
11989 if (dot)
11990 return "rldimi. %0,%1,%2,%3";
11991 return "rldimi %0,%1,%2,%3";
11994 if (nb < 32 && ne < 32)
11996 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11997 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11998 operands[3] = GEN_INT (31 - nb);
11999 operands[4] = GEN_INT (31 - ne);
12000 if (dot)
12001 return "rlwimi. %0,%1,%2,%3,%4";
12002 return "rlwimi %0,%1,%2,%3,%4";
12005 gcc_unreachable ();
12008 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
12009 using two machine instructions. */
12011 bool
12012 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
12014 /* There are two kinds of AND we can handle with two insns:
12015 1) those we can do with two rl* insn;
12016 2) ori[s];xori[s].
12018 We do not handle that last case yet. */
12020 /* If there is just one stretch of ones, we can do it. */
12021 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
12022 return true;
12024 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12025 one insn, we can do the whole thing with two. */
12026 unsigned HOST_WIDE_INT val = INTVAL (c);
12027 unsigned HOST_WIDE_INT bit1 = val & -val;
12028 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12029 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12030 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12031 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
12034 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12035 If EXPAND is true, split rotate-and-mask instructions we generate to
12036 their constituent parts as well (this is used during expand); if DOT
12037 is 1, make the last insn a record-form instruction clobbering the
12038 destination GPR and setting the CC reg (from operands[3]); if 2, set
12039 that GPR as well as the CC reg. */
12041 void
12042 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
12044 gcc_assert (!(expand && dot));
12046 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
12048 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12049 shift right. This generates better code than doing the masks without
12050 shifts, or shifting first right and then left. */
12051 int nb, ne;
12052 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
12054 gcc_assert (mode == DImode);
12056 int shift = 63 - nb;
12057 if (expand)
12059 rtx tmp1 = gen_reg_rtx (DImode);
12060 rtx tmp2 = gen_reg_rtx (DImode);
12061 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
12062 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
12063 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
12065 else
12067 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
12068 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
12069 emit_move_insn (operands[0], tmp);
12070 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
12071 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12073 return;
12076 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12077 that does the rest. */
12078 unsigned HOST_WIDE_INT bit1 = val & -val;
12079 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12080 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12081 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12083 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
12084 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
12086 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
12088 /* Two "no-rotate"-and-mask instructions, for SImode. */
12089 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
12091 gcc_assert (mode == SImode);
12093 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12094 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
12095 emit_move_insn (reg, tmp);
12096 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12097 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12098 return;
12101 gcc_assert (mode == DImode);
12103 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12104 insns; we have to do the first in SImode, because it wraps. */
12105 if (mask2 <= 0xffffffff
12106 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
12108 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12109 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
12110 GEN_INT (mask1));
12111 rtx reg_low = gen_lowpart (SImode, reg);
12112 emit_move_insn (reg_low, tmp);
12113 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12114 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12115 return;
12118 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12119 at the top end), rotate back and clear the other hole. */
12120 int right = exact_log2 (bit3);
12121 int left = 64 - right;
12123 /* Rotate the mask too. */
12124 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
12126 if (expand)
12128 rtx tmp1 = gen_reg_rtx (DImode);
12129 rtx tmp2 = gen_reg_rtx (DImode);
12130 rtx tmp3 = gen_reg_rtx (DImode);
12131 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
12132 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
12133 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
12134 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
12136 else
12138 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
12139 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
12140 emit_move_insn (operands[0], tmp);
12141 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
12142 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
12143 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12147 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12148 for lfq and stfq insns iff the registers are hard registers. */
12151 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
12153 /* We might have been passed a SUBREG. */
12154 if (!REG_P (reg1) || !REG_P (reg2))
12155 return 0;
12157 /* We might have been passed non floating point registers. */
12158 if (!FP_REGNO_P (REGNO (reg1))
12159 || !FP_REGNO_P (REGNO (reg2)))
12160 return 0;
12162 return (REGNO (reg1) == REGNO (reg2) - 1);
12165 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12166 addr1 and addr2 must be in consecutive memory locations
12167 (addr2 == addr1 + 8). */
12170 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
12172 rtx addr1, addr2;
12173 unsigned int reg1, reg2;
12174 int offset1, offset2;
12176 /* The mems cannot be volatile. */
12177 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
12178 return 0;
12180 addr1 = XEXP (mem1, 0);
12181 addr2 = XEXP (mem2, 0);
12183 /* Extract an offset (if used) from the first addr. */
12184 if (GET_CODE (addr1) == PLUS)
12186 /* If not a REG, return zero. */
12187 if (!REG_P (XEXP (addr1, 0)))
12188 return 0;
12189 else
12191 reg1 = REGNO (XEXP (addr1, 0));
12192 /* The offset must be constant! */
12193 if (!CONST_INT_P (XEXP (addr1, 1)))
12194 return 0;
12195 offset1 = INTVAL (XEXP (addr1, 1));
12198 else if (!REG_P (addr1))
12199 return 0;
12200 else
12202 reg1 = REGNO (addr1);
12203 /* This was a simple (mem (reg)) expression. Offset is 0. */
12204 offset1 = 0;
12207 /* And now for the second addr. */
12208 if (GET_CODE (addr2) == PLUS)
12210 /* If not a REG, return zero. */
12211 if (!REG_P (XEXP (addr2, 0)))
12212 return 0;
12213 else
12215 reg2 = REGNO (XEXP (addr2, 0));
12216 /* The offset must be constant. */
12217 if (!CONST_INT_P (XEXP (addr2, 1)))
12218 return 0;
12219 offset2 = INTVAL (XEXP (addr2, 1));
12222 else if (!REG_P (addr2))
12223 return 0;
12224 else
12226 reg2 = REGNO (addr2);
12227 /* This was a simple (mem (reg)) expression. Offset is 0. */
12228 offset2 = 0;
12231 /* Both of these must have the same base register. */
12232 if (reg1 != reg2)
12233 return 0;
12235 /* The offset for the second addr must be 8 more than the first addr. */
12236 if (offset2 != offset1 + 8)
12237 return 0;
12239 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12240 instructions. */
12241 return 1;
12244 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12245 need to use DDmode, in all other cases we can use the same mode. */
12246 static machine_mode
12247 rs6000_secondary_memory_needed_mode (machine_mode mode)
12249 if (lra_in_progress && mode == SDmode)
12250 return DDmode;
12251 return mode;
12254 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12255 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12256 only work on the traditional altivec registers, note if an altivec register
12257 was chosen. */
12259 static enum rs6000_reg_type
12260 register_to_reg_type (rtx reg, bool *is_altivec)
12262 HOST_WIDE_INT regno;
12263 enum reg_class rclass;
12265 if (SUBREG_P (reg))
12266 reg = SUBREG_REG (reg);
12268 if (!REG_P (reg))
12269 return NO_REG_TYPE;
12271 regno = REGNO (reg);
12272 if (!HARD_REGISTER_NUM_P (regno))
12274 if (!lra_in_progress && !reload_completed)
12275 return PSEUDO_REG_TYPE;
12277 regno = true_regnum (reg);
12278 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12279 return PSEUDO_REG_TYPE;
12282 gcc_assert (regno >= 0);
12284 if (is_altivec && ALTIVEC_REGNO_P (regno))
12285 *is_altivec = true;
12287 rclass = rs6000_regno_regclass[regno];
12288 return reg_class_to_reg_type[(int)rclass];
12291 /* Helper function to return the cost of adding a TOC entry address. */
12293 static inline int
12294 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12296 int ret;
12298 if (TARGET_CMODEL != CMODEL_SMALL)
12299 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12301 else
12302 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12304 return ret;
12307 /* Helper function for rs6000_secondary_reload to determine whether the memory
12308 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12309 needs reloading. Return negative if the memory is not handled by the memory
12310 helper functions and to try a different reload method, 0 if no additional
12311 instructions are need, and positive to give the extra cost for the
12312 memory. */
12314 static int
12315 rs6000_secondary_reload_memory (rtx addr,
12316 enum reg_class rclass,
12317 machine_mode mode)
12319 int extra_cost = 0;
12320 rtx reg, and_arg, plus_arg0, plus_arg1;
12321 addr_mask_type addr_mask;
12322 const char *type = NULL;
12323 const char *fail_msg = NULL;
12325 if (GPR_REG_CLASS_P (rclass))
12326 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12328 else if (rclass == FLOAT_REGS)
12329 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12331 else if (rclass == ALTIVEC_REGS)
12332 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12334 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12335 else if (rclass == VSX_REGS)
12336 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12337 & ~RELOAD_REG_AND_M16);
12339 /* If the register allocator hasn't made up its mind yet on the register
12340 class to use, settle on defaults to use. */
12341 else if (rclass == NO_REGS)
12343 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12344 & ~RELOAD_REG_AND_M16);
12346 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12347 addr_mask &= ~(RELOAD_REG_INDEXED
12348 | RELOAD_REG_PRE_INCDEC
12349 | RELOAD_REG_PRE_MODIFY);
12352 else
12353 addr_mask = 0;
12355 /* If the register isn't valid in this register class, just return now. */
12356 if ((addr_mask & RELOAD_REG_VALID) == 0)
12358 if (TARGET_DEBUG_ADDR)
12360 fprintf (stderr,
12361 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12362 "not valid in class\n",
12363 GET_MODE_NAME (mode), reg_class_names[rclass]);
12364 debug_rtx (addr);
12367 return -1;
12370 switch (GET_CODE (addr))
12372 /* Does the register class supports auto update forms for this mode? We
12373 don't need a scratch register, since the powerpc only supports
12374 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12375 case PRE_INC:
12376 case PRE_DEC:
12377 reg = XEXP (addr, 0);
12378 if (!base_reg_operand (addr, GET_MODE (reg)))
12380 fail_msg = "no base register #1";
12381 extra_cost = -1;
12384 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12386 extra_cost = 1;
12387 type = "update";
12389 break;
12391 case PRE_MODIFY:
12392 reg = XEXP (addr, 0);
12393 plus_arg1 = XEXP (addr, 1);
12394 if (!base_reg_operand (reg, GET_MODE (reg))
12395 || GET_CODE (plus_arg1) != PLUS
12396 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12398 fail_msg = "bad PRE_MODIFY";
12399 extra_cost = -1;
12402 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12404 extra_cost = 1;
12405 type = "update";
12407 break;
12409 /* Do we need to simulate AND -16 to clear the bottom address bits used
12410 in VMX load/stores? Only allow the AND for vector sizes. */
12411 case AND:
12412 and_arg = XEXP (addr, 0);
12413 if (GET_MODE_SIZE (mode) != 16
12414 || !CONST_INT_P (XEXP (addr, 1))
12415 || INTVAL (XEXP (addr, 1)) != -16)
12417 fail_msg = "bad Altivec AND #1";
12418 extra_cost = -1;
12421 if (rclass != ALTIVEC_REGS)
12423 if (legitimate_indirect_address_p (and_arg, false))
12424 extra_cost = 1;
12426 else if (legitimate_indexed_address_p (and_arg, false))
12427 extra_cost = 2;
12429 else
12431 fail_msg = "bad Altivec AND #2";
12432 extra_cost = -1;
12435 type = "and";
12437 break;
12439 /* If this is an indirect address, make sure it is a base register. */
12440 case REG:
12441 case SUBREG:
12442 if (!legitimate_indirect_address_p (addr, false))
12444 extra_cost = 1;
12445 type = "move";
12447 break;
12449 /* If this is an indexed address, make sure the register class can handle
12450 indexed addresses for this mode. */
12451 case PLUS:
12452 plus_arg0 = XEXP (addr, 0);
12453 plus_arg1 = XEXP (addr, 1);
12455 /* (plus (plus (reg) (constant)) (constant)) is generated during
12456 push_reload processing, so handle it now. */
12457 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12459 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12461 extra_cost = 1;
12462 type = "offset";
12466 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12467 push_reload processing, so handle it now. */
12468 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12470 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12472 extra_cost = 1;
12473 type = "indexed #2";
12477 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12479 fail_msg = "no base register #2";
12480 extra_cost = -1;
12483 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12485 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12486 || !legitimate_indexed_address_p (addr, false))
12488 extra_cost = 1;
12489 type = "indexed";
12493 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12494 && CONST_INT_P (plus_arg1))
12496 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12498 extra_cost = 1;
12499 type = "vector d-form offset";
12503 /* Make sure the register class can handle offset addresses. */
12504 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12506 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12508 extra_cost = 1;
12509 type = "offset #2";
12513 else
12515 fail_msg = "bad PLUS";
12516 extra_cost = -1;
12519 break;
12521 case LO_SUM:
12522 /* Quad offsets are restricted and can't handle normal addresses. */
12523 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12525 extra_cost = -1;
12526 type = "vector d-form lo_sum";
12529 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12531 fail_msg = "bad LO_SUM";
12532 extra_cost = -1;
12535 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12537 extra_cost = 1;
12538 type = "lo_sum";
12540 break;
12542 /* Static addresses need to create a TOC entry. */
12543 case CONST:
12544 case SYMBOL_REF:
12545 case LABEL_REF:
12546 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12548 extra_cost = -1;
12549 type = "vector d-form lo_sum #2";
12552 else
12554 type = "address";
12555 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12557 break;
12559 /* TOC references look like offsetable memory. */
12560 case UNSPEC:
12561 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12563 fail_msg = "bad UNSPEC";
12564 extra_cost = -1;
12567 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12569 extra_cost = -1;
12570 type = "vector d-form lo_sum #3";
12573 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12575 extra_cost = 1;
12576 type = "toc reference";
12578 break;
12580 default:
12582 fail_msg = "bad address";
12583 extra_cost = -1;
12587 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12589 if (extra_cost < 0)
12590 fprintf (stderr,
12591 "rs6000_secondary_reload_memory error: mode = %s, "
12592 "class = %s, addr_mask = '%s', %s\n",
12593 GET_MODE_NAME (mode),
12594 reg_class_names[rclass],
12595 rs6000_debug_addr_mask (addr_mask, false),
12596 (fail_msg != NULL) ? fail_msg : "<bad address>");
12598 else
12599 fprintf (stderr,
12600 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12601 "addr_mask = '%s', extra cost = %d, %s\n",
12602 GET_MODE_NAME (mode),
12603 reg_class_names[rclass],
12604 rs6000_debug_addr_mask (addr_mask, false),
12605 extra_cost,
12606 (type) ? type : "<none>");
12608 debug_rtx (addr);
12611 return extra_cost;
12614 /* Helper function for rs6000_secondary_reload to return true if a move to a
12615 different register classe is really a simple move. */
12617 static bool
12618 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12619 enum rs6000_reg_type from_type,
12620 machine_mode mode)
12622 int size = GET_MODE_SIZE (mode);
12624 /* Add support for various direct moves available. In this function, we only
12625 look at cases where we don't need any extra registers, and one or more
12626 simple move insns are issued. Originally small integers are not allowed
12627 in FPR/VSX registers. Single precision binary floating is not a simple
12628 move because we need to convert to the single precision memory layout.
12629 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12630 need special direct move handling, which we do not support yet. */
12631 if (TARGET_DIRECT_MOVE
12632 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12633 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12635 if (TARGET_POWERPC64)
12637 /* ISA 2.07: MTVSRD or MVFVSRD. */
12638 if (size == 8)
12639 return true;
12641 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12642 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12643 return true;
12646 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12647 if (TARGET_P8_VECTOR)
12649 if (mode == SImode)
12650 return true;
12652 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12653 return true;
12656 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12657 if (mode == SDmode)
12658 return true;
12661 /* Move to/from SPR. */
12662 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12663 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12664 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12665 return true;
12667 return false;
12670 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12671 special direct moves that involve allocating an extra register, return the
12672 insn code of the helper function if there is such a function or
12673 CODE_FOR_nothing if not. */
12675 static bool
12676 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12677 enum rs6000_reg_type from_type,
12678 machine_mode mode,
12679 secondary_reload_info *sri,
12680 bool altivec_p)
12682 bool ret = false;
12683 enum insn_code icode = CODE_FOR_nothing;
12684 int cost = 0;
12685 int size = GET_MODE_SIZE (mode);
12687 if (TARGET_POWERPC64 && size == 16)
12689 /* Handle moving 128-bit values from GPRs to VSX point registers on
12690 ISA 2.07 (power8, power9) when running in 64-bit mode using
12691 XXPERMDI to glue the two 64-bit values back together. */
12692 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12694 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12695 icode = reg_addr[mode].reload_vsx_gpr;
12698 /* Handle moving 128-bit values from VSX point registers to GPRs on
12699 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12700 bottom 64-bit value. */
12701 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12703 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12704 icode = reg_addr[mode].reload_gpr_vsx;
12708 else if (TARGET_POWERPC64 && mode == SFmode)
12710 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12712 cost = 3; /* xscvdpspn, mfvsrd, and. */
12713 icode = reg_addr[mode].reload_gpr_vsx;
12716 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12718 cost = 2; /* mtvsrz, xscvspdpn. */
12719 icode = reg_addr[mode].reload_vsx_gpr;
12723 else if (!TARGET_POWERPC64 && size == 8)
12725 /* Handle moving 64-bit values from GPRs to floating point registers on
12726 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12727 32-bit values back together. Altivec register classes must be handled
12728 specially since a different instruction is used, and the secondary
12729 reload support requires a single instruction class in the scratch
12730 register constraint. However, right now TFmode is not allowed in
12731 Altivec registers, so the pattern will never match. */
12732 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12734 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12735 icode = reg_addr[mode].reload_fpr_gpr;
12739 if (icode != CODE_FOR_nothing)
12741 ret = true;
12742 if (sri)
12744 sri->icode = icode;
12745 sri->extra_cost = cost;
12749 return ret;
12752 /* Return whether a move between two register classes can be done either
12753 directly (simple move) or via a pattern that uses a single extra temporary
12754 (using ISA 2.07's direct move in this case. */
12756 static bool
12757 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12758 enum rs6000_reg_type from_type,
12759 machine_mode mode,
12760 secondary_reload_info *sri,
12761 bool altivec_p)
12763 /* Fall back to load/store reloads if either type is not a register. */
12764 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12765 return false;
12767 /* If we haven't allocated registers yet, assume the move can be done for the
12768 standard register types. */
12769 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12770 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12771 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12772 return true;
12774 /* Moves to the same set of registers is a simple move for non-specialized
12775 registers. */
12776 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12777 return true;
12779 /* Check whether a simple move can be done directly. */
12780 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12782 if (sri)
12784 sri->icode = CODE_FOR_nothing;
12785 sri->extra_cost = 0;
12787 return true;
12790 /* Now check if we can do it in a few steps. */
12791 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12792 altivec_p);
12795 /* Inform reload about cases where moving X with a mode MODE to a register in
12796 RCLASS requires an extra scratch or immediate register. Return the class
12797 needed for the immediate register.
12799 For VSX and Altivec, we may need a register to convert sp+offset into
12800 reg+sp.
12802 For misaligned 64-bit gpr loads and stores we need a register to
12803 convert an offset address to indirect. */
12805 static reg_class_t
12806 rs6000_secondary_reload (bool in_p,
12807 rtx x,
12808 reg_class_t rclass_i,
12809 machine_mode mode,
12810 secondary_reload_info *sri)
12812 enum reg_class rclass = (enum reg_class) rclass_i;
12813 reg_class_t ret = ALL_REGS;
12814 enum insn_code icode;
12815 bool default_p = false;
12816 bool done_p = false;
12818 /* Allow subreg of memory before/during reload. */
12819 bool memory_p = (MEM_P (x)
12820 || (!reload_completed && SUBREG_P (x)
12821 && MEM_P (SUBREG_REG (x))));
12823 sri->icode = CODE_FOR_nothing;
12824 sri->t_icode = CODE_FOR_nothing;
12825 sri->extra_cost = 0;
12826 icode = ((in_p)
12827 ? reg_addr[mode].reload_load
12828 : reg_addr[mode].reload_store);
12830 if (REG_P (x) || register_operand (x, mode))
12832 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12833 bool altivec_p = (rclass == ALTIVEC_REGS);
12834 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12836 if (!in_p)
12837 std::swap (to_type, from_type);
12839 /* Can we do a direct move of some sort? */
12840 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12841 altivec_p))
12843 icode = (enum insn_code)sri->icode;
12844 default_p = false;
12845 done_p = true;
12846 ret = NO_REGS;
12850 /* Make sure 0.0 is not reloaded or forced into memory. */
12851 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12853 ret = NO_REGS;
12854 default_p = false;
12855 done_p = true;
12858 /* If this is a scalar floating point value and we want to load it into the
12859 traditional Altivec registers, do it via a move via a traditional floating
12860 point register, unless we have D-form addressing. Also make sure that
12861 non-zero constants use a FPR. */
12862 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12863 && !mode_supports_vmx_dform (mode)
12864 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12865 && (memory_p || CONST_DOUBLE_P (x)))
12867 ret = FLOAT_REGS;
12868 default_p = false;
12869 done_p = true;
12872 /* Handle reload of load/stores if we have reload helper functions. */
12873 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12875 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12876 mode);
12878 if (extra_cost >= 0)
12880 done_p = true;
12881 ret = NO_REGS;
12882 if (extra_cost > 0)
12884 sri->extra_cost = extra_cost;
12885 sri->icode = icode;
12890 /* Handle unaligned loads and stores of integer registers. */
12891 if (!done_p && TARGET_POWERPC64
12892 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12893 && memory_p
12894 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12896 rtx addr = XEXP (x, 0);
12897 rtx off = address_offset (addr);
12899 if (off != NULL_RTX)
12901 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12902 unsigned HOST_WIDE_INT offset = INTVAL (off);
12904 /* We need a secondary reload when our legitimate_address_p
12905 says the address is good (as otherwise the entire address
12906 will be reloaded), and the offset is not a multiple of
12907 four or we have an address wrap. Address wrap will only
12908 occur for LO_SUMs since legitimate_offset_address_p
12909 rejects addresses for 16-byte mems that will wrap. */
12910 if (GET_CODE (addr) == LO_SUM
12911 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12912 && ((offset & 3) != 0
12913 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12914 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12915 && (offset & 3) != 0))
12917 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12918 if (in_p)
12919 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12920 : CODE_FOR_reload_di_load);
12921 else
12922 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12923 : CODE_FOR_reload_di_store);
12924 sri->extra_cost = 2;
12925 ret = NO_REGS;
12926 done_p = true;
12928 else
12929 default_p = true;
12931 else
12932 default_p = true;
12935 if (!done_p && !TARGET_POWERPC64
12936 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12937 && memory_p
12938 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12940 rtx addr = XEXP (x, 0);
12941 rtx off = address_offset (addr);
12943 if (off != NULL_RTX)
12945 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12946 unsigned HOST_WIDE_INT offset = INTVAL (off);
12948 /* We need a secondary reload when our legitimate_address_p
12949 says the address is good (as otherwise the entire address
12950 will be reloaded), and we have a wrap.
12952 legitimate_lo_sum_address_p allows LO_SUM addresses to
12953 have any offset so test for wrap in the low 16 bits.
12955 legitimate_offset_address_p checks for the range
12956 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12957 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12958 [0x7ff4,0x7fff] respectively, so test for the
12959 intersection of these ranges, [0x7ffc,0x7fff] and
12960 [0x7ff4,0x7ff7] respectively.
12962 Note that the address we see here may have been
12963 manipulated by legitimize_reload_address. */
12964 if (GET_CODE (addr) == LO_SUM
12965 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12966 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12968 if (in_p)
12969 sri->icode = CODE_FOR_reload_si_load;
12970 else
12971 sri->icode = CODE_FOR_reload_si_store;
12972 sri->extra_cost = 2;
12973 ret = NO_REGS;
12974 done_p = true;
12976 else
12977 default_p = true;
12979 else
12980 default_p = true;
12983 if (!done_p)
12984 default_p = true;
12986 if (default_p)
12987 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12989 gcc_assert (ret != ALL_REGS);
12991 if (TARGET_DEBUG_ADDR)
12993 fprintf (stderr,
12994 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12995 "mode = %s",
12996 reg_class_names[ret],
12997 in_p ? "true" : "false",
12998 reg_class_names[rclass],
12999 GET_MODE_NAME (mode));
13001 if (reload_completed)
13002 fputs (", after reload", stderr);
13004 if (!done_p)
13005 fputs (", done_p not set", stderr);
13007 if (default_p)
13008 fputs (", default secondary reload", stderr);
13010 if (sri->icode != CODE_FOR_nothing)
13011 fprintf (stderr, ", reload func = %s, extra cost = %d",
13012 insn_data[sri->icode].name, sri->extra_cost);
13014 else if (sri->extra_cost > 0)
13015 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
13017 fputs ("\n", stderr);
13018 debug_rtx (x);
13021 return ret;
13024 /* Better tracing for rs6000_secondary_reload_inner. */
13026 static void
13027 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
13028 bool store_p)
13030 rtx set, clobber;
13032 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
13034 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
13035 store_p ? "store" : "load");
13037 if (store_p)
13038 set = gen_rtx_SET (mem, reg);
13039 else
13040 set = gen_rtx_SET (reg, mem);
13042 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
13043 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
13046 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
13047 ATTRIBUTE_NORETURN;
13049 static void
13050 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
13051 bool store_p)
13053 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
13054 gcc_unreachable ();
13057 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13058 reload helper functions. These were identified in
13059 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13060 reload, it calls the insns:
13061 reload_<RELOAD:mode>_<P:mptrsize>_store
13062 reload_<RELOAD:mode>_<P:mptrsize>_load
13064 which in turn calls this function, to do whatever is necessary to create
13065 valid addresses. */
13067 void
13068 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
13070 int regno = true_regnum (reg);
13071 machine_mode mode = GET_MODE (reg);
13072 addr_mask_type addr_mask;
13073 rtx addr;
13074 rtx new_addr;
13075 rtx op_reg, op0, op1;
13076 rtx and_op;
13077 rtx cc_clobber;
13078 rtvec rv;
13080 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
13081 || !base_reg_operand (scratch, GET_MODE (scratch)))
13082 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13084 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
13085 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
13087 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
13088 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
13090 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
13091 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
13093 else
13094 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13096 /* Make sure the mode is valid in this register class. */
13097 if ((addr_mask & RELOAD_REG_VALID) == 0)
13098 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13100 if (TARGET_DEBUG_ADDR)
13101 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
13103 new_addr = addr = XEXP (mem, 0);
13104 switch (GET_CODE (addr))
13106 /* Does the register class support auto update forms for this mode? If
13107 not, do the update now. We don't need a scratch register, since the
13108 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13109 case PRE_INC:
13110 case PRE_DEC:
13111 op_reg = XEXP (addr, 0);
13112 if (!base_reg_operand (op_reg, Pmode))
13113 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13115 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
13117 int delta = GET_MODE_SIZE (mode);
13118 if (GET_CODE (addr) == PRE_DEC)
13119 delta = -delta;
13120 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
13121 new_addr = op_reg;
13123 break;
13125 case PRE_MODIFY:
13126 op0 = XEXP (addr, 0);
13127 op1 = XEXP (addr, 1);
13128 if (!base_reg_operand (op0, Pmode)
13129 || GET_CODE (op1) != PLUS
13130 || !rtx_equal_p (op0, XEXP (op1, 0)))
13131 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13133 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
13135 emit_insn (gen_rtx_SET (op0, op1));
13136 new_addr = reg;
13138 break;
13140 /* Do we need to simulate AND -16 to clear the bottom address bits used
13141 in VMX load/stores? */
13142 case AND:
13143 op0 = XEXP (addr, 0);
13144 op1 = XEXP (addr, 1);
13145 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
13147 if (REG_P (op0) || SUBREG_P (op0))
13148 op_reg = op0;
13150 else if (GET_CODE (op1) == PLUS)
13152 emit_insn (gen_rtx_SET (scratch, op1));
13153 op_reg = scratch;
13156 else
13157 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13159 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
13160 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
13161 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
13162 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
13163 new_addr = scratch;
13165 break;
13167 /* If this is an indirect address, make sure it is a base register. */
13168 case REG:
13169 case SUBREG:
13170 if (!base_reg_operand (addr, GET_MODE (addr)))
13172 emit_insn (gen_rtx_SET (scratch, addr));
13173 new_addr = scratch;
13175 break;
13177 /* If this is an indexed address, make sure the register class can handle
13178 indexed addresses for this mode. */
13179 case PLUS:
13180 op0 = XEXP (addr, 0);
13181 op1 = XEXP (addr, 1);
13182 if (!base_reg_operand (op0, Pmode))
13183 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13185 else if (int_reg_operand (op1, Pmode))
13187 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13189 emit_insn (gen_rtx_SET (scratch, addr));
13190 new_addr = scratch;
13194 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
13196 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
13197 || !quad_address_p (addr, mode, false))
13199 emit_insn (gen_rtx_SET (scratch, addr));
13200 new_addr = scratch;
13204 /* Make sure the register class can handle offset addresses. */
13205 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
13207 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13209 emit_insn (gen_rtx_SET (scratch, addr));
13210 new_addr = scratch;
13214 else
13215 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13217 break;
13219 case LO_SUM:
13220 op0 = XEXP (addr, 0);
13221 op1 = XEXP (addr, 1);
13222 if (!base_reg_operand (op0, Pmode))
13223 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13225 else if (int_reg_operand (op1, Pmode))
13227 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13229 emit_insn (gen_rtx_SET (scratch, addr));
13230 new_addr = scratch;
13234 /* Quad offsets are restricted and can't handle normal addresses. */
13235 else if (mode_supports_dq_form (mode))
13237 emit_insn (gen_rtx_SET (scratch, addr));
13238 new_addr = scratch;
13241 /* Make sure the register class can handle offset addresses. */
13242 else if (legitimate_lo_sum_address_p (mode, addr, false))
13244 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13246 emit_insn (gen_rtx_SET (scratch, addr));
13247 new_addr = scratch;
13251 else
13252 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13254 break;
13256 case SYMBOL_REF:
13257 case CONST:
13258 case LABEL_REF:
13259 rs6000_emit_move (scratch, addr, Pmode);
13260 new_addr = scratch;
13261 break;
13263 default:
13264 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13267 /* Adjust the address if it changed. */
13268 if (addr != new_addr)
13270 mem = replace_equiv_address_nv (mem, new_addr);
13271 if (TARGET_DEBUG_ADDR)
13272 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13275 /* Now create the move. */
13276 if (store_p)
13277 emit_insn (gen_rtx_SET (mem, reg));
13278 else
13279 emit_insn (gen_rtx_SET (reg, mem));
13281 return;
13284 /* Convert reloads involving 64-bit gprs and misaligned offset
13285 addressing, or multiple 32-bit gprs and offsets that are too large,
13286 to use indirect addressing. */
13288 void
13289 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13291 int regno = true_regnum (reg);
13292 enum reg_class rclass;
13293 rtx addr;
13294 rtx scratch_or_premodify = scratch;
13296 if (TARGET_DEBUG_ADDR)
13298 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13299 store_p ? "store" : "load");
13300 fprintf (stderr, "reg:\n");
13301 debug_rtx (reg);
13302 fprintf (stderr, "mem:\n");
13303 debug_rtx (mem);
13304 fprintf (stderr, "scratch:\n");
13305 debug_rtx (scratch);
13308 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13309 gcc_assert (MEM_P (mem));
13310 rclass = REGNO_REG_CLASS (regno);
13311 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13312 addr = XEXP (mem, 0);
13314 if (GET_CODE (addr) == PRE_MODIFY)
13316 gcc_assert (REG_P (XEXP (addr, 0))
13317 && GET_CODE (XEXP (addr, 1)) == PLUS
13318 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13319 scratch_or_premodify = XEXP (addr, 0);
13320 addr = XEXP (addr, 1);
13322 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13324 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13326 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13328 /* Now create the move. */
13329 if (store_p)
13330 emit_insn (gen_rtx_SET (mem, reg));
13331 else
13332 emit_insn (gen_rtx_SET (reg, mem));
13334 return;
13337 /* Given an rtx X being reloaded into a reg required to be
13338 in class CLASS, return the class of reg to actually use.
13339 In general this is just CLASS; but on some machines
13340 in some cases it is preferable to use a more restrictive class.
13342 On the RS/6000, we have to return NO_REGS when we want to reload a
13343 floating-point CONST_DOUBLE to force it to be copied to memory.
13345 We also don't want to reload integer values into floating-point
13346 registers if we can at all help it. In fact, this can
13347 cause reload to die, if it tries to generate a reload of CTR
13348 into a FP register and discovers it doesn't have the memory location
13349 required.
13351 ??? Would it be a good idea to have reload do the converse, that is
13352 try to reload floating modes into FP registers if possible?
13355 static enum reg_class
13356 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13358 machine_mode mode = GET_MODE (x);
13359 bool is_constant = CONSTANT_P (x);
13361 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13362 reload class for it. */
13363 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13364 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13365 return NO_REGS;
13367 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13368 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13369 return NO_REGS;
13371 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13372 the reloading of address expressions using PLUS into floating point
13373 registers. */
13374 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13376 if (is_constant)
13378 /* Zero is always allowed in all VSX registers. */
13379 if (x == CONST0_RTX (mode))
13380 return rclass;
13382 /* If this is a vector constant that can be formed with a few Altivec
13383 instructions, we want altivec registers. */
13384 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13385 return ALTIVEC_REGS;
13387 /* If this is an integer constant that can easily be loaded into
13388 vector registers, allow it. */
13389 if (CONST_INT_P (x))
13391 HOST_WIDE_INT value = INTVAL (x);
13393 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13394 2.06 can generate it in the Altivec registers with
13395 VSPLTI<x>. */
13396 if (value == -1)
13398 if (TARGET_P8_VECTOR)
13399 return rclass;
13400 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13401 return ALTIVEC_REGS;
13402 else
13403 return NO_REGS;
13406 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13407 a sign extend in the Altivec registers. */
13408 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13409 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13410 return ALTIVEC_REGS;
13413 /* Force constant to memory. */
13414 return NO_REGS;
13417 /* D-form addressing can easily reload the value. */
13418 if (mode_supports_vmx_dform (mode)
13419 || mode_supports_dq_form (mode))
13420 return rclass;
13422 /* If this is a scalar floating point value and we don't have D-form
13423 addressing, prefer the traditional floating point registers so that we
13424 can use D-form (register+offset) addressing. */
13425 if (rclass == VSX_REGS
13426 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13427 return FLOAT_REGS;
13429 /* Prefer the Altivec registers if Altivec is handling the vector
13430 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13431 loads. */
13432 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13433 || mode == V1TImode)
13434 return ALTIVEC_REGS;
13436 return rclass;
13439 if (is_constant || GET_CODE (x) == PLUS)
13441 if (reg_class_subset_p (GENERAL_REGS, rclass))
13442 return GENERAL_REGS;
13443 if (reg_class_subset_p (BASE_REGS, rclass))
13444 return BASE_REGS;
13445 return NO_REGS;
13448 /* For the vector pair and vector quad modes, prefer their natural register
13449 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13450 the GPR registers. */
13451 if (rclass == GEN_OR_FLOAT_REGS)
13453 if (mode == OOmode)
13454 return VSX_REGS;
13456 if (mode == XOmode)
13457 return FLOAT_REGS;
13459 if (GET_MODE_CLASS (mode) == MODE_INT)
13460 return GENERAL_REGS;
13463 return rclass;
13466 /* Debug version of rs6000_preferred_reload_class. */
13467 static enum reg_class
13468 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13470 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13472 fprintf (stderr,
13473 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13474 "mode = %s, x:\n",
13475 reg_class_names[ret], reg_class_names[rclass],
13476 GET_MODE_NAME (GET_MODE (x)));
13477 debug_rtx (x);
13479 return ret;
13482 /* If we are copying between FP or AltiVec registers and anything else, we need
13483 a memory location. The exception is when we are targeting ppc64 and the
13484 move to/from fpr to gpr instructions are available. Also, under VSX, you
13485 can copy vector registers from the FP register set to the Altivec register
13486 set and vice versa. */
13488 static bool
13489 rs6000_secondary_memory_needed (machine_mode mode,
13490 reg_class_t from_class,
13491 reg_class_t to_class)
13493 enum rs6000_reg_type from_type, to_type;
13494 bool altivec_p = ((from_class == ALTIVEC_REGS)
13495 || (to_class == ALTIVEC_REGS));
13497 /* If a simple/direct move is available, we don't need secondary memory */
13498 from_type = reg_class_to_reg_type[(int)from_class];
13499 to_type = reg_class_to_reg_type[(int)to_class];
13501 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13502 (secondary_reload_info *)0, altivec_p))
13503 return false;
13505 /* If we have a floating point or vector register class, we need to use
13506 memory to transfer the data. */
13507 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13508 return true;
13510 return false;
13513 /* Debug version of rs6000_secondary_memory_needed. */
13514 static bool
13515 rs6000_debug_secondary_memory_needed (machine_mode mode,
13516 reg_class_t from_class,
13517 reg_class_t to_class)
13519 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13521 fprintf (stderr,
13522 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13523 "to_class = %s, mode = %s\n",
13524 ret ? "true" : "false",
13525 reg_class_names[from_class],
13526 reg_class_names[to_class],
13527 GET_MODE_NAME (mode));
13529 return ret;
13532 /* Return the register class of a scratch register needed to copy IN into
13533 or out of a register in RCLASS in MODE. If it can be done directly,
13534 NO_REGS is returned. */
13536 static enum reg_class
13537 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13538 rtx in)
13540 int regno;
13542 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13543 #if TARGET_MACHO
13544 && MACHOPIC_INDIRECT
13545 #endif
13548 /* We cannot copy a symbolic operand directly into anything
13549 other than BASE_REGS for TARGET_ELF. So indicate that a
13550 register from BASE_REGS is needed as an intermediate
13551 register.
13553 On Darwin, pic addresses require a load from memory, which
13554 needs a base register. */
13555 if (rclass != BASE_REGS
13556 && (SYMBOL_REF_P (in)
13557 || GET_CODE (in) == HIGH
13558 || GET_CODE (in) == LABEL_REF
13559 || GET_CODE (in) == CONST))
13560 return BASE_REGS;
13563 if (REG_P (in))
13565 regno = REGNO (in);
13566 if (!HARD_REGISTER_NUM_P (regno))
13568 regno = true_regnum (in);
13569 if (!HARD_REGISTER_NUM_P (regno))
13570 regno = -1;
13573 else if (SUBREG_P (in))
13575 regno = true_regnum (in);
13576 if (!HARD_REGISTER_NUM_P (regno))
13577 regno = -1;
13579 else
13580 regno = -1;
13582 /* If we have VSX register moves, prefer moving scalar values between
13583 Altivec registers and GPR by going via an FPR (and then via memory)
13584 instead of reloading the secondary memory address for Altivec moves. */
13585 if (TARGET_VSX
13586 && GET_MODE_SIZE (mode) < 16
13587 && !mode_supports_vmx_dform (mode)
13588 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13589 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13590 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13591 && (regno >= 0 && INT_REGNO_P (regno)))))
13592 return FLOAT_REGS;
13594 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13595 into anything. */
13596 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13597 || (regno >= 0 && INT_REGNO_P (regno)))
13598 return NO_REGS;
13600 /* Constants, memory, and VSX registers can go into VSX registers (both the
13601 traditional floating point and the altivec registers). */
13602 if (rclass == VSX_REGS
13603 && (regno == -1 || VSX_REGNO_P (regno)))
13604 return NO_REGS;
13606 /* Constants, memory, and FP registers can go into FP registers. */
13607 if ((regno == -1 || FP_REGNO_P (regno))
13608 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13609 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13611 /* Memory, and AltiVec registers can go into AltiVec registers. */
13612 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13613 && rclass == ALTIVEC_REGS)
13614 return NO_REGS;
13616 /* We can copy among the CR registers. */
13617 if ((rclass == CR_REGS || rclass == CR0_REGS)
13618 && regno >= 0 && CR_REGNO_P (regno))
13619 return NO_REGS;
13621 /* Otherwise, we need GENERAL_REGS. */
13622 return GENERAL_REGS;
13625 /* Debug version of rs6000_secondary_reload_class. */
13626 static enum reg_class
13627 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13628 machine_mode mode, rtx in)
13630 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13631 fprintf (stderr,
13632 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13633 "mode = %s, input rtx:\n",
13634 reg_class_names[ret], reg_class_names[rclass],
13635 GET_MODE_NAME (mode));
13636 debug_rtx (in);
13638 return ret;
13641 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13643 static bool
13644 rs6000_can_change_mode_class (machine_mode from,
13645 machine_mode to,
13646 reg_class_t rclass)
13648 unsigned from_size = GET_MODE_SIZE (from);
13649 unsigned to_size = GET_MODE_SIZE (to);
13651 if (from_size != to_size)
13653 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13655 if (reg_classes_intersect_p (xclass, rclass))
13657 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13658 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13659 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13660 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13662 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13663 single register under VSX because the scalar part of the register
13664 is in the upper 64-bits, and not the lower 64-bits. Types like
13665 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13666 IEEE floating point can't overlap, and neither can small
13667 values. */
13669 if (to_float128_vector_p && from_float128_vector_p)
13670 return true;
13672 else if (to_float128_vector_p || from_float128_vector_p)
13673 return false;
13675 /* TDmode in floating-mode registers must always go into a register
13676 pair with the most significant word in the even-numbered register
13677 to match ISA requirements. In little-endian mode, this does not
13678 match subreg numbering, so we cannot allow subregs. */
13679 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13680 return false;
13682 /* Allow SD<->DD changes, since SDmode values are stored in
13683 the low half of the DDmode, just like target-independent
13684 code expects. We need to allow at least SD->DD since
13685 rs6000_secondary_memory_needed_mode asks for that change
13686 to be made for SD reloads. */
13687 if ((to == DDmode && from == SDmode)
13688 || (to == SDmode && from == DDmode))
13689 return true;
13691 if (from_size < 8 || to_size < 8)
13692 return false;
13694 if (from_size == 8 && (8 * to_nregs) != to_size)
13695 return false;
13697 if (to_size == 8 && (8 * from_nregs) != from_size)
13698 return false;
13700 return true;
13702 else
13703 return true;
13706 /* Since the VSX register set includes traditional floating point registers
13707 and altivec registers, just check for the size being different instead of
13708 trying to check whether the modes are vector modes. Otherwise it won't
13709 allow say DF and DI to change classes. For types like TFmode and TDmode
13710 that take 2 64-bit registers, rather than a single 128-bit register, don't
13711 allow subregs of those types to other 128 bit types. */
13712 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13714 unsigned num_regs = (from_size + 15) / 16;
13715 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13716 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13717 return false;
13719 return (from_size == 8 || from_size == 16);
13722 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13723 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13724 return false;
13726 return true;
13729 /* Debug version of rs6000_can_change_mode_class. */
13730 static bool
13731 rs6000_debug_can_change_mode_class (machine_mode from,
13732 machine_mode to,
13733 reg_class_t rclass)
13735 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13737 fprintf (stderr,
13738 "rs6000_can_change_mode_class, return %s, from = %s, "
13739 "to = %s, rclass = %s\n",
13740 ret ? "true" : "false",
13741 GET_MODE_NAME (from), GET_MODE_NAME (to),
13742 reg_class_names[rclass]);
13744 return ret;
13747 /* Return a string to do a move operation of 128 bits of data. */
13749 const char *
13750 rs6000_output_move_128bit (rtx operands[])
13752 rtx dest = operands[0];
13753 rtx src = operands[1];
13754 machine_mode mode = GET_MODE (dest);
13755 int dest_regno;
13756 int src_regno;
13757 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13758 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13760 if (REG_P (dest))
13762 dest_regno = REGNO (dest);
13763 dest_gpr_p = INT_REGNO_P (dest_regno);
13764 dest_fp_p = FP_REGNO_P (dest_regno);
13765 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13766 dest_vsx_p = dest_fp_p | dest_vmx_p;
13768 else
13770 dest_regno = -1;
13771 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13774 if (REG_P (src))
13776 src_regno = REGNO (src);
13777 src_gpr_p = INT_REGNO_P (src_regno);
13778 src_fp_p = FP_REGNO_P (src_regno);
13779 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13780 src_vsx_p = src_fp_p | src_vmx_p;
13782 else
13784 src_regno = -1;
13785 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13788 /* Register moves. */
13789 if (dest_regno >= 0 && src_regno >= 0)
13791 if (dest_gpr_p)
13793 if (src_gpr_p)
13794 return "#";
13796 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13797 return (WORDS_BIG_ENDIAN
13798 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13799 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13801 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13802 return "#";
13805 else if (TARGET_VSX && dest_vsx_p)
13807 if (src_vsx_p)
13808 return "xxlor %x0,%x1,%x1";
13810 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13811 return (WORDS_BIG_ENDIAN
13812 ? "mtvsrdd %x0,%1,%L1"
13813 : "mtvsrdd %x0,%L1,%1");
13815 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13816 return "#";
13819 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13820 return "vor %0,%1,%1";
13822 else if (dest_fp_p && src_fp_p)
13823 return "#";
13826 /* Loads. */
13827 else if (dest_regno >= 0 && MEM_P (src))
13829 if (dest_gpr_p)
13831 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13832 return "lq %0,%1";
13833 else
13834 return "#";
13837 else if (TARGET_ALTIVEC && dest_vmx_p
13838 && altivec_indexed_or_indirect_operand (src, mode))
13839 return "lvx %0,%y1";
13841 else if (TARGET_VSX && dest_vsx_p)
13843 if (mode_supports_dq_form (mode)
13844 && quad_address_p (XEXP (src, 0), mode, true))
13845 return "lxv %x0,%1";
13847 else if (TARGET_P9_VECTOR)
13848 return "lxvx %x0,%y1";
13850 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13851 return "lxvw4x %x0,%y1";
13853 else
13854 return "lxvd2x %x0,%y1";
13857 else if (TARGET_ALTIVEC && dest_vmx_p)
13858 return "lvx %0,%y1";
13860 else if (dest_fp_p)
13861 return "#";
13864 /* Stores. */
13865 else if (src_regno >= 0 && MEM_P (dest))
13867 if (src_gpr_p)
13869 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13870 return "stq %1,%0";
13871 else
13872 return "#";
13875 else if (TARGET_ALTIVEC && src_vmx_p
13876 && altivec_indexed_or_indirect_operand (dest, mode))
13877 return "stvx %1,%y0";
13879 else if (TARGET_VSX && src_vsx_p)
13881 if (mode_supports_dq_form (mode)
13882 && quad_address_p (XEXP (dest, 0), mode, true))
13883 return "stxv %x1,%0";
13885 else if (TARGET_P9_VECTOR)
13886 return "stxvx %x1,%y0";
13888 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13889 return "stxvw4x %x1,%y0";
13891 else
13892 return "stxvd2x %x1,%y0";
13895 else if (TARGET_ALTIVEC && src_vmx_p)
13896 return "stvx %1,%y0";
13898 else if (src_fp_p)
13899 return "#";
13902 /* Constants. */
13903 else if (dest_regno >= 0
13904 && (CONST_INT_P (src)
13905 || CONST_WIDE_INT_P (src)
13906 || CONST_DOUBLE_P (src)
13907 || GET_CODE (src) == CONST_VECTOR))
13909 if (dest_gpr_p)
13910 return "#";
13912 else if ((dest_vmx_p && TARGET_ALTIVEC)
13913 || (dest_vsx_p && TARGET_VSX))
13914 return output_vec_const_move (operands);
13917 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13920 /* Validate a 128-bit move. */
13921 bool
13922 rs6000_move_128bit_ok_p (rtx operands[])
13924 machine_mode mode = GET_MODE (operands[0]);
13925 return (gpc_reg_operand (operands[0], mode)
13926 || gpc_reg_operand (operands[1], mode));
13929 /* Return true if a 128-bit move needs to be split. */
13930 bool
13931 rs6000_split_128bit_ok_p (rtx operands[])
13933 if (!reload_completed)
13934 return false;
13936 if (!gpr_or_gpr_p (operands[0], operands[1]))
13937 return false;
13939 if (quad_load_store_p (operands[0], operands[1]))
13940 return false;
13942 return true;
13946 /* Given a comparison operation, return the bit number in CCR to test. We
13947 know this is a valid comparison.
13949 SCC_P is 1 if this is for an scc. That means that %D will have been
13950 used instead of %C, so the bits will be in different places.
13952 Return -1 if OP isn't a valid comparison for some reason. */
13955 ccr_bit (rtx op, int scc_p)
13957 enum rtx_code code = GET_CODE (op);
13958 machine_mode cc_mode;
13959 int cc_regnum;
13960 int base_bit;
13961 rtx reg;
13963 if (!COMPARISON_P (op))
13964 return -1;
13966 reg = XEXP (op, 0);
13968 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13969 return -1;
13971 cc_mode = GET_MODE (reg);
13972 cc_regnum = REGNO (reg);
13973 base_bit = 4 * (cc_regnum - CR0_REGNO);
13975 validate_condition_mode (code, cc_mode);
13977 /* When generating a sCOND operation, only positive conditions are
13978 allowed. */
13979 if (scc_p)
13980 switch (code)
13982 case EQ:
13983 case GT:
13984 case LT:
13985 case UNORDERED:
13986 case GTU:
13987 case LTU:
13988 break;
13989 default:
13990 return -1;
13993 switch (code)
13995 case NE:
13996 return scc_p ? base_bit + 3 : base_bit + 2;
13997 case EQ:
13998 return base_bit + 2;
13999 case GT: case GTU: case UNLE:
14000 return base_bit + 1;
14001 case LT: case LTU: case UNGE:
14002 return base_bit;
14003 case ORDERED: case UNORDERED:
14004 return base_bit + 3;
14006 case GE: case GEU:
14007 /* If scc, we will have done a cror to put the bit in the
14008 unordered position. So test that bit. For integer, this is ! LT
14009 unless this is an scc insn. */
14010 return scc_p ? base_bit + 3 : base_bit;
14012 case LE: case LEU:
14013 return scc_p ? base_bit + 3 : base_bit + 1;
14015 default:
14016 return -1;
14020 /* Return the GOT register. */
14023 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
14025 /* The second flow pass currently (June 1999) can't update
14026 regs_ever_live without disturbing other parts of the compiler, so
14027 update it here to make the prolog/epilogue code happy. */
14028 if (!can_create_pseudo_p ()
14029 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
14030 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
14032 crtl->uses_pic_offset_table = 1;
14034 return pic_offset_table_rtx;
14037 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14039 /* Write out a function code label. */
14041 void
14042 rs6000_output_function_entry (FILE *file, const char *fname)
14044 if (fname[0] != '.')
14046 switch (DEFAULT_ABI)
14048 default:
14049 gcc_unreachable ();
14051 case ABI_AIX:
14052 if (DOT_SYMBOLS)
14053 putc ('.', file);
14054 else
14055 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
14056 break;
14058 case ABI_ELFv2:
14059 case ABI_V4:
14060 case ABI_DARWIN:
14061 break;
14065 RS6000_OUTPUT_BASENAME (file, fname);
14068 /* Print an operand. Recognize special options, documented below. */
14070 #if TARGET_ELF
14071 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14072 only introduced by the linker, when applying the sda21
14073 relocation. */
14074 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14075 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14076 #else
14077 #define SMALL_DATA_RELOC "sda21"
14078 #define SMALL_DATA_REG 0
14079 #endif
14081 void
14082 print_operand (FILE *file, rtx x, int code)
14084 int i;
14085 unsigned HOST_WIDE_INT uval;
14087 switch (code)
14089 /* %a is output_address. */
14091 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14092 output_operand. */
14094 case 'A':
14095 /* Write the MMA accumulator number associated with VSX register X. */
14096 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
14097 output_operand_lossage ("invalid %%A value");
14098 else
14099 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
14100 return;
14102 case 'D':
14103 /* Like 'J' but get to the GT bit only. */
14104 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14106 output_operand_lossage ("invalid %%D value");
14107 return;
14110 /* Bit 1 is GT bit. */
14111 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
14113 /* Add one for shift count in rlinm for scc. */
14114 fprintf (file, "%d", i + 1);
14115 return;
14117 case 'e':
14118 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14119 if (! INT_P (x))
14121 output_operand_lossage ("invalid %%e value");
14122 return;
14125 uval = INTVAL (x);
14126 if ((uval & 0xffff) == 0 && uval != 0)
14127 putc ('s', file);
14128 return;
14130 case 'E':
14131 /* X is a CR register. Print the number of the EQ bit of the CR */
14132 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14133 output_operand_lossage ("invalid %%E value");
14134 else
14135 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
14136 return;
14138 case 'f':
14139 /* X is a CR register. Print the shift count needed to move it
14140 to the high-order four bits. */
14141 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14142 output_operand_lossage ("invalid %%f value");
14143 else
14144 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
14145 return;
14147 case 'F':
14148 /* Similar, but print the count for the rotate in the opposite
14149 direction. */
14150 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14151 output_operand_lossage ("invalid %%F value");
14152 else
14153 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
14154 return;
14156 case 'G':
14157 /* X is a constant integer. If it is negative, print "m",
14158 otherwise print "z". This is to make an aze or ame insn. */
14159 if (!CONST_INT_P (x))
14160 output_operand_lossage ("invalid %%G value");
14161 else if (INTVAL (x) >= 0)
14162 putc ('z', file);
14163 else
14164 putc ('m', file);
14165 return;
14167 case 'h':
14168 /* If constant, output low-order five bits. Otherwise, write
14169 normally. */
14170 if (INT_P (x))
14171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
14172 else
14173 print_operand (file, x, 0);
14174 return;
14176 case 'H':
14177 /* If constant, output low-order six bits. Otherwise, write
14178 normally. */
14179 if (INT_P (x))
14180 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
14181 else
14182 print_operand (file, x, 0);
14183 return;
14185 case 'I':
14186 /* Print `i' if this is a constant, else nothing. */
14187 if (INT_P (x))
14188 putc ('i', file);
14189 return;
14191 case 'j':
14192 /* Write the bit number in CCR for jump. */
14193 i = ccr_bit (x, 0);
14194 if (i == -1)
14195 output_operand_lossage ("invalid %%j code");
14196 else
14197 fprintf (file, "%d", i);
14198 return;
14200 case 'J':
14201 /* Similar, but add one for shift count in rlinm for scc and pass
14202 scc flag to `ccr_bit'. */
14203 i = ccr_bit (x, 1);
14204 if (i == -1)
14205 output_operand_lossage ("invalid %%J code");
14206 else
14207 /* If we want bit 31, write a shift count of zero, not 32. */
14208 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14209 return;
14211 case 'k':
14212 /* X must be a constant. Write the 1's complement of the
14213 constant. */
14214 if (! INT_P (x))
14215 output_operand_lossage ("invalid %%k value");
14216 else
14217 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
14218 return;
14220 case 'K':
14221 /* X must be a symbolic constant on ELF. Write an
14222 expression suitable for an 'addi' that adds in the low 16
14223 bits of the MEM. */
14224 if (GET_CODE (x) == CONST)
14226 if (GET_CODE (XEXP (x, 0)) != PLUS
14227 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
14228 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
14229 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
14230 output_operand_lossage ("invalid %%K value");
14232 print_operand_address (file, x);
14233 fputs ("@l", file);
14234 return;
14236 /* %l is output_asm_label. */
14238 case 'L':
14239 /* Write second word of DImode or DFmode reference. Works on register
14240 or non-indexed memory only. */
14241 if (REG_P (x))
14242 fputs (reg_names[REGNO (x) + 1], file);
14243 else if (MEM_P (x))
14245 machine_mode mode = GET_MODE (x);
14246 /* Handle possible auto-increment. Since it is pre-increment and
14247 we have already done it, we can just use an offset of word. */
14248 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14249 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14250 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14251 UNITS_PER_WORD));
14252 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14253 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14254 UNITS_PER_WORD));
14255 else
14256 output_address (mode, XEXP (adjust_address_nv (x, SImode,
14257 UNITS_PER_WORD),
14258 0));
14260 if (small_data_operand (x, GET_MODE (x)))
14261 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14262 reg_names[SMALL_DATA_REG]);
14264 return;
14266 case 'N': /* Unused */
14267 /* Write the number of elements in the vector times 4. */
14268 if (GET_CODE (x) != PARALLEL)
14269 output_operand_lossage ("invalid %%N value");
14270 else
14271 fprintf (file, "%d", XVECLEN (x, 0) * 4);
14272 return;
14274 case 'O': /* Unused */
14275 /* Similar, but subtract 1 first. */
14276 if (GET_CODE (x) != PARALLEL)
14277 output_operand_lossage ("invalid %%O value");
14278 else
14279 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14280 return;
14282 case 'p':
14283 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14284 if (! INT_P (x)
14285 || INTVAL (x) < 0
14286 || (i = exact_log2 (INTVAL (x))) < 0)
14287 output_operand_lossage ("invalid %%p value");
14288 else
14289 fprintf (file, "%d", i);
14290 return;
14292 case 'P':
14293 /* The operand must be an indirect memory reference. The result
14294 is the register name. */
14295 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14296 || REGNO (XEXP (x, 0)) >= 32)
14297 output_operand_lossage ("invalid %%P value");
14298 else
14299 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14300 return;
14302 case 'q':
14303 /* This outputs the logical code corresponding to a boolean
14304 expression. The expression may have one or both operands
14305 negated (if one, only the first one). For condition register
14306 logical operations, it will also treat the negated
14307 CR codes as NOTs, but not handle NOTs of them. */
14309 const char *const *t = 0;
14310 const char *s;
14311 enum rtx_code code = GET_CODE (x);
14312 static const char * const tbl[3][3] = {
14313 { "and", "andc", "nor" },
14314 { "or", "orc", "nand" },
14315 { "xor", "eqv", "xor" } };
14317 if (code == AND)
14318 t = tbl[0];
14319 else if (code == IOR)
14320 t = tbl[1];
14321 else if (code == XOR)
14322 t = tbl[2];
14323 else
14324 output_operand_lossage ("invalid %%q value");
14326 if (GET_CODE (XEXP (x, 0)) != NOT)
14327 s = t[0];
14328 else
14330 if (GET_CODE (XEXP (x, 1)) == NOT)
14331 s = t[2];
14332 else
14333 s = t[1];
14336 fputs (s, file);
14338 return;
14340 case 'Q':
14341 if (! TARGET_MFCRF)
14342 return;
14343 fputc (',', file);
14344 /* FALLTHRU */
14346 case 'R':
14347 /* X is a CR register. Print the mask for `mtcrf'. */
14348 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14349 output_operand_lossage ("invalid %%R value");
14350 else
14351 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14352 return;
14354 case 's':
14355 /* Low 5 bits of 32 - value */
14356 if (! INT_P (x))
14357 output_operand_lossage ("invalid %%s value");
14358 else
14359 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14360 return;
14362 case 't':
14363 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14364 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14366 output_operand_lossage ("invalid %%t value");
14367 return;
14370 /* Bit 3 is OV bit. */
14371 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14373 /* If we want bit 31, write a shift count of zero, not 32. */
14374 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14375 return;
14377 case 'T':
14378 /* Print the symbolic name of a branch target register. */
14379 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14380 x = XVECEXP (x, 0, 0);
14381 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14382 && REGNO (x) != CTR_REGNO))
14383 output_operand_lossage ("invalid %%T value");
14384 else if (REGNO (x) == LR_REGNO)
14385 fputs ("lr", file);
14386 else
14387 fputs ("ctr", file);
14388 return;
14390 case 'u':
14391 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14392 for use in unsigned operand. */
14393 if (! INT_P (x))
14395 output_operand_lossage ("invalid %%u value");
14396 return;
14399 uval = INTVAL (x);
14400 if ((uval & 0xffff) == 0)
14401 uval >>= 16;
14403 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14404 return;
14406 case 'v':
14407 /* High-order 16 bits of constant for use in signed operand. */
14408 if (! INT_P (x))
14409 output_operand_lossage ("invalid %%v value");
14410 else
14411 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14412 (INTVAL (x) >> 16) & 0xffff);
14413 return;
14415 case 'U':
14416 /* Print `u' if this has an auto-increment or auto-decrement. */
14417 if (MEM_P (x)
14418 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14419 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14420 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14421 putc ('u', file);
14422 return;
14424 case 'V':
14425 /* Print the trap code for this operand. */
14426 switch (GET_CODE (x))
14428 case EQ:
14429 fputs ("eq", file); /* 4 */
14430 break;
14431 case NE:
14432 fputs ("ne", file); /* 24 */
14433 break;
14434 case LT:
14435 fputs ("lt", file); /* 16 */
14436 break;
14437 case LE:
14438 fputs ("le", file); /* 20 */
14439 break;
14440 case GT:
14441 fputs ("gt", file); /* 8 */
14442 break;
14443 case GE:
14444 fputs ("ge", file); /* 12 */
14445 break;
14446 case LTU:
14447 fputs ("llt", file); /* 2 */
14448 break;
14449 case LEU:
14450 fputs ("lle", file); /* 6 */
14451 break;
14452 case GTU:
14453 fputs ("lgt", file); /* 1 */
14454 break;
14455 case GEU:
14456 fputs ("lge", file); /* 5 */
14457 break;
14458 default:
14459 output_operand_lossage ("invalid %%V value");
14461 break;
14463 case 'w':
14464 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14465 normally. */
14466 if (INT_P (x))
14467 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14468 else
14469 print_operand (file, x, 0);
14470 return;
14472 case 'x':
14473 /* X is a FPR or Altivec register used in a VSX context. */
14474 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14475 output_operand_lossage ("invalid %%x value");
14476 else
14478 int reg = REGNO (x);
14479 int vsx_reg = (FP_REGNO_P (reg)
14480 ? reg - 32
14481 : reg - FIRST_ALTIVEC_REGNO + 32);
14483 #ifdef TARGET_REGNAMES
14484 if (TARGET_REGNAMES)
14485 fprintf (file, "%%vs%d", vsx_reg);
14486 else
14487 #endif
14488 fprintf (file, "%d", vsx_reg);
14490 return;
14492 case 'X':
14493 if (MEM_P (x)
14494 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14495 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14496 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14497 putc ('x', file);
14498 return;
14500 case 'Y':
14501 /* Like 'L', for third word of TImode/PTImode */
14502 if (REG_P (x))
14503 fputs (reg_names[REGNO (x) + 2], file);
14504 else if (MEM_P (x))
14506 machine_mode mode = GET_MODE (x);
14507 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14508 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14509 output_address (mode, plus_constant (Pmode,
14510 XEXP (XEXP (x, 0), 0), 8));
14511 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14512 output_address (mode, plus_constant (Pmode,
14513 XEXP (XEXP (x, 0), 0), 8));
14514 else
14515 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14516 if (small_data_operand (x, GET_MODE (x)))
14517 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14518 reg_names[SMALL_DATA_REG]);
14520 return;
14522 case 'z':
14523 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14524 x = XVECEXP (x, 0, 1);
14525 /* X is a SYMBOL_REF. Write out the name preceded by a
14526 period and without any trailing data in brackets. Used for function
14527 names. If we are configured for System V (or the embedded ABI) on
14528 the PowerPC, do not emit the period, since those systems do not use
14529 TOCs and the like. */
14530 if (!SYMBOL_REF_P (x))
14532 output_operand_lossage ("invalid %%z value");
14533 return;
14536 /* For macho, check to see if we need a stub. */
14537 if (TARGET_MACHO)
14539 const char *name = XSTR (x, 0);
14540 #if TARGET_MACHO
14541 if (darwin_symbol_stubs
14542 && MACHOPIC_INDIRECT
14543 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14544 name = machopic_indirection_name (x, /*stub_p=*/true);
14545 #endif
14546 assemble_name (file, name);
14548 else if (!DOT_SYMBOLS)
14549 assemble_name (file, XSTR (x, 0));
14550 else
14551 rs6000_output_function_entry (file, XSTR (x, 0));
14552 return;
14554 case 'Z':
14555 /* Like 'L', for last word of TImode/PTImode. */
14556 if (REG_P (x))
14557 fputs (reg_names[REGNO (x) + 3], file);
14558 else if (MEM_P (x))
14560 machine_mode mode = GET_MODE (x);
14561 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14562 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14563 output_address (mode, plus_constant (Pmode,
14564 XEXP (XEXP (x, 0), 0), 12));
14565 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14566 output_address (mode, plus_constant (Pmode,
14567 XEXP (XEXP (x, 0), 0), 12));
14568 else
14569 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14570 if (small_data_operand (x, GET_MODE (x)))
14571 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14572 reg_names[SMALL_DATA_REG]);
14574 return;
14576 /* Print AltiVec memory operand. */
14577 case 'y':
14579 rtx tmp;
14581 gcc_assert (MEM_P (x));
14583 tmp = XEXP (x, 0);
14585 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14586 && GET_CODE (tmp) == AND
14587 && CONST_INT_P (XEXP (tmp, 1))
14588 && INTVAL (XEXP (tmp, 1)) == -16)
14589 tmp = XEXP (tmp, 0);
14590 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14591 && GET_CODE (tmp) == PRE_MODIFY)
14592 tmp = XEXP (tmp, 1);
14593 if (REG_P (tmp))
14594 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14595 else
14597 if (GET_CODE (tmp) != PLUS
14598 || !REG_P (XEXP (tmp, 0))
14599 || !REG_P (XEXP (tmp, 1)))
14601 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14602 break;
14605 if (REGNO (XEXP (tmp, 0)) == 0)
14606 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14607 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14608 else
14609 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14610 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14612 break;
14615 case 0:
14616 if (REG_P (x))
14617 fprintf (file, "%s", reg_names[REGNO (x)]);
14618 else if (MEM_P (x))
14620 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14621 know the width from the mode. */
14622 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14623 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14624 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14625 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14626 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14627 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14628 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14629 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14630 else
14631 output_address (GET_MODE (x), XEXP (x, 0));
14633 else if (toc_relative_expr_p (x, false,
14634 &tocrel_base_oac, &tocrel_offset_oac))
14635 /* This hack along with a corresponding hack in
14636 rs6000_output_addr_const_extra arranges to output addends
14637 where the assembler expects to find them. eg.
14638 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14639 without this hack would be output as "x@toc+4". We
14640 want "x+4@toc". */
14641 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14642 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14643 output_addr_const (file, XVECEXP (x, 0, 0));
14644 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14645 output_addr_const (file, XVECEXP (x, 0, 1));
14646 else
14647 output_addr_const (file, x);
14648 return;
14650 case '&':
14651 if (const char *name = get_some_local_dynamic_name ())
14652 assemble_name (file, name);
14653 else
14654 output_operand_lossage ("'%%&' used without any "
14655 "local dynamic TLS references");
14656 return;
14658 default:
14659 output_operand_lossage ("invalid %%xn code");
14663 /* Print the address of an operand. */
14665 void
14666 print_operand_address (FILE *file, rtx x)
14668 if (REG_P (x))
14669 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14671 /* Is it a PC-relative address? */
14672 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14674 HOST_WIDE_INT offset;
14676 if (GET_CODE (x) == CONST)
14677 x = XEXP (x, 0);
14679 if (GET_CODE (x) == PLUS)
14681 offset = INTVAL (XEXP (x, 1));
14682 x = XEXP (x, 0);
14684 else
14685 offset = 0;
14687 output_addr_const (file, x);
14689 if (offset)
14690 fprintf (file, "%+" PRId64, offset);
14692 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14693 fprintf (file, "@got");
14695 fprintf (file, "@pcrel");
14697 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14698 || GET_CODE (x) == LABEL_REF)
14700 output_addr_const (file, x);
14701 if (small_data_operand (x, GET_MODE (x)))
14702 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14703 reg_names[SMALL_DATA_REG]);
14704 else
14706 /* Do not support getting address directly from TOC, emit error.
14707 No more work is needed for !TARGET_TOC. */
14708 if (TARGET_TOC)
14709 output_operand_lossage ("%%a requires an address of memory");
14712 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14713 && REG_P (XEXP (x, 1)))
14715 if (REGNO (XEXP (x, 0)) == 0)
14716 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14717 reg_names[ REGNO (XEXP (x, 0)) ]);
14718 else
14719 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14720 reg_names[ REGNO (XEXP (x, 1)) ]);
14722 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14723 && CONST_INT_P (XEXP (x, 1)))
14724 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14725 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14726 #if TARGET_MACHO
14727 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14728 && CONSTANT_P (XEXP (x, 1)))
14730 fprintf (file, "lo16(");
14731 output_addr_const (file, XEXP (x, 1));
14732 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14734 #endif
14735 #if TARGET_ELF
14736 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14737 && CONSTANT_P (XEXP (x, 1)))
14739 output_addr_const (file, XEXP (x, 1));
14740 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14742 #endif
14743 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14745 /* This hack along with a corresponding hack in
14746 rs6000_output_addr_const_extra arranges to output addends
14747 where the assembler expects to find them. eg.
14748 (lo_sum (reg 9)
14749 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14750 without this hack would be output as "x@toc+8@l(9)". We
14751 want "x+8@toc@l(9)". */
14752 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14753 if (GET_CODE (x) == LO_SUM)
14754 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14755 else
14756 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14758 else
14759 output_addr_const (file, x);
14762 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14764 bool
14765 rs6000_output_addr_const_extra (FILE *file, rtx x)
14767 if (GET_CODE (x) == UNSPEC)
14768 switch (XINT (x, 1))
14770 case UNSPEC_TOCREL:
14771 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14772 && REG_P (XVECEXP (x, 0, 1))
14773 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14774 output_addr_const (file, XVECEXP (x, 0, 0));
14775 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14777 if (INTVAL (tocrel_offset_oac) >= 0)
14778 fprintf (file, "+");
14779 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14781 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14783 putc ('-', file);
14784 assemble_name (file, toc_label_name);
14785 need_toc_init = 1;
14787 else if (TARGET_ELF)
14788 fputs ("@toc", file);
14789 return true;
14791 #if TARGET_MACHO
14792 case UNSPEC_MACHOPIC_OFFSET:
14793 output_addr_const (file, XVECEXP (x, 0, 0));
14794 putc ('-', file);
14795 machopic_output_function_base_name (file);
14796 return true;
14797 #endif
14799 return false;
14802 /* Target hook for assembling integer objects. The PowerPC version has
14803 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14804 is defined. It also needs to handle DI-mode objects on 64-bit
14805 targets. */
14807 static bool
14808 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14810 #ifdef RELOCATABLE_NEEDS_FIXUP
14811 /* Special handling for SI values. */
14812 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14814 static int recurse = 0;
14816 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14817 the .fixup section. Since the TOC section is already relocated, we
14818 don't need to mark it here. We used to skip the text section, but it
14819 should never be valid for relocated addresses to be placed in the text
14820 section. */
14821 if (DEFAULT_ABI == ABI_V4
14822 && (TARGET_RELOCATABLE || flag_pic > 1)
14823 && in_section != toc_section
14824 && !recurse
14825 && !CONST_SCALAR_INT_P (x)
14826 && CONSTANT_P (x))
14828 char buf[256];
14830 recurse = 1;
14831 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14832 fixuplabelno++;
14833 ASM_OUTPUT_LABEL (asm_out_file, buf);
14834 fprintf (asm_out_file, "\t.long\t(");
14835 output_addr_const (asm_out_file, x);
14836 fprintf (asm_out_file, ")@fixup\n");
14837 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14838 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14839 fprintf (asm_out_file, "\t.long\t");
14840 assemble_name (asm_out_file, buf);
14841 fprintf (asm_out_file, "\n\t.previous\n");
14842 recurse = 0;
14843 return true;
14845 /* Remove initial .'s to turn a -mcall-aixdesc function
14846 address into the address of the descriptor, not the function
14847 itself. */
14848 else if (SYMBOL_REF_P (x)
14849 && XSTR (x, 0)[0] == '.'
14850 && DEFAULT_ABI == ABI_AIX)
14852 const char *name = XSTR (x, 0);
14853 while (*name == '.')
14854 name++;
14856 fprintf (asm_out_file, "\t.long\t%s\n", name);
14857 return true;
14860 #endif /* RELOCATABLE_NEEDS_FIXUP */
14861 return default_assemble_integer (x, size, aligned_p);
14864 /* Return a template string for assembly to emit when making an
14865 external call. FUNOP is the call mem argument operand number. */
14867 static const char *
14868 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14870 /* -Wformat-overflow workaround, without which gcc thinks that %u
14871 might produce 10 digits. */
14872 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14874 char arg[12];
14875 arg[0] = 0;
14876 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14878 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14879 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14880 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14881 sprintf (arg, "(%%&@tlsld)");
14884 /* The magic 32768 offset here corresponds to the offset of
14885 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14886 char z[11];
14887 sprintf (z, "%%z%u%s", funop,
14888 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14889 ? "+32768" : ""));
14891 static char str[32]; /* 1 spare */
14892 if (rs6000_pcrel_p ())
14893 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14894 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14895 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14896 sibcall ? "" : "\n\tnop");
14897 else if (DEFAULT_ABI == ABI_V4)
14898 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14899 flag_pic ? "@plt" : "");
14900 #if TARGET_MACHO
14901 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14902 else if (DEFAULT_ABI == ABI_DARWIN)
14904 /* The cookie is in operand func+2. */
14905 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14906 int cookie = INTVAL (operands[funop + 2]);
14907 if (cookie & CALL_LONG)
14909 tree funname = get_identifier (XSTR (operands[funop], 0));
14910 tree labelname = get_prev_label (funname);
14911 gcc_checking_assert (labelname && !sibcall);
14913 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14914 instruction will reach 'foo', otherwise link as 'bl L42'".
14915 "L42" should be a 'branch island', that will do a far jump to
14916 'foo'. Branch islands are generated in
14917 macho_branch_islands(). */
14918 sprintf (str, "jbsr %%z%u,%.10s", funop,
14919 IDENTIFIER_POINTER (labelname));
14921 else
14922 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14923 after the call. */
14924 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14926 #endif
14927 else
14928 gcc_unreachable ();
14929 return str;
14932 const char *
14933 rs6000_call_template (rtx *operands, unsigned int funop)
14935 return rs6000_call_template_1 (operands, funop, false);
14938 const char *
14939 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14941 return rs6000_call_template_1 (operands, funop, true);
14944 /* As above, for indirect calls. */
14946 static const char *
14947 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14948 bool sibcall)
14950 /* -Wformat-overflow workaround, without which gcc thinks that %u
14951 might produce 10 digits. Note that -Wformat-overflow will not
14952 currently warn here for str[], so do not rely on a warning to
14953 ensure str[] is correctly sized. */
14954 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14956 /* Currently, funop is either 0 or 1. The maximum string is always
14957 a !speculate 64-bit __tls_get_addr call.
14959 ABI_ELFv2, pcrel:
14960 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14961 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14962 . 9 crset 2\n\t
14963 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14964 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14965 . 8 beq%T1l-
14966 .---
14967 .142
14969 ABI_AIX:
14970 . 9 ld 2,%3\n\t
14971 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14972 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14973 . 9 crset 2\n\t
14974 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14975 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14976 . 10 beq%T1l-\n\t
14977 . 10 ld 2,%4(1)
14978 .---
14979 .151
14981 ABI_ELFv2:
14982 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14983 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14984 . 9 crset 2\n\t
14985 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14986 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14987 . 10 beq%T1l-\n\t
14988 . 10 ld 2,%3(1)
14989 .---
14990 .142
14992 ABI_V4:
14993 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14994 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14995 . 9 crset 2\n\t
14996 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14997 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14998 . 8 beq%T1l-
14999 .---
15000 .141 */
15001 static char str[160]; /* 8 spare */
15002 char *s = str;
15003 const char *ptrload = TARGET_64BIT ? "d" : "wz";
15005 if (DEFAULT_ABI == ABI_AIX)
15006 s += sprintf (s,
15007 "l%s 2,%%%u\n\t",
15008 ptrload, funop + 3);
15010 /* We don't need the extra code to stop indirect call speculation if
15011 calling via LR. */
15012 bool speculate = (TARGET_MACHO
15013 || rs6000_speculate_indirect_jumps
15014 || (REG_P (operands[funop])
15015 && REGNO (operands[funop]) == LR_REGNO));
15017 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
15019 const char *rel64 = TARGET_64BIT ? "64" : "";
15020 char tls[29];
15021 tls[0] = 0;
15022 if (GET_CODE (operands[funop + 1]) == UNSPEC)
15024 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
15025 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
15026 rel64, funop + 1);
15027 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
15028 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15029 rel64);
15032 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
15033 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15034 && flag_pic == 2 ? "+32768" : "");
15035 if (!speculate)
15037 s += sprintf (s,
15038 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15039 tls, rel64, notoc, funop, addend);
15040 s += sprintf (s, "crset 2\n\t");
15042 s += sprintf (s,
15043 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15044 tls, rel64, notoc, funop, addend);
15046 else if (!speculate)
15047 s += sprintf (s, "crset 2\n\t");
15049 if (rs6000_pcrel_p ())
15051 if (speculate)
15052 sprintf (s, "b%%T%ul", funop);
15053 else
15054 sprintf (s, "beq%%T%ul-", funop);
15056 else if (DEFAULT_ABI == ABI_AIX)
15058 if (speculate)
15059 sprintf (s,
15060 "b%%T%ul\n\t"
15061 "l%s 2,%%%u(1)",
15062 funop, ptrload, funop + 4);
15063 else
15064 sprintf (s,
15065 "beq%%T%ul-\n\t"
15066 "l%s 2,%%%u(1)",
15067 funop, ptrload, funop + 4);
15069 else if (DEFAULT_ABI == ABI_ELFv2)
15071 if (speculate)
15072 sprintf (s,
15073 "b%%T%ul\n\t"
15074 "l%s 2,%%%u(1)",
15075 funop, ptrload, funop + 3);
15076 else
15077 sprintf (s,
15078 "beq%%T%ul-\n\t"
15079 "l%s 2,%%%u(1)",
15080 funop, ptrload, funop + 3);
15082 else
15084 if (speculate)
15085 sprintf (s,
15086 "b%%T%u%s",
15087 funop, sibcall ? "" : "l");
15088 else
15089 sprintf (s,
15090 "beq%%T%u%s-%s",
15091 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
15093 return str;
15096 const char *
15097 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
15099 return rs6000_indirect_call_template_1 (operands, funop, false);
15102 const char *
15103 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
15105 return rs6000_indirect_call_template_1 (operands, funop, true);
15108 #if HAVE_AS_PLTSEQ
15109 /* Output indirect call insns. WHICH identifies the type of sequence. */
15110 const char *
15111 rs6000_pltseq_template (rtx *operands, int which)
15113 const char *rel64 = TARGET_64BIT ? "64" : "";
15114 char tls[30];
15115 tls[0] = 0;
15116 if (GET_CODE (operands[3]) == UNSPEC)
15118 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
15119 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
15120 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15121 off, rel64);
15122 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
15123 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15124 off, rel64);
15127 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
15128 static char str[96]; /* 10 spare */
15129 char off = WORDS_BIG_ENDIAN ? '2' : '4';
15130 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15131 && flag_pic == 2 ? "+32768" : "");
15132 switch (which)
15134 case RS6000_PLTSEQ_TOCSAVE:
15135 sprintf (str,
15136 "st%s\n\t"
15137 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15138 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
15139 tls, rel64);
15140 break;
15141 case RS6000_PLTSEQ_PLT16_HA:
15142 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
15143 sprintf (str,
15144 "lis %%0,0\n\t"
15145 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15146 tls, off, rel64);
15147 else
15148 sprintf (str,
15149 "addis %%0,%%1,0\n\t"
15150 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15151 tls, off, rel64, addend);
15152 break;
15153 case RS6000_PLTSEQ_PLT16_LO:
15154 sprintf (str,
15155 "l%s %%0,0(%%1)\n\t"
15156 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15157 TARGET_64BIT ? "d" : "wz",
15158 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
15159 break;
15160 case RS6000_PLTSEQ_MTCTR:
15161 sprintf (str,
15162 "mtctr %%1\n\t"
15163 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15164 tls, rel64, addend);
15165 break;
15166 case RS6000_PLTSEQ_PLT_PCREL34:
15167 sprintf (str,
15168 "pl%s %%0,0(0),1\n\t"
15169 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15170 TARGET_64BIT ? "d" : "wz",
15171 tls, rel64);
15172 break;
15173 default:
15174 gcc_unreachable ();
15176 return str;
15178 #endif
15180 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15181 /* Emit an assembler directive to set symbol visibility for DECL to
15182 VISIBILITY_TYPE. */
15184 static void
15185 rs6000_assemble_visibility (tree decl, int vis)
15187 if (TARGET_XCOFF)
15188 return;
15190 /* Functions need to have their entry point symbol visibility set as
15191 well as their descriptor symbol visibility. */
15192 if (DEFAULT_ABI == ABI_AIX
15193 && DOT_SYMBOLS
15194 && TREE_CODE (decl) == FUNCTION_DECL)
15196 static const char * const visibility_types[] = {
15197 NULL, "protected", "hidden", "internal"
15200 const char *name, *type;
15202 name = ((* targetm.strip_name_encoding)
15203 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
15204 type = visibility_types[vis];
15206 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
15207 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
15209 else
15210 default_assemble_visibility (decl, vis);
15212 #endif
15214 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15215 entry. If RECORD_P is true and the target supports named sections,
15216 the location of the NOPs will be recorded in a special object section
15217 called "__patchable_function_entries". This routine may be called
15218 twice per function to put NOPs before and after the function
15219 entry. */
15221 void
15222 rs6000_print_patchable_function_entry (FILE *file,
15223 unsigned HOST_WIDE_INT patch_area_size,
15224 bool record_p)
15226 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
15227 /* For a function which needs global entry point, we will emit the
15228 patchable area before and after local entry point under the control of
15229 cfun->machine->global_entry_emitted, see the handling in function
15230 rs6000_output_function_prologue. */
15231 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
15232 default_print_patchable_function_entry (file, patch_area_size, record_p);
15235 enum rtx_code
15236 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
15238 /* Reversal of FP compares takes care -- an ordered compare
15239 becomes an unordered compare and vice versa. */
15240 if (mode == CCFPmode
15241 && (!flag_finite_math_only
15242 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
15243 || code == UNEQ || code == LTGT))
15244 return reverse_condition_maybe_unordered (code);
15245 else
15246 return reverse_condition (code);
15249 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15250 nonzero bits at the LOWBITS low bits only.
15252 Return true if C can be rotated to such constant. If so, *ROT is written
15253 to the number by which C is rotated.
15254 Return false otherwise. */
15256 bool
15257 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
15259 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
15261 /* case a. 0..0xxx: already at least clz zeros. */
15262 int lz = clz_hwi (c);
15263 if (lz >= clz)
15265 *rot = 0;
15266 return true;
15269 /* case b. 0..0xxx0..0: at least clz zeros. */
15270 int tz = ctz_hwi (c);
15271 if (lz + tz >= clz)
15273 *rot = HOST_BITS_PER_WIDE_INT - tz;
15274 return true;
15277 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15278 ^bit -> Vbit, , then zeros are at head or tail.
15279 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15280 const int rot_bits = lowbits + 1;
15281 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15282 tz = ctz_hwi (rc);
15283 if (clz_hwi (rc) + tz >= clz)
15285 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15286 return true;
15289 return false;
15292 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15293 which contains 48bits leading zeros and 16bits of any value. */
15295 bool
15296 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15298 int rot = 0;
15299 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15300 return res && rot > 0;
15303 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15304 which contains 49bits leading ones and 15bits of any value. */
15306 bool
15307 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15309 int rot = 0;
15310 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15311 return res && rot > 0;
15314 /* Generate a compare for CODE. Return a brand-new rtx that
15315 represents the result of the compare. */
15317 static rtx
15318 rs6000_generate_compare (rtx cmp, machine_mode mode)
15320 machine_mode comp_mode;
15321 rtx compare_result;
15322 enum rtx_code code = GET_CODE (cmp);
15323 rtx op0 = XEXP (cmp, 0);
15324 rtx op1 = XEXP (cmp, 1);
15326 if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))
15327 comp_mode = CCmode;
15328 else if (FLOAT_MODE_P (mode))
15329 comp_mode = CCFPmode;
15330 else if (code == GTU || code == LTU
15331 || code == GEU || code == LEU)
15332 comp_mode = CCUNSmode;
15333 else if ((code == EQ || code == NE)
15334 && unsigned_reg_p (op0)
15335 && (unsigned_reg_p (op1)
15336 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15337 /* These are unsigned values, perhaps there will be a later
15338 ordering compare that can be shared with this one. */
15339 comp_mode = CCUNSmode;
15340 else
15341 comp_mode = CCmode;
15343 /* If we have an unsigned compare, make sure we don't have a signed value as
15344 an immediate. */
15345 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15346 && INTVAL (op1) < 0)
15348 op0 = copy_rtx_if_shared (op0);
15349 op1 = force_reg (GET_MODE (op0), op1);
15350 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15353 /* First, the compare. */
15354 compare_result = gen_reg_rtx (comp_mode);
15356 /* IEEE 128-bit support in VSX registers when we do not have hardware
15357 support. */
15358 if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))
15360 rtx libfunc = NULL_RTX;
15361 bool check_nan = false;
15362 rtx dest;
15364 switch (code)
15366 case EQ:
15367 case NE:
15368 libfunc = optab_libfunc (eq_optab, mode);
15369 break;
15371 case GT:
15372 case GE:
15373 libfunc = optab_libfunc (ge_optab, mode);
15374 break;
15376 case LT:
15377 case LE:
15378 libfunc = optab_libfunc (le_optab, mode);
15379 break;
15381 case UNORDERED:
15382 case ORDERED:
15383 libfunc = optab_libfunc (unord_optab, mode);
15384 code = (code == UNORDERED) ? NE : EQ;
15385 break;
15387 case UNGE:
15388 case UNGT:
15389 check_nan = true;
15390 libfunc = optab_libfunc (ge_optab, mode);
15391 code = (code == UNGE) ? GE : GT;
15392 break;
15394 case UNLE:
15395 case UNLT:
15396 check_nan = true;
15397 libfunc = optab_libfunc (le_optab, mode);
15398 code = (code == UNLE) ? LE : LT;
15399 break;
15401 case UNEQ:
15402 case LTGT:
15403 check_nan = true;
15404 libfunc = optab_libfunc (eq_optab, mode);
15405 code = (code = UNEQ) ? EQ : NE;
15406 break;
15408 default:
15409 gcc_unreachable ();
15412 gcc_assert (libfunc);
15414 if (!check_nan)
15415 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15416 SImode, op0, mode, op1, mode);
15418 /* The library signals an exception for signalling NaNs, so we need to
15419 handle isgreater, etc. by first checking isordered. */
15420 else
15422 rtx ne_rtx, normal_dest, unord_dest;
15423 rtx unord_func = optab_libfunc (unord_optab, mode);
15424 rtx join_label = gen_label_rtx ();
15425 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15426 rtx unord_cmp = gen_reg_rtx (comp_mode);
15429 /* Test for either value being a NaN. */
15430 gcc_assert (unord_func);
15431 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15432 SImode, op0, mode, op1, mode);
15434 /* Set value (0) if either value is a NaN, and jump to the join
15435 label. */
15436 dest = gen_reg_rtx (SImode);
15437 emit_move_insn (dest, const1_rtx);
15438 emit_insn (gen_rtx_SET (unord_cmp,
15439 gen_rtx_COMPARE (comp_mode, unord_dest,
15440 const0_rtx)));
15442 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15443 emit_jump_insn (gen_rtx_SET (pc_rtx,
15444 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15445 join_ref,
15446 pc_rtx)));
15448 /* Do the normal comparison, knowing that the values are not
15449 NaNs. */
15450 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15451 SImode, op0, mode, op1, mode);
15453 emit_insn (gen_cstoresi4 (dest,
15454 gen_rtx_fmt_ee (code, SImode, normal_dest,
15455 const0_rtx),
15456 normal_dest, const0_rtx));
15458 /* Join NaN and non-Nan paths. Compare dest against 0. */
15459 emit_label (join_label);
15460 code = NE;
15463 emit_insn (gen_rtx_SET (compare_result,
15464 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15467 else
15469 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15470 CLOBBERs to match cmptf_internal2 pattern. */
15471 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15472 && FLOAT128_IBM_P (GET_MODE (op0))
15473 && TARGET_HARD_FLOAT)
15474 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15475 gen_rtvec (10,
15476 gen_rtx_SET (compare_result,
15477 gen_rtx_COMPARE (comp_mode, op0, op1)),
15478 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15479 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15480 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15481 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15482 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15483 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15484 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15485 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15486 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15487 else if (GET_CODE (op1) == UNSPEC
15488 && XINT (op1, 1) == UNSPEC_SP_TEST)
15490 rtx op1b = XVECEXP (op1, 0, 0);
15491 comp_mode = CCEQmode;
15492 compare_result = gen_reg_rtx (CCEQmode);
15493 if (TARGET_64BIT)
15494 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15495 else
15496 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15498 else if (mode == V16QImode)
15500 gcc_assert (code == EQ || code == NE);
15502 rtx result_vector = gen_reg_rtx (V16QImode);
15503 rtx cc_bit = gen_reg_rtx (SImode);
15504 emit_insn (gen_altivec_vcmpequb_p (result_vector, op0, op1));
15505 emit_insn (gen_cr6_test_for_lt (cc_bit));
15506 emit_insn (gen_rtx_SET (compare_result,
15507 gen_rtx_COMPARE (comp_mode, cc_bit,
15508 const1_rtx)));
15510 else
15511 emit_insn (gen_rtx_SET (compare_result,
15512 gen_rtx_COMPARE (comp_mode, op0, op1)));
15515 validate_condition_mode (code, GET_MODE (compare_result));
15517 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15521 /* Return the diagnostic message string if the binary operation OP is
15522 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15524 static const char*
15525 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15526 const_tree type1,
15527 const_tree type2)
15529 machine_mode mode1 = TYPE_MODE (type1);
15530 machine_mode mode2 = TYPE_MODE (type2);
15532 /* For complex modes, use the inner type. */
15533 if (COMPLEX_MODE_P (mode1))
15534 mode1 = GET_MODE_INNER (mode1);
15536 if (COMPLEX_MODE_P (mode2))
15537 mode2 = GET_MODE_INNER (mode2);
15539 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15540 double to intermix unless -mfloat128-convert. */
15541 if (mode1 == mode2)
15542 return NULL;
15544 if (!TARGET_FLOAT128_CVT)
15546 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15547 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15548 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15549 "point types");
15552 return NULL;
15556 /* Expand floating point conversion to/from __float128 and __ibm128. */
15558 void
15559 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15561 machine_mode dest_mode = GET_MODE (dest);
15562 machine_mode src_mode = GET_MODE (src);
15563 convert_optab cvt = unknown_optab;
15564 bool do_move = false;
15565 rtx libfunc = NULL_RTX;
15566 rtx dest2;
15567 typedef rtx (*rtx_2func_t) (rtx, rtx);
15568 rtx_2func_t hw_convert = (rtx_2func_t)0;
15569 size_t kf_or_tf;
15571 struct hw_conv_t {
15572 rtx_2func_t from_df;
15573 rtx_2func_t from_sf;
15574 rtx_2func_t from_si_sign;
15575 rtx_2func_t from_si_uns;
15576 rtx_2func_t from_di_sign;
15577 rtx_2func_t from_di_uns;
15578 rtx_2func_t to_df;
15579 rtx_2func_t to_sf;
15580 rtx_2func_t to_si_sign;
15581 rtx_2func_t to_si_uns;
15582 rtx_2func_t to_di_sign;
15583 rtx_2func_t to_di_uns;
15584 } hw_conversions[2] = {
15585 /* convertions to/from KFmode */
15587 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15588 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15589 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15590 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15591 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15592 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15593 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15594 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15595 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15596 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15597 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15598 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15601 /* convertions to/from TFmode */
15603 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15604 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15605 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15606 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15607 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15608 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15609 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15610 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15611 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15612 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15613 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15614 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15618 if (dest_mode == src_mode)
15619 gcc_unreachable ();
15621 /* Eliminate memory operations. */
15622 if (MEM_P (src))
15623 src = force_reg (src_mode, src);
15625 if (MEM_P (dest))
15627 rtx tmp = gen_reg_rtx (dest_mode);
15628 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15629 rs6000_emit_move (dest, tmp, dest_mode);
15630 return;
15633 /* Convert to IEEE 128-bit floating point. */
15634 if (FLOAT128_IEEE_P (dest_mode))
15636 if (dest_mode == KFmode)
15637 kf_or_tf = 0;
15638 else if (dest_mode == TFmode)
15639 kf_or_tf = 1;
15640 else
15641 gcc_unreachable ();
15643 switch (src_mode)
15645 case E_DFmode:
15646 cvt = sext_optab;
15647 hw_convert = hw_conversions[kf_or_tf].from_df;
15648 break;
15650 case E_SFmode:
15651 cvt = sext_optab;
15652 hw_convert = hw_conversions[kf_or_tf].from_sf;
15653 break;
15655 case E_KFmode:
15656 case E_IFmode:
15657 case E_TFmode:
15658 if (FLOAT128_IBM_P (src_mode))
15659 cvt = trunc_optab;
15660 else
15661 do_move = true;
15662 break;
15664 case E_SImode:
15665 if (unsigned_p)
15667 cvt = ufloat_optab;
15668 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15670 else
15672 cvt = sfloat_optab;
15673 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15675 break;
15677 case E_DImode:
15678 if (unsigned_p)
15680 cvt = ufloat_optab;
15681 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15683 else
15685 cvt = sfloat_optab;
15686 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15688 break;
15690 default:
15691 gcc_unreachable ();
15695 /* Convert from IEEE 128-bit floating point. */
15696 else if (FLOAT128_IEEE_P (src_mode))
15698 if (src_mode == KFmode)
15699 kf_or_tf = 0;
15700 else if (src_mode == TFmode)
15701 kf_or_tf = 1;
15702 else
15703 gcc_unreachable ();
15705 switch (dest_mode)
15707 case E_DFmode:
15708 cvt = trunc_optab;
15709 hw_convert = hw_conversions[kf_or_tf].to_df;
15710 break;
15712 case E_SFmode:
15713 cvt = trunc_optab;
15714 hw_convert = hw_conversions[kf_or_tf].to_sf;
15715 break;
15717 case E_KFmode:
15718 case E_IFmode:
15719 case E_TFmode:
15720 if (FLOAT128_IBM_P (dest_mode))
15721 cvt = sext_optab;
15722 else
15723 do_move = true;
15724 break;
15726 case E_SImode:
15727 if (unsigned_p)
15729 cvt = ufix_optab;
15730 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15732 else
15734 cvt = sfix_optab;
15735 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15737 break;
15739 case E_DImode:
15740 if (unsigned_p)
15742 cvt = ufix_optab;
15743 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15745 else
15747 cvt = sfix_optab;
15748 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15750 break;
15752 default:
15753 gcc_unreachable ();
15757 /* Both IBM format. */
15758 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15759 do_move = true;
15761 else
15762 gcc_unreachable ();
15764 /* Handle conversion between TFmode/KFmode/IFmode. */
15765 if (do_move)
15766 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15768 /* Handle conversion if we have hardware support. */
15769 else if (TARGET_FLOAT128_HW && hw_convert)
15770 emit_insn ((hw_convert) (dest, src));
15772 /* Call an external function to do the conversion. */
15773 else if (cvt != unknown_optab)
15775 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15776 gcc_assert (libfunc != NULL_RTX);
15778 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15779 src, src_mode);
15781 gcc_assert (dest2 != NULL_RTX);
15782 if (!rtx_equal_p (dest, dest2))
15783 emit_move_insn (dest, dest2);
15786 else
15787 gcc_unreachable ();
15789 return;
15793 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15794 can be used as that dest register. Return the dest register. */
15797 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15799 if (op2 == const0_rtx)
15800 return op1;
15802 if (GET_CODE (scratch) == SCRATCH)
15803 scratch = gen_reg_rtx (mode);
15805 if (logical_operand (op2, mode))
15806 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15807 else
15808 emit_insn (gen_rtx_SET (scratch,
15809 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15811 return scratch;
15814 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15815 requires this. The result is mode MODE. */
15817 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15819 rtx cond[2];
15820 int n = 0;
15821 if (code == LTGT || code == LE || code == UNLT)
15822 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15823 if (code == LTGT || code == GE || code == UNGT)
15824 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15825 if (code == LE || code == GE || code == UNEQ)
15826 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15827 if (code == UNLT || code == UNGT || code == UNEQ)
15828 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15830 gcc_assert (n == 2);
15832 rtx cc = gen_reg_rtx (CCEQmode);
15833 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15834 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15836 return cc;
15839 void
15840 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15842 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15843 rtx_code cond_code = GET_CODE (condition_rtx);
15845 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15846 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15848 else if (cond_code == NE
15849 || cond_code == GE || cond_code == LE
15850 || cond_code == GEU || cond_code == LEU
15851 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15853 rtx not_result = gen_reg_rtx (CCEQmode);
15854 rtx not_op, rev_cond_rtx;
15855 machine_mode cc_mode;
15857 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15859 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15860 SImode, XEXP (condition_rtx, 0), const0_rtx);
15861 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15862 emit_insn (gen_rtx_SET (not_result, not_op));
15863 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15866 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15867 if (op_mode == VOIDmode)
15868 op_mode = GET_MODE (XEXP (operands[1], 1));
15870 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15872 PUT_MODE (condition_rtx, DImode);
15873 convert_move (operands[0], condition_rtx, 0);
15875 else
15877 PUT_MODE (condition_rtx, SImode);
15878 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15882 /* Emit a branch of kind CODE to location LOC. */
15884 void
15885 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15887 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15888 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15889 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15890 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15893 /* Return the string to output a conditional branch to LABEL, which is
15894 the operand template of the label, or NULL if the branch is really a
15895 conditional return.
15897 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15898 condition code register and its mode specifies what kind of
15899 comparison we made.
15901 REVERSED is nonzero if we should reverse the sense of the comparison.
15903 INSN is the insn. */
15905 char *
15906 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15908 static char string[64];
15909 enum rtx_code code = GET_CODE (op);
15910 rtx cc_reg = XEXP (op, 0);
15911 machine_mode mode = GET_MODE (cc_reg);
15912 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15913 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15914 int really_reversed = reversed ^ need_longbranch;
15915 char *s = string;
15916 const char *ccode;
15917 const char *pred;
15918 rtx note;
15920 validate_condition_mode (code, mode);
15922 /* Work out which way this really branches. We could use
15923 reverse_condition_maybe_unordered here always but this
15924 makes the resulting assembler clearer. */
15925 if (really_reversed)
15927 /* Reversal of FP compares takes care -- an ordered compare
15928 becomes an unordered compare and vice versa. */
15929 if (mode == CCFPmode)
15930 code = reverse_condition_maybe_unordered (code);
15931 else
15932 code = reverse_condition (code);
15935 switch (code)
15937 /* Not all of these are actually distinct opcodes, but
15938 we distinguish them for clarity of the resulting assembler. */
15939 case NE: case LTGT:
15940 ccode = "ne"; break;
15941 case EQ: case UNEQ:
15942 ccode = "eq"; break;
15943 case GE: case GEU:
15944 ccode = "ge"; break;
15945 case GT: case GTU: case UNGT:
15946 ccode = "gt"; break;
15947 case LE: case LEU:
15948 ccode = "le"; break;
15949 case LT: case LTU: case UNLT:
15950 ccode = "lt"; break;
15951 case UNORDERED: ccode = "un"; break;
15952 case ORDERED: ccode = "nu"; break;
15953 case UNGE: ccode = "nl"; break;
15954 case UNLE: ccode = "ng"; break;
15955 default:
15956 gcc_unreachable ();
15959 /* Maybe we have a guess as to how likely the branch is. */
15960 pred = "";
15961 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15962 if (note != NULL_RTX)
15964 /* PROB is the difference from 50%. */
15965 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15966 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15968 /* Only hint for highly probable/improbable branches on newer cpus when
15969 we have real profile data, as static prediction overrides processor
15970 dynamic prediction. For older cpus we may as well always hint, but
15971 assume not taken for branches that are very close to 50% as a
15972 mispredicted taken branch is more expensive than a
15973 mispredicted not-taken branch. */
15974 if (rs6000_always_hint
15975 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15976 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15977 && br_prob_note_reliable_p (note)))
15979 if (abs (prob) > REG_BR_PROB_BASE / 20
15980 && ((prob > 0) ^ need_longbranch))
15981 pred = "+";
15982 else
15983 pred = "-";
15987 if (label == NULL)
15988 s += sprintf (s, "b%slr%s ", ccode, pred);
15989 else
15990 s += sprintf (s, "b%s%s ", ccode, pred);
15992 /* We need to escape any '%' characters in the reg_names string.
15993 Assume they'd only be the first character.... */
15994 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15995 *s++ = '%';
15996 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15998 if (label != NULL)
16000 /* If the branch distance was too far, we may have to use an
16001 unconditional branch to go the distance. */
16002 if (need_longbranch)
16003 s += sprintf (s, ",$+8\n\tb %s", label);
16004 else
16005 s += sprintf (s, ",%s", label);
16008 return string;
16011 /* Return insn for VSX or Altivec comparisons. */
16013 static rtx
16014 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
16016 rtx mask;
16017 machine_mode mode = GET_MODE (op0);
16019 switch (code)
16021 default:
16022 break;
16024 case GE:
16025 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16026 return NULL_RTX;
16027 /* FALLTHRU */
16029 case EQ:
16030 case GT:
16031 case GTU:
16032 case ORDERED:
16033 case UNORDERED:
16034 case UNEQ:
16035 case LTGT:
16036 mask = gen_reg_rtx (mode);
16037 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
16038 return mask;
16041 return NULL_RTX;
16044 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16045 DMODE is expected destination mode. This is a recursive function. */
16047 static rtx
16048 rs6000_emit_vector_compare (enum rtx_code rcode,
16049 rtx op0, rtx op1,
16050 machine_mode dmode)
16052 rtx mask;
16053 bool swap_operands = false;
16054 bool try_again = false;
16056 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
16057 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
16059 /* See if the comparison works as is. */
16060 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16061 if (mask)
16062 return mask;
16064 switch (rcode)
16066 case LT:
16067 rcode = GT;
16068 swap_operands = true;
16069 try_again = true;
16070 break;
16071 case LTU:
16072 rcode = GTU;
16073 swap_operands = true;
16074 try_again = true;
16075 break;
16076 case NE:
16077 case UNLE:
16078 case UNLT:
16079 case UNGE:
16080 case UNGT:
16081 /* Invert condition and try again.
16082 e.g., A != B becomes ~(A==B). */
16084 enum rtx_code rev_code;
16085 enum insn_code nor_code;
16086 rtx mask2;
16088 rev_code = reverse_condition_maybe_unordered (rcode);
16089 if (rev_code == UNKNOWN)
16090 return NULL_RTX;
16092 nor_code = optab_handler (one_cmpl_optab, dmode);
16093 if (nor_code == CODE_FOR_nothing)
16094 return NULL_RTX;
16096 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
16097 if (!mask2)
16098 return NULL_RTX;
16100 mask = gen_reg_rtx (dmode);
16101 emit_insn (GEN_FCN (nor_code) (mask, mask2));
16102 return mask;
16104 break;
16105 case GE:
16106 case GEU:
16107 case LE:
16108 case LEU:
16109 /* Try GT/GTU/LT/LTU OR EQ */
16111 rtx c_rtx, eq_rtx;
16112 enum insn_code ior_code;
16113 enum rtx_code new_code;
16115 switch (rcode)
16117 case GE:
16118 new_code = GT;
16119 break;
16121 case GEU:
16122 new_code = GTU;
16123 break;
16125 case LE:
16126 new_code = LT;
16127 break;
16129 case LEU:
16130 new_code = LTU;
16131 break;
16133 default:
16134 gcc_unreachable ();
16137 ior_code = optab_handler (ior_optab, dmode);
16138 if (ior_code == CODE_FOR_nothing)
16139 return NULL_RTX;
16141 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
16142 if (!c_rtx)
16143 return NULL_RTX;
16145 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
16146 if (!eq_rtx)
16147 return NULL_RTX;
16149 mask = gen_reg_rtx (dmode);
16150 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
16151 return mask;
16153 break;
16154 default:
16155 return NULL_RTX;
16158 if (try_again)
16160 if (swap_operands)
16161 std::swap (op0, op1);
16163 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16164 if (mask)
16165 return mask;
16168 /* You only get two chances. */
16169 return NULL_RTX;
16172 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16173 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16174 operands for the relation operation COND. */
16176 static int
16177 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
16178 rtx cond, rtx cc_op0, rtx cc_op1)
16180 machine_mode dest_mode = GET_MODE (dest);
16181 machine_mode mask_mode = GET_MODE (cc_op0);
16182 enum rtx_code rcode = GET_CODE (cond);
16183 rtx mask;
16184 bool invert_move = false;
16186 if (VECTOR_UNIT_NONE_P (dest_mode))
16187 return 0;
16189 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
16190 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
16192 switch (rcode)
16194 /* Swap operands if we can, and fall back to doing the operation as
16195 specified, and doing a NOR to invert the test. */
16196 case NE:
16197 case UNLE:
16198 case UNLT:
16199 case UNGE:
16200 case UNGT:
16201 /* Invert condition and try again.
16202 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16203 invert_move = true;
16204 rcode = reverse_condition_maybe_unordered (rcode);
16205 if (rcode == UNKNOWN)
16206 return 0;
16207 break;
16209 case GE:
16210 case LE:
16211 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
16213 /* Invert condition to avoid compound test. */
16214 invert_move = true;
16215 rcode = reverse_condition (rcode);
16217 break;
16219 case GTU:
16220 case GEU:
16221 case LTU:
16222 case LEU:
16224 /* Invert condition to avoid compound test if necessary. */
16225 if (rcode == GEU || rcode == LEU)
16227 invert_move = true;
16228 rcode = reverse_condition (rcode);
16230 break;
16232 default:
16233 break;
16236 /* Get the vector mask for the given relational operations. */
16237 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
16239 if (!mask)
16240 return 0;
16242 if (mask_mode != dest_mode)
16243 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
16245 if (invert_move)
16246 std::swap (op_true, op_false);
16248 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16249 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
16250 && (GET_CODE (op_true) == CONST_VECTOR
16251 || GET_CODE (op_false) == CONST_VECTOR))
16253 rtx constant_0 = CONST0_RTX (dest_mode);
16254 rtx constant_m1 = CONSTM1_RTX (dest_mode);
16256 if (op_true == constant_m1 && op_false == constant_0)
16258 emit_move_insn (dest, mask);
16259 return 1;
16262 else if (op_true == constant_0 && op_false == constant_m1)
16264 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
16265 return 1;
16268 /* If we can't use the vector comparison directly, perhaps we can use
16269 the mask for the true or false fields, instead of loading up a
16270 constant. */
16271 if (op_true == constant_m1)
16272 op_true = mask;
16274 if (op_false == constant_0)
16275 op_false = mask;
16278 if (!REG_P (op_true) && !SUBREG_P (op_true))
16279 op_true = force_reg (dest_mode, op_true);
16281 if (!REG_P (op_false) && !SUBREG_P (op_false))
16282 op_false = force_reg (dest_mode, op_false);
16284 rtx tmp = gen_rtx_IOR (dest_mode,
16285 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
16286 op_false),
16287 gen_rtx_AND (dest_mode, mask, op_true));
16288 emit_insn (gen_rtx_SET (dest, tmp));
16289 return 1;
16292 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16293 maximum or minimum with "C" semantics.
16295 Unless you use -ffast-math, you can't use these instructions to replace
16296 conditions that implicitly reverse the condition because the comparison
16297 might generate a NaN or signed zer0.
16299 I.e. the following can be replaced all of the time
16300 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16301 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16302 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16303 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16305 The following can be replaced only if -ffast-math is used:
16306 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16307 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16308 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16309 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16311 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16312 nonzero/true, FALSE_COND if it is zero/false.
16314 Return false if we can't generate the appropriate minimum or maximum, and
16315 true if we can did the minimum or maximum. */
16317 static bool
16318 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16320 enum rtx_code code = GET_CODE (op);
16321 rtx op0 = XEXP (op, 0);
16322 rtx op1 = XEXP (op, 1);
16323 machine_mode compare_mode = GET_MODE (op0);
16324 machine_mode result_mode = GET_MODE (dest);
16326 if (result_mode != compare_mode)
16327 return false;
16329 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16330 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16331 we need to do the reversions first to make the following checks
16332 support fewer cases, like:
16334 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16335 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16336 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16337 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16339 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16340 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16341 have to check for fast-math or the like. */
16342 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16344 code = reverse_condition_maybe_unordered (code);
16345 std::swap (true_cond, false_cond);
16348 bool max_p;
16349 if (code == GE || code == GT)
16350 max_p = true;
16351 else if (code == LE || code == LT)
16352 max_p = false;
16353 else
16354 return false;
16356 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16359 /* Only when NaNs and signed-zeros are not in effect, smax could be
16360 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16361 `op0 > op1 ? op1 : op0`. */
16362 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16363 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16364 max_p = !max_p;
16366 else
16367 return false;
16369 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16370 return true;
16373 /* Possibly emit a floating point conditional move by generating a compare that
16374 sets a mask instruction and a XXSEL select instruction.
16376 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16377 nonzero/true, FALSE_COND if it is zero/false.
16379 Return false if the operation cannot be generated, and true if we could
16380 generate the instruction. */
16382 static bool
16383 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16385 enum rtx_code code = GET_CODE (op);
16386 rtx op0 = XEXP (op, 0);
16387 rtx op1 = XEXP (op, 1);
16388 machine_mode compare_mode = GET_MODE (op0);
16389 machine_mode result_mode = GET_MODE (dest);
16390 rtx compare_rtx;
16391 rtx cmove_rtx;
16392 rtx clobber_rtx;
16394 if (!can_create_pseudo_p ())
16395 return 0;
16397 /* We allow the comparison to be either SFmode/DFmode and the true/false
16398 condition to be either SFmode/DFmode. I.e. we allow:
16400 float a, b;
16401 double c, d, r;
16403 r = (a == b) ? c : d;
16405 and:
16407 double a, b;
16408 float c, d, r;
16410 r = (a == b) ? c : d;
16412 but we don't allow intermixing the IEEE 128-bit floating point types with
16413 the 32/64-bit scalar types. */
16415 if (!(compare_mode == result_mode
16416 || (compare_mode == SFmode && result_mode == DFmode)
16417 || (compare_mode == DFmode && result_mode == SFmode)))
16418 return false;
16420 switch (code)
16422 case EQ:
16423 case GE:
16424 case GT:
16425 break;
16427 case NE:
16428 case LT:
16429 case LE:
16430 code = swap_condition (code);
16431 std::swap (op0, op1);
16432 break;
16434 default:
16435 return false;
16438 /* Generate: [(parallel [(set (dest)
16439 (if_then_else (op (cmp1) (cmp2))
16440 (true)
16441 (false)))
16442 (clobber (scratch))])]. */
16444 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16445 cmove_rtx = gen_rtx_SET (dest,
16446 gen_rtx_IF_THEN_ELSE (result_mode,
16447 compare_rtx,
16448 true_cond,
16449 false_cond));
16451 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16452 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16453 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16455 return true;
16458 /* Helper function to return true if the target has instructions to do a
16459 compare and set mask instruction that can be used with XXSEL to implement a
16460 conditional move. It is also assumed that such a target also supports the
16461 "C" minimum and maximum instructions. */
16463 static bool
16464 have_compare_and_set_mask (machine_mode mode)
16466 switch (mode)
16468 case E_SFmode:
16469 case E_DFmode:
16470 return TARGET_P9_MINMAX;
16472 case E_KFmode:
16473 case E_TFmode:
16474 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16476 default:
16477 break;
16480 return false;
16483 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16484 operands of the last comparison is nonzero/true, FALSE_COND if it
16485 is zero/false. Return 0 if the hardware has no such operation. */
16487 bool
16488 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16490 enum rtx_code code = GET_CODE (op);
16491 rtx op0 = XEXP (op, 0);
16492 rtx op1 = XEXP (op, 1);
16493 machine_mode compare_mode = GET_MODE (op0);
16494 machine_mode result_mode = GET_MODE (dest);
16495 rtx temp;
16496 bool is_against_zero;
16498 /* These modes should always match. */
16499 if (GET_MODE (op1) != compare_mode
16500 /* In the isel case however, we can use a compare immediate, so
16501 op1 may be a small constant. */
16502 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16503 return false;
16504 if (GET_MODE (true_cond) != result_mode)
16505 return false;
16506 if (GET_MODE (false_cond) != result_mode)
16507 return false;
16509 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16510 instructions. */
16511 if (have_compare_and_set_mask (compare_mode)
16512 && have_compare_and_set_mask (result_mode))
16514 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16515 return true;
16517 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16518 return true;
16521 /* Don't allow using floating point comparisons for integer results for
16522 now. */
16523 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16524 return false;
16526 /* First, work out if the hardware can do this at all, or
16527 if it's too slow.... */
16528 if (!FLOAT_MODE_P (compare_mode))
16530 if (TARGET_ISEL)
16531 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16532 return false;
16535 is_against_zero = op1 == CONST0_RTX (compare_mode);
16537 /* A floating-point subtract might overflow, underflow, or produce
16538 an inexact result, thus changing the floating-point flags, so it
16539 can't be generated if we care about that. It's safe if one side
16540 of the construct is zero, since then no subtract will be
16541 generated. */
16542 if (SCALAR_FLOAT_MODE_P (compare_mode)
16543 && flag_trapping_math && ! is_against_zero)
16544 return false;
16546 /* Eliminate half of the comparisons by switching operands, this
16547 makes the remaining code simpler. */
16548 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16549 || code == LTGT || code == LT || code == UNLE)
16551 code = reverse_condition_maybe_unordered (code);
16552 temp = true_cond;
16553 true_cond = false_cond;
16554 false_cond = temp;
16557 /* UNEQ and LTGT take four instructions for a comparison with zero,
16558 it'll probably be faster to use a branch here too. */
16559 if (code == UNEQ && HONOR_NANS (compare_mode))
16560 return false;
16562 /* We're going to try to implement comparisons by performing
16563 a subtract, then comparing against zero. Unfortunately,
16564 Inf - Inf is NaN which is not zero, and so if we don't
16565 know that the operand is finite and the comparison
16566 would treat EQ different to UNORDERED, we can't do it. */
16567 if (HONOR_INFINITIES (compare_mode)
16568 && code != GT && code != UNGE
16569 && (!CONST_DOUBLE_P (op1)
16570 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16571 /* Constructs of the form (a OP b ? a : b) are safe. */
16572 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16573 || (! rtx_equal_p (op0, true_cond)
16574 && ! rtx_equal_p (op1, true_cond))))
16575 return false;
16577 /* At this point we know we can use fsel. */
16579 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16580 is no fsel instruction. */
16581 if (compare_mode != SFmode && compare_mode != DFmode)
16582 return false;
16584 /* Reduce the comparison to a comparison against zero. */
16585 if (! is_against_zero)
16587 temp = gen_reg_rtx (compare_mode);
16588 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16589 op0 = temp;
16590 op1 = CONST0_RTX (compare_mode);
16593 /* If we don't care about NaNs we can reduce some of the comparisons
16594 down to faster ones. */
16595 if (! HONOR_NANS (compare_mode))
16596 switch (code)
16598 case GT:
16599 code = LE;
16600 temp = true_cond;
16601 true_cond = false_cond;
16602 false_cond = temp;
16603 break;
16604 case UNGE:
16605 code = GE;
16606 break;
16607 case UNEQ:
16608 code = EQ;
16609 break;
16610 default:
16611 break;
16614 /* Now, reduce everything down to a GE. */
16615 switch (code)
16617 case GE:
16618 break;
16620 case LE:
16621 temp = gen_reg_rtx (compare_mode);
16622 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16623 op0 = temp;
16624 break;
16626 case ORDERED:
16627 temp = gen_reg_rtx (compare_mode);
16628 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16629 op0 = temp;
16630 break;
16632 case EQ:
16633 temp = gen_reg_rtx (compare_mode);
16634 emit_insn (gen_rtx_SET (temp,
16635 gen_rtx_NEG (compare_mode,
16636 gen_rtx_ABS (compare_mode, op0))));
16637 op0 = temp;
16638 break;
16640 case UNGE:
16641 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16642 temp = gen_reg_rtx (result_mode);
16643 emit_insn (gen_rtx_SET (temp,
16644 gen_rtx_IF_THEN_ELSE (result_mode,
16645 gen_rtx_GE (VOIDmode,
16646 op0, op1),
16647 true_cond, false_cond)));
16648 false_cond = true_cond;
16649 true_cond = temp;
16651 temp = gen_reg_rtx (compare_mode);
16652 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16653 op0 = temp;
16654 break;
16656 case GT:
16657 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16658 temp = gen_reg_rtx (result_mode);
16659 emit_insn (gen_rtx_SET (temp,
16660 gen_rtx_IF_THEN_ELSE (result_mode,
16661 gen_rtx_GE (VOIDmode,
16662 op0, op1),
16663 true_cond, false_cond)));
16664 true_cond = false_cond;
16665 false_cond = temp;
16667 temp = gen_reg_rtx (compare_mode);
16668 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16669 op0 = temp;
16670 break;
16672 default:
16673 gcc_unreachable ();
16676 emit_insn (gen_rtx_SET (dest,
16677 gen_rtx_IF_THEN_ELSE (result_mode,
16678 gen_rtx_GE (VOIDmode,
16679 op0, op1),
16680 true_cond, false_cond)));
16681 return true;
16684 /* Same as above, but for ints (isel). */
16686 bool
16687 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16689 rtx condition_rtx, cr;
16690 machine_mode mode = GET_MODE (dest);
16691 enum rtx_code cond_code;
16692 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16693 bool signedp;
16695 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16696 return false;
16698 /* PR104335: We now need to expect CC-mode "comparisons"
16699 coming from ifcvt. The following code expects proper
16700 comparisons so better abort here. */
16701 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16702 return false;
16704 /* We still have to do the compare, because isel doesn't do a
16705 compare, it just looks at the CRx bits set by a previous compare
16706 instruction. */
16707 condition_rtx = rs6000_generate_compare (op, mode);
16708 cond_code = GET_CODE (condition_rtx);
16709 cr = XEXP (condition_rtx, 0);
16710 signedp = GET_MODE (cr) == CCmode;
16712 isel_func = (mode == SImode
16713 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16714 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16716 switch (cond_code)
16718 case LT: case GT: case LTU: case GTU: case EQ:
16719 /* isel handles these directly. */
16720 break;
16722 default:
16723 /* We need to swap the sense of the comparison. */
16725 std::swap (false_cond, true_cond);
16726 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16728 break;
16731 false_cond = force_reg (mode, false_cond);
16732 if (true_cond != const0_rtx)
16733 true_cond = force_reg (mode, true_cond);
16735 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16737 return true;
16740 void
16741 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16743 machine_mode mode = GET_MODE (op0);
16744 enum rtx_code c;
16745 rtx target;
16747 /* VSX/altivec have direct min/max insns. */
16748 if ((code == SMAX || code == SMIN)
16749 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16750 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16751 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16753 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16754 return;
16757 if (code == SMAX || code == SMIN)
16758 c = GE;
16759 else
16760 c = GEU;
16762 if (code == SMAX || code == UMAX)
16763 target = emit_conditional_move (dest, { c, op0, op1, mode },
16764 op0, op1, mode, 0);
16765 else
16766 target = emit_conditional_move (dest, { c, op0, op1, mode },
16767 op1, op0, mode, 0);
16768 gcc_assert (target);
16769 if (target != dest)
16770 emit_move_insn (dest, target);
16773 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16774 COND is true. Mark the jump as unlikely to be taken. */
16776 static void
16777 emit_unlikely_jump (rtx cond, rtx label)
16779 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16780 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16781 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16784 /* A subroutine of the atomic operation splitters. Emit a load-locked
16785 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16786 the zero_extend operation. */
16788 static void
16789 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16791 rtx (*fn) (rtx, rtx) = NULL;
16793 switch (mode)
16795 case E_QImode:
16796 fn = gen_load_lockedqi;
16797 break;
16798 case E_HImode:
16799 fn = gen_load_lockedhi;
16800 break;
16801 case E_SImode:
16802 if (GET_MODE (mem) == QImode)
16803 fn = gen_load_lockedqi_si;
16804 else if (GET_MODE (mem) == HImode)
16805 fn = gen_load_lockedhi_si;
16806 else
16807 fn = gen_load_lockedsi;
16808 break;
16809 case E_DImode:
16810 fn = gen_load_lockeddi;
16811 break;
16812 case E_TImode:
16813 fn = gen_load_lockedti;
16814 break;
16815 default:
16816 gcc_unreachable ();
16818 emit_insn (fn (reg, mem));
16821 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16822 instruction in MODE. */
16824 static void
16825 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16827 rtx (*fn) (rtx, rtx, rtx) = NULL;
16829 switch (mode)
16831 case E_QImode:
16832 fn = gen_store_conditionalqi;
16833 break;
16834 case E_HImode:
16835 fn = gen_store_conditionalhi;
16836 break;
16837 case E_SImode:
16838 fn = gen_store_conditionalsi;
16839 break;
16840 case E_DImode:
16841 fn = gen_store_conditionaldi;
16842 break;
16843 case E_TImode:
16844 fn = gen_store_conditionalti;
16845 break;
16846 default:
16847 gcc_unreachable ();
16850 /* Emit sync before stwcx. to address PPC405 Erratum. */
16851 if (PPC405_ERRATUM77)
16852 emit_insn (gen_hwsync ());
16854 emit_insn (fn (res, mem, val));
16857 /* Expand barriers before and after a load_locked/store_cond sequence. */
16859 static rtx
16860 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16862 rtx addr = XEXP (mem, 0);
16864 if (!legitimate_indirect_address_p (addr, reload_completed)
16865 && !legitimate_indexed_address_p (addr, reload_completed))
16867 addr = force_reg (Pmode, addr);
16868 mem = replace_equiv_address_nv (mem, addr);
16871 switch (model)
16873 case MEMMODEL_RELAXED:
16874 case MEMMODEL_CONSUME:
16875 case MEMMODEL_ACQUIRE:
16876 break;
16877 case MEMMODEL_RELEASE:
16878 case MEMMODEL_ACQ_REL:
16879 emit_insn (gen_lwsync ());
16880 break;
16881 case MEMMODEL_SEQ_CST:
16882 emit_insn (gen_hwsync ());
16883 break;
16884 default:
16885 gcc_unreachable ();
16887 return mem;
16890 static void
16891 rs6000_post_atomic_barrier (enum memmodel model)
16893 switch (model)
16895 case MEMMODEL_RELAXED:
16896 case MEMMODEL_CONSUME:
16897 case MEMMODEL_RELEASE:
16898 break;
16899 case MEMMODEL_ACQUIRE:
16900 case MEMMODEL_ACQ_REL:
16901 case MEMMODEL_SEQ_CST:
16902 emit_insn (gen_isync ());
16903 break;
16904 default:
16905 gcc_unreachable ();
16909 /* A subroutine of the various atomic expanders. For sub-word operations,
16910 we must adjust things to operate on SImode. Given the original MEM,
16911 return a new aligned memory. Also build and return the quantities by
16912 which to shift and mask. */
16914 static rtx
16915 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16917 rtx addr, align, shift, mask, mem;
16918 HOST_WIDE_INT shift_mask;
16919 machine_mode mode = GET_MODE (orig_mem);
16921 /* For smaller modes, we have to implement this via SImode. */
16922 shift_mask = (mode == QImode ? 0x18 : 0x10);
16924 addr = XEXP (orig_mem, 0);
16925 addr = force_reg (GET_MODE (addr), addr);
16927 /* Aligned memory containing subword. Generate a new memory. We
16928 do not want any of the existing MEM_ATTR data, as we're now
16929 accessing memory outside the original object. */
16930 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16931 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16932 mem = gen_rtx_MEM (SImode, align);
16933 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16934 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16935 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16937 /* Shift amount for subword relative to aligned word. */
16938 shift = gen_reg_rtx (SImode);
16939 addr = gen_lowpart (SImode, addr);
16940 rtx tmp = gen_reg_rtx (SImode);
16941 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16942 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16943 if (BYTES_BIG_ENDIAN)
16944 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16945 shift, 1, OPTAB_LIB_WIDEN);
16946 *pshift = shift;
16948 /* Mask for insertion. */
16949 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16950 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16951 *pmask = mask;
16953 return mem;
16956 /* A subroutine of the various atomic expanders. For sub-word operands,
16957 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16959 static rtx
16960 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16962 rtx x;
16964 x = gen_reg_rtx (SImode);
16965 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16966 gen_rtx_NOT (SImode, mask),
16967 oldval)));
16969 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16971 return x;
16974 /* A subroutine of the various atomic expanders. For sub-word operands,
16975 extract WIDE to NARROW via SHIFT. */
16977 static void
16978 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16980 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16981 wide, 1, OPTAB_LIB_WIDEN);
16982 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16985 /* Expand an atomic compare and swap operation. */
16987 void
16988 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16990 rtx boolval, retval, mem, oldval, newval, cond;
16991 rtx label1, label2, x, mask, shift;
16992 machine_mode mode, orig_mode;
16993 enum memmodel mod_s, mod_f;
16994 bool is_weak;
16996 boolval = operands[0];
16997 retval = operands[1];
16998 mem = operands[2];
16999 oldval = operands[3];
17000 newval = operands[4];
17001 is_weak = (INTVAL (operands[5]) != 0);
17002 mod_s = memmodel_base (INTVAL (operands[6]));
17003 mod_f = memmodel_base (INTVAL (operands[7]));
17004 orig_mode = mode = GET_MODE (mem);
17006 mask = shift = NULL_RTX;
17007 if (mode == QImode || mode == HImode)
17009 /* Before power8, we didn't have access to lbarx/lharx, so generate a
17010 lwarx and shift/mask operations. With power8, we need to do the
17011 comparison in SImode, but the store is still done in QI/HImode. */
17012 oldval = convert_modes (SImode, mode, oldval, 1);
17014 if (!TARGET_SYNC_HI_QI)
17016 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17018 /* Shift and mask OLDVAL into position with the word. */
17019 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
17020 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17022 /* Shift and mask NEWVAL into position within the word. */
17023 newval = convert_modes (SImode, mode, newval, 1);
17024 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
17025 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17028 /* Prepare to adjust the return value. */
17029 retval = gen_reg_rtx (SImode);
17030 mode = SImode;
17032 else if (reg_overlap_mentioned_p (retval, oldval))
17033 oldval = copy_to_reg (oldval);
17035 if (mode != TImode && !reg_or_short_operand (oldval, mode))
17036 oldval = copy_to_mode_reg (mode, oldval);
17038 if (reg_overlap_mentioned_p (retval, newval))
17039 newval = copy_to_reg (newval);
17041 mem = rs6000_pre_atomic_barrier (mem, mod_s);
17043 label1 = NULL_RTX;
17044 if (!is_weak)
17046 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17047 emit_label (XEXP (label1, 0));
17049 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17051 emit_load_locked (mode, retval, mem);
17053 x = retval;
17054 if (mask)
17055 x = expand_simple_binop (SImode, AND, retval, mask,
17056 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17058 cond = gen_reg_rtx (CCmode);
17059 /* If we have TImode, synthesize a comparison. */
17060 if (mode != TImode)
17061 x = gen_rtx_COMPARE (CCmode, x, oldval);
17062 else
17064 rtx xor1_result = gen_reg_rtx (DImode);
17065 rtx xor2_result = gen_reg_rtx (DImode);
17066 rtx or_result = gen_reg_rtx (DImode);
17067 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
17068 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
17069 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
17070 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
17072 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
17073 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
17074 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
17075 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
17078 emit_insn (gen_rtx_SET (cond, x));
17080 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17081 emit_unlikely_jump (x, label2);
17083 x = newval;
17084 if (mask)
17085 x = rs6000_mask_atomic_subword (retval, newval, mask);
17087 emit_store_conditional (orig_mode, cond, mem, x);
17089 if (!is_weak)
17091 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17092 emit_unlikely_jump (x, label1);
17095 if (!is_mm_relaxed (mod_f))
17096 emit_label (XEXP (label2, 0));
17098 rs6000_post_atomic_barrier (mod_s);
17100 if (is_mm_relaxed (mod_f))
17101 emit_label (XEXP (label2, 0));
17103 if (shift)
17104 rs6000_finish_atomic_subword (operands[1], retval, shift);
17105 else if (mode != GET_MODE (operands[1]))
17106 convert_move (operands[1], retval, 1);
17108 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17109 x = gen_rtx_EQ (SImode, cond, const0_rtx);
17110 emit_insn (gen_rtx_SET (boolval, x));
17113 /* Expand an atomic exchange operation. */
17115 void
17116 rs6000_expand_atomic_exchange (rtx operands[])
17118 rtx retval, mem, val, cond;
17119 machine_mode mode;
17120 enum memmodel model;
17121 rtx label, x, mask, shift;
17123 retval = operands[0];
17124 mem = operands[1];
17125 val = operands[2];
17126 model = memmodel_base (INTVAL (operands[3]));
17127 mode = GET_MODE (mem);
17129 mask = shift = NULL_RTX;
17130 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
17132 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17134 /* Shift and mask VAL into position with the word. */
17135 val = convert_modes (SImode, mode, val, 1);
17136 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17137 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17139 /* Prepare to adjust the return value. */
17140 retval = gen_reg_rtx (SImode);
17141 mode = SImode;
17144 mem = rs6000_pre_atomic_barrier (mem, model);
17146 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17147 emit_label (XEXP (label, 0));
17149 emit_load_locked (mode, retval, mem);
17151 x = val;
17152 if (mask)
17153 x = rs6000_mask_atomic_subword (retval, val, mask);
17155 cond = gen_reg_rtx (CCmode);
17156 emit_store_conditional (mode, cond, mem, x);
17158 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17159 emit_unlikely_jump (x, label);
17161 rs6000_post_atomic_barrier (model);
17163 if (shift)
17164 rs6000_finish_atomic_subword (operands[0], retval, shift);
17167 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17168 to perform. MEM is the memory on which to operate. VAL is the second
17169 operand of the binary operator. BEFORE and AFTER are optional locations to
17170 return the value of MEM either before of after the operation. MODEL_RTX
17171 is a CONST_INT containing the memory model to use. */
17173 void
17174 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
17175 rtx orig_before, rtx orig_after, rtx model_rtx)
17177 enum memmodel model = memmodel_base (INTVAL (model_rtx));
17178 machine_mode mode = GET_MODE (mem);
17179 machine_mode store_mode = mode;
17180 rtx label, x, cond, mask, shift;
17181 rtx before = orig_before, after = orig_after;
17183 mask = shift = NULL_RTX;
17184 /* On power8, we want to use SImode for the operation. On previous systems,
17185 use the operation in a subword and shift/mask to get the proper byte or
17186 halfword. */
17187 if (mode == QImode || mode == HImode)
17189 if (TARGET_SYNC_HI_QI)
17191 val = convert_modes (SImode, mode, val, 1);
17193 /* Prepare to adjust the return value. */
17194 before = gen_reg_rtx (SImode);
17195 if (after)
17196 after = gen_reg_rtx (SImode);
17197 mode = SImode;
17199 else
17201 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17203 /* Shift and mask VAL into position with the word. */
17204 val = convert_modes (SImode, mode, val, 1);
17205 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17206 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17208 switch (code)
17210 case IOR:
17211 case XOR:
17212 /* We've already zero-extended VAL. That is sufficient to
17213 make certain that it does not affect other bits. */
17214 mask = NULL;
17215 break;
17217 case AND:
17218 /* If we make certain that all of the other bits in VAL are
17219 set, that will be sufficient to not affect other bits. */
17220 x = gen_rtx_NOT (SImode, mask);
17221 x = gen_rtx_IOR (SImode, x, val);
17222 emit_insn (gen_rtx_SET (val, x));
17223 mask = NULL;
17224 break;
17226 case NOT:
17227 case PLUS:
17228 case MINUS:
17229 /* These will all affect bits outside the field and need
17230 adjustment via MASK within the loop. */
17231 break;
17233 default:
17234 gcc_unreachable ();
17237 /* Prepare to adjust the return value. */
17238 before = gen_reg_rtx (SImode);
17239 if (after)
17240 after = gen_reg_rtx (SImode);
17241 store_mode = mode = SImode;
17245 mem = rs6000_pre_atomic_barrier (mem, model);
17247 label = gen_label_rtx ();
17248 emit_label (label);
17249 label = gen_rtx_LABEL_REF (VOIDmode, label);
17251 if (before == NULL_RTX)
17252 before = gen_reg_rtx (mode);
17254 emit_load_locked (mode, before, mem);
17256 if (code == NOT)
17258 x = expand_simple_binop (mode, AND, before, val,
17259 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17260 after = expand_simple_unop (mode, NOT, x, after, 1);
17262 else
17264 after = expand_simple_binop (mode, code, before, val,
17265 after, 1, OPTAB_LIB_WIDEN);
17268 x = after;
17269 if (mask)
17271 x = expand_simple_binop (SImode, AND, after, mask,
17272 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17273 x = rs6000_mask_atomic_subword (before, x, mask);
17275 else if (store_mode != mode)
17276 x = convert_modes (store_mode, mode, x, 1);
17278 cond = gen_reg_rtx (CCmode);
17279 emit_store_conditional (store_mode, cond, mem, x);
17281 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17282 emit_unlikely_jump (x, label);
17284 rs6000_post_atomic_barrier (model);
17286 if (shift)
17288 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17289 then do the calcuations in a SImode register. */
17290 if (orig_before)
17291 rs6000_finish_atomic_subword (orig_before, before, shift);
17292 if (orig_after)
17293 rs6000_finish_atomic_subword (orig_after, after, shift);
17295 else if (store_mode != mode)
17297 /* QImode/HImode on machines with lbarx/lharx where we do the native
17298 operation and then do the calcuations in a SImode register. */
17299 if (orig_before)
17300 convert_move (orig_before, before, 1);
17301 if (orig_after)
17302 convert_move (orig_after, after, 1);
17304 else if (orig_after && after != orig_after)
17305 emit_move_insn (orig_after, after);
17308 static GTY(()) alias_set_type TOC_alias_set = -1;
17310 alias_set_type
17311 get_TOC_alias_set (void)
17313 if (TOC_alias_set == -1)
17314 TOC_alias_set = new_alias_set ();
17315 return TOC_alias_set;
17318 /* The mode the ABI uses for a word. This is not the same as word_mode
17319 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17321 static scalar_int_mode
17322 rs6000_abi_word_mode (void)
17324 return TARGET_32BIT ? SImode : DImode;
17327 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17328 static char *
17329 rs6000_offload_options (void)
17331 if (TARGET_64BIT)
17332 return xstrdup ("-foffload-abi=lp64");
17333 else
17334 return xstrdup ("-foffload-abi=ilp32");
17338 /* A quick summary of the various types of 'constant-pool tables'
17339 under PowerPC:
17341 Target Flags Name One table per
17342 AIX (none) AIX TOC object file
17343 AIX -mfull-toc AIX TOC object file
17344 AIX -mminimal-toc AIX minimal TOC translation unit
17345 SVR4/EABI (none) SVR4 SDATA object file
17346 SVR4/EABI -fpic SVR4 pic object file
17347 SVR4/EABI -fPIC SVR4 PIC translation unit
17348 SVR4/EABI -mrelocatable EABI TOC function
17349 SVR4/EABI -maix AIX TOC object file
17350 SVR4/EABI -maix -mminimal-toc
17351 AIX minimal TOC translation unit
17353 Name Reg. Set by entries contains:
17354 made by addrs? fp? sum?
17356 AIX TOC 2 crt0 as Y option option
17357 AIX minimal TOC 30 prolog gcc Y Y option
17358 SVR4 SDATA 13 crt0 gcc N Y N
17359 SVR4 pic 30 prolog ld Y not yet N
17360 SVR4 PIC 30 prolog gcc Y option option
17361 EABI TOC 30 prolog gcc Y option option
17365 /* Hash functions for the hash table. */
17367 static unsigned
17368 rs6000_hash_constant (rtx k)
17370 enum rtx_code code = GET_CODE (k);
17371 machine_mode mode = GET_MODE (k);
17372 unsigned result = (code << 3) ^ mode;
17373 const char *format;
17374 int flen, fidx;
17376 format = GET_RTX_FORMAT (code);
17377 flen = strlen (format);
17378 fidx = 0;
17380 switch (code)
17382 case LABEL_REF:
17383 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17385 case CONST_WIDE_INT:
17387 int i;
17388 flen = CONST_WIDE_INT_NUNITS (k);
17389 for (i = 0; i < flen; i++)
17390 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17391 return result;
17394 case CONST_DOUBLE:
17395 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17397 case CODE_LABEL:
17398 fidx = 3;
17399 break;
17401 default:
17402 break;
17405 for (; fidx < flen; fidx++)
17406 switch (format[fidx])
17408 case 's':
17410 unsigned i, len;
17411 const char *str = XSTR (k, fidx);
17412 len = strlen (str);
17413 result = result * 613 + len;
17414 for (i = 0; i < len; i++)
17415 result = result * 613 + (unsigned) str[i];
17416 break;
17418 case 'u':
17419 case 'e':
17420 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17421 break;
17422 case 'i':
17423 case 'n':
17424 result = result * 613 + (unsigned) XINT (k, fidx);
17425 break;
17426 case 'w':
17427 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17428 result = result * 613 + (unsigned) XWINT (k, fidx);
17429 else
17431 size_t i;
17432 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17433 result = result * 613 + (unsigned) (XWINT (k, fidx)
17434 >> CHAR_BIT * i);
17436 break;
17437 case '0':
17438 break;
17439 default:
17440 gcc_unreachable ();
17443 return result;
17446 hashval_t
17447 toc_hasher::hash (toc_hash_struct *thc)
17449 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17452 /* Compare H1 and H2 for equivalence. */
17454 bool
17455 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17457 rtx r1 = h1->key;
17458 rtx r2 = h2->key;
17460 if (h1->key_mode != h2->key_mode)
17461 return 0;
17463 return rtx_equal_p (r1, r2);
17466 /* These are the names given by the C++ front-end to vtables, and
17467 vtable-like objects. Ideally, this logic should not be here;
17468 instead, there should be some programmatic way of inquiring as
17469 to whether or not an object is a vtable. */
17471 #define VTABLE_NAME_P(NAME) \
17472 (startswith (name, "_vt.") \
17473 || startswith (name, "_ZTV") \
17474 || startswith (name, "_ZTT") \
17475 || startswith (name, "_ZTI") \
17476 || startswith (name, "_ZTC"))
17478 #ifdef NO_DOLLAR_IN_LABEL
17479 /* Return a GGC-allocated character string translating dollar signs in
17480 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17482 const char *
17483 rs6000_xcoff_strip_dollar (const char *name)
17485 char *strip, *p;
17486 const char *q;
17487 size_t len;
17489 q = (const char *) strchr (name, '$');
17491 if (q == 0 || q == name)
17492 return name;
17494 len = strlen (name);
17495 strip = XALLOCAVEC (char, len + 1);
17496 strcpy (strip, name);
17497 p = strip + (q - name);
17498 while (p)
17500 *p = '_';
17501 p = strchr (p + 1, '$');
17504 return ggc_alloc_string (strip, len);
17506 #endif
17508 void
17509 rs6000_output_symbol_ref (FILE *file, rtx x)
17511 const char *name = XSTR (x, 0);
17513 /* Currently C++ toc references to vtables can be emitted before it
17514 is decided whether the vtable is public or private. If this is
17515 the case, then the linker will eventually complain that there is
17516 a reference to an unknown section. Thus, for vtables only,
17517 we emit the TOC reference to reference the identifier and not the
17518 symbol. */
17519 if (VTABLE_NAME_P (name))
17521 RS6000_OUTPUT_BASENAME (file, name);
17523 else
17524 assemble_name (file, name);
17527 /* Output a TOC entry. We derive the entry name from what is being
17528 written. */
17530 void
17531 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17533 char buf[256];
17534 const char *name = buf;
17535 rtx base = x;
17536 HOST_WIDE_INT offset = 0;
17538 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17540 /* When the linker won't eliminate them, don't output duplicate
17541 TOC entries (this happens on AIX if there is any kind of TOC,
17542 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17543 CODE_LABELs. */
17544 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17546 struct toc_hash_struct *h;
17548 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17549 time because GGC is not initialized at that point. */
17550 if (toc_hash_table == NULL)
17551 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17553 h = ggc_alloc<toc_hash_struct> ();
17554 h->key = x;
17555 h->key_mode = mode;
17556 h->labelno = labelno;
17558 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17559 if (*found == NULL)
17560 *found = h;
17561 else /* This is indeed a duplicate.
17562 Set this label equal to that label. */
17564 fputs ("\t.set ", file);
17565 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17566 fprintf (file, "%d,", labelno);
17567 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17568 fprintf (file, "%d\n", ((*found)->labelno));
17570 #ifdef HAVE_AS_TLS
17571 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17572 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17573 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17575 fputs ("\t.set ", file);
17576 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17577 fprintf (file, "%d,", labelno);
17578 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17579 fprintf (file, "%d\n", ((*found)->labelno));
17581 #endif
17582 return;
17586 /* If we're going to put a double constant in the TOC, make sure it's
17587 aligned properly when strict alignment is on. */
17588 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17589 && STRICT_ALIGNMENT
17590 && GET_MODE_BITSIZE (mode) >= 64
17591 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17592 ASM_OUTPUT_ALIGN (file, 3);
17595 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17597 /* Handle FP constants specially. Note that if we have a minimal
17598 TOC, things we put here aren't actually in the TOC, so we can allow
17599 FP constants. */
17600 if (CONST_DOUBLE_P (x)
17601 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17602 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17604 long k[4];
17606 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17607 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17608 else
17609 real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
17611 if (TARGET_64BIT)
17613 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17614 fputs (DOUBLE_INT_ASM_OP, file);
17615 else
17616 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17617 k[0] & 0xffffffff, k[1] & 0xffffffff,
17618 k[2] & 0xffffffff, k[3] & 0xffffffff);
17619 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17620 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17621 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17622 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17623 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17624 return;
17626 else
17628 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17629 fputs ("\t.long ", file);
17630 else
17631 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17632 k[0] & 0xffffffff, k[1] & 0xffffffff,
17633 k[2] & 0xffffffff, k[3] & 0xffffffff);
17634 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17635 k[0] & 0xffffffff, k[1] & 0xffffffff,
17636 k[2] & 0xffffffff, k[3] & 0xffffffff);
17637 return;
17640 else if (CONST_DOUBLE_P (x)
17641 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17643 long k[2];
17645 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17646 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17647 else
17648 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17650 if (TARGET_64BIT)
17652 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17653 fputs (DOUBLE_INT_ASM_OP, file);
17654 else
17655 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17656 k[0] & 0xffffffff, k[1] & 0xffffffff);
17657 fprintf (file, "0x%lx%08lx\n",
17658 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17659 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17660 return;
17662 else
17664 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17665 fputs ("\t.long ", file);
17666 else
17667 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17668 k[0] & 0xffffffff, k[1] & 0xffffffff);
17669 fprintf (file, "0x%lx,0x%lx\n",
17670 k[0] & 0xffffffff, k[1] & 0xffffffff);
17671 return;
17674 else if (CONST_DOUBLE_P (x)
17675 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17677 long l;
17679 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17680 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17681 else
17682 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17684 if (TARGET_64BIT)
17686 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17687 fputs (DOUBLE_INT_ASM_OP, file);
17688 else
17689 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17690 if (WORDS_BIG_ENDIAN)
17691 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17692 else
17693 fprintf (file, "0x%lx\n", l & 0xffffffff);
17694 return;
17696 else
17698 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17699 fputs ("\t.long ", file);
17700 else
17701 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17702 fprintf (file, "0x%lx\n", l & 0xffffffff);
17703 return;
17706 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17708 unsigned HOST_WIDE_INT low;
17709 HOST_WIDE_INT high;
17711 low = INTVAL (x) & 0xffffffff;
17712 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17714 /* TOC entries are always Pmode-sized, so when big-endian
17715 smaller integer constants in the TOC need to be padded.
17716 (This is still a win over putting the constants in
17717 a separate constant pool, because then we'd have
17718 to have both a TOC entry _and_ the actual constant.)
17720 For a 32-bit target, CONST_INT values are loaded and shifted
17721 entirely within `low' and can be stored in one TOC entry. */
17723 /* It would be easy to make this work, but it doesn't now. */
17724 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17726 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17728 low |= high << 32;
17729 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17730 high = (HOST_WIDE_INT) low >> 32;
17731 low &= 0xffffffff;
17734 if (TARGET_64BIT)
17736 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17737 fputs (DOUBLE_INT_ASM_OP, file);
17738 else
17739 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17740 (long) high & 0xffffffff, (long) low & 0xffffffff);
17741 fprintf (file, "0x%lx%08lx\n",
17742 (long) high & 0xffffffff, (long) low & 0xffffffff);
17743 return;
17745 else
17747 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17749 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17750 fputs ("\t.long ", file);
17751 else
17752 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17753 (long) high & 0xffffffff, (long) low & 0xffffffff);
17754 fprintf (file, "0x%lx,0x%lx\n",
17755 (long) high & 0xffffffff, (long) low & 0xffffffff);
17757 else
17759 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17760 fputs ("\t.long ", file);
17761 else
17762 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17763 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17765 return;
17769 if (GET_CODE (x) == CONST)
17771 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17772 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17774 base = XEXP (XEXP (x, 0), 0);
17775 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17778 switch (GET_CODE (base))
17780 case SYMBOL_REF:
17781 name = XSTR (base, 0);
17782 break;
17784 case LABEL_REF:
17785 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17786 CODE_LABEL_NUMBER (XEXP (base, 0)));
17787 break;
17789 case CODE_LABEL:
17790 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17791 break;
17793 default:
17794 gcc_unreachable ();
17797 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17798 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17799 else
17801 fputs ("\t.tc ", file);
17802 RS6000_OUTPUT_BASENAME (file, name);
17804 if (offset < 0)
17805 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17806 else if (offset)
17807 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17809 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17810 after other TOC symbols, reducing overflow of small TOC access
17811 to [TC] symbols. */
17812 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17813 ? "[TE]," : "[TC],", file);
17816 /* Currently C++ toc references to vtables can be emitted before it
17817 is decided whether the vtable is public or private. If this is
17818 the case, then the linker will eventually complain that there is
17819 a TOC reference to an unknown section. Thus, for vtables only,
17820 we emit the TOC reference to reference the symbol and not the
17821 section. */
17822 if (VTABLE_NAME_P (name))
17824 RS6000_OUTPUT_BASENAME (file, name);
17825 if (offset < 0)
17826 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17827 else if (offset > 0)
17828 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17830 else
17831 output_addr_const (file, x);
17833 #if HAVE_AS_TLS
17834 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17836 switch (SYMBOL_REF_TLS_MODEL (base))
17838 case 0:
17839 break;
17840 case TLS_MODEL_LOCAL_EXEC:
17841 fputs ("@le", file);
17842 break;
17843 case TLS_MODEL_INITIAL_EXEC:
17844 fputs ("@ie", file);
17845 break;
17846 /* Use global-dynamic for local-dynamic. */
17847 case TLS_MODEL_GLOBAL_DYNAMIC:
17848 case TLS_MODEL_LOCAL_DYNAMIC:
17849 putc ('\n', file);
17850 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17851 fputs ("\t.tc .", file);
17852 RS6000_OUTPUT_BASENAME (file, name);
17853 fputs ("[TC],", file);
17854 output_addr_const (file, x);
17855 fputs ("@m", file);
17856 break;
17857 default:
17858 gcc_unreachable ();
17861 #endif
17863 putc ('\n', file);
17866 /* Output an assembler pseudo-op to write an ASCII string of N characters
17867 starting at P to FILE.
17869 On the RS/6000, we have to do this using the .byte operation and
17870 write out special characters outside the quoted string.
17871 Also, the assembler is broken; very long strings are truncated,
17872 so we must artificially break them up early. */
17874 void
17875 output_ascii (FILE *file, const char *p, int n)
17877 char c;
17878 int i, count_string;
17879 const char *for_string = "\t.byte \"";
17880 const char *for_decimal = "\t.byte ";
17881 const char *to_close = NULL;
17883 count_string = 0;
17884 for (i = 0; i < n; i++)
17886 c = *p++;
17887 if (c >= ' ' && c < 0177)
17889 if (for_string)
17890 fputs (for_string, file);
17891 putc (c, file);
17893 /* Write two quotes to get one. */
17894 if (c == '"')
17896 putc (c, file);
17897 ++count_string;
17900 for_string = NULL;
17901 for_decimal = "\"\n\t.byte ";
17902 to_close = "\"\n";
17903 ++count_string;
17905 if (count_string >= 512)
17907 fputs (to_close, file);
17909 for_string = "\t.byte \"";
17910 for_decimal = "\t.byte ";
17911 to_close = NULL;
17912 count_string = 0;
17915 else
17917 if (for_decimal)
17918 fputs (for_decimal, file);
17919 fprintf (file, "%d", c);
17921 for_string = "\n\t.byte \"";
17922 for_decimal = ", ";
17923 to_close = "\n";
17924 count_string = 0;
17928 /* Now close the string if we have written one. Then end the line. */
17929 if (to_close)
17930 fputs (to_close, file);
17933 /* Generate a unique section name for FILENAME for a section type
17934 represented by SECTION_DESC. Output goes into BUF.
17936 SECTION_DESC can be any string, as long as it is different for each
17937 possible section type.
17939 We name the section in the same manner as xlc. The name begins with an
17940 underscore followed by the filename (after stripping any leading directory
17941 names) with the last period replaced by the string SECTION_DESC. If
17942 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17943 the name. */
17945 void
17946 rs6000_gen_section_name (char **buf, const char *filename,
17947 const char *section_desc)
17949 const char *q, *after_last_slash, *last_period = 0;
17950 char *p;
17951 int len;
17953 after_last_slash = filename;
17954 for (q = filename; *q; q++)
17956 if (*q == '/')
17957 after_last_slash = q + 1;
17958 else if (*q == '.')
17959 last_period = q;
17962 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17963 *buf = (char *) xmalloc (len);
17965 p = *buf;
17966 *p++ = '_';
17968 for (q = after_last_slash; *q; q++)
17970 if (q == last_period)
17972 strcpy (p, section_desc);
17973 p += strlen (section_desc);
17974 break;
17977 else if (ISALNUM (*q))
17978 *p++ = *q;
17981 if (last_period == 0)
17982 strcpy (p, section_desc);
17983 else
17984 *p = '\0';
17987 /* Emit profile function. */
17989 void
17990 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17992 /* Non-standard profiling for kernels, which just saves LR then calls
17993 _mcount without worrying about arg saves. The idea is to change
17994 the function prologue as little as possible as it isn't easy to
17995 account for arg save/restore code added just for _mcount. */
17996 if (TARGET_PROFILE_KERNEL)
17997 return;
17999 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
18001 #ifndef NO_PROFILE_COUNTERS
18002 # define NO_PROFILE_COUNTERS 0
18003 #endif
18004 if (NO_PROFILE_COUNTERS)
18005 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
18006 LCT_NORMAL, VOIDmode);
18007 else
18009 char buf[30];
18010 const char *label_name;
18011 rtx fun;
18013 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18014 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
18015 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
18017 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
18018 LCT_NORMAL, VOIDmode, fun, Pmode);
18021 else if (DEFAULT_ABI == ABI_DARWIN)
18023 const char *mcount_name = RS6000_MCOUNT;
18024 int caller_addr_regno = LR_REGNO;
18026 /* Be conservative and always set this, at least for now. */
18027 crtl->uses_pic_offset_table = 1;
18029 #if TARGET_MACHO
18030 /* For PIC code, set up a stub and collect the caller's address
18031 from r0, which is where the prologue puts it. */
18032 if (MACHOPIC_INDIRECT
18033 && crtl->uses_pic_offset_table)
18034 caller_addr_regno = 0;
18035 #endif
18036 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
18037 LCT_NORMAL, VOIDmode,
18038 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
18042 /* Write function profiler code. */
18044 void
18045 output_function_profiler (FILE *file, int labelno)
18047 char buf[100];
18049 switch (DEFAULT_ABI)
18051 default:
18052 gcc_unreachable ();
18054 case ABI_V4:
18055 if (!TARGET_32BIT)
18057 warning (0, "no profiling of 64-bit code for this ABI");
18058 return;
18060 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18061 fprintf (file, "\tmflr %s\n", reg_names[0]);
18062 if (NO_PROFILE_COUNTERS)
18064 asm_fprintf (file, "\tstw %s,4(%s)\n",
18065 reg_names[0], reg_names[1]);
18067 else if (TARGET_SECURE_PLT && flag_pic)
18069 if (TARGET_LINK_STACK)
18071 char name[32];
18072 get_ppc476_thunk_name (name);
18073 asm_fprintf (file, "\tbl %s\n", name);
18075 else
18076 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
18077 asm_fprintf (file, "\tstw %s,4(%s)\n",
18078 reg_names[0], reg_names[1]);
18079 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18080 asm_fprintf (file, "\taddis %s,%s,",
18081 reg_names[12], reg_names[12]);
18082 assemble_name (file, buf);
18083 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
18084 assemble_name (file, buf);
18085 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
18087 else if (flag_pic == 1)
18089 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
18090 asm_fprintf (file, "\tstw %s,4(%s)\n",
18091 reg_names[0], reg_names[1]);
18092 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18093 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
18094 assemble_name (file, buf);
18095 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
18097 else if (flag_pic > 1)
18099 asm_fprintf (file, "\tstw %s,4(%s)\n",
18100 reg_names[0], reg_names[1]);
18101 /* Now, we need to get the address of the label. */
18102 if (TARGET_LINK_STACK)
18104 char name[32];
18105 get_ppc476_thunk_name (name);
18106 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
18107 assemble_name (file, buf);
18108 fputs ("-.\n1:", file);
18109 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18110 asm_fprintf (file, "\taddi %s,%s,4\n",
18111 reg_names[11], reg_names[11]);
18113 else
18115 fputs ("\tbcl 20,31,1f\n\t.long ", file);
18116 assemble_name (file, buf);
18117 fputs ("-.\n1:", file);
18118 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18120 asm_fprintf (file, "\tlwz %s,0(%s)\n",
18121 reg_names[0], reg_names[11]);
18122 asm_fprintf (file, "\tadd %s,%s,%s\n",
18123 reg_names[0], reg_names[0], reg_names[11]);
18125 else
18127 asm_fprintf (file, "\tlis %s,", reg_names[12]);
18128 assemble_name (file, buf);
18129 fputs ("@ha\n", file);
18130 asm_fprintf (file, "\tstw %s,4(%s)\n",
18131 reg_names[0], reg_names[1]);
18132 asm_fprintf (file, "\tla %s,", reg_names[0]);
18133 assemble_name (file, buf);
18134 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
18137 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18138 fprintf (file, "\tbl %s%s\n",
18139 RS6000_MCOUNT, flag_pic ? "@plt" : "");
18140 break;
18142 case ABI_AIX:
18143 case ABI_ELFv2:
18144 case ABI_DARWIN:
18145 /* Don't do anything, done in output_profile_hook (). */
18146 break;
18152 /* The following variable value is the last issued insn. */
18154 static rtx_insn *last_scheduled_insn;
18156 /* The following variable helps to balance issuing of load and
18157 store instructions */
18159 static int load_store_pendulum;
18161 /* The following variable helps pair divide insns during scheduling. */
18162 static int divide_cnt;
18163 /* The following variable helps pair and alternate vector and vector load
18164 insns during scheduling. */
18165 static int vec_pairing;
18168 /* Power4 load update and store update instructions are cracked into a
18169 load or store and an integer insn which are executed in the same cycle.
18170 Branches have their own dispatch slot which does not count against the
18171 GCC issue rate, but it changes the program flow so there are no other
18172 instructions to issue in this cycle. */
18174 static int
18175 rs6000_variable_issue_1 (rtx_insn *insn, int more)
18177 last_scheduled_insn = insn;
18178 if (GET_CODE (PATTERN (insn)) == USE
18179 || GET_CODE (PATTERN (insn)) == CLOBBER)
18181 cached_can_issue_more = more;
18182 return cached_can_issue_more;
18185 if (insn_terminates_group_p (insn, current_group))
18187 cached_can_issue_more = 0;
18188 return cached_can_issue_more;
18191 /* If no reservation, but reach here */
18192 if (recog_memoized (insn) < 0)
18193 return more;
18195 if (rs6000_sched_groups)
18197 if (is_microcoded_insn (insn))
18198 cached_can_issue_more = 0;
18199 else if (is_cracked_insn (insn))
18200 cached_can_issue_more = more > 2 ? more - 2 : 0;
18201 else
18202 cached_can_issue_more = more - 1;
18204 return cached_can_issue_more;
18207 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
18208 return 0;
18210 cached_can_issue_more = more - 1;
18211 return cached_can_issue_more;
18214 static int
18215 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
18217 int r = rs6000_variable_issue_1 (insn, more);
18218 if (verbose)
18219 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
18220 return r;
18223 /* Adjust the cost of a scheduling dependency. Return the new cost of
18224 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18226 static int
18227 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
18228 unsigned int)
18230 enum attr_type attr_type;
18232 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
18233 return cost;
18235 switch (dep_type)
18237 case REG_DEP_TRUE:
18239 /* Data dependency; DEP_INSN writes a register that INSN reads
18240 some cycles later. */
18242 /* Separate a load from a narrower, dependent store. */
18243 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
18244 || rs6000_tune == PROCESSOR_POWER10
18245 || rs6000_tune == PROCESSOR_POWER11)
18246 && GET_CODE (PATTERN (insn)) == SET
18247 && GET_CODE (PATTERN (dep_insn)) == SET
18248 && MEM_P (XEXP (PATTERN (insn), 1))
18249 && MEM_P (XEXP (PATTERN (dep_insn), 0))
18250 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
18251 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
18252 return cost + 14;
18254 attr_type = get_attr_type (insn);
18256 switch (attr_type)
18258 case TYPE_JMPREG:
18259 /* Tell the first scheduling pass about the latency between
18260 a mtctr and bctr (and mtlr and br/blr). The first
18261 scheduling pass will not know about this latency since
18262 the mtctr instruction, which has the latency associated
18263 to it, will be generated by reload. */
18264 return 4;
18265 case TYPE_BRANCH:
18266 /* Leave some extra cycles between a compare and its
18267 dependent branch, to inhibit expensive mispredicts. */
18268 if ((rs6000_tune == PROCESSOR_PPC603
18269 || rs6000_tune == PROCESSOR_PPC604
18270 || rs6000_tune == PROCESSOR_PPC604e
18271 || rs6000_tune == PROCESSOR_PPC620
18272 || rs6000_tune == PROCESSOR_PPC630
18273 || rs6000_tune == PROCESSOR_PPC750
18274 || rs6000_tune == PROCESSOR_PPC7400
18275 || rs6000_tune == PROCESSOR_PPC7450
18276 || rs6000_tune == PROCESSOR_PPCE5500
18277 || rs6000_tune == PROCESSOR_PPCE6500
18278 || rs6000_tune == PROCESSOR_POWER4
18279 || rs6000_tune == PROCESSOR_POWER5
18280 || rs6000_tune == PROCESSOR_POWER7
18281 || rs6000_tune == PROCESSOR_POWER8
18282 || rs6000_tune == PROCESSOR_POWER9
18283 || rs6000_tune == PROCESSOR_POWER10
18284 || rs6000_tune == PROCESSOR_POWER11
18285 || rs6000_tune == PROCESSOR_CELL)
18286 && recog_memoized (dep_insn)
18287 && (INSN_CODE (dep_insn) >= 0))
18289 switch (get_attr_type (dep_insn))
18291 case TYPE_CMP:
18292 case TYPE_FPCOMPARE:
18293 case TYPE_CR_LOGICAL:
18294 return cost + 2;
18295 case TYPE_EXTS:
18296 case TYPE_MUL:
18297 if (get_attr_dot (dep_insn) == DOT_YES)
18298 return cost + 2;
18299 else
18300 break;
18301 case TYPE_SHIFT:
18302 if (get_attr_dot (dep_insn) == DOT_YES
18303 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18304 return cost + 2;
18305 else
18306 break;
18307 default:
18308 break;
18310 break;
18312 case TYPE_STORE:
18313 case TYPE_FPSTORE:
18314 if ((rs6000_tune == PROCESSOR_POWER6)
18315 && recog_memoized (dep_insn)
18316 && (INSN_CODE (dep_insn) >= 0))
18319 if (GET_CODE (PATTERN (insn)) != SET)
18320 /* If this happens, we have to extend this to schedule
18321 optimally. Return default for now. */
18322 return cost;
18324 /* Adjust the cost for the case where the value written
18325 by a fixed point operation is used as the address
18326 gen value on a store. */
18327 switch (get_attr_type (dep_insn))
18329 case TYPE_LOAD:
18330 case TYPE_CNTLZ:
18332 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18333 return get_attr_sign_extend (dep_insn)
18334 == SIGN_EXTEND_YES ? 6 : 4;
18335 break;
18337 case TYPE_SHIFT:
18339 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18340 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18341 6 : 3;
18342 break;
18344 case TYPE_INTEGER:
18345 case TYPE_ADD:
18346 case TYPE_LOGICAL:
18347 case TYPE_EXTS:
18348 case TYPE_INSERT:
18350 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18351 return 3;
18352 break;
18354 case TYPE_STORE:
18355 case TYPE_FPLOAD:
18356 case TYPE_FPSTORE:
18358 if (get_attr_update (dep_insn) == UPDATE_YES
18359 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18360 return 3;
18361 break;
18363 case TYPE_MUL:
18365 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18366 return 17;
18367 break;
18369 case TYPE_DIV:
18371 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18372 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18373 break;
18375 default:
18376 break;
18379 break;
18381 case TYPE_LOAD:
18382 if ((rs6000_tune == PROCESSOR_POWER6)
18383 && recog_memoized (dep_insn)
18384 && (INSN_CODE (dep_insn) >= 0))
18387 /* Adjust the cost for the case where the value written
18388 by a fixed point instruction is used within the address
18389 gen portion of a subsequent load(u)(x) */
18390 switch (get_attr_type (dep_insn))
18392 case TYPE_LOAD:
18393 case TYPE_CNTLZ:
18395 if (set_to_load_agen (dep_insn, insn))
18396 return get_attr_sign_extend (dep_insn)
18397 == SIGN_EXTEND_YES ? 6 : 4;
18398 break;
18400 case TYPE_SHIFT:
18402 if (set_to_load_agen (dep_insn, insn))
18403 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18404 6 : 3;
18405 break;
18407 case TYPE_INTEGER:
18408 case TYPE_ADD:
18409 case TYPE_LOGICAL:
18410 case TYPE_EXTS:
18411 case TYPE_INSERT:
18413 if (set_to_load_agen (dep_insn, insn))
18414 return 3;
18415 break;
18417 case TYPE_STORE:
18418 case TYPE_FPLOAD:
18419 case TYPE_FPSTORE:
18421 if (get_attr_update (dep_insn) == UPDATE_YES
18422 && set_to_load_agen (dep_insn, insn))
18423 return 3;
18424 break;
18426 case TYPE_MUL:
18428 if (set_to_load_agen (dep_insn, insn))
18429 return 17;
18430 break;
18432 case TYPE_DIV:
18434 if (set_to_load_agen (dep_insn, insn))
18435 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18436 break;
18438 default:
18439 break;
18442 break;
18444 default:
18445 break;
18448 /* Fall out to return default cost. */
18450 break;
18452 case REG_DEP_OUTPUT:
18453 /* Output dependency; DEP_INSN writes a register that INSN writes some
18454 cycles later. */
18455 if ((rs6000_tune == PROCESSOR_POWER6)
18456 && recog_memoized (dep_insn)
18457 && (INSN_CODE (dep_insn) >= 0))
18459 attr_type = get_attr_type (insn);
18461 switch (attr_type)
18463 case TYPE_FP:
18464 case TYPE_FPSIMPLE:
18465 if (get_attr_type (dep_insn) == TYPE_FP
18466 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18467 return 1;
18468 break;
18469 default:
18470 break;
18473 /* Fall through, no cost for output dependency. */
18474 /* FALLTHRU */
18476 case REG_DEP_ANTI:
18477 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18478 cycles later. */
18479 return 0;
18481 default:
18482 gcc_unreachable ();
18485 return cost;
18488 /* Debug version of rs6000_adjust_cost. */
18490 static int
18491 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18492 int cost, unsigned int dw)
18494 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18496 if (ret != cost)
18498 const char *dep;
18500 switch (dep_type)
18502 default: dep = "unknown depencency"; break;
18503 case REG_DEP_TRUE: dep = "data dependency"; break;
18504 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18505 case REG_DEP_ANTI: dep = "anti depencency"; break;
18508 fprintf (stderr,
18509 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18510 "%s, insn:\n", ret, cost, dep);
18512 debug_rtx (insn);
18515 return ret;
18518 /* The function returns a true if INSN is microcoded.
18519 Return false otherwise. */
18521 static bool
18522 is_microcoded_insn (rtx_insn *insn)
18524 if (!insn || !NONDEBUG_INSN_P (insn)
18525 || GET_CODE (PATTERN (insn)) == USE
18526 || GET_CODE (PATTERN (insn)) == CLOBBER)
18527 return false;
18529 if (rs6000_tune == PROCESSOR_CELL)
18530 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18532 if (rs6000_sched_groups
18533 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18535 enum attr_type type = get_attr_type (insn);
18536 if ((type == TYPE_LOAD
18537 && get_attr_update (insn) == UPDATE_YES
18538 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18539 || ((type == TYPE_LOAD || type == TYPE_STORE)
18540 && get_attr_update (insn) == UPDATE_YES
18541 && get_attr_indexed (insn) == INDEXED_YES)
18542 || type == TYPE_MFCR)
18543 return true;
18546 return false;
18549 /* The function returns true if INSN is cracked into 2 instructions
18550 by the processor (and therefore occupies 2 issue slots). */
18552 static bool
18553 is_cracked_insn (rtx_insn *insn)
18555 if (!insn || !NONDEBUG_INSN_P (insn)
18556 || GET_CODE (PATTERN (insn)) == USE
18557 || GET_CODE (PATTERN (insn)) == CLOBBER)
18558 return false;
18560 if (rs6000_sched_groups
18561 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18563 enum attr_type type = get_attr_type (insn);
18564 if ((type == TYPE_LOAD
18565 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18566 && get_attr_update (insn) == UPDATE_NO)
18567 || (type == TYPE_LOAD
18568 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18569 && get_attr_update (insn) == UPDATE_YES
18570 && get_attr_indexed (insn) == INDEXED_NO)
18571 || (type == TYPE_STORE
18572 && get_attr_update (insn) == UPDATE_YES
18573 && get_attr_indexed (insn) == INDEXED_NO)
18574 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18575 && get_attr_update (insn) == UPDATE_YES)
18576 || (type == TYPE_CR_LOGICAL
18577 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18578 || (type == TYPE_EXTS
18579 && get_attr_dot (insn) == DOT_YES)
18580 || (type == TYPE_SHIFT
18581 && get_attr_dot (insn) == DOT_YES
18582 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18583 || (type == TYPE_MUL
18584 && get_attr_dot (insn) == DOT_YES)
18585 || type == TYPE_DIV
18586 || (type == TYPE_INSERT
18587 && get_attr_size (insn) == SIZE_32))
18588 return true;
18591 return false;
18594 /* The function returns true if INSN can be issued only from
18595 the branch slot. */
18597 static bool
18598 is_branch_slot_insn (rtx_insn *insn)
18600 if (!insn || !NONDEBUG_INSN_P (insn)
18601 || GET_CODE (PATTERN (insn)) == USE
18602 || GET_CODE (PATTERN (insn)) == CLOBBER)
18603 return false;
18605 if (rs6000_sched_groups)
18607 enum attr_type type = get_attr_type (insn);
18608 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18609 return true;
18610 return false;
18613 return false;
18616 /* The function returns true if out_inst sets a value that is
18617 used in the address generation computation of in_insn */
18618 static bool
18619 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18621 rtx out_set, in_set;
18623 /* For performance reasons, only handle the simple case where
18624 both loads are a single_set. */
18625 out_set = single_set (out_insn);
18626 if (out_set)
18628 in_set = single_set (in_insn);
18629 if (in_set)
18630 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18633 return false;
18636 /* Try to determine base/offset/size parts of the given MEM.
18637 Return true if successful, false if all the values couldn't
18638 be determined.
18640 This function only looks for REG or REG+CONST address forms.
18641 REG+REG address form will return false. */
18643 static bool
18644 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18645 HOST_WIDE_INT *size)
18647 rtx addr_rtx;
18648 if (MEM_SIZE_KNOWN_P (mem))
18649 *size = MEM_SIZE (mem);
18650 else
18651 return false;
18653 addr_rtx = (XEXP (mem, 0));
18654 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18655 addr_rtx = XEXP (addr_rtx, 1);
18657 *offset = 0;
18658 while (GET_CODE (addr_rtx) == PLUS
18659 && CONST_INT_P (XEXP (addr_rtx, 1)))
18661 *offset += INTVAL (XEXP (addr_rtx, 1));
18662 addr_rtx = XEXP (addr_rtx, 0);
18664 if (!REG_P (addr_rtx))
18665 return false;
18667 *base = addr_rtx;
18668 return true;
18671 /* If the target storage locations of arguments MEM1 and MEM2 are
18672 adjacent, then return the argument that has the lower address.
18673 Otherwise, return NULL_RTX. */
18675 static rtx
18676 adjacent_mem_locations (rtx mem1, rtx mem2)
18678 rtx reg1, reg2;
18679 HOST_WIDE_INT off1, size1, off2, size2;
18681 if (MEM_P (mem1)
18682 && MEM_P (mem2)
18683 && get_memref_parts (mem1, &reg1, &off1, &size1)
18684 && get_memref_parts (mem2, &reg2, &off2, &size2)
18685 && REGNO (reg1) == REGNO (reg2))
18687 if (off1 + size1 == off2)
18688 return mem1;
18689 else if (off2 + size2 == off1)
18690 return mem2;
18693 return NULL_RTX;
18696 /* This function returns true if it can be determined that the two MEM
18697 locations overlap by at least 1 byte based on base reg/offset/size. */
18699 static bool
18700 mem_locations_overlap (rtx mem1, rtx mem2)
18702 rtx reg1, reg2;
18703 HOST_WIDE_INT off1, size1, off2, size2;
18705 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18706 && get_memref_parts (mem2, &reg2, &off2, &size2))
18707 return ((REGNO (reg1) == REGNO (reg2))
18708 && (((off1 <= off2) && (off1 + size1 > off2))
18709 || ((off2 <= off1) && (off2 + size2 > off1))));
18711 return false;
18714 /* A C statement (sans semicolon) to update the integer scheduling
18715 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18716 INSN earlier, reduce the priority to execute INSN later. Do not
18717 define this macro if you do not need to adjust the scheduling
18718 priorities of insns. */
18720 static int
18721 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18723 rtx load_mem, str_mem;
18724 /* On machines (like the 750) which have asymmetric integer units,
18725 where one integer unit can do multiply and divides and the other
18726 can't, reduce the priority of multiply/divide so it is scheduled
18727 before other integer operations. */
18729 #if 0
18730 if (! INSN_P (insn))
18731 return priority;
18733 if (GET_CODE (PATTERN (insn)) == USE)
18734 return priority;
18736 switch (rs6000_tune) {
18737 case PROCESSOR_PPC750:
18738 switch (get_attr_type (insn))
18740 default:
18741 break;
18743 case TYPE_MUL:
18744 case TYPE_DIV:
18745 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18746 priority, priority);
18747 if (priority >= 0 && priority < 0x01000000)
18748 priority >>= 3;
18749 break;
18752 #endif
18754 if (insn_must_be_first_in_group (insn)
18755 && reload_completed
18756 && current_sched_info->sched_max_insns_priority
18757 && rs6000_sched_restricted_insns_priority)
18760 /* Prioritize insns that can be dispatched only in the first
18761 dispatch slot. */
18762 if (rs6000_sched_restricted_insns_priority == 1)
18763 /* Attach highest priority to insn. This means that in
18764 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18765 precede 'priority' (critical path) considerations. */
18766 return current_sched_info->sched_max_insns_priority;
18767 else if (rs6000_sched_restricted_insns_priority == 2)
18768 /* Increase priority of insn by a minimal amount. This means that in
18769 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18770 considerations precede dispatch-slot restriction considerations. */
18771 return (priority + 1);
18774 if (rs6000_tune == PROCESSOR_POWER6
18775 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18776 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18777 /* Attach highest priority to insn if the scheduler has just issued two
18778 stores and this instruction is a load, or two loads and this instruction
18779 is a store. Power6 wants loads and stores scheduled alternately
18780 when possible */
18781 return current_sched_info->sched_max_insns_priority;
18783 return priority;
18786 /* Return true if the instruction is nonpipelined on the Cell. */
18787 static bool
18788 is_nonpipeline_insn (rtx_insn *insn)
18790 enum attr_type type;
18791 if (!insn || !NONDEBUG_INSN_P (insn)
18792 || GET_CODE (PATTERN (insn)) == USE
18793 || GET_CODE (PATTERN (insn)) == CLOBBER)
18794 return false;
18796 type = get_attr_type (insn);
18797 if (type == TYPE_MUL
18798 || type == TYPE_DIV
18799 || type == TYPE_SDIV
18800 || type == TYPE_DDIV
18801 || type == TYPE_SSQRT
18802 || type == TYPE_DSQRT
18803 || type == TYPE_MFCR
18804 || type == TYPE_MFCRF
18805 || type == TYPE_MFJMPR)
18807 return true;
18809 return false;
18813 /* Return how many instructions the machine can issue per cycle. */
18815 static int
18816 rs6000_issue_rate (void)
18818 /* Unless scheduling for register pressure, use issue rate of 1 for
18819 first scheduling pass to decrease degradation. */
18820 if (!reload_completed && !flag_sched_pressure)
18821 return 1;
18823 switch (rs6000_tune) {
18824 case PROCESSOR_RS64A:
18825 case PROCESSOR_PPC601: /* ? */
18826 case PROCESSOR_PPC7450:
18827 return 3;
18828 case PROCESSOR_PPC440:
18829 case PROCESSOR_PPC603:
18830 case PROCESSOR_PPC750:
18831 case PROCESSOR_PPC7400:
18832 case PROCESSOR_PPC8540:
18833 case PROCESSOR_PPC8548:
18834 case PROCESSOR_CELL:
18835 case PROCESSOR_PPCE300C2:
18836 case PROCESSOR_PPCE300C3:
18837 case PROCESSOR_PPCE500MC:
18838 case PROCESSOR_PPCE500MC64:
18839 case PROCESSOR_PPCE5500:
18840 case PROCESSOR_PPCE6500:
18841 case PROCESSOR_TITAN:
18842 return 2;
18843 case PROCESSOR_PPC476:
18844 case PROCESSOR_PPC604:
18845 case PROCESSOR_PPC604e:
18846 case PROCESSOR_PPC620:
18847 case PROCESSOR_PPC630:
18848 return 4;
18849 case PROCESSOR_POWER4:
18850 case PROCESSOR_POWER5:
18851 case PROCESSOR_POWER6:
18852 case PROCESSOR_POWER7:
18853 return 5;
18854 case PROCESSOR_POWER8:
18855 return 7;
18856 case PROCESSOR_POWER9:
18857 return 6;
18858 case PROCESSOR_POWER10:
18859 case PROCESSOR_POWER11:
18860 return 8;
18861 default:
18862 return 1;
18866 /* Return how many instructions to look ahead for better insn
18867 scheduling. */
18869 static int
18870 rs6000_use_sched_lookahead (void)
18872 switch (rs6000_tune)
18874 case PROCESSOR_PPC8540:
18875 case PROCESSOR_PPC8548:
18876 return 4;
18878 case PROCESSOR_CELL:
18879 return (reload_completed ? 8 : 0);
18881 default:
18882 return 0;
18886 /* We are choosing insn from the ready queue. Return zero if INSN can be
18887 chosen. */
18888 static int
18889 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18891 if (ready_index == 0)
18892 return 0;
18894 if (rs6000_tune != PROCESSOR_CELL)
18895 return 0;
18897 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18899 if (!reload_completed
18900 || is_nonpipeline_insn (insn)
18901 || is_microcoded_insn (insn))
18902 return 1;
18904 return 0;
18907 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18908 and return true. */
18910 static bool
18911 find_mem_ref (rtx pat, rtx *mem_ref)
18913 const char * fmt;
18914 int i, j;
18916 /* stack_tie does not produce any real memory traffic. */
18917 if (tie_operand (pat, VOIDmode))
18918 return false;
18920 if (MEM_P (pat))
18922 *mem_ref = pat;
18923 return true;
18926 /* Recursively process the pattern. */
18927 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18929 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18931 if (fmt[i] == 'e')
18933 if (find_mem_ref (XEXP (pat, i), mem_ref))
18934 return true;
18936 else if (fmt[i] == 'E')
18937 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18939 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18940 return true;
18944 return false;
18947 /* Determine if PAT is a PATTERN of a load insn. */
18949 static bool
18950 is_load_insn1 (rtx pat, rtx *load_mem)
18952 if (!pat || pat == NULL_RTX)
18953 return false;
18955 if (GET_CODE (pat) == SET)
18957 if (REG_P (SET_DEST (pat)))
18958 return find_mem_ref (SET_SRC (pat), load_mem);
18959 else
18960 return false;
18963 if (GET_CODE (pat) == PARALLEL)
18965 int i;
18967 for (i = 0; i < XVECLEN (pat, 0); i++)
18968 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18969 return true;
18972 return false;
18975 /* Determine if INSN loads from memory. */
18977 static bool
18978 is_load_insn (rtx insn, rtx *load_mem)
18980 if (!insn || !INSN_P (insn))
18981 return false;
18983 if (CALL_P (insn))
18984 return false;
18986 return is_load_insn1 (PATTERN (insn), load_mem);
18989 /* Determine if PAT is a PATTERN of a store insn. */
18991 static bool
18992 is_store_insn1 (rtx pat, rtx *str_mem)
18994 if (!pat || pat == NULL_RTX)
18995 return false;
18997 if (GET_CODE (pat) == SET)
18999 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
19000 return find_mem_ref (SET_DEST (pat), str_mem);
19001 else
19002 return false;
19005 if (GET_CODE (pat) == PARALLEL)
19007 int i;
19009 for (i = 0; i < XVECLEN (pat, 0); i++)
19010 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
19011 return true;
19014 return false;
19017 /* Determine if INSN stores to memory. */
19019 static bool
19020 is_store_insn (rtx insn, rtx *str_mem)
19022 if (!insn || !INSN_P (insn))
19023 return false;
19025 return is_store_insn1 (PATTERN (insn), str_mem);
19028 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19030 static bool
19031 is_power9_pairable_vec_type (enum attr_type type)
19033 switch (type)
19035 case TYPE_VECSIMPLE:
19036 case TYPE_VECCOMPLEX:
19037 case TYPE_VECDIV:
19038 case TYPE_VECCMP:
19039 case TYPE_VECPERM:
19040 case TYPE_VECFLOAT:
19041 case TYPE_VECFDIV:
19042 case TYPE_VECDOUBLE:
19043 return true;
19044 default:
19045 break;
19047 return false;
19050 /* Returns whether the dependence between INSN and NEXT is considered
19051 costly by the given target. */
19053 static bool
19054 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
19056 rtx insn;
19057 rtx next;
19058 rtx load_mem, str_mem;
19060 /* If the flag is not enabled - no dependence is considered costly;
19061 allow all dependent insns in the same group.
19062 This is the most aggressive option. */
19063 if (rs6000_sched_costly_dep == no_dep_costly)
19064 return false;
19066 /* If the flag is set to 1 - a dependence is always considered costly;
19067 do not allow dependent instructions in the same group.
19068 This is the most conservative option. */
19069 if (rs6000_sched_costly_dep == all_deps_costly)
19070 return true;
19072 insn = DEP_PRO (dep);
19073 next = DEP_CON (dep);
19075 if (rs6000_sched_costly_dep == store_to_load_dep_costly
19076 && is_load_insn (next, &load_mem)
19077 && is_store_insn (insn, &str_mem))
19078 /* Prevent load after store in the same group. */
19079 return true;
19081 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
19082 && is_load_insn (next, &load_mem)
19083 && is_store_insn (insn, &str_mem)
19084 && DEP_TYPE (dep) == REG_DEP_TRUE
19085 && mem_locations_overlap(str_mem, load_mem))
19086 /* Prevent load after store in the same group if it is a true
19087 dependence. */
19088 return true;
19090 /* The flag is set to X; dependences with latency >= X are considered costly,
19091 and will not be scheduled in the same group. */
19092 if (rs6000_sched_costly_dep <= max_dep_latency
19093 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
19094 return true;
19096 return false;
19099 /* Return the next insn after INSN that is found before TAIL is reached,
19100 skipping any "non-active" insns - insns that will not actually occupy
19101 an issue slot. Return NULL_RTX if such an insn is not found. */
19103 static rtx_insn *
19104 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
19106 if (insn == NULL_RTX || insn == tail)
19107 return NULL;
19109 while (1)
19111 insn = NEXT_INSN (insn);
19112 if (insn == NULL_RTX || insn == tail)
19113 return NULL;
19115 if (CALL_P (insn)
19116 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
19117 || (NONJUMP_INSN_P (insn)
19118 && GET_CODE (PATTERN (insn)) != USE
19119 && GET_CODE (PATTERN (insn)) != CLOBBER
19120 && INSN_CODE (insn) != CODE_FOR_stack_tie))
19121 break;
19123 return insn;
19126 /* Move instruction at POS to the end of the READY list. */
19128 static void
19129 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
19131 rtx_insn *tmp;
19132 int i;
19134 tmp = ready[pos];
19135 for (i = pos; i < lastpos; i++)
19136 ready[i] = ready[i + 1];
19137 ready[lastpos] = tmp;
19140 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19142 static int
19143 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
19145 /* For Power6, we need to handle some special cases to try and keep the
19146 store queue from overflowing and triggering expensive flushes.
19148 This code monitors how load and store instructions are being issued
19149 and skews the ready list one way or the other to increase the likelihood
19150 that a desired instruction is issued at the proper time.
19152 A couple of things are done. First, we maintain a "load_store_pendulum"
19153 to track the current state of load/store issue.
19155 - If the pendulum is at zero, then no loads or stores have been
19156 issued in the current cycle so we do nothing.
19158 - If the pendulum is 1, then a single load has been issued in this
19159 cycle and we attempt to locate another load in the ready list to
19160 issue with it.
19162 - If the pendulum is -2, then two stores have already been
19163 issued in this cycle, so we increase the priority of the first load
19164 in the ready list to increase it's likelihood of being chosen first
19165 in the next cycle.
19167 - If the pendulum is -1, then a single store has been issued in this
19168 cycle and we attempt to locate another store in the ready list to
19169 issue with it, preferring a store to an adjacent memory location to
19170 facilitate store pairing in the store queue.
19172 - If the pendulum is 2, then two loads have already been
19173 issued in this cycle, so we increase the priority of the first store
19174 in the ready list to increase it's likelihood of being chosen first
19175 in the next cycle.
19177 - If the pendulum < -2 or > 2, then do nothing.
19179 Note: This code covers the most common scenarios. There exist non
19180 load/store instructions which make use of the LSU and which
19181 would need to be accounted for to strictly model the behavior
19182 of the machine. Those instructions are currently unaccounted
19183 for to help minimize compile time overhead of this code.
19185 int pos;
19186 rtx load_mem, str_mem;
19188 if (is_store_insn (last_scheduled_insn, &str_mem))
19189 /* Issuing a store, swing the load_store_pendulum to the left */
19190 load_store_pendulum--;
19191 else if (is_load_insn (last_scheduled_insn, &load_mem))
19192 /* Issuing a load, swing the load_store_pendulum to the right */
19193 load_store_pendulum++;
19194 else
19195 return cached_can_issue_more;
19197 /* If the pendulum is balanced, or there is only one instruction on
19198 the ready list, then all is well, so return. */
19199 if ((load_store_pendulum == 0) || (lastpos <= 0))
19200 return cached_can_issue_more;
19202 if (load_store_pendulum == 1)
19204 /* A load has been issued in this cycle. Scan the ready list
19205 for another load to issue with it */
19206 pos = lastpos;
19208 while (pos >= 0)
19210 if (is_load_insn (ready[pos], &load_mem))
19212 /* Found a load. Move it to the head of the ready list,
19213 and adjust it's priority so that it is more likely to
19214 stay there */
19215 move_to_end_of_ready (ready, pos, lastpos);
19217 if (!sel_sched_p ()
19218 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19219 INSN_PRIORITY (ready[lastpos])++;
19220 break;
19222 pos--;
19225 else if (load_store_pendulum == -2)
19227 /* Two stores have been issued in this cycle. Increase the
19228 priority of the first load in the ready list to favor it for
19229 issuing in the next cycle. */
19230 pos = lastpos;
19232 while (pos >= 0)
19234 if (is_load_insn (ready[pos], &load_mem)
19235 && !sel_sched_p ()
19236 && INSN_PRIORITY_KNOWN (ready[pos]))
19238 INSN_PRIORITY (ready[pos])++;
19240 /* Adjust the pendulum to account for the fact that a load
19241 was found and increased in priority. This is to prevent
19242 increasing the priority of multiple loads */
19243 load_store_pendulum--;
19245 break;
19247 pos--;
19250 else if (load_store_pendulum == -1)
19252 /* A store has been issued in this cycle. Scan the ready list for
19253 another store to issue with it, preferring a store to an adjacent
19254 memory location */
19255 int first_store_pos = -1;
19257 pos = lastpos;
19259 while (pos >= 0)
19261 if (is_store_insn (ready[pos], &str_mem))
19263 rtx str_mem2;
19264 /* Maintain the index of the first store found on the
19265 list */
19266 if (first_store_pos == -1)
19267 first_store_pos = pos;
19269 if (is_store_insn (last_scheduled_insn, &str_mem2)
19270 && adjacent_mem_locations (str_mem, str_mem2))
19272 /* Found an adjacent store. Move it to the head of the
19273 ready list, and adjust it's priority so that it is
19274 more likely to stay there */
19275 move_to_end_of_ready (ready, pos, lastpos);
19277 if (!sel_sched_p ()
19278 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19279 INSN_PRIORITY (ready[lastpos])++;
19281 first_store_pos = -1;
19283 break;
19286 pos--;
19289 if (first_store_pos >= 0)
19291 /* An adjacent store wasn't found, but a non-adjacent store was,
19292 so move the non-adjacent store to the front of the ready
19293 list, and adjust its priority so that it is more likely to
19294 stay there. */
19295 move_to_end_of_ready (ready, first_store_pos, lastpos);
19296 if (!sel_sched_p ()
19297 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19298 INSN_PRIORITY (ready[lastpos])++;
19301 else if (load_store_pendulum == 2)
19303 /* Two loads have been issued in this cycle. Increase the priority
19304 of the first store in the ready list to favor it for issuing in
19305 the next cycle. */
19306 pos = lastpos;
19308 while (pos >= 0)
19310 if (is_store_insn (ready[pos], &str_mem)
19311 && !sel_sched_p ()
19312 && INSN_PRIORITY_KNOWN (ready[pos]))
19314 INSN_PRIORITY (ready[pos])++;
19316 /* Adjust the pendulum to account for the fact that a store
19317 was found and increased in priority. This is to prevent
19318 increasing the priority of multiple stores */
19319 load_store_pendulum++;
19321 break;
19323 pos--;
19327 return cached_can_issue_more;
19330 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19332 static int
19333 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19335 int pos;
19336 enum attr_type type, type2;
19338 type = get_attr_type (last_scheduled_insn);
19340 /* Try to issue fixed point divides back-to-back in pairs so they will be
19341 routed to separate execution units and execute in parallel. */
19342 if (type == TYPE_DIV && divide_cnt == 0)
19344 /* First divide has been scheduled. */
19345 divide_cnt = 1;
19347 /* Scan the ready list looking for another divide, if found move it
19348 to the end of the list so it is chosen next. */
19349 pos = lastpos;
19350 while (pos >= 0)
19352 if (recog_memoized (ready[pos]) >= 0
19353 && get_attr_type (ready[pos]) == TYPE_DIV)
19355 move_to_end_of_ready (ready, pos, lastpos);
19356 break;
19358 pos--;
19361 else
19363 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19364 divide_cnt = 0;
19366 /* The best dispatch throughput for vector and vector load insns can be
19367 achieved by interleaving a vector and vector load such that they'll
19368 dispatch to the same superslice. If this pairing cannot be achieved
19369 then it is best to pair vector insns together and vector load insns
19370 together.
19372 To aid in this pairing, vec_pairing maintains the current state with
19373 the following values:
19375 0 : Initial state, no vecload/vector pairing has been started.
19377 1 : A vecload or vector insn has been issued and a candidate for
19378 pairing has been found and moved to the end of the ready
19379 list. */
19380 if (type == TYPE_VECLOAD)
19382 /* Issued a vecload. */
19383 if (vec_pairing == 0)
19385 int vecload_pos = -1;
19386 /* We issued a single vecload, look for a vector insn to pair it
19387 with. If one isn't found, try to pair another vecload. */
19388 pos = lastpos;
19389 while (pos >= 0)
19391 if (recog_memoized (ready[pos]) >= 0)
19393 type2 = get_attr_type (ready[pos]);
19394 if (is_power9_pairable_vec_type (type2))
19396 /* Found a vector insn to pair with, move it to the
19397 end of the ready list so it is scheduled next. */
19398 move_to_end_of_ready (ready, pos, lastpos);
19399 vec_pairing = 1;
19400 return cached_can_issue_more;
19402 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19403 /* Remember position of first vecload seen. */
19404 vecload_pos = pos;
19406 pos--;
19408 if (vecload_pos >= 0)
19410 /* Didn't find a vector to pair with but did find a vecload,
19411 move it to the end of the ready list. */
19412 move_to_end_of_ready (ready, vecload_pos, lastpos);
19413 vec_pairing = 1;
19414 return cached_can_issue_more;
19418 else if (is_power9_pairable_vec_type (type))
19420 /* Issued a vector operation. */
19421 if (vec_pairing == 0)
19423 int vec_pos = -1;
19424 /* We issued a single vector insn, look for a vecload to pair it
19425 with. If one isn't found, try to pair another vector. */
19426 pos = lastpos;
19427 while (pos >= 0)
19429 if (recog_memoized (ready[pos]) >= 0)
19431 type2 = get_attr_type (ready[pos]);
19432 if (type2 == TYPE_VECLOAD)
19434 /* Found a vecload insn to pair with, move it to the
19435 end of the ready list so it is scheduled next. */
19436 move_to_end_of_ready (ready, pos, lastpos);
19437 vec_pairing = 1;
19438 return cached_can_issue_more;
19440 else if (is_power9_pairable_vec_type (type2)
19441 && vec_pos == -1)
19442 /* Remember position of first vector insn seen. */
19443 vec_pos = pos;
19445 pos--;
19447 if (vec_pos >= 0)
19449 /* Didn't find a vecload to pair with but did find a vector
19450 insn, move it to the end of the ready list. */
19451 move_to_end_of_ready (ready, vec_pos, lastpos);
19452 vec_pairing = 1;
19453 return cached_can_issue_more;
19458 /* We've either finished a vec/vecload pair, couldn't find an insn to
19459 continue the current pair, or the last insn had nothing to do with
19460 with pairing. In any case, reset the state. */
19461 vec_pairing = 0;
19464 return cached_can_issue_more;
19467 /* Determine if INSN is a store to memory that can be fused with a similar
19468 adjacent store. */
19470 static bool
19471 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19473 /* Insn must be a non-prefixed base+disp form store. */
19474 if (is_store_insn (insn, str_mem)
19475 && get_attr_prefixed (insn) == PREFIXED_NO
19476 && get_attr_update (insn) == UPDATE_NO
19477 && get_attr_indexed (insn) == INDEXED_NO)
19479 /* Further restrictions by mode and size. */
19480 if (!MEM_SIZE_KNOWN_P (*str_mem))
19481 return false;
19483 machine_mode mode = GET_MODE (*str_mem);
19484 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19486 if (INTEGRAL_MODE_P (mode))
19487 /* Must be word or dword size. */
19488 return (size == 4 || size == 8);
19489 else if (FLOAT_MODE_P (mode))
19490 /* Must be dword size. */
19491 return (size == 8);
19494 return false;
19497 /* Do Power10 specific reordering of the ready list. */
19499 static int
19500 power10_sched_reorder (rtx_insn **ready, int lastpos)
19502 rtx mem1;
19504 /* Do store fusion during sched2 only. */
19505 if (!reload_completed)
19506 return cached_can_issue_more;
19508 /* If the prior insn finished off a store fusion pair then simply
19509 reset the counter and return, nothing more to do. */
19510 if (load_store_pendulum != 0)
19512 load_store_pendulum = 0;
19513 return cached_can_issue_more;
19516 /* Try to pair certain store insns to adjacent memory locations
19517 so that the hardware will fuse them to a single operation. */
19518 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19521 /* A fusable store was just scheduled. Scan the ready list for another
19522 store that it can fuse with. */
19523 int pos = lastpos;
19524 while (pos >= 0)
19526 rtx mem2;
19527 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19528 must be ascending only. */
19529 if (is_fusable_store (ready[pos], &mem2)
19530 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19531 && adjacent_mem_locations (mem1, mem2))
19532 || (FLOAT_MODE_P (GET_MODE (mem1))
19533 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19535 /* Found a fusable store. Move it to the end of the ready list
19536 so it is scheduled next. */
19537 move_to_end_of_ready (ready, pos, lastpos);
19539 load_store_pendulum = -1;
19540 break;
19542 pos--;
19546 return cached_can_issue_more;
19549 /* We are about to begin issuing insns for this clock cycle. */
19551 static int
19552 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19553 rtx_insn **ready ATTRIBUTE_UNUSED,
19554 int *pn_ready ATTRIBUTE_UNUSED,
19555 int clock_var ATTRIBUTE_UNUSED)
19557 int n_ready = *pn_ready;
19559 if (sched_verbose)
19560 fprintf (dump, "// rs6000_sched_reorder :\n");
19562 /* Reorder the ready list, if the second to last ready insn
19563 is a nonepipeline insn. */
19564 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19566 if (is_nonpipeline_insn (ready[n_ready - 1])
19567 && (recog_memoized (ready[n_ready - 2]) > 0))
19568 /* Simply swap first two insns. */
19569 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19572 if (rs6000_tune == PROCESSOR_POWER6)
19573 load_store_pendulum = 0;
19575 /* Do Power10/Power11 dependent reordering. */
19576 if (last_scheduled_insn
19577 && (rs6000_tune == PROCESSOR_POWER10
19578 || rs6000_tune == PROCESSOR_POWER11))
19579 power10_sched_reorder (ready, n_ready - 1);
19581 return rs6000_issue_rate ();
19584 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19586 static int
19587 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19588 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19590 if (sched_verbose)
19591 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19593 /* Do Power6 dependent reordering if necessary. */
19594 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19595 return power6_sched_reorder2 (ready, *pn_ready - 1);
19597 /* Do Power9 dependent reordering if necessary. */
19598 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19599 && recog_memoized (last_scheduled_insn) >= 0)
19600 return power9_sched_reorder2 (ready, *pn_ready - 1);
19602 /* Do Power10/Power11 dependent reordering. */
19603 if (last_scheduled_insn
19604 && (rs6000_tune == PROCESSOR_POWER10
19605 || rs6000_tune == PROCESSOR_POWER11))
19606 return power10_sched_reorder (ready, *pn_ready - 1);
19608 return cached_can_issue_more;
19611 /* Return whether the presence of INSN causes a dispatch group termination
19612 of group WHICH_GROUP.
19614 If WHICH_GROUP == current_group, this function will return true if INSN
19615 causes the termination of the current group (i.e, the dispatch group to
19616 which INSN belongs). This means that INSN will be the last insn in the
19617 group it belongs to.
19619 If WHICH_GROUP == previous_group, this function will return true if INSN
19620 causes the termination of the previous group (i.e, the dispatch group that
19621 precedes the group to which INSN belongs). This means that INSN will be
19622 the first insn in the group it belongs to). */
19624 static bool
19625 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19627 bool first, last;
19629 if (! insn)
19630 return false;
19632 first = insn_must_be_first_in_group (insn);
19633 last = insn_must_be_last_in_group (insn);
19635 if (first && last)
19636 return true;
19638 if (which_group == current_group)
19639 return last;
19640 else if (which_group == previous_group)
19641 return first;
19643 return false;
19647 static bool
19648 insn_must_be_first_in_group (rtx_insn *insn)
19650 enum attr_type type;
19652 if (!insn
19653 || NOTE_P (insn)
19654 || DEBUG_INSN_P (insn)
19655 || GET_CODE (PATTERN (insn)) == USE
19656 || GET_CODE (PATTERN (insn)) == CLOBBER)
19657 return false;
19659 switch (rs6000_tune)
19661 case PROCESSOR_POWER5:
19662 if (is_cracked_insn (insn))
19663 return true;
19664 /* FALLTHRU */
19665 case PROCESSOR_POWER4:
19666 if (is_microcoded_insn (insn))
19667 return true;
19669 if (!rs6000_sched_groups)
19670 return false;
19672 type = get_attr_type (insn);
19674 switch (type)
19676 case TYPE_MFCR:
19677 case TYPE_MFCRF:
19678 case TYPE_MTCR:
19679 case TYPE_CR_LOGICAL:
19680 case TYPE_MTJMPR:
19681 case TYPE_MFJMPR:
19682 case TYPE_DIV:
19683 case TYPE_LOAD_L:
19684 case TYPE_STORE_C:
19685 case TYPE_ISYNC:
19686 case TYPE_SYNC:
19687 return true;
19688 default:
19689 break;
19691 break;
19692 case PROCESSOR_POWER6:
19693 type = get_attr_type (insn);
19695 switch (type)
19697 case TYPE_EXTS:
19698 case TYPE_CNTLZ:
19699 case TYPE_TRAP:
19700 case TYPE_MUL:
19701 case TYPE_INSERT:
19702 case TYPE_FPCOMPARE:
19703 case TYPE_MFCR:
19704 case TYPE_MTCR:
19705 case TYPE_MFJMPR:
19706 case TYPE_MTJMPR:
19707 case TYPE_ISYNC:
19708 case TYPE_SYNC:
19709 case TYPE_LOAD_L:
19710 case TYPE_STORE_C:
19711 return true;
19712 case TYPE_SHIFT:
19713 if (get_attr_dot (insn) == DOT_NO
19714 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19715 return true;
19716 else
19717 break;
19718 case TYPE_DIV:
19719 if (get_attr_size (insn) == SIZE_32)
19720 return true;
19721 else
19722 break;
19723 case TYPE_LOAD:
19724 case TYPE_STORE:
19725 case TYPE_FPLOAD:
19726 case TYPE_FPSTORE:
19727 if (get_attr_update (insn) == UPDATE_YES)
19728 return true;
19729 else
19730 break;
19731 default:
19732 break;
19734 break;
19735 case PROCESSOR_POWER7:
19736 type = get_attr_type (insn);
19738 switch (type)
19740 case TYPE_CR_LOGICAL:
19741 case TYPE_MFCR:
19742 case TYPE_MFCRF:
19743 case TYPE_MTCR:
19744 case TYPE_DIV:
19745 case TYPE_ISYNC:
19746 case TYPE_LOAD_L:
19747 case TYPE_STORE_C:
19748 case TYPE_MFJMPR:
19749 case TYPE_MTJMPR:
19750 return true;
19751 case TYPE_MUL:
19752 case TYPE_SHIFT:
19753 case TYPE_EXTS:
19754 if (get_attr_dot (insn) == DOT_YES)
19755 return true;
19756 else
19757 break;
19758 case TYPE_LOAD:
19759 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19760 || get_attr_update (insn) == UPDATE_YES)
19761 return true;
19762 else
19763 break;
19764 case TYPE_STORE:
19765 case TYPE_FPLOAD:
19766 case TYPE_FPSTORE:
19767 if (get_attr_update (insn) == UPDATE_YES)
19768 return true;
19769 else
19770 break;
19771 default:
19772 break;
19774 break;
19775 case PROCESSOR_POWER8:
19776 type = get_attr_type (insn);
19778 switch (type)
19780 case TYPE_CR_LOGICAL:
19781 case TYPE_MFCR:
19782 case TYPE_MFCRF:
19783 case TYPE_MTCR:
19784 case TYPE_SYNC:
19785 case TYPE_ISYNC:
19786 case TYPE_LOAD_L:
19787 case TYPE_STORE_C:
19788 case TYPE_VECSTORE:
19789 case TYPE_MFJMPR:
19790 case TYPE_MTJMPR:
19791 return true;
19792 case TYPE_SHIFT:
19793 case TYPE_EXTS:
19794 case TYPE_MUL:
19795 if (get_attr_dot (insn) == DOT_YES)
19796 return true;
19797 else
19798 break;
19799 case TYPE_LOAD:
19800 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19801 || get_attr_update (insn) == UPDATE_YES)
19802 return true;
19803 else
19804 break;
19805 case TYPE_STORE:
19806 if (get_attr_update (insn) == UPDATE_YES
19807 && get_attr_indexed (insn) == INDEXED_YES)
19808 return true;
19809 else
19810 break;
19811 default:
19812 break;
19814 break;
19815 default:
19816 break;
19819 return false;
19822 static bool
19823 insn_must_be_last_in_group (rtx_insn *insn)
19825 enum attr_type type;
19827 if (!insn
19828 || NOTE_P (insn)
19829 || DEBUG_INSN_P (insn)
19830 || GET_CODE (PATTERN (insn)) == USE
19831 || GET_CODE (PATTERN (insn)) == CLOBBER)
19832 return false;
19834 switch (rs6000_tune) {
19835 case PROCESSOR_POWER4:
19836 case PROCESSOR_POWER5:
19837 if (is_microcoded_insn (insn))
19838 return true;
19840 if (is_branch_slot_insn (insn))
19841 return true;
19843 break;
19844 case PROCESSOR_POWER6:
19845 type = get_attr_type (insn);
19847 switch (type)
19849 case TYPE_EXTS:
19850 case TYPE_CNTLZ:
19851 case TYPE_TRAP:
19852 case TYPE_MUL:
19853 case TYPE_FPCOMPARE:
19854 case TYPE_MFCR:
19855 case TYPE_MTCR:
19856 case TYPE_MFJMPR:
19857 case TYPE_MTJMPR:
19858 case TYPE_ISYNC:
19859 case TYPE_SYNC:
19860 case TYPE_LOAD_L:
19861 case TYPE_STORE_C:
19862 return true;
19863 case TYPE_SHIFT:
19864 if (get_attr_dot (insn) == DOT_NO
19865 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19866 return true;
19867 else
19868 break;
19869 case TYPE_DIV:
19870 if (get_attr_size (insn) == SIZE_32)
19871 return true;
19872 else
19873 break;
19874 default:
19875 break;
19877 break;
19878 case PROCESSOR_POWER7:
19879 type = get_attr_type (insn);
19881 switch (type)
19883 case TYPE_ISYNC:
19884 case TYPE_SYNC:
19885 case TYPE_LOAD_L:
19886 case TYPE_STORE_C:
19887 return true;
19888 case TYPE_LOAD:
19889 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19890 && get_attr_update (insn) == UPDATE_YES)
19891 return true;
19892 else
19893 break;
19894 case TYPE_STORE:
19895 if (get_attr_update (insn) == UPDATE_YES
19896 && get_attr_indexed (insn) == INDEXED_YES)
19897 return true;
19898 else
19899 break;
19900 default:
19901 break;
19903 break;
19904 case PROCESSOR_POWER8:
19905 type = get_attr_type (insn);
19907 switch (type)
19909 case TYPE_MFCR:
19910 case TYPE_MTCR:
19911 case TYPE_ISYNC:
19912 case TYPE_SYNC:
19913 case TYPE_LOAD_L:
19914 case TYPE_STORE_C:
19915 return true;
19916 case TYPE_LOAD:
19917 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19918 && get_attr_update (insn) == UPDATE_YES)
19919 return true;
19920 else
19921 break;
19922 case TYPE_STORE:
19923 if (get_attr_update (insn) == UPDATE_YES
19924 && get_attr_indexed (insn) == INDEXED_YES)
19925 return true;
19926 else
19927 break;
19928 default:
19929 break;
19931 break;
19932 default:
19933 break;
19936 return false;
19939 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19940 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19942 static bool
19943 is_costly_group (rtx *group_insns, rtx next_insn)
19945 int i;
19946 int issue_rate = rs6000_issue_rate ();
19948 for (i = 0; i < issue_rate; i++)
19950 sd_iterator_def sd_it;
19951 dep_t dep;
19952 rtx insn = group_insns[i];
19954 if (!insn)
19955 continue;
19957 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19959 rtx next = DEP_CON (dep);
19961 if (next == next_insn
19962 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19963 return true;
19967 return false;
19970 /* Utility of the function redefine_groups.
19971 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19972 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19973 to keep it "far" (in a separate group) from GROUP_INSNS, following
19974 one of the following schemes, depending on the value of the flag
19975 -minsert_sched_nops = X:
19976 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19977 in order to force NEXT_INSN into a separate group.
19978 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19979 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19980 insertion (has a group just ended, how many vacant issue slots remain in the
19981 last group, and how many dispatch groups were encountered so far). */
19983 static int
19984 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19985 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19986 int *group_count)
19988 rtx nop;
19989 bool force;
19990 int issue_rate = rs6000_issue_rate ();
19991 bool end = *group_end;
19992 int i;
19994 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19995 return can_issue_more;
19997 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19998 return can_issue_more;
20000 force = is_costly_group (group_insns, next_insn);
20001 if (!force)
20002 return can_issue_more;
20004 if (sched_verbose > 6)
20005 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
20006 *group_count ,can_issue_more);
20008 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
20010 if (*group_end)
20011 can_issue_more = 0;
20013 /* Since only a branch can be issued in the last issue_slot, it is
20014 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
20015 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
20016 in this case the last nop will start a new group and the branch
20017 will be forced to the new group. */
20018 if (can_issue_more && !is_branch_slot_insn (next_insn))
20019 can_issue_more--;
20021 /* Do we have a special group ending nop? */
20022 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
20023 || rs6000_tune == PROCESSOR_POWER8)
20025 nop = gen_group_ending_nop ();
20026 emit_insn_before (nop, next_insn);
20027 can_issue_more = 0;
20029 else
20030 while (can_issue_more > 0)
20032 nop = gen_nop ();
20033 emit_insn_before (nop, next_insn);
20034 can_issue_more--;
20037 *group_end = true;
20038 return 0;
20041 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
20043 int n_nops = rs6000_sched_insert_nops;
20045 /* Nops can't be issued from the branch slot, so the effective
20046 issue_rate for nops is 'issue_rate - 1'. */
20047 if (can_issue_more == 0)
20048 can_issue_more = issue_rate;
20049 can_issue_more--;
20050 if (can_issue_more == 0)
20052 can_issue_more = issue_rate - 1;
20053 (*group_count)++;
20054 end = true;
20055 for (i = 0; i < issue_rate; i++)
20057 group_insns[i] = 0;
20061 while (n_nops > 0)
20063 nop = gen_nop ();
20064 emit_insn_before (nop, next_insn);
20065 if (can_issue_more == issue_rate - 1) /* new group begins */
20066 end = false;
20067 can_issue_more--;
20068 if (can_issue_more == 0)
20070 can_issue_more = issue_rate - 1;
20071 (*group_count)++;
20072 end = true;
20073 for (i = 0; i < issue_rate; i++)
20075 group_insns[i] = 0;
20078 n_nops--;
20081 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20082 can_issue_more++;
20084 /* Is next_insn going to start a new group? */
20085 *group_end
20086 = (end
20087 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20088 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20089 || (can_issue_more < issue_rate &&
20090 insn_terminates_group_p (next_insn, previous_group)));
20091 if (*group_end && end)
20092 (*group_count)--;
20094 if (sched_verbose > 6)
20095 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
20096 *group_count, can_issue_more);
20097 return can_issue_more;
20100 return can_issue_more;
20103 /* This function tries to synch the dispatch groups that the compiler "sees"
20104 with the dispatch groups that the processor dispatcher is expected to
20105 form in practice. It tries to achieve this synchronization by forcing the
20106 estimated processor grouping on the compiler (as opposed to the function
20107 'pad_goups' which tries to force the scheduler's grouping on the processor).
20109 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20110 examines the (estimated) dispatch groups that will be formed by the processor
20111 dispatcher. It marks these group boundaries to reflect the estimated
20112 processor grouping, overriding the grouping that the scheduler had marked.
20113 Depending on the value of the flag '-minsert-sched-nops' this function can
20114 force certain insns into separate groups or force a certain distance between
20115 them by inserting nops, for example, if there exists a "costly dependence"
20116 between the insns.
20118 The function estimates the group boundaries that the processor will form as
20119 follows: It keeps track of how many vacant issue slots are available after
20120 each insn. A subsequent insn will start a new group if one of the following
20121 4 cases applies:
20122 - no more vacant issue slots remain in the current dispatch group.
20123 - only the last issue slot, which is the branch slot, is vacant, but the next
20124 insn is not a branch.
20125 - only the last 2 or less issue slots, including the branch slot, are vacant,
20126 which means that a cracked insn (which occupies two issue slots) can't be
20127 issued in this group.
20128 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20129 start a new group. */
20131 static int
20132 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20133 rtx_insn *tail)
20135 rtx_insn *insn, *next_insn;
20136 int issue_rate;
20137 int can_issue_more;
20138 int slot, i;
20139 bool group_end;
20140 int group_count = 0;
20141 rtx *group_insns;
20143 /* Initialize. */
20144 issue_rate = rs6000_issue_rate ();
20145 group_insns = XALLOCAVEC (rtx, issue_rate);
20146 for (i = 0; i < issue_rate; i++)
20148 group_insns[i] = 0;
20150 can_issue_more = issue_rate;
20151 slot = 0;
20152 insn = get_next_active_insn (prev_head_insn, tail);
20153 group_end = false;
20155 while (insn != NULL_RTX)
20157 slot = (issue_rate - can_issue_more);
20158 group_insns[slot] = insn;
20159 can_issue_more =
20160 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20161 if (insn_terminates_group_p (insn, current_group))
20162 can_issue_more = 0;
20164 next_insn = get_next_active_insn (insn, tail);
20165 if (next_insn == NULL_RTX)
20166 return group_count + 1;
20168 /* Is next_insn going to start a new group? */
20169 group_end
20170 = (can_issue_more == 0
20171 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20172 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20173 || (can_issue_more < issue_rate &&
20174 insn_terminates_group_p (next_insn, previous_group)));
20176 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
20177 next_insn, &group_end, can_issue_more,
20178 &group_count);
20180 if (group_end)
20182 group_count++;
20183 can_issue_more = 0;
20184 for (i = 0; i < issue_rate; i++)
20186 group_insns[i] = 0;
20190 if (GET_MODE (next_insn) == TImode && can_issue_more)
20191 PUT_MODE (next_insn, VOIDmode);
20192 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
20193 PUT_MODE (next_insn, TImode);
20195 insn = next_insn;
20196 if (can_issue_more == 0)
20197 can_issue_more = issue_rate;
20198 } /* while */
20200 return group_count;
20203 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20204 dispatch group boundaries that the scheduler had marked. Pad with nops
20205 any dispatch groups which have vacant issue slots, in order to force the
20206 scheduler's grouping on the processor dispatcher. The function
20207 returns the number of dispatch groups found. */
20209 static int
20210 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20211 rtx_insn *tail)
20213 rtx_insn *insn, *next_insn;
20214 rtx nop;
20215 int issue_rate;
20216 int can_issue_more;
20217 int group_end;
20218 int group_count = 0;
20220 /* Initialize issue_rate. */
20221 issue_rate = rs6000_issue_rate ();
20222 can_issue_more = issue_rate;
20224 insn = get_next_active_insn (prev_head_insn, tail);
20225 next_insn = get_next_active_insn (insn, tail);
20227 while (insn != NULL_RTX)
20229 can_issue_more =
20230 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20232 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
20234 if (next_insn == NULL_RTX)
20235 break;
20237 if (group_end)
20239 /* If the scheduler had marked group termination at this location
20240 (between insn and next_insn), and neither insn nor next_insn will
20241 force group termination, pad the group with nops to force group
20242 termination. */
20243 if (can_issue_more
20244 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
20245 && !insn_terminates_group_p (insn, current_group)
20246 && !insn_terminates_group_p (next_insn, previous_group))
20248 if (!is_branch_slot_insn (next_insn))
20249 can_issue_more--;
20251 while (can_issue_more)
20253 nop = gen_nop ();
20254 emit_insn_before (nop, next_insn);
20255 can_issue_more--;
20259 can_issue_more = issue_rate;
20260 group_count++;
20263 insn = next_insn;
20264 next_insn = get_next_active_insn (insn, tail);
20267 return group_count;
20270 /* We're beginning a new block. Initialize data structures as necessary. */
20272 static void
20273 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
20274 int sched_verbose ATTRIBUTE_UNUSED,
20275 int max_ready ATTRIBUTE_UNUSED)
20277 last_scheduled_insn = NULL;
20278 load_store_pendulum = 0;
20279 divide_cnt = 0;
20280 vec_pairing = 0;
20283 /* The following function is called at the end of scheduling BB.
20284 After reload, it inserts nops at insn group bundling. */
20286 static void
20287 rs6000_sched_finish (FILE *dump, int sched_verbose)
20289 int n_groups;
20291 if (sched_verbose)
20292 fprintf (dump, "=== Finishing schedule.\n");
20294 if (reload_completed && rs6000_sched_groups)
20296 /* Do not run sched_finish hook when selective scheduling enabled. */
20297 if (sel_sched_p ())
20298 return;
20300 if (rs6000_sched_insert_nops == sched_finish_none)
20301 return;
20303 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20304 n_groups = pad_groups (dump, sched_verbose,
20305 current_sched_info->prev_head,
20306 current_sched_info->next_tail);
20307 else
20308 n_groups = redefine_groups (dump, sched_verbose,
20309 current_sched_info->prev_head,
20310 current_sched_info->next_tail);
20312 if (sched_verbose >= 6)
20314 fprintf (dump, "ngroups = %d\n", n_groups);
20315 print_rtl (dump, current_sched_info->prev_head);
20316 fprintf (dump, "Done finish_sched\n");
20321 struct rs6000_sched_context
20323 short cached_can_issue_more;
20324 rtx_insn *last_scheduled_insn;
20325 int load_store_pendulum;
20326 int divide_cnt;
20327 int vec_pairing;
20330 typedef struct rs6000_sched_context rs6000_sched_context_def;
20331 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20333 /* Allocate store for new scheduling context. */
20334 static void *
20335 rs6000_alloc_sched_context (void)
20337 return xmalloc (sizeof (rs6000_sched_context_def));
20340 /* If CLEAN_P is true then initializes _SC with clean data,
20341 and from the global context otherwise. */
20342 static void
20343 rs6000_init_sched_context (void *_sc, bool clean_p)
20345 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20347 if (clean_p)
20349 sc->cached_can_issue_more = 0;
20350 sc->last_scheduled_insn = NULL;
20351 sc->load_store_pendulum = 0;
20352 sc->divide_cnt = 0;
20353 sc->vec_pairing = 0;
20355 else
20357 sc->cached_can_issue_more = cached_can_issue_more;
20358 sc->last_scheduled_insn = last_scheduled_insn;
20359 sc->load_store_pendulum = load_store_pendulum;
20360 sc->divide_cnt = divide_cnt;
20361 sc->vec_pairing = vec_pairing;
20365 /* Sets the global scheduling context to the one pointed to by _SC. */
20366 static void
20367 rs6000_set_sched_context (void *_sc)
20369 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20371 gcc_assert (sc != NULL);
20373 cached_can_issue_more = sc->cached_can_issue_more;
20374 last_scheduled_insn = sc->last_scheduled_insn;
20375 load_store_pendulum = sc->load_store_pendulum;
20376 divide_cnt = sc->divide_cnt;
20377 vec_pairing = sc->vec_pairing;
20380 /* Free _SC. */
20381 static void
20382 rs6000_free_sched_context (void *_sc)
20384 gcc_assert (_sc != NULL);
20386 free (_sc);
20389 static bool
20390 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20392 switch (get_attr_type (insn))
20394 case TYPE_DIV:
20395 case TYPE_SDIV:
20396 case TYPE_DDIV:
20397 case TYPE_VECDIV:
20398 case TYPE_SSQRT:
20399 case TYPE_DSQRT:
20400 return false;
20402 default:
20403 return true;
20407 /* Length in units of the trampoline for entering a nested function. */
20410 rs6000_trampoline_size (void)
20412 int ret = 0;
20414 switch (DEFAULT_ABI)
20416 default:
20417 gcc_unreachable ();
20419 case ABI_AIX:
20420 ret = (TARGET_32BIT) ? 12 : 24;
20421 break;
20423 case ABI_ELFv2:
20424 gcc_assert (!TARGET_32BIT);
20425 ret = 32;
20426 break;
20428 case ABI_DARWIN:
20429 case ABI_V4:
20430 ret = (TARGET_32BIT) ? 40 : 48;
20431 break;
20434 return ret;
20437 /* Emit RTL insns to initialize the variable parts of a trampoline.
20438 FNADDR is an RTX for the address of the function's pure code.
20439 CXT is an RTX for the static chain value for the function. */
20441 static void
20442 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20444 int regsize = (TARGET_32BIT) ? 4 : 8;
20445 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20446 rtx ctx_reg = force_reg (Pmode, cxt);
20447 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20449 switch (DEFAULT_ABI)
20451 default:
20452 gcc_unreachable ();
20454 /* Under AIX, just build the 3 word function descriptor */
20455 case ABI_AIX:
20457 rtx fnmem, fn_reg, toc_reg;
20459 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20460 error ("you cannot take the address of a nested function if you use "
20461 "the %qs option", "-mno-pointers-to-nested-functions");
20463 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20464 fn_reg = gen_reg_rtx (Pmode);
20465 toc_reg = gen_reg_rtx (Pmode);
20467 /* Macro to shorten the code expansions below. */
20468 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20470 m_tramp = replace_equiv_address (m_tramp, addr);
20472 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20473 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20474 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20475 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20476 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20478 # undef MEM_PLUS
20480 break;
20482 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20483 case ABI_ELFv2:
20484 case ABI_DARWIN:
20485 case ABI_V4:
20486 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20487 LCT_NORMAL, VOIDmode,
20488 addr, Pmode,
20489 GEN_INT (rs6000_trampoline_size ()), SImode,
20490 fnaddr, Pmode,
20491 ctx_reg, Pmode);
20492 break;
20497 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20498 identifier as an argument, so the front end shouldn't look it up. */
20500 static bool
20501 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20503 return is_attribute_p ("altivec", attr_id);
20506 /* Handle the "altivec" attribute. The attribute may have
20507 arguments as follows:
20509 __attribute__((altivec(vector__)))
20510 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20511 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20513 and may appear more than once (e.g., 'vector bool char') in a
20514 given declaration. */
20516 static tree
20517 rs6000_handle_altivec_attribute (tree *node,
20518 tree name ATTRIBUTE_UNUSED,
20519 tree args,
20520 int flags ATTRIBUTE_UNUSED,
20521 bool *no_add_attrs)
20523 tree type = *node, result = NULL_TREE;
20524 machine_mode mode;
20525 int unsigned_p;
20526 char altivec_type
20527 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20528 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20529 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20530 : '?');
20532 while (POINTER_TYPE_P (type)
20533 || TREE_CODE (type) == FUNCTION_TYPE
20534 || TREE_CODE (type) == METHOD_TYPE
20535 || TREE_CODE (type) == ARRAY_TYPE)
20536 type = TREE_TYPE (type);
20538 mode = TYPE_MODE (type);
20540 /* Check for invalid AltiVec type qualifiers. */
20541 if (type == long_double_type_node)
20542 error ("use of %<long double%> in AltiVec types is invalid");
20543 else if (type == boolean_type_node)
20544 error ("use of boolean types in AltiVec types is invalid");
20545 else if (TREE_CODE (type) == COMPLEX_TYPE)
20546 error ("use of %<complex%> in AltiVec types is invalid");
20547 else if (DECIMAL_FLOAT_MODE_P (mode))
20548 error ("use of decimal floating-point types in AltiVec types is invalid");
20549 else if (!TARGET_VSX)
20551 if (type == long_unsigned_type_node || type == long_integer_type_node)
20553 if (TARGET_64BIT)
20554 error ("use of %<long%> in AltiVec types is invalid for "
20555 "64-bit code without %qs", "-mvsx");
20556 else if (rs6000_warn_altivec_long)
20557 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20558 "use %<int%>");
20560 else if (type == long_long_unsigned_type_node
20561 || type == long_long_integer_type_node)
20562 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20563 "-mvsx");
20564 else if (type == double_type_node)
20565 error ("use of %<double%> in AltiVec types is invalid without %qs",
20566 "-mvsx");
20569 switch (altivec_type)
20571 case 'v':
20572 unsigned_p = TYPE_UNSIGNED (type);
20573 switch (mode)
20575 case E_TImode:
20576 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20577 break;
20578 case E_DImode:
20579 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20580 break;
20581 case E_SImode:
20582 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20583 break;
20584 case E_HImode:
20585 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20586 break;
20587 case E_QImode:
20588 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20589 break;
20590 case E_SFmode: result = V4SF_type_node; break;
20591 case E_DFmode: result = V2DF_type_node; break;
20592 /* If the user says 'vector int bool', we may be handed the 'bool'
20593 attribute _before_ the 'vector' attribute, and so select the
20594 proper type in the 'b' case below. */
20595 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20596 case E_V2DImode: case E_V2DFmode:
20597 result = type;
20598 default: break;
20600 break;
20601 case 'b':
20602 switch (mode)
20604 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20605 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20606 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20607 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20608 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20609 default: break;
20611 break;
20612 case 'p':
20613 switch (mode)
20615 case E_V8HImode: result = pixel_V8HI_type_node;
20616 default: break;
20618 default: break;
20621 /* Propagate qualifiers attached to the element type
20622 onto the vector type. */
20623 if (result && result != type && TYPE_QUALS (type))
20624 result = build_qualified_type (result, TYPE_QUALS (type));
20626 *no_add_attrs = true; /* No need to hang on to the attribute. */
20628 if (result)
20629 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20631 return NULL_TREE;
20634 /* AltiVec defines five built-in scalar types that serve as vector
20635 elements; we must teach the compiler how to mangle them. The 128-bit
20636 floating point mangling is target-specific as well. MMA defines
20637 two built-in types to be used as opaque vector types. */
20639 static const char *
20640 rs6000_mangle_type (const_tree type)
20642 type = TYPE_MAIN_VARIANT (type);
20644 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20645 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20646 && TREE_CODE (type) != OPAQUE_TYPE)
20647 return NULL;
20649 if (type == bool_char_type_node) return "U6__boolc";
20650 if (type == bool_short_type_node) return "U6__bools";
20651 if (type == pixel_type_node) return "u7__pixel";
20652 if (type == bool_int_type_node) return "U6__booli";
20653 if (type == bool_long_long_type_node) return "U6__boolx";
20655 if (type == float128_type_node || type == float64x_type_node)
20656 return NULL;
20658 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20659 return "g";
20660 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20661 return "u9__ieee128";
20663 if (type == vector_pair_type_node)
20664 return "u13__vector_pair";
20665 if (type == vector_quad_type_node)
20666 return "u13__vector_quad";
20668 /* For all other types, use the default mangling. */
20669 return NULL;
20672 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20673 struct attribute_spec.handler. */
20675 static tree
20676 rs6000_handle_longcall_attribute (tree *node, tree name,
20677 tree args ATTRIBUTE_UNUSED,
20678 int flags ATTRIBUTE_UNUSED,
20679 bool *no_add_attrs)
20681 if (TREE_CODE (*node) != FUNCTION_TYPE
20682 && TREE_CODE (*node) != FIELD_DECL
20683 && TREE_CODE (*node) != TYPE_DECL)
20685 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20686 name);
20687 *no_add_attrs = true;
20690 return NULL_TREE;
20693 /* Set longcall attributes on all functions declared when
20694 rs6000_default_long_calls is true. */
20695 static void
20696 rs6000_set_default_type_attributes (tree type)
20698 if (rs6000_default_long_calls
20699 && FUNC_OR_METHOD_TYPE_P (type))
20700 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20701 NULL_TREE,
20702 TYPE_ATTRIBUTES (type));
20704 #if TARGET_MACHO
20705 darwin_set_default_type_attributes (type);
20706 #endif
20709 /* Return a reference suitable for calling a function with the
20710 longcall attribute. */
20712 static rtx
20713 rs6000_longcall_ref (rtx call_ref, rtx arg)
20715 /* System V adds '.' to the internal name, so skip them. */
20716 const char *call_name = XSTR (call_ref, 0);
20717 if (*call_name == '.')
20719 while (*call_name == '.')
20720 call_name++;
20722 tree node = get_identifier (call_name);
20723 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20726 if (TARGET_PLTSEQ)
20728 rtx base = const0_rtx;
20729 int regno = 12;
20730 if (rs6000_pcrel_p ())
20732 rtx reg = gen_rtx_REG (Pmode, regno);
20733 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20734 gen_rtvec (3, base, call_ref, arg),
20735 UNSPECV_PLT_PCREL);
20736 emit_insn (gen_rtx_SET (reg, u));
20737 return reg;
20740 if (DEFAULT_ABI == ABI_ELFv2)
20741 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20742 else
20744 if (flag_pic)
20745 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20746 regno = 11;
20748 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20749 may be used by a function global entry point. For SysV4, r11
20750 is used by __glink_PLTresolve lazy resolver entry. */
20751 rtx reg = gen_rtx_REG (Pmode, regno);
20752 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20753 UNSPEC_PLT16_HA);
20754 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20755 gen_rtvec (3, reg, call_ref, arg),
20756 UNSPECV_PLT16_LO);
20757 emit_insn (gen_rtx_SET (reg, hi));
20758 emit_insn (gen_rtx_SET (reg, lo));
20759 return reg;
20762 return force_reg (Pmode, call_ref);
20765 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20766 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20767 #endif
20769 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20770 struct attribute_spec.handler. */
20771 static tree
20772 rs6000_handle_struct_attribute (tree *node, tree name,
20773 tree args ATTRIBUTE_UNUSED,
20774 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20776 tree *type = NULL;
20777 if (DECL_P (*node))
20779 if (TREE_CODE (*node) == TYPE_DECL)
20780 type = &TREE_TYPE (*node);
20782 else
20783 type = node;
20785 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20786 || TREE_CODE (*type) == UNION_TYPE)))
20788 warning (OPT_Wattributes, "%qE attribute ignored", name);
20789 *no_add_attrs = true;
20792 else if ((is_attribute_p ("ms_struct", name)
20793 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20794 || ((is_attribute_p ("gcc_struct", name)
20795 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20797 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20798 name);
20799 *no_add_attrs = true;
20802 return NULL_TREE;
20805 static bool
20806 rs6000_ms_bitfield_layout_p (const_tree record_type)
20808 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20809 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20810 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20813 #ifdef USING_ELFOS_H
20815 /* A get_unnamed_section callback, used for switching to toc_section. */
20817 static void
20818 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20820 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20821 && TARGET_MINIMAL_TOC)
20823 if (!toc_initialized)
20825 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20826 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20827 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20828 fprintf (asm_out_file, "\t.tc ");
20829 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20830 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20831 fprintf (asm_out_file, "\n");
20833 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20834 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20835 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20836 fprintf (asm_out_file, " = .+32768\n");
20837 toc_initialized = 1;
20839 else
20840 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20842 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20844 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20845 if (!toc_initialized)
20847 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20848 toc_initialized = 1;
20851 else
20853 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20854 if (!toc_initialized)
20856 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20857 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20858 fprintf (asm_out_file, " = .+32768\n");
20859 toc_initialized = 1;
20864 /* Implement TARGET_ASM_INIT_SECTIONS. */
20866 static void
20867 rs6000_elf_asm_init_sections (void)
20869 toc_section
20870 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20872 sdata2_section
20873 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20874 SDATA2_SECTION_ASM_OP);
20877 /* Implement TARGET_SELECT_RTX_SECTION. */
20879 static section *
20880 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20881 unsigned HOST_WIDE_INT align)
20883 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20884 return toc_section;
20885 else
20886 return default_elf_select_rtx_section (mode, x, align);
20889 /* For a SYMBOL_REF, set generic flags and then perform some
20890 target-specific processing.
20892 When the AIX ABI is requested on a non-AIX system, replace the
20893 function name with the real name (with a leading .) rather than the
20894 function descriptor name. This saves a lot of overriding code to
20895 read the prefixes. */
20897 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20898 static void
20899 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20901 default_encode_section_info (decl, rtl, first);
20903 if (first
20904 && TREE_CODE (decl) == FUNCTION_DECL
20905 && !TARGET_AIX
20906 && DEFAULT_ABI == ABI_AIX)
20908 rtx sym_ref = XEXP (rtl, 0);
20909 size_t len = strlen (XSTR (sym_ref, 0));
20910 char *str = XALLOCAVEC (char, len + 2);
20911 str[0] = '.';
20912 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20913 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20917 static inline bool
20918 compare_section_name (const char *section, const char *templ)
20920 int len;
20922 len = strlen (templ);
20923 return (strncmp (section, templ, len) == 0
20924 && (section[len] == 0 || section[len] == '.'));
20927 bool
20928 rs6000_elf_in_small_data_p (const_tree decl)
20930 if (rs6000_sdata == SDATA_NONE)
20931 return false;
20933 /* We want to merge strings, so we never consider them small data. */
20934 if (TREE_CODE (decl) == STRING_CST)
20935 return false;
20937 /* Functions are never in the small data area. */
20938 if (TREE_CODE (decl) == FUNCTION_DECL)
20939 return false;
20941 if (VAR_P (decl) && DECL_SECTION_NAME (decl))
20943 const char *section = DECL_SECTION_NAME (decl);
20944 if (compare_section_name (section, ".sdata")
20945 || compare_section_name (section, ".sdata2")
20946 || compare_section_name (section, ".gnu.linkonce.s")
20947 || compare_section_name (section, ".sbss")
20948 || compare_section_name (section, ".sbss2")
20949 || compare_section_name (section, ".gnu.linkonce.sb")
20950 || strcmp (section, ".PPC.EMB.sdata0") == 0
20951 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20952 return true;
20954 else
20956 /* If we are told not to put readonly data in sdata, then don't. */
20957 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20958 && !rs6000_readonly_in_sdata)
20959 return false;
20961 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20963 if (size > 0
20964 && size <= g_switch_value
20965 /* If it's not public, and we're not going to reference it there,
20966 there's no need to put it in the small data section. */
20967 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20968 return true;
20971 return false;
20974 #endif /* USING_ELFOS_H */
20976 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20978 static bool
20979 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20981 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20984 /* Do not place thread-local symbols refs in the object blocks. */
20986 static bool
20987 rs6000_use_blocks_for_decl_p (const_tree decl)
20989 return !DECL_THREAD_LOCAL_P (decl);
20992 /* Return a REG that occurs in ADDR with coefficient 1.
20993 ADDR can be effectively incremented by incrementing REG.
20995 r0 is special and we must not select it as an address
20996 register by this routine since our caller will try to
20997 increment the returned register via an "la" instruction. */
21000 find_addr_reg (rtx addr)
21002 while (GET_CODE (addr) == PLUS)
21004 if (REG_P (XEXP (addr, 0))
21005 && REGNO (XEXP (addr, 0)) != 0)
21006 addr = XEXP (addr, 0);
21007 else if (REG_P (XEXP (addr, 1))
21008 && REGNO (XEXP (addr, 1)) != 0)
21009 addr = XEXP (addr, 1);
21010 else if (CONSTANT_P (XEXP (addr, 0)))
21011 addr = XEXP (addr, 1);
21012 else if (CONSTANT_P (XEXP (addr, 1)))
21013 addr = XEXP (addr, 0);
21014 else
21015 gcc_unreachable ();
21017 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
21018 return addr;
21021 void
21022 rs6000_fatal_bad_address (rtx op)
21024 fatal_insn ("bad address", op);
21027 #if TARGET_MACHO
21029 vec<branch_island, va_gc> *branch_islands;
21031 /* Remember to generate a branch island for far calls to the given
21032 function. */
21034 static void
21035 add_compiler_branch_island (tree label_name, tree function_name,
21036 int line_number)
21038 branch_island bi = {function_name, label_name, line_number};
21039 vec_safe_push (branch_islands, bi);
21042 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21043 already there or not. */
21045 static int
21046 no_previous_def (tree function_name)
21048 branch_island *bi;
21049 unsigned ix;
21051 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21052 if (function_name == bi->function_name)
21053 return 0;
21054 return 1;
21057 /* GET_PREV_LABEL gets the label name from the previous definition of
21058 the function. */
21060 static tree
21061 get_prev_label (tree function_name)
21063 branch_island *bi;
21064 unsigned ix;
21066 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21067 if (function_name == bi->function_name)
21068 return bi->label_name;
21069 return NULL_TREE;
21072 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21074 void
21075 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21077 unsigned int length;
21078 char *symbol_name, *lazy_ptr_name;
21079 char *local_label_0;
21080 static unsigned label = 0;
21082 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21083 symb = (*targetm.strip_name_encoding) (symb);
21085 length = strlen (symb);
21086 symbol_name = XALLOCAVEC (char, length + 32);
21087 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21089 lazy_ptr_name = XALLOCAVEC (char, length + 32);
21090 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
21092 if (MACHOPIC_PURE)
21094 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
21095 fprintf (file, "\t.align 5\n");
21097 fprintf (file, "%s:\n", stub);
21098 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21100 label++;
21101 local_label_0 = XALLOCAVEC (char, 16);
21102 sprintf (local_label_0, "L%u$spb", label);
21104 fprintf (file, "\tmflr r0\n");
21105 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
21106 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
21107 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
21108 lazy_ptr_name, local_label_0);
21109 fprintf (file, "\tmtlr r0\n");
21110 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
21111 (TARGET_64BIT ? "ldu" : "lwzu"),
21112 lazy_ptr_name, local_label_0);
21113 fprintf (file, "\tmtctr r12\n");
21114 fprintf (file, "\tbctr\n");
21116 else /* mdynamic-no-pic or mkernel. */
21118 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
21119 fprintf (file, "\t.align 4\n");
21121 fprintf (file, "%s:\n", stub);
21122 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21124 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
21125 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
21126 (TARGET_64BIT ? "ldu" : "lwzu"),
21127 lazy_ptr_name);
21128 fprintf (file, "\tmtctr r12\n");
21129 fprintf (file, "\tbctr\n");
21132 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21133 fprintf (file, "%s:\n", lazy_ptr_name);
21134 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21135 fprintf (file, "%sdyld_stub_binding_helper\n",
21136 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
21139 /* Legitimize PIC addresses. If the address is already
21140 position-independent, we return ORIG. Newly generated
21141 position-independent addresses go into a reg. This is REG if non
21142 zero, otherwise we allocate register(s) as necessary. */
21144 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21147 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
21148 rtx reg)
21150 rtx base, offset;
21152 if (reg == NULL && !reload_completed)
21153 reg = gen_reg_rtx (Pmode);
21155 if (GET_CODE (orig) == CONST)
21157 rtx reg_temp;
21159 if (GET_CODE (XEXP (orig, 0)) == PLUS
21160 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
21161 return orig;
21163 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
21165 /* Use a different reg for the intermediate value, as
21166 it will be marked UNCHANGING. */
21167 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
21168 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
21169 Pmode, reg_temp);
21170 offset =
21171 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
21172 Pmode, reg);
21174 if (CONST_INT_P (offset))
21176 if (SMALL_INT (offset))
21177 return plus_constant (Pmode, base, INTVAL (offset));
21178 else if (!reload_completed)
21179 offset = force_reg (Pmode, offset);
21180 else
21182 rtx mem = force_const_mem (Pmode, orig);
21183 return machopic_legitimize_pic_address (mem, Pmode, reg);
21186 return gen_rtx_PLUS (Pmode, base, offset);
21189 /* Fall back on generic machopic code. */
21190 return machopic_legitimize_pic_address (orig, mode, reg);
21193 /* Output a .machine directive for the Darwin assembler, and call
21194 the generic start_file routine. */
21196 static void
21197 rs6000_darwin_file_start (void)
21199 static const struct
21201 const char *arg;
21202 const char *name;
21203 HOST_WIDE_INT if_set;
21204 } mapping[] = {
21205 { "ppc64", "ppc64", MASK_64BIT },
21206 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
21207 | MASK_POWERPC64 },
21208 { "power4", "ppc970", 0 },
21209 { "G5", "ppc970", 0 },
21210 { "7450", "ppc7450", 0 },
21211 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
21212 { "G4", "ppc7400", 0 },
21213 { "750", "ppc750", 0 },
21214 { "740", "ppc750", 0 },
21215 { "G3", "ppc750", 0 },
21216 { "604e", "ppc604e", 0 },
21217 { "604", "ppc604", 0 },
21218 { "603e", "ppc603", 0 },
21219 { "603", "ppc603", 0 },
21220 { "601", "ppc601", 0 },
21221 { NULL, "ppc", 0 } };
21222 const char *cpu_id = "";
21223 size_t i;
21225 rs6000_file_start ();
21226 darwin_file_start ();
21228 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21230 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
21231 cpu_id = rs6000_default_cpu;
21233 if (OPTION_SET_P (rs6000_cpu_index))
21234 cpu_id = processor_target_table[rs6000_cpu_index].name;
21236 /* Look through the mapping array. Pick the first name that either
21237 matches the argument, has a bit set in IF_SET that is also set
21238 in the target flags, or has a NULL name. */
21240 i = 0;
21241 while (mapping[i].arg != NULL
21242 && strcmp (mapping[i].arg, cpu_id) != 0
21243 && (mapping[i].if_set & rs6000_isa_flags) == 0)
21244 i++;
21246 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
21249 #endif /* TARGET_MACHO */
21251 #if TARGET_ELF
21252 static int
21253 rs6000_elf_reloc_rw_mask (void)
21255 if (flag_pic)
21256 return 3;
21257 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21258 return 2;
21259 else
21260 return 0;
21263 /* Record an element in the table of global constructors. SYMBOL is
21264 a SYMBOL_REF of the function to be called; PRIORITY is a number
21265 between 0 and MAX_INIT_PRIORITY.
21267 This differs from default_named_section_asm_out_constructor in
21268 that we have special handling for -mrelocatable. */
21270 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
21271 static void
21272 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
21274 const char *section = ".ctors";
21275 char buf[18];
21277 if (priority != DEFAULT_INIT_PRIORITY)
21279 sprintf (buf, ".ctors.%.5u",
21280 /* Invert the numbering so the linker puts us in the proper
21281 order; constructors are run from right to left, and the
21282 linker sorts in increasing order. */
21283 MAX_INIT_PRIORITY - priority);
21284 section = buf;
21287 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21288 assemble_align (POINTER_SIZE);
21290 if (DEFAULT_ABI == ABI_V4
21291 && (TARGET_RELOCATABLE || flag_pic > 1))
21293 fputs ("\t.long (", asm_out_file);
21294 output_addr_const (asm_out_file, symbol);
21295 fputs (")@fixup\n", asm_out_file);
21297 else
21298 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21301 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21302 static void
21303 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21305 const char *section = ".dtors";
21306 char buf[18];
21308 if (priority != DEFAULT_INIT_PRIORITY)
21310 sprintf (buf, ".dtors.%.5u",
21311 /* Invert the numbering so the linker puts us in the proper
21312 order; constructors are run from right to left, and the
21313 linker sorts in increasing order. */
21314 MAX_INIT_PRIORITY - priority);
21315 section = buf;
21318 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21319 assemble_align (POINTER_SIZE);
21321 if (DEFAULT_ABI == ABI_V4
21322 && (TARGET_RELOCATABLE || flag_pic > 1))
21324 fputs ("\t.long (", asm_out_file);
21325 output_addr_const (asm_out_file, symbol);
21326 fputs (")@fixup\n", asm_out_file);
21328 else
21329 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21332 void
21333 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21335 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21337 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21338 ASM_OUTPUT_LABEL (file, name);
21339 fputs (DOUBLE_INT_ASM_OP, file);
21340 rs6000_output_function_entry (file, name);
21341 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21342 if (DOT_SYMBOLS)
21344 fputs ("\t.size\t", file);
21345 assemble_name (file, name);
21346 fputs (",24\n\t.type\t.", file);
21347 assemble_name (file, name);
21348 fputs (",@function\n", file);
21349 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21351 fputs ("\t.globl\t.", file);
21352 assemble_name (file, name);
21353 putc ('\n', file);
21356 else
21357 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21358 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21359 rs6000_output_function_entry (file, name);
21360 fputs (":\n", file);
21361 assemble_function_label_final ();
21362 return;
21365 int uses_toc;
21366 if (DEFAULT_ABI == ABI_V4
21367 && (TARGET_RELOCATABLE || flag_pic > 1)
21368 && !TARGET_SECURE_PLT
21369 && (!constant_pool_empty_p () || crtl->profile)
21370 && (uses_toc = uses_TOC ()))
21372 char buf[256];
21374 if (uses_toc == 2)
21375 switch_to_other_text_partition ();
21376 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21378 fprintf (file, "\t.long ");
21379 assemble_name (file, toc_label_name);
21380 need_toc_init = 1;
21381 putc ('-', file);
21382 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21383 assemble_name (file, buf);
21384 putc ('\n', file);
21385 if (uses_toc == 2)
21386 switch_to_other_text_partition ();
21389 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21390 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21392 if (TARGET_CMODEL == CMODEL_LARGE
21393 && rs6000_global_entry_point_prologue_needed_p ())
21395 char buf[256];
21397 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21399 fprintf (file, "\t.quad .TOC.-");
21400 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21401 assemble_name (file, buf);
21402 putc ('\n', file);
21405 if (DEFAULT_ABI == ABI_AIX)
21407 const char *desc_name, *orig_name;
21409 orig_name = (*targetm.strip_name_encoding) (name);
21410 desc_name = orig_name;
21411 while (*desc_name == '.')
21412 desc_name++;
21414 if (TREE_PUBLIC (decl))
21415 fprintf (file, "\t.globl %s\n", desc_name);
21417 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21418 fprintf (file, "%s:\n", desc_name);
21419 fprintf (file, "\t.long %s\n", orig_name);
21420 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21421 fputs ("\t.long 0\n", file);
21422 fprintf (file, "\t.previous\n");
21424 ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
21427 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21428 static void
21429 rs6000_elf_file_end (void)
21431 #ifdef HAVE_AS_GNU_ATTRIBUTE
21432 /* ??? The value emitted depends on options active at file end.
21433 Assume anyone using #pragma or attributes that might change
21434 options knows what they are doing. */
21435 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21436 && rs6000_passes_float)
21438 int fp;
21440 if (TARGET_HARD_FLOAT)
21441 fp = 1;
21442 else
21443 fp = 2;
21444 if (rs6000_passes_long_double)
21446 if (!TARGET_LONG_DOUBLE_128)
21447 fp |= 2 * 4;
21448 else if (TARGET_IEEEQUAD)
21449 fp |= 3 * 4;
21450 else
21451 fp |= 1 * 4;
21453 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21455 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21457 if (rs6000_passes_vector)
21458 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21459 (TARGET_ALTIVEC_ABI ? 2 : 1));
21460 if (rs6000_returns_struct)
21461 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21462 aix_struct_return ? 2 : 1);
21464 #endif
21465 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21466 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21467 file_end_indicate_exec_stack ();
21468 #endif
21470 if (flag_split_stack)
21471 file_end_indicate_split_stack ();
21473 if (cpu_builtin_p)
21475 /* We have expanded a CPU builtin, so we need to emit a reference to
21476 the special symbol that LIBC uses to declare it supports the
21477 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21478 switch_to_section (data_section);
21479 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21480 fprintf (asm_out_file, "\t%s %s\n",
21481 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21484 #endif
21486 #if TARGET_XCOFF
21488 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21489 #define HAVE_XCOFF_DWARF_EXTRAS 0
21490 #endif
21493 /* Names of bss and data sections. These should be unique names for each
21494 compilation unit. */
21496 char *xcoff_bss_section_name;
21497 char *xcoff_private_data_section_name;
21498 char *xcoff_private_rodata_section_name;
21499 char *xcoff_tls_data_section_name;
21500 char *xcoff_read_only_section_name;
21502 static enum unwind_info_type
21503 rs6000_xcoff_debug_unwind_info (void)
21505 return UI_NONE;
21508 static void
21509 rs6000_xcoff_asm_output_anchor (rtx symbol)
21511 char buffer[100];
21513 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21514 SYMBOL_REF_BLOCK_OFFSET (symbol));
21515 fprintf (asm_out_file, "%s", SET_ASM_OP);
21516 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21517 fprintf (asm_out_file, ",");
21518 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21519 fprintf (asm_out_file, "\n");
21522 static void
21523 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21525 fputs (GLOBAL_ASM_OP, stream);
21526 RS6000_OUTPUT_BASENAME (stream, name);
21527 putc ('\n', stream);
21530 /* A get_unnamed_decl callback, used for read-only sections. PTR
21531 points to the section string variable. */
21533 static void
21534 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21536 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21537 directive
21538 ? xcoff_private_rodata_section_name
21539 : xcoff_read_only_section_name,
21540 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21543 /* Likewise for read-write sections. */
21545 static void
21546 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21548 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21549 xcoff_private_data_section_name,
21550 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21553 static void
21554 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21556 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21557 directive
21558 ? xcoff_private_data_section_name
21559 : xcoff_tls_data_section_name,
21560 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21563 /* A get_unnamed_section callback, used for switching to toc_section. */
21565 static void
21566 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21568 if (TARGET_MINIMAL_TOC)
21570 /* toc_section is always selected at least once from
21571 rs6000_xcoff_file_start, so this is guaranteed to
21572 always be defined once and only once in each file. */
21573 if (!toc_initialized)
21575 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21576 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21577 toc_initialized = 1;
21579 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21580 (TARGET_32BIT ? "" : ",3"));
21582 else
21583 fputs ("\t.toc\n", asm_out_file);
21586 /* Implement TARGET_ASM_INIT_SECTIONS. */
21588 static void
21589 rs6000_xcoff_asm_init_sections (void)
21591 read_only_data_section
21592 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21593 NULL);
21595 private_data_section
21596 = get_unnamed_section (SECTION_WRITE,
21597 rs6000_xcoff_output_readwrite_section_asm_op,
21598 NULL);
21600 read_only_private_data_section
21601 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21602 "");
21604 tls_data_section
21605 = get_unnamed_section (SECTION_TLS,
21606 rs6000_xcoff_output_tls_section_asm_op,
21607 NULL);
21609 tls_private_data_section
21610 = get_unnamed_section (SECTION_TLS,
21611 rs6000_xcoff_output_tls_section_asm_op,
21612 "");
21614 toc_section
21615 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21617 readonly_data_section = read_only_data_section;
21620 static int
21621 rs6000_xcoff_reloc_rw_mask (void)
21623 return 3;
21626 static void
21627 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21628 tree decl ATTRIBUTE_UNUSED)
21630 int smclass;
21631 static const char * const suffix[7]
21632 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21634 if (flags & SECTION_EXCLUDE)
21635 smclass = 6;
21636 else if (flags & SECTION_DEBUG)
21638 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21639 return;
21641 else if (flags & SECTION_CODE)
21642 smclass = 0;
21643 else if (flags & SECTION_TLS)
21645 if (flags & SECTION_BSS)
21646 smclass = 5;
21647 else
21648 smclass = 4;
21650 else if (flags & SECTION_WRITE)
21652 if (flags & SECTION_BSS)
21653 smclass = 3;
21654 else
21655 smclass = 2;
21657 else
21658 smclass = 1;
21660 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21661 (flags & SECTION_CODE) ? "." : "",
21662 name, suffix[smclass], flags & SECTION_ENTSIZE);
21665 #define IN_NAMED_SECTION(DECL) \
21666 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21667 && DECL_SECTION_NAME (DECL) != NULL)
21669 static section *
21670 rs6000_xcoff_select_section (tree decl, int reloc,
21671 unsigned HOST_WIDE_INT align)
21673 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21674 named section. */
21675 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21677 resolve_unique_section (decl, reloc, true);
21678 if (IN_NAMED_SECTION (decl))
21679 return get_named_section (decl, NULL, reloc);
21682 if (decl_readonly_section (decl, reloc))
21684 if (TREE_PUBLIC (decl))
21685 return read_only_data_section;
21686 else
21687 return read_only_private_data_section;
21689 else
21691 #if HAVE_AS_TLS
21692 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21694 if (bss_initializer_p (decl))
21695 return tls_comm_section;
21696 else if (TREE_PUBLIC (decl))
21697 return tls_data_section;
21698 else
21699 return tls_private_data_section;
21701 else
21702 #endif
21703 if (TREE_PUBLIC (decl))
21704 return data_section;
21705 else
21706 return private_data_section;
21710 static void
21711 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21713 const char *name;
21715 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21716 name = (*targetm.strip_name_encoding) (name);
21717 set_decl_section_name (decl, name);
21720 /* Select section for constant in constant pool.
21722 On RS/6000, all constants are in the private read-only data area.
21723 However, if this is being placed in the TOC it must be output as a
21724 toc entry. */
21726 static section *
21727 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21728 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21730 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21731 return toc_section;
21732 else
21733 return read_only_private_data_section;
21736 /* Remove any trailing [DS] or the like from the symbol name. */
21738 static const char *
21739 rs6000_xcoff_strip_name_encoding (const char *name)
21741 size_t len;
21742 if (*name == '*')
21743 name++;
21744 len = strlen (name);
21745 if (name[len - 1] == ']')
21746 return ggc_alloc_string (name, len - 4);
21747 else
21748 return name;
21751 /* Section attributes. AIX is always PIC. */
21753 static unsigned int
21754 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21756 unsigned int align;
21757 unsigned int flags = default_section_type_flags (decl, name, reloc);
21759 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21760 flags |= SECTION_BSS;
21762 /* Align to at least UNIT size. */
21763 if (!decl || !DECL_P (decl))
21764 align = MIN_UNITS_PER_WORD;
21765 /* Align code CSECT to at least 32 bytes. */
21766 else if ((flags & SECTION_CODE) != 0)
21767 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21768 else
21769 /* Increase alignment of large objects if not already stricter. */
21770 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21771 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21772 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21774 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21777 /* Output at beginning of assembler file.
21779 Initialize the section names for the RS/6000 at this point.
21781 Specify filename, including full path, to assembler.
21783 We want to go into the TOC section so at least one .toc will be emitted.
21784 Also, in order to output proper .bs/.es pairs, we need at least one static
21785 [RW] section emitted.
21787 Finally, declare mcount when profiling to make the assembler happy. */
21789 static void
21790 rs6000_xcoff_file_start (void)
21792 rs6000_gen_section_name (&xcoff_bss_section_name,
21793 main_input_filename, ".bss_");
21794 rs6000_gen_section_name (&xcoff_private_data_section_name,
21795 main_input_filename, ".rw_");
21796 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21797 main_input_filename, ".rop_");
21798 rs6000_gen_section_name (&xcoff_read_only_section_name,
21799 main_input_filename, ".ro_");
21800 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21801 main_input_filename, ".tls_");
21803 fputs ("\t.file\t", asm_out_file);
21804 output_quoted_string (asm_out_file, main_input_filename);
21805 fputc ('\n', asm_out_file);
21806 if (write_symbols != NO_DEBUG)
21807 switch_to_section (private_data_section);
21808 switch_to_section (toc_section);
21809 switch_to_section (text_section);
21810 if (profile_flag)
21811 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21812 rs6000_file_start ();
21815 /* Output at end of assembler file.
21816 On the RS/6000, referencing data should automatically pull in text. */
21818 static void
21819 rs6000_xcoff_file_end (void)
21821 switch_to_section (text_section);
21822 if (xcoff_tls_exec_model_detected)
21824 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21825 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21827 fputs ("_section_.text:\n", asm_out_file);
21828 switch_to_section (data_section);
21829 fputs (TARGET_32BIT
21830 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21831 asm_out_file);
21835 struct declare_alias_data
21837 FILE *file;
21838 bool function_descriptor;
21841 /* Declare alias N. A helper function for for_node_and_aliases. */
21843 static bool
21844 rs6000_declare_alias (struct symtab_node *n, void *d)
21846 struct declare_alias_data *data = (struct declare_alias_data *)d;
21847 /* Main symbol is output specially, because varasm machinery does part of
21848 the job for us - we do not need to declare .globl/lglobs and such. */
21849 if (!n->alias || n->weakref)
21850 return false;
21852 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21853 return false;
21855 /* Prevent assemble_alias from trying to use .set pseudo operation
21856 that does not behave as expected by the middle-end. */
21857 TREE_ASM_WRITTEN (n->decl) = true;
21859 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21860 char *buffer = (char *) alloca (strlen (name) + 2);
21861 char *p;
21862 int dollar_inside = 0;
21864 strcpy (buffer, name);
21865 p = strchr (buffer, '$');
21866 while (p) {
21867 *p = '_';
21868 dollar_inside++;
21869 p = strchr (p + 1, '$');
21871 if (TREE_PUBLIC (n->decl))
21873 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21875 if (dollar_inside) {
21876 if (data->function_descriptor)
21877 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21878 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21880 if (data->function_descriptor)
21882 fputs ("\t.globl .", data->file);
21883 RS6000_OUTPUT_BASENAME (data->file, buffer);
21884 putc ('\n', data->file);
21886 fputs ("\t.globl ", data->file);
21887 assemble_name (data->file, buffer);
21888 putc ('\n', data->file);
21890 #ifdef ASM_WEAKEN_DECL
21891 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21892 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21893 #endif
21895 else
21897 if (dollar_inside)
21899 if (data->function_descriptor)
21900 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21901 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21903 if (data->function_descriptor)
21905 fputs ("\t.lglobl .", data->file);
21906 RS6000_OUTPUT_BASENAME (data->file, buffer);
21907 putc ('\n', data->file);
21909 fputs ("\t.lglobl ", data->file);
21910 assemble_name (data->file, buffer);
21911 putc ('\n', data->file);
21913 if (data->function_descriptor)
21914 putc ('.', data->file);
21915 ASM_OUTPUT_LABEL (data->file, buffer);
21916 return false;
21920 #ifdef HAVE_GAS_HIDDEN
21921 /* Helper function to calculate visibility of a DECL
21922 and return the value as a const string. */
21924 static const char *
21925 rs6000_xcoff_visibility (tree decl)
21927 static const char * const visibility_types[] = {
21928 "", ",protected", ",hidden", ",internal"
21931 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21932 return visibility_types[vis];
21934 #endif
21937 /* This macro produces the initial definition of a function name.
21938 On the RS/6000, we need to place an extra '.' in the function name and
21939 output the function descriptor.
21940 Dollar signs are converted to underscores.
21942 The csect for the function will have already been created when
21943 text_section was selected. We do have to go back to that csect, however.
21945 The third and fourth parameters to the .function pseudo-op (16 and 044)
21946 are placeholders which no longer have any use.
21948 Because AIX assembler's .set command has unexpected semantics, we output
21949 all aliases as alternative labels in front of the definition. */
21951 void
21952 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21954 char *buffer = (char *) alloca (strlen (name) + 1);
21955 char *p;
21956 int dollar_inside = 0;
21957 struct declare_alias_data data = {file, false};
21959 strcpy (buffer, name);
21960 p = strchr (buffer, '$');
21961 while (p) {
21962 *p = '_';
21963 dollar_inside++;
21964 p = strchr (p + 1, '$');
21966 if (TREE_PUBLIC (decl))
21968 if (!RS6000_WEAK || !DECL_WEAK (decl))
21970 if (dollar_inside) {
21971 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21972 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21974 fputs ("\t.globl .", file);
21975 RS6000_OUTPUT_BASENAME (file, buffer);
21976 #ifdef HAVE_GAS_HIDDEN
21977 fputs (rs6000_xcoff_visibility (decl), file);
21978 #endif
21979 putc ('\n', file);
21982 else
21984 if (dollar_inside) {
21985 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21986 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21988 fputs ("\t.lglobl .", file);
21989 RS6000_OUTPUT_BASENAME (file, buffer);
21990 putc ('\n', file);
21993 fputs ("\t.csect ", file);
21994 assemble_name (file, buffer);
21995 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21997 ASM_OUTPUT_FUNCTION_LABEL (file, buffer, decl);
21999 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22000 &data, true);
22001 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
22002 RS6000_OUTPUT_BASENAME (file, buffer);
22003 fputs (", TOC[tc0], 0\n", file);
22005 in_section = NULL;
22006 switch_to_section (function_section (decl));
22007 putc ('.', file);
22008 ASM_OUTPUT_LABEL (file, buffer);
22010 data.function_descriptor = true;
22011 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22012 &data, true);
22013 if (!DECL_IGNORED_P (decl))
22015 if (dwarf_debuginfo_p ())
22017 name = (*targetm.strip_name_encoding) (name);
22018 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
22021 return;
22025 /* Output assembly language to globalize a symbol from a DECL,
22026 possibly with visibility. */
22028 void
22029 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
22031 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
22032 fputs (GLOBAL_ASM_OP, stream);
22033 assemble_name (stream, name);
22034 #ifdef HAVE_GAS_HIDDEN
22035 fputs (rs6000_xcoff_visibility (decl), stream);
22036 #endif
22037 putc ('\n', stream);
22040 /* Output assembly language to define a symbol as COMMON from a DECL,
22041 possibly with visibility. */
22043 void
22044 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
22045 tree decl ATTRIBUTE_UNUSED,
22046 const char *name,
22047 unsigned HOST_WIDE_INT size,
22048 unsigned int align)
22050 unsigned int align2 = 2;
22052 if (align == 0)
22053 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
22055 if (align > 32)
22056 align2 = floor_log2 (align / BITS_PER_UNIT);
22057 else if (size > 4)
22058 align2 = 3;
22060 if (! DECL_COMMON (decl))
22062 /* Forget section. */
22063 in_section = NULL;
22065 /* Globalize TLS BSS. */
22066 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
22068 fputs (GLOBAL_ASM_OP, stream);
22069 assemble_name (stream, name);
22070 fputc ('\n', stream);
22073 /* Switch to section and skip space. */
22074 fputs ("\t.csect ", stream);
22075 assemble_name (stream, name);
22076 fprintf (stream, ",%u\n", align2);
22077 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
22078 ASM_OUTPUT_SKIP (stream, size ? size : 1);
22079 return;
22082 if (TREE_PUBLIC (decl))
22084 fprintf (stream,
22085 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
22086 name, size, align2);
22088 #ifdef HAVE_GAS_HIDDEN
22089 if (decl != NULL)
22090 fputs (rs6000_xcoff_visibility (decl), stream);
22091 #endif
22092 putc ('\n', stream);
22094 else
22095 fprintf (stream,
22096 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
22097 (*targetm.strip_name_encoding) (name), size, name, align2);
22100 /* This macro produces the initial definition of a object (variable) name.
22101 Because AIX assembler's .set command has unexpected semantics, we output
22102 all aliases as alternative labels in front of the definition. */
22104 void
22105 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
22107 struct declare_alias_data data = {file, false};
22108 ASM_OUTPUT_LABEL (file, name);
22109 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22110 &data, true);
22113 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22115 void
22116 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
22118 fputs (integer_asm_op (size, FALSE), file);
22119 assemble_name (file, label);
22120 fputs ("-$", file);
22123 /* Output a symbol offset relative to the dbase for the current object.
22124 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22125 signed offsets.
22127 __gcc_unwind_dbase is embedded in all executables/libraries through
22128 libgcc/config/rs6000/crtdbase.S. */
22130 void
22131 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
22133 fputs (integer_asm_op (size, FALSE), file);
22134 assemble_name (file, label);
22135 fputs("-__gcc_unwind_dbase", file);
22138 #ifdef HAVE_AS_TLS
22139 static void
22140 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
22142 rtx symbol;
22143 int flags;
22144 const char *symname;
22146 default_encode_section_info (decl, rtl, first);
22148 /* Careful not to prod global register variables. */
22149 if (!MEM_P (rtl))
22150 return;
22151 symbol = XEXP (rtl, 0);
22152 if (!SYMBOL_REF_P (symbol))
22153 return;
22155 flags = SYMBOL_REF_FLAGS (symbol);
22157 if (VAR_P (decl) && DECL_THREAD_LOCAL_P (decl))
22158 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
22160 SYMBOL_REF_FLAGS (symbol) = flags;
22162 symname = XSTR (symbol, 0);
22164 /* Append CSECT mapping class, unless the symbol already is qualified.
22165 Aliases are implemented as labels, so the symbol name should not add
22166 a mapping class. */
22167 if (decl
22168 && DECL_P (decl)
22169 && VAR_OR_FUNCTION_DECL_P (decl)
22170 && (symtab_node::get (decl) == NULL
22171 || symtab_node::get (decl)->alias == 0)
22172 && symname[strlen (symname) - 1] != ']')
22174 const char *smclass = NULL;
22176 if (TREE_CODE (decl) == FUNCTION_DECL)
22177 smclass = "[DS]";
22178 else if (DECL_THREAD_LOCAL_P (decl))
22180 if (bss_initializer_p (decl))
22181 smclass = "[UL]";
22182 else if (flag_data_sections)
22183 smclass = "[TL]";
22185 else if (DECL_EXTERNAL (decl))
22186 smclass = "[UA]";
22187 else if (bss_initializer_p (decl))
22188 smclass = "[BS]";
22189 else if (flag_data_sections)
22191 /* This must exactly match the logic of select section. */
22192 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
22193 smclass = "[RO]";
22194 else
22195 smclass = "[RW]";
22198 if (smclass != NULL)
22200 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
22202 strcpy (newname, symname);
22203 strcat (newname, smclass);
22204 XSTR (symbol, 0) = ggc_strdup (newname);
22208 #endif /* HAVE_AS_TLS */
22209 #endif /* TARGET_XCOFF */
22211 void
22212 rs6000_asm_weaken_decl (FILE *stream, tree decl,
22213 const char *name, const char *val)
22215 fputs ("\t.weak\t", stream);
22216 assemble_name (stream, name);
22217 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22218 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22220 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22221 if (TARGET_XCOFF)
22222 fputs (rs6000_xcoff_visibility (decl), stream);
22223 #endif
22224 fputs ("\n\t.weak\t.", stream);
22225 RS6000_OUTPUT_BASENAME (stream, name);
22227 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22228 if (TARGET_XCOFF)
22229 fputs (rs6000_xcoff_visibility (decl), stream);
22230 #endif
22231 fputc ('\n', stream);
22233 if (val)
22235 #ifdef ASM_OUTPUT_DEF
22236 ASM_OUTPUT_DEF (stream, name, val);
22237 #endif
22238 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22239 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22241 fputs ("\t.set\t.", stream);
22242 RS6000_OUTPUT_BASENAME (stream, name);
22243 fputs (",.", stream);
22244 RS6000_OUTPUT_BASENAME (stream, val);
22245 fputc ('\n', stream);
22251 /* Return true if INSN should not be copied. */
22253 static bool
22254 rs6000_cannot_copy_insn_p (rtx_insn *insn)
22256 return recog_memoized (insn) >= 0
22257 && get_attr_cannot_copy (insn);
22260 /* Compute a (partial) cost for rtx X. Return true if the complete
22261 cost has been computed, and false if subexpressions should be
22262 scanned. In either case, *TOTAL contains the cost result. */
22264 static bool
22265 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
22266 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
22268 int code = GET_CODE (x);
22270 switch (code)
22272 /* On the RS/6000, if it is valid in the insn, it is free. */
22273 case CONST_INT:
22274 if (((outer_code == SET
22275 || outer_code == PLUS
22276 || outer_code == MINUS)
22277 && (satisfies_constraint_I (x)
22278 || satisfies_constraint_L (x)))
22279 || (outer_code == AND
22280 && (satisfies_constraint_K (x)
22281 || (mode == SImode
22282 ? satisfies_constraint_L (x)
22283 : satisfies_constraint_J (x))))
22284 || ((outer_code == IOR || outer_code == XOR)
22285 && (satisfies_constraint_K (x)
22286 || (mode == SImode
22287 ? satisfies_constraint_L (x)
22288 : satisfies_constraint_J (x))))
22289 || outer_code == ASHIFT
22290 || outer_code == ASHIFTRT
22291 || outer_code == LSHIFTRT
22292 || outer_code == ROTATE
22293 || outer_code == ROTATERT
22294 || outer_code == ZERO_EXTRACT
22295 || (outer_code == MULT
22296 && satisfies_constraint_I (x))
22297 || ((outer_code == DIV || outer_code == UDIV
22298 || outer_code == MOD || outer_code == UMOD)
22299 && exact_log2 (INTVAL (x)) >= 0)
22300 || (outer_code == COMPARE
22301 && (satisfies_constraint_I (x)
22302 || satisfies_constraint_K (x)))
22303 || ((outer_code == EQ || outer_code == NE)
22304 && (satisfies_constraint_I (x)
22305 || satisfies_constraint_K (x)
22306 || (mode == SImode
22307 ? satisfies_constraint_L (x)
22308 : satisfies_constraint_J (x))))
22309 || (outer_code == GTU
22310 && satisfies_constraint_I (x))
22311 || (outer_code == LTU
22312 && satisfies_constraint_P (x)))
22314 *total = 0;
22315 return true;
22317 else if ((outer_code == PLUS
22318 && reg_or_add_cint_operand (x, mode))
22319 || (outer_code == MINUS
22320 && reg_or_sub_cint_operand (x, mode))
22321 || ((outer_code == SET
22322 || outer_code == IOR
22323 || outer_code == XOR)
22324 && (INTVAL (x)
22325 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22327 *total = COSTS_N_INSNS (1);
22328 return true;
22330 /* FALLTHRU */
22332 case CONST_DOUBLE:
22333 case CONST_WIDE_INT:
22334 case CONST:
22335 case HIGH:
22336 case SYMBOL_REF:
22337 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22338 return true;
22340 case MEM:
22341 /* When optimizing for size, MEM should be slightly more expensive
22342 than generating address, e.g., (plus (reg) (const)).
22343 L1 cache latency is about two instructions. */
22344 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22345 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22346 *total += COSTS_N_INSNS (100);
22347 return true;
22349 case LABEL_REF:
22350 *total = 0;
22351 return true;
22353 case PLUS:
22354 case MINUS:
22355 if (FLOAT_MODE_P (mode))
22356 *total = rs6000_cost->fp;
22357 else
22358 *total = COSTS_N_INSNS (1);
22359 return false;
22361 case MULT:
22362 if (CONST_INT_P (XEXP (x, 1))
22363 && satisfies_constraint_I (XEXP (x, 1)))
22365 if (INTVAL (XEXP (x, 1)) >= -256
22366 && INTVAL (XEXP (x, 1)) <= 255)
22367 *total = rs6000_cost->mulsi_const9;
22368 else
22369 *total = rs6000_cost->mulsi_const;
22371 else if (mode == SFmode)
22372 *total = rs6000_cost->fp;
22373 else if (FLOAT_MODE_P (mode))
22374 *total = rs6000_cost->dmul;
22375 else if (mode == DImode)
22376 *total = rs6000_cost->muldi;
22377 else
22378 *total = rs6000_cost->mulsi;
22379 return false;
22381 case FMA:
22382 if (mode == SFmode)
22383 *total = rs6000_cost->fp;
22384 else
22385 *total = rs6000_cost->dmul;
22386 break;
22388 case DIV:
22389 case MOD:
22390 if (FLOAT_MODE_P (mode))
22392 *total = mode == DFmode ? rs6000_cost->ddiv
22393 : rs6000_cost->sdiv;
22394 return false;
22396 /* FALLTHRU */
22398 case UDIV:
22399 case UMOD:
22400 if (CONST_INT_P (XEXP (x, 1))
22401 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22403 if (code == DIV || code == MOD)
22404 /* Shift, addze */
22405 *total = COSTS_N_INSNS (2);
22406 else
22407 /* Shift */
22408 *total = COSTS_N_INSNS (1);
22410 else
22412 if (GET_MODE (XEXP (x, 1)) == DImode)
22413 *total = rs6000_cost->divdi;
22414 else
22415 *total = rs6000_cost->divsi;
22417 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22418 if ((!TARGET_MODULO
22419 || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
22420 && (code == MOD || code == UMOD))
22421 *total += COSTS_N_INSNS (2);
22422 return false;
22424 case CTZ:
22425 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22426 return false;
22428 case FFS:
22429 *total = COSTS_N_INSNS (4);
22430 return false;
22432 case POPCOUNT:
22433 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22434 return false;
22436 case PARITY:
22437 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22438 return false;
22440 case NOT:
22441 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22442 *total = 0;
22443 else
22444 *total = COSTS_N_INSNS (1);
22445 return false;
22447 case AND:
22448 if (CONST_INT_P (XEXP (x, 1)))
22450 rtx left = XEXP (x, 0);
22451 rtx_code left_code = GET_CODE (left);
22453 /* rotate-and-mask: 1 insn. */
22454 if ((left_code == ROTATE
22455 || left_code == ASHIFT
22456 || left_code == LSHIFTRT)
22457 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22459 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22460 if (!CONST_INT_P (XEXP (left, 1)))
22461 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22462 *total += COSTS_N_INSNS (1);
22463 return true;
22466 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22467 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22468 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22469 || (val & 0xffff) == val
22470 || (val & 0xffff0000) == val
22471 || ((val & 0xffff) == 0 && mode == SImode))
22473 *total = rtx_cost (left, mode, AND, 0, speed);
22474 *total += COSTS_N_INSNS (1);
22475 return true;
22478 /* 2 insns. */
22479 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22481 *total = rtx_cost (left, mode, AND, 0, speed);
22482 *total += COSTS_N_INSNS (2);
22483 return true;
22487 *total = COSTS_N_INSNS (1);
22488 return false;
22490 case IOR:
22491 /* FIXME */
22492 *total = COSTS_N_INSNS (1);
22493 return true;
22495 case CLZ:
22496 case XOR:
22497 case ZERO_EXTRACT:
22498 *total = COSTS_N_INSNS (1);
22499 return false;
22501 case ASHIFT:
22502 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22503 the sign extend and shift separately within the insn. */
22504 if (TARGET_EXTSWSLI && mode == DImode
22505 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22506 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22508 *total = 0;
22509 return false;
22511 /* fall through */
22513 case ASHIFTRT:
22514 case LSHIFTRT:
22515 case ROTATE:
22516 case ROTATERT:
22517 /* Handle mul_highpart. */
22518 if (outer_code == TRUNCATE
22519 && GET_CODE (XEXP (x, 0)) == MULT)
22521 if (mode == DImode)
22522 *total = rs6000_cost->muldi;
22523 else
22524 *total = rs6000_cost->mulsi;
22525 return true;
22527 else if (outer_code == AND)
22528 *total = 0;
22529 else
22530 *total = COSTS_N_INSNS (1);
22531 return false;
22533 case SIGN_EXTEND:
22534 case ZERO_EXTEND:
22535 if (MEM_P (XEXP (x, 0)))
22536 *total = 0;
22537 else
22538 *total = COSTS_N_INSNS (1);
22539 return false;
22541 case COMPARE:
22542 case NEG:
22543 case ABS:
22544 if (!FLOAT_MODE_P (mode))
22546 *total = COSTS_N_INSNS (1);
22547 return false;
22549 /* FALLTHRU */
22551 case FLOAT:
22552 case UNSIGNED_FLOAT:
22553 case FIX:
22554 case UNSIGNED_FIX:
22555 case FLOAT_TRUNCATE:
22556 *total = rs6000_cost->fp;
22557 return false;
22559 case FLOAT_EXTEND:
22560 if (mode == DFmode)
22561 *total = rs6000_cost->sfdf_convert;
22562 else
22563 *total = rs6000_cost->fp;
22564 return false;
22566 case CALL:
22567 case IF_THEN_ELSE:
22568 if (!speed)
22570 *total = COSTS_N_INSNS (1);
22571 return true;
22573 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22575 *total = rs6000_cost->fp;
22576 return false;
22578 break;
22580 case NE:
22581 case EQ:
22582 case GTU:
22583 case LTU:
22584 /* Carry bit requires mode == Pmode.
22585 NEG or PLUS already counted so only add one. */
22586 if (mode == Pmode
22587 && (outer_code == NEG || outer_code == PLUS))
22589 *total = COSTS_N_INSNS (1);
22590 return true;
22592 /* FALLTHRU */
22594 case GT:
22595 case LT:
22596 case UNORDERED:
22597 if (outer_code == SET)
22599 if (XEXP (x, 1) == const0_rtx)
22601 *total = COSTS_N_INSNS (2);
22602 return true;
22604 else
22606 *total = COSTS_N_INSNS (3);
22607 return false;
22610 /* CC COMPARE. */
22611 if (outer_code == COMPARE)
22613 *total = 0;
22614 return true;
22616 break;
22618 case UNSPEC:
22619 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22621 *total = 0;
22622 return true;
22624 break;
22626 default:
22627 break;
22630 return false;
22633 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22635 static bool
22636 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22637 int opno, int *total, bool speed)
22639 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22641 fprintf (stderr,
22642 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22643 "opno = %d, total = %d, speed = %s, x:\n",
22644 ret ? "complete" : "scan inner",
22645 GET_MODE_NAME (mode),
22646 GET_RTX_NAME (outer_code),
22647 opno,
22648 *total,
22649 speed ? "true" : "false");
22651 debug_rtx (x);
22653 return ret;
22656 static int
22657 rs6000_insn_cost (rtx_insn *insn, bool speed)
22659 if (recog_memoized (insn) < 0)
22660 return 0;
22662 /* If we are optimizing for size, just use the length. */
22663 if (!speed)
22664 return get_attr_length (insn);
22666 /* Use the cost if provided. */
22667 int cost = get_attr_cost (insn);
22668 if (cost > 0)
22669 return cost;
22671 /* If the insn tells us how many insns there are, use that. Otherwise use
22672 the length/4. Adjust the insn length to remove the extra size that
22673 prefixed instructions take. */
22674 int n = get_attr_num_insns (insn);
22675 if (n == 0)
22677 int length = get_attr_length (insn);
22678 if (get_attr_prefixed (insn) == PREFIXED_YES)
22680 int adjust = 0;
22681 ADJUST_INSN_LENGTH (insn, adjust);
22682 length -= adjust;
22685 n = length / 4;
22688 enum attr_type type = get_attr_type (insn);
22690 switch (type)
22692 case TYPE_LOAD:
22693 case TYPE_FPLOAD:
22694 case TYPE_VECLOAD:
22695 cost = COSTS_N_INSNS (n + 1);
22696 break;
22698 case TYPE_MUL:
22699 switch (get_attr_size (insn))
22701 case SIZE_8:
22702 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22703 break;
22704 case SIZE_16:
22705 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22706 break;
22707 case SIZE_32:
22708 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22709 break;
22710 case SIZE_64:
22711 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22712 break;
22713 default:
22714 gcc_unreachable ();
22716 break;
22717 case TYPE_DIV:
22718 switch (get_attr_size (insn))
22720 case SIZE_32:
22721 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22722 break;
22723 case SIZE_64:
22724 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22725 break;
22726 default:
22727 gcc_unreachable ();
22729 break;
22731 case TYPE_FP:
22732 cost = n * rs6000_cost->fp;
22733 break;
22734 case TYPE_DMUL:
22735 cost = n * rs6000_cost->dmul;
22736 break;
22737 case TYPE_SDIV:
22738 cost = n * rs6000_cost->sdiv;
22739 break;
22740 case TYPE_DDIV:
22741 cost = n * rs6000_cost->ddiv;
22742 break;
22744 case TYPE_SYNC:
22745 case TYPE_LOAD_L:
22746 case TYPE_MFCR:
22747 case TYPE_MFCRF:
22748 cost = COSTS_N_INSNS (n + 2);
22749 break;
22751 default:
22752 cost = COSTS_N_INSNS (n);
22755 return cost;
22758 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22760 static int
22761 rs6000_debug_address_cost (rtx x, machine_mode mode,
22762 addr_space_t as, bool speed)
22764 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22766 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22767 ret, speed ? "true" : "false");
22768 debug_rtx (x);
22770 return ret;
22774 /* A C expression returning the cost of moving data from a register of class
22775 CLASS1 to one of CLASS2. */
22777 static int
22778 rs6000_register_move_cost (machine_mode mode,
22779 reg_class_t from, reg_class_t to)
22781 int ret;
22782 reg_class_t rclass;
22784 if (TARGET_DEBUG_COST)
22785 dbg_cost_ctrl++;
22787 /* If we have VSX, we can easily move between FPR or Altivec registers,
22788 otherwise we can only easily move within classes.
22789 Do this first so we give best-case answers for union classes
22790 containing both gprs and vsx regs. */
22791 HARD_REG_SET to_vsx, from_vsx;
22792 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22793 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22794 if (!hard_reg_set_empty_p (to_vsx)
22795 && !hard_reg_set_empty_p (from_vsx)
22796 && (TARGET_VSX
22797 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22799 int reg = FIRST_FPR_REGNO;
22800 if (TARGET_VSX
22801 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22802 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22803 reg = FIRST_ALTIVEC_REGNO;
22804 ret = 2 * hard_regno_nregs (reg, mode);
22807 /* Moves from/to GENERAL_REGS. */
22808 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22809 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22811 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22813 if (TARGET_DIRECT_MOVE)
22815 /* Keep the cost for direct moves above that for within
22816 a register class even if the actual processor cost is
22817 comparable. We do this because a direct move insn
22818 can't be a nop, whereas with ideal register
22819 allocation a move within the same class might turn
22820 out to be a nop. */
22821 if (rs6000_tune == PROCESSOR_POWER9
22822 || rs6000_tune == PROCESSOR_POWER10
22823 || rs6000_tune == PROCESSOR_POWER11)
22824 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22825 else
22826 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22827 /* SFmode requires a conversion when moving between gprs
22828 and vsx. */
22829 if (mode == SFmode)
22830 ret += 2;
22832 else
22833 ret = (rs6000_memory_move_cost (mode, rclass, false)
22834 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22837 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22838 shift. */
22839 else if (rclass == CR_REGS)
22840 ret = 4;
22842 /* For those processors that have slow LR/CTR moves, make them more
22843 expensive than memory in order to bias spills to memory .*/
22844 else if ((rs6000_tune == PROCESSOR_POWER6
22845 || rs6000_tune == PROCESSOR_POWER7
22846 || rs6000_tune == PROCESSOR_POWER8
22847 || rs6000_tune == PROCESSOR_POWER9)
22848 && reg_class_subset_p (rclass, SPECIAL_REGS))
22849 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22851 else
22852 /* A move will cost one instruction per GPR moved. */
22853 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22856 /* Everything else has to go through GENERAL_REGS. */
22857 else
22858 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22859 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22861 if (TARGET_DEBUG_COST)
22863 if (dbg_cost_ctrl == 1)
22864 fprintf (stderr,
22865 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22866 ret, GET_MODE_NAME (mode), reg_class_names[from],
22867 reg_class_names[to]);
22868 dbg_cost_ctrl--;
22871 return ret;
22874 /* A C expressions returning the cost of moving data of MODE from a register to
22875 or from memory. */
22877 static int
22878 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22879 bool in ATTRIBUTE_UNUSED)
22881 int ret;
22883 if (TARGET_DEBUG_COST)
22884 dbg_cost_ctrl++;
22886 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22887 ret = 4 * hard_regno_nregs (0, mode);
22888 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22889 || reg_classes_intersect_p (rclass, VSX_REGS)))
22890 ret = 4 * hard_regno_nregs (32, mode);
22891 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22892 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22893 else
22894 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22896 if (TARGET_DEBUG_COST)
22898 if (dbg_cost_ctrl == 1)
22899 fprintf (stderr,
22900 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22901 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22902 dbg_cost_ctrl--;
22905 return ret;
22908 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22910 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22911 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22912 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22913 move cost between GENERAL_REGS and VSX_REGS low.
22915 It might seem reasonable to use a union class. After all, if usage
22916 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22917 rather than memory. However, in cases where register pressure of
22918 both is high, like the cactus_adm spec test, allowing
22919 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22920 the first scheduling pass. This is partly due to an allocno of
22921 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22922 class, which gives too high a pressure for GENERAL_REGS and too low
22923 for VSX_REGS. So, force a choice of the subclass here.
22925 The best class is also the union if GENERAL_REGS and VSX_REGS have
22926 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22927 allocno class, since trying to narrow down the class by regno mode
22928 is prone to error. For example, SImode is allowed in VSX regs and
22929 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22930 it would be wrong to choose an allocno of GENERAL_REGS based on
22931 SImode. */
22933 static reg_class_t
22934 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22935 reg_class_t allocno_class,
22936 reg_class_t best_class)
22938 switch (allocno_class)
22940 case GEN_OR_VSX_REGS:
22941 /* best_class must be a subset of allocno_class. */
22942 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22943 || best_class == GEN_OR_FLOAT_REGS
22944 || best_class == VSX_REGS
22945 || best_class == ALTIVEC_REGS
22946 || best_class == FLOAT_REGS
22947 || best_class == GENERAL_REGS
22948 || best_class == BASE_REGS);
22949 /* Use best_class but choose wider classes when copying from the
22950 wider class to best_class is cheap. This mimics IRA choice
22951 of allocno class. */
22952 if (best_class == BASE_REGS)
22953 return GENERAL_REGS;
22954 if (TARGET_VSX && best_class == FLOAT_REGS)
22955 return VSX_REGS;
22956 return best_class;
22958 case VSX_REGS:
22959 if (best_class == ALTIVEC_REGS)
22960 return ALTIVEC_REGS;
22962 default:
22963 break;
22966 return allocno_class;
22969 /* Load up a constant. If the mode is a vector mode, splat the value across
22970 all of the vector elements. */
22972 static rtx
22973 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22975 rtx reg;
22977 if (mode == SFmode || mode == DFmode)
22979 rtx d = const_double_from_real_value (dconst, mode);
22980 reg = force_reg (mode, d);
22982 else if (mode == V4SFmode)
22984 rtx d = const_double_from_real_value (dconst, SFmode);
22985 rtvec v = gen_rtvec (4, d, d, d, d);
22986 reg = gen_reg_rtx (mode);
22987 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22989 else if (mode == V2DFmode)
22991 rtx d = const_double_from_real_value (dconst, DFmode);
22992 rtvec v = gen_rtvec (2, d, d);
22993 reg = gen_reg_rtx (mode);
22994 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22996 else
22997 gcc_unreachable ();
22999 return reg;
23002 /* Generate an FMA instruction. */
23004 static void
23005 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
23007 machine_mode mode = GET_MODE (target);
23008 rtx dst;
23010 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
23011 gcc_assert (dst != NULL);
23013 if (dst != target)
23014 emit_move_insn (target, dst);
23017 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
23019 static void
23020 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
23022 machine_mode mode = GET_MODE (dst);
23023 rtx r;
23025 /* This is a tad more complicated, since the fnma_optab is for
23026 a different expression: fma(-m1, m2, a), which is the same
23027 thing except in the case of signed zeros.
23029 Fortunately we know that if FMA is supported that FNMSUB is
23030 also supported in the ISA. Just expand it directly. */
23032 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
23034 r = gen_rtx_NEG (mode, a);
23035 r = gen_rtx_FMA (mode, m1, m2, r);
23036 r = gen_rtx_NEG (mode, r);
23037 emit_insn (gen_rtx_SET (dst, r));
23040 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23041 add a reg_note saying that this was a division. Support both scalar and
23042 vector divide. Assumes no trapping math and finite arguments. */
23044 void
23045 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
23047 machine_mode mode = GET_MODE (dst);
23048 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
23049 int i;
23051 /* Low precision estimates guarantee 5 bits of accuracy. High
23052 precision estimates guarantee 14 bits of accuracy. SFmode
23053 requires 23 bits of accuracy. DFmode requires 52 bits of
23054 accuracy. Each pass at least doubles the accuracy, leading
23055 to the following. */
23056 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23057 if (mode == DFmode || mode == V2DFmode)
23058 passes++;
23060 enum insn_code code = optab_handler (smul_optab, mode);
23061 insn_gen_fn gen_mul = GEN_FCN (code);
23063 gcc_assert (code != CODE_FOR_nothing);
23065 one = rs6000_load_constant_and_splat (mode, dconst1);
23067 /* x0 = 1./d estimate */
23068 x0 = gen_reg_rtx (mode);
23069 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
23070 UNSPEC_FRES)));
23072 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23073 if (passes > 1) {
23075 /* e0 = 1. - d * x0 */
23076 e0 = gen_reg_rtx (mode);
23077 rs6000_emit_nmsub (e0, d, x0, one);
23079 /* x1 = x0 + e0 * x0 */
23080 x1 = gen_reg_rtx (mode);
23081 rs6000_emit_madd (x1, e0, x0, x0);
23083 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
23084 ++i, xprev = xnext, eprev = enext) {
23086 /* enext = eprev * eprev */
23087 enext = gen_reg_rtx (mode);
23088 emit_insn (gen_mul (enext, eprev, eprev));
23090 /* xnext = xprev + enext * xprev */
23091 xnext = gen_reg_rtx (mode);
23092 rs6000_emit_madd (xnext, enext, xprev, xprev);
23095 } else
23096 xprev = x0;
23098 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23100 /* u = n * xprev */
23101 u = gen_reg_rtx (mode);
23102 emit_insn (gen_mul (u, n, xprev));
23104 /* v = n - (d * u) */
23105 v = gen_reg_rtx (mode);
23106 rs6000_emit_nmsub (v, d, u, n);
23108 /* dst = (v * xprev) + u */
23109 rs6000_emit_madd (dst, v, xprev, u);
23111 if (note_p)
23112 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
23115 /* Goldschmidt's Algorithm for single/double-precision floating point
23116 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23118 void
23119 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
23121 machine_mode mode = GET_MODE (src);
23122 rtx e = gen_reg_rtx (mode);
23123 rtx g = gen_reg_rtx (mode);
23124 rtx h = gen_reg_rtx (mode);
23126 /* Low precision estimates guarantee 5 bits of accuracy. High
23127 precision estimates guarantee 14 bits of accuracy. SFmode
23128 requires 23 bits of accuracy. DFmode requires 52 bits of
23129 accuracy. Each pass at least doubles the accuracy, leading
23130 to the following. */
23131 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23132 if (mode == DFmode || mode == V2DFmode)
23133 passes++;
23135 int i;
23136 rtx mhalf;
23137 enum insn_code code = optab_handler (smul_optab, mode);
23138 insn_gen_fn gen_mul = GEN_FCN (code);
23140 gcc_assert (code != CODE_FOR_nothing);
23142 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
23144 /* e = rsqrt estimate */
23145 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
23146 UNSPEC_RSQRT)));
23148 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23149 if (!recip)
23151 rtx zero = force_reg (mode, CONST0_RTX (mode));
23153 if (mode == SFmode)
23155 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
23156 e, zero, mode, 0);
23157 if (target != e)
23158 emit_move_insn (e, target);
23160 else
23162 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
23163 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
23167 /* g = sqrt estimate. */
23168 emit_insn (gen_mul (g, e, src));
23169 /* h = 1/(2*sqrt) estimate. */
23170 emit_insn (gen_mul (h, e, mhalf));
23172 if (recip)
23174 if (passes == 1)
23176 rtx t = gen_reg_rtx (mode);
23177 rs6000_emit_nmsub (t, g, h, mhalf);
23178 /* Apply correction directly to 1/rsqrt estimate. */
23179 rs6000_emit_madd (dst, e, t, e);
23181 else
23183 for (i = 0; i < passes; i++)
23185 rtx t1 = gen_reg_rtx (mode);
23186 rtx g1 = gen_reg_rtx (mode);
23187 rtx h1 = gen_reg_rtx (mode);
23189 rs6000_emit_nmsub (t1, g, h, mhalf);
23190 rs6000_emit_madd (g1, g, t1, g);
23191 rs6000_emit_madd (h1, h, t1, h);
23193 g = g1;
23194 h = h1;
23196 /* Multiply by 2 for 1/rsqrt. */
23197 emit_insn (gen_add3_insn (dst, h, h));
23200 else
23202 rtx t = gen_reg_rtx (mode);
23203 rs6000_emit_nmsub (t, g, h, mhalf);
23204 rs6000_emit_madd (dst, g, t, g);
23207 return;
23210 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23211 (Power7) targets. DST is the target, and SRC is the argument operand. */
23213 void
23214 rs6000_emit_popcount (rtx dst, rtx src)
23216 machine_mode mode = GET_MODE (dst);
23217 rtx tmp1, tmp2;
23219 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23220 if (TARGET_POPCNTD)
23222 if (mode == SImode)
23223 emit_insn (gen_popcntdsi2 (dst, src));
23224 else
23225 emit_insn (gen_popcntddi2 (dst, src));
23226 return;
23229 tmp1 = gen_reg_rtx (mode);
23231 if (mode == SImode)
23233 emit_insn (gen_popcntbsi2 (tmp1, src));
23234 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
23235 NULL_RTX, 0);
23236 tmp2 = force_reg (SImode, tmp2);
23237 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
23239 else
23241 emit_insn (gen_popcntbdi2 (tmp1, src));
23242 tmp2 = expand_mult (DImode, tmp1,
23243 GEN_INT ((HOST_WIDE_INT)
23244 0x01010101 << 32 | 0x01010101),
23245 NULL_RTX, 0);
23246 tmp2 = force_reg (DImode, tmp2);
23247 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
23252 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23253 target, and SRC is the argument operand. */
23255 void
23256 rs6000_emit_parity (rtx dst, rtx src)
23258 machine_mode mode = GET_MODE (dst);
23259 rtx tmp;
23261 tmp = gen_reg_rtx (mode);
23263 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23264 if (TARGET_CMPB)
23266 if (mode == SImode)
23268 emit_insn (gen_popcntbsi2 (tmp, src));
23269 emit_insn (gen_paritysi2_cmpb (dst, tmp));
23271 else
23273 emit_insn (gen_popcntbdi2 (tmp, src));
23274 emit_insn (gen_paritydi2_cmpb (dst, tmp));
23276 return;
23279 if (mode == SImode)
23281 /* Is mult+shift >= shift+xor+shift+xor? */
23282 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
23284 rtx tmp1, tmp2, tmp3, tmp4;
23286 tmp1 = gen_reg_rtx (SImode);
23287 emit_insn (gen_popcntbsi2 (tmp1, src));
23289 tmp2 = gen_reg_rtx (SImode);
23290 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
23291 tmp3 = gen_reg_rtx (SImode);
23292 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
23294 tmp4 = gen_reg_rtx (SImode);
23295 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
23296 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
23298 else
23299 rs6000_emit_popcount (tmp, src);
23300 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
23302 else
23304 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23305 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23307 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23309 tmp1 = gen_reg_rtx (DImode);
23310 emit_insn (gen_popcntbdi2 (tmp1, src));
23312 tmp2 = gen_reg_rtx (DImode);
23313 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23314 tmp3 = gen_reg_rtx (DImode);
23315 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23317 tmp4 = gen_reg_rtx (DImode);
23318 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23319 tmp5 = gen_reg_rtx (DImode);
23320 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23322 tmp6 = gen_reg_rtx (DImode);
23323 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23324 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23326 else
23327 rs6000_emit_popcount (tmp, src);
23328 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23332 /* Expand an Altivec constant permutation for little endian mode.
23333 OP0 and OP1 are the input vectors and TARGET is the output vector.
23334 SEL specifies the constant permutation vector.
23336 There are two issues: First, the two input operands must be
23337 swapped so that together they form a double-wide array in LE
23338 order. Second, the vperm instruction has surprising behavior
23339 in LE mode: it interprets the elements of the source vectors
23340 in BE mode ("left to right") and interprets the elements of
23341 the destination vector in LE mode ("right to left"). To
23342 correct for this, we must subtract each element of the permute
23343 control vector from 31.
23345 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23346 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23347 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23348 serve as the permute control vector. Then, in BE mode,
23350 vperm 9,10,11,12
23352 places the desired result in vr9. However, in LE mode the
23353 vector contents will be
23355 vr10 = 00000003 00000002 00000001 00000000
23356 vr11 = 00000007 00000006 00000005 00000004
23358 The result of the vperm using the same permute control vector is
23360 vr9 = 05000000 07000000 01000000 03000000
23362 That is, the leftmost 4 bytes of vr10 are interpreted as the
23363 source for the rightmost 4 bytes of vr9, and so on.
23365 If we change the permute control vector to
23367 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23369 and issue
23371 vperm 9,11,10,12
23373 we get the desired
23375 vr9 = 00000006 00000004 00000002 00000000. */
23377 static void
23378 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23379 const vec_perm_indices &sel)
23381 unsigned int i;
23382 rtx perm[16];
23383 rtx constv, unspec;
23385 /* Unpack and adjust the constant selector. */
23386 for (i = 0; i < 16; ++i)
23388 unsigned int elt = 31 - (sel[i] & 31);
23389 perm[i] = GEN_INT (elt);
23392 /* Expand to a permute, swapping the inputs and using the
23393 adjusted selector. */
23394 if (!REG_P (op0))
23395 op0 = force_reg (V16QImode, op0);
23396 if (!REG_P (op1))
23397 op1 = force_reg (V16QImode, op1);
23399 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23400 constv = force_reg (V16QImode, constv);
23401 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23402 UNSPEC_VPERM);
23403 if (!REG_P (target))
23405 rtx tmp = gen_reg_rtx (V16QImode);
23406 emit_move_insn (tmp, unspec);
23407 unspec = tmp;
23410 emit_move_insn (target, unspec);
23413 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23414 permute control vector. But here it's not a constant, so we must
23415 generate a vector NAND or NOR to do the adjustment. */
23417 void
23418 altivec_expand_vec_perm_le (rtx operands[4])
23420 rtx notx, iorx, unspec;
23421 rtx target = operands[0];
23422 rtx op0 = operands[1];
23423 rtx op1 = operands[2];
23424 rtx sel = operands[3];
23425 rtx tmp = target;
23426 rtx norreg = gen_reg_rtx (V16QImode);
23427 machine_mode mode = GET_MODE (target);
23429 /* Get everything in regs so the pattern matches. */
23430 if (!REG_P (op0))
23431 op0 = force_reg (mode, op0);
23432 if (!REG_P (op1))
23433 op1 = force_reg (mode, op1);
23434 if (!REG_P (sel))
23435 sel = force_reg (V16QImode, sel);
23436 if (!REG_P (target))
23437 tmp = gen_reg_rtx (mode);
23439 if (TARGET_P9_VECTOR)
23441 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23442 UNSPEC_VPERMR);
23444 else
23446 /* Invert the selector with a VNAND if available, else a VNOR.
23447 The VNAND is preferred for future fusion opportunities. */
23448 notx = gen_rtx_NOT (V16QImode, sel);
23449 iorx = (TARGET_P8_VECTOR
23450 ? gen_rtx_IOR (V16QImode, notx, notx)
23451 : gen_rtx_AND (V16QImode, notx, notx));
23452 emit_insn (gen_rtx_SET (norreg, iorx));
23454 /* Permute with operands reversed and adjusted selector. */
23455 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23456 UNSPEC_VPERM);
23459 /* Copy into target, possibly by way of a register. */
23460 if (!REG_P (target))
23462 emit_move_insn (tmp, unspec);
23463 unspec = tmp;
23466 emit_move_insn (target, unspec);
23469 /* Expand an Altivec constant permutation. Return true if we match
23470 an efficient implementation; false to fall back to VPERM.
23472 OP0 and OP1 are the input vectors and TARGET is the output vector.
23473 SEL specifies the constant permutation vector. */
23475 static bool
23476 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23477 const vec_perm_indices &sel)
23479 struct altivec_perm_insn {
23480 HOST_WIDE_INT mask;
23481 enum insn_code impl;
23482 unsigned char perm[16];
23484 static const struct altivec_perm_insn patterns[] = {
23485 {OPTION_MASK_ALTIVEC,
23486 CODE_FOR_altivec_vpkuhum_direct,
23487 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23488 {OPTION_MASK_ALTIVEC,
23489 CODE_FOR_altivec_vpkuwum_direct,
23490 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23491 {OPTION_MASK_ALTIVEC,
23492 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct_be
23493 : CODE_FOR_altivec_vmrglb_direct_le,
23494 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23495 {OPTION_MASK_ALTIVEC,
23496 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct_be
23497 : CODE_FOR_altivec_vmrglh_direct_le,
23498 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23499 {OPTION_MASK_ALTIVEC,
23500 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be
23501 : CODE_FOR_altivec_vmrglw_direct_v4si_le,
23502 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23503 {OPTION_MASK_ALTIVEC,
23504 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct_be
23505 : CODE_FOR_altivec_vmrghb_direct_le,
23506 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23507 {OPTION_MASK_ALTIVEC,
23508 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct_be
23509 : CODE_FOR_altivec_vmrghh_direct_le,
23510 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23511 {OPTION_MASK_ALTIVEC,
23512 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be
23513 : CODE_FOR_altivec_vmrghw_direct_v4si_le,
23514 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23515 {OPTION_MASK_P8_VECTOR,
23516 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23517 : CODE_FOR_p8_vmrgow_v4sf_direct,
23518 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23519 {OPTION_MASK_P8_VECTOR,
23520 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23521 : CODE_FOR_p8_vmrgew_v4sf_direct,
23522 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23523 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23524 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23525 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23526 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23527 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23528 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23529 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23530 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23532 unsigned int i, j, elt, which;
23533 unsigned char perm[16];
23534 rtx x;
23535 bool one_vec;
23537 /* Unpack the constant selector. */
23538 for (i = which = 0; i < 16; ++i)
23540 elt = sel[i] & 31;
23541 which |= (elt < 16 ? 1 : 2);
23542 perm[i] = elt;
23545 /* Simplify the constant selector based on operands. */
23546 switch (which)
23548 default:
23549 gcc_unreachable ();
23551 case 3:
23552 one_vec = false;
23553 if (!rtx_equal_p (op0, op1))
23554 break;
23555 /* FALLTHRU */
23557 case 2:
23558 for (i = 0; i < 16; ++i)
23559 perm[i] &= 15;
23560 op0 = op1;
23561 one_vec = true;
23562 break;
23564 case 1:
23565 op1 = op0;
23566 one_vec = true;
23567 break;
23570 /* Look for splat patterns. */
23571 if (one_vec)
23573 elt = perm[0];
23575 for (i = 0; i < 16; ++i)
23576 if (perm[i] != elt)
23577 break;
23578 if (i == 16)
23580 if (!BYTES_BIG_ENDIAN)
23581 elt = 15 - elt;
23582 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23583 return true;
23586 if (elt % 2 == 0)
23588 for (i = 0; i < 16; i += 2)
23589 if (perm[i] != elt || perm[i + 1] != elt + 1)
23590 break;
23591 if (i == 16)
23593 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23594 x = gen_reg_rtx (V8HImode);
23595 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23596 GEN_INT (field)));
23597 emit_move_insn (target, gen_lowpart (V16QImode, x));
23598 return true;
23602 if (elt % 4 == 0)
23604 for (i = 0; i < 16; i += 4)
23605 if (perm[i] != elt
23606 || perm[i + 1] != elt + 1
23607 || perm[i + 2] != elt + 2
23608 || perm[i + 3] != elt + 3)
23609 break;
23610 if (i == 16)
23612 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23613 x = gen_reg_rtx (V4SImode);
23614 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23615 GEN_INT (field)));
23616 emit_move_insn (target, gen_lowpart (V16QImode, x));
23617 return true;
23622 /* Look for merge and pack patterns. */
23623 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23625 bool swapped;
23627 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23628 continue;
23630 elt = patterns[j].perm[0];
23631 if (perm[0] == elt)
23632 swapped = false;
23633 else if (perm[0] == elt + 16)
23634 swapped = true;
23635 else
23636 continue;
23637 for (i = 1; i < 16; ++i)
23639 elt = patterns[j].perm[i];
23640 if (swapped)
23641 elt = (elt >= 16 ? elt - 16 : elt + 16);
23642 else if (one_vec && elt >= 16)
23643 elt -= 16;
23644 if (perm[i] != elt)
23645 break;
23647 if (i == 16)
23649 enum insn_code icode = patterns[j].impl;
23650 machine_mode omode = insn_data[icode].operand[0].mode;
23651 machine_mode imode = insn_data[icode].operand[1].mode;
23653 rtx perm_idx = GEN_INT (0);
23654 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23656 int perm_val = 0;
23657 if (one_vec)
23659 if (perm[0] == 8)
23660 perm_val |= 2;
23661 if (perm[8] == 8)
23662 perm_val |= 1;
23664 else
23666 if (perm[0] != 0)
23667 perm_val |= 2;
23668 if (perm[8] != 16)
23669 perm_val |= 1;
23671 perm_idx = GEN_INT (perm_val);
23674 /* For little-endian, don't use vpkuwum and vpkuhum if the
23675 underlying vector type is not V4SI and V8HI, respectively.
23676 For example, using vpkuwum with a V8HI picks up the even
23677 halfwords (BE numbering) when the even halfwords (LE
23678 numbering) are what we need. */
23679 if (!BYTES_BIG_ENDIAN
23680 && icode == CODE_FOR_altivec_vpkuwum_direct
23681 && ((REG_P (op0)
23682 && GET_MODE (op0) != V4SImode)
23683 || (SUBREG_P (op0)
23684 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23685 continue;
23686 if (!BYTES_BIG_ENDIAN
23687 && icode == CODE_FOR_altivec_vpkuhum_direct
23688 && ((REG_P (op0)
23689 && GET_MODE (op0) != V8HImode)
23690 || (SUBREG_P (op0)
23691 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23692 continue;
23694 /* For little-endian, the two input operands must be swapped
23695 (or swapped back) to ensure proper right-to-left numbering
23696 from 0 to 2N-1. */
23697 if (swapped == BYTES_BIG_ENDIAN
23698 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23699 std::swap (op0, op1);
23700 if (imode != V16QImode)
23702 op0 = gen_lowpart (imode, op0);
23703 op1 = gen_lowpart (imode, op1);
23705 if (omode == V16QImode)
23706 x = target;
23707 else
23708 x = gen_reg_rtx (omode);
23709 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23710 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23711 else
23712 emit_insn (GEN_FCN (icode) (x, op0, op1));
23713 if (omode != V16QImode)
23714 emit_move_insn (target, gen_lowpart (V16QImode, x));
23715 return true;
23719 if (!BYTES_BIG_ENDIAN)
23721 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23722 return true;
23725 return false;
23728 /* Expand a VSX Permute Doubleword constant permutation.
23729 Return true if we match an efficient implementation. */
23731 static bool
23732 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23733 unsigned char perm0, unsigned char perm1)
23735 rtx x;
23737 /* If both selectors come from the same operand, fold to single op. */
23738 if ((perm0 & 2) == (perm1 & 2))
23740 if (perm0 & 2)
23741 op0 = op1;
23742 else
23743 op1 = op0;
23745 /* If both operands are equal, fold to simpler permutation. */
23746 if (rtx_equal_p (op0, op1))
23748 perm0 = perm0 & 1;
23749 perm1 = (perm1 & 1) + 2;
23751 /* If the first selector comes from the second operand, swap. */
23752 else if (perm0 & 2)
23754 if (perm1 & 2)
23755 return false;
23756 perm0 -= 2;
23757 perm1 += 2;
23758 std::swap (op0, op1);
23760 /* If the second selector does not come from the second operand, fail. */
23761 else if ((perm1 & 2) == 0)
23762 return false;
23764 /* Success! */
23765 if (target != NULL)
23767 machine_mode vmode, dmode;
23768 rtvec v;
23770 vmode = GET_MODE (target);
23771 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23772 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23773 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23774 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23775 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23776 emit_insn (gen_rtx_SET (target, x));
23778 return true;
23781 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23783 static bool
23784 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23785 rtx target, rtx op0, rtx op1,
23786 const vec_perm_indices &sel)
23788 if (vmode != op_mode)
23789 return false;
23791 bool testing_p = !target;
23793 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23794 if (TARGET_ALTIVEC && testing_p)
23795 return true;
23797 if (op0)
23799 rtx nop0 = force_reg (vmode, op0);
23800 if (op0 == op1)
23801 op1 = nop0;
23802 op0 = nop0;
23804 if (op1)
23805 op1 = force_reg (vmode, op1);
23807 /* Check for ps_merge* or xxpermdi insns. */
23808 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23810 if (testing_p)
23812 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23813 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23815 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23816 return true;
23819 if (TARGET_ALTIVEC)
23821 /* Force the target-independent code to lower to V16QImode. */
23822 if (vmode != V16QImode)
23823 return false;
23824 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23825 return true;
23828 return false;
23831 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23832 OP0 and OP1 are the input vectors and TARGET is the output vector.
23833 PERM specifies the constant permutation vector. */
23835 static void
23836 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23837 machine_mode vmode, const vec_perm_builder &perm)
23839 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23840 if (x != target)
23841 emit_move_insn (target, x);
23844 /* Expand an extract even operation. */
23846 void
23847 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23849 machine_mode vmode = GET_MODE (target);
23850 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23851 vec_perm_builder perm (nelt, nelt, 1);
23853 for (i = 0; i < nelt; i++)
23854 perm.quick_push (i * 2);
23856 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23859 /* Expand a vector interleave operation. */
23861 void
23862 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23864 machine_mode vmode = GET_MODE (target);
23865 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23866 vec_perm_builder perm (nelt, nelt, 1);
23868 high = (highp ? 0 : nelt / 2);
23869 for (i = 0; i < nelt / 2; i++)
23871 perm.quick_push (i + high);
23872 perm.quick_push (i + nelt + high);
23875 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23878 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23879 void
23880 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23882 HOST_WIDE_INT hwi_scale (scale);
23883 REAL_VALUE_TYPE r_pow;
23884 rtvec v = rtvec_alloc (2);
23885 rtx elt;
23886 rtx scale_vec = gen_reg_rtx (V2DFmode);
23887 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23888 elt = const_double_from_real_value (r_pow, DFmode);
23889 RTVEC_ELT (v, 0) = elt;
23890 RTVEC_ELT (v, 1) = elt;
23891 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23892 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23895 /* Return an RTX representing where to find the function value of a
23896 function returning MODE. */
23897 static rtx
23898 rs6000_complex_function_value (machine_mode mode)
23900 unsigned int regno;
23901 rtx r1, r2;
23902 machine_mode inner = GET_MODE_INNER (mode);
23903 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23905 if (TARGET_FLOAT128_TYPE
23906 && (mode == KCmode
23907 || (mode == TCmode && TARGET_IEEEQUAD)))
23908 regno = ALTIVEC_ARG_RETURN;
23910 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23911 regno = FP_ARG_RETURN;
23913 else
23915 regno = GP_ARG_RETURN;
23917 /* 32-bit is OK since it'll go in r3/r4. */
23918 if (TARGET_32BIT && inner_bytes >= 4)
23919 return gen_rtx_REG (mode, regno);
23922 if (inner_bytes >= 8)
23923 return gen_rtx_REG (mode, regno);
23925 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23926 const0_rtx);
23927 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23928 GEN_INT (inner_bytes));
23929 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23932 /* Return an rtx describing a return value of MODE as a PARALLEL
23933 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23934 stride REG_STRIDE. */
23936 static rtx
23937 rs6000_parallel_return (machine_mode mode,
23938 int n_elts, machine_mode elt_mode,
23939 unsigned int regno, unsigned int reg_stride)
23941 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23943 int i;
23944 for (i = 0; i < n_elts; i++)
23946 rtx r = gen_rtx_REG (elt_mode, regno);
23947 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23948 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23949 regno += reg_stride;
23952 return par;
23955 /* Target hook for TARGET_FUNCTION_VALUE.
23957 An integer value is in r3 and a floating-point value is in fp1,
23958 unless -msoft-float. */
23960 static rtx
23961 rs6000_function_value (const_tree valtype,
23962 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23963 bool outgoing ATTRIBUTE_UNUSED)
23965 machine_mode mode;
23966 unsigned int regno;
23967 machine_mode elt_mode;
23968 int n_elts;
23970 /* Special handling for structs in darwin64. */
23971 if (TARGET_MACHO
23972 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23974 CUMULATIVE_ARGS valcum;
23975 rtx valret;
23977 valcum.words = 0;
23978 valcum.fregno = FP_ARG_MIN_REG;
23979 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23980 /* Do a trial code generation as if this were going to be passed as
23981 an argument; if any part goes in memory, we return NULL. */
23982 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23983 if (valret)
23984 return valret;
23985 /* Otherwise fall through to standard ABI rules. */
23988 mode = TYPE_MODE (valtype);
23990 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23991 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23993 int first_reg, n_regs;
23995 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23997 /* _Decimal128 must use even/odd register pairs. */
23998 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23999 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
24001 else
24003 first_reg = ALTIVEC_ARG_RETURN;
24004 n_regs = 1;
24007 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
24010 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
24011 if (TARGET_32BIT && TARGET_POWERPC64)
24012 switch (mode)
24014 default:
24015 break;
24016 case E_DImode:
24017 case E_SCmode:
24018 case E_DCmode:
24019 case E_TCmode:
24020 int count = GET_MODE_SIZE (mode) / 4;
24021 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
24024 if ((INTEGRAL_TYPE_P (valtype)
24025 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
24026 || POINTER_TYPE_P (valtype))
24027 mode = TARGET_32BIT ? SImode : DImode;
24029 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24030 /* _Decimal128 must use an even/odd register pair. */
24031 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24032 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
24033 && !FLOAT128_VECTOR_P (mode))
24034 regno = FP_ARG_RETURN;
24035 else if (TREE_CODE (valtype) == COMPLEX_TYPE
24036 && targetm.calls.split_complex_arg)
24037 return rs6000_complex_function_value (mode);
24038 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24039 return register is used in both cases, and we won't see V2DImode/V2DFmode
24040 for pure altivec, combine the two cases. */
24041 else if ((VECTOR_TYPE_P (valtype) || VECTOR_ALIGNMENT_P (mode))
24042 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
24043 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24044 regno = ALTIVEC_ARG_RETURN;
24045 else
24046 regno = GP_ARG_RETURN;
24048 return gen_rtx_REG (mode, regno);
24051 /* Define how to find the value returned by a library function
24052 assuming the value has mode MODE. */
24054 rs6000_libcall_value (machine_mode mode)
24056 unsigned int regno;
24058 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24059 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
24060 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
24062 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24063 /* _Decimal128 must use an even/odd register pair. */
24064 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24065 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
24066 regno = FP_ARG_RETURN;
24067 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24068 return register is used in both cases, and we won't see V2DImode/V2DFmode
24069 for pure altivec, combine the two cases. */
24070 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
24071 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
24072 regno = ALTIVEC_ARG_RETURN;
24073 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
24074 return rs6000_complex_function_value (mode);
24075 else
24076 regno = GP_ARG_RETURN;
24078 return gen_rtx_REG (mode, regno);
24081 /* Compute register pressure classes. We implement the target hook to avoid
24082 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24083 lead to incorrect estimates of number of available registers and therefor
24084 increased register pressure/spill. */
24085 static int
24086 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
24088 int n;
24090 n = 0;
24091 pressure_classes[n++] = GENERAL_REGS;
24092 if (TARGET_ALTIVEC)
24093 pressure_classes[n++] = ALTIVEC_REGS;
24094 if (TARGET_VSX)
24095 pressure_classes[n++] = VSX_REGS;
24096 else
24098 if (TARGET_HARD_FLOAT)
24099 pressure_classes[n++] = FLOAT_REGS;
24101 pressure_classes[n++] = CR_REGS;
24102 pressure_classes[n++] = SPECIAL_REGS;
24104 return n;
24107 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24108 Frame pointer elimination is automatically handled.
24110 For the RS/6000, if frame pointer elimination is being done, we would like
24111 to convert ap into fp, not sp.
24113 We need r30 if -mminimal-toc was specified, and there are constant pool
24114 references. */
24116 static bool
24117 rs6000_can_eliminate (const int from, const int to)
24119 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
24120 ? ! frame_pointer_needed
24121 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
24122 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
24123 || constant_pool_empty_p ()
24124 : true);
24127 /* Define the offset between two registers, FROM to be eliminated and its
24128 replacement TO, at the start of a routine. */
24129 HOST_WIDE_INT
24130 rs6000_initial_elimination_offset (int from, int to)
24132 rs6000_stack_t *info = rs6000_stack_info ();
24133 HOST_WIDE_INT offset;
24135 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24136 offset = info->push_p ? 0 : -info->total_size;
24137 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24139 offset = info->push_p ? 0 : -info->total_size;
24140 if (FRAME_GROWS_DOWNWARD)
24141 offset += info->fixed_size + info->vars_size + info->parm_size;
24143 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24144 offset = FRAME_GROWS_DOWNWARD
24145 ? info->fixed_size + info->vars_size + info->parm_size
24146 : 0;
24147 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24148 offset = info->total_size;
24149 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24150 offset = info->push_p ? info->total_size : 0;
24151 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
24152 offset = 0;
24153 else
24154 gcc_unreachable ();
24156 return offset;
24159 /* Fill in sizes of registers used by unwinder. */
24161 static void
24162 rs6000_init_dwarf_reg_sizes_extra (tree address)
24164 if (TARGET_MACHO && ! TARGET_ALTIVEC)
24166 int i;
24167 machine_mode mode = TYPE_MODE (char_type_node);
24168 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
24169 rtx mem = gen_rtx_MEM (BLKmode, addr);
24170 rtx value = gen_int_mode (16, mode);
24172 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24173 The unwinder still needs to know the size of Altivec registers. */
24175 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
24177 int column = DWARF_REG_TO_UNWIND_COLUMN
24178 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
24179 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
24181 emit_move_insn (adjust_address (mem, mode, offset), value);
24186 /* Map internal gcc register numbers to debug format register numbers.
24187 FORMAT specifies the type of debug register number to use:
24188 0 -- debug information, except for frame-related sections
24189 1 -- DWARF .debug_frame section
24190 2 -- DWARF .eh_frame section */
24192 unsigned int
24193 rs6000_debugger_regno (unsigned int regno, unsigned int format)
24195 /* On some platforms, we use the standard DWARF register
24196 numbering for .debug_info and .debug_frame. */
24197 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
24199 #ifdef RS6000_USE_DWARF_NUMBERING
24200 if (regno <= 31)
24201 return regno;
24202 if (FP_REGNO_P (regno))
24203 return regno - FIRST_FPR_REGNO + 32;
24204 if (ALTIVEC_REGNO_P (regno))
24205 return regno - FIRST_ALTIVEC_REGNO + 1124;
24206 if (regno == LR_REGNO)
24207 return 108;
24208 if (regno == CTR_REGNO)
24209 return 109;
24210 if (regno == CA_REGNO)
24211 return 101; /* XER */
24212 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24213 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24214 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24215 to the DWARF reg for CR. */
24216 if (format == 1 && regno == CR2_REGNO)
24217 return 64;
24218 if (CR_REGNO_P (regno))
24219 return regno - CR0_REGNO + 86;
24220 if (regno == VRSAVE_REGNO)
24221 return 356;
24222 if (regno == VSCR_REGNO)
24223 return 67;
24225 /* These do not make much sense. */
24226 if (regno == FRAME_POINTER_REGNUM)
24227 return 111;
24228 if (regno == ARG_POINTER_REGNUM)
24229 return 67;
24230 if (regno == 64)
24231 return 100;
24233 gcc_unreachable ();
24234 #endif
24237 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24238 information, and also for .eh_frame. */
24239 /* Translate the regnos to their numbers in GCC 7 (and before). */
24240 if (regno <= 31)
24241 return regno;
24242 if (FP_REGNO_P (regno))
24243 return regno - FIRST_FPR_REGNO + 32;
24244 if (ALTIVEC_REGNO_P (regno))
24245 return regno - FIRST_ALTIVEC_REGNO + 77;
24246 if (regno == LR_REGNO)
24247 return 65;
24248 if (regno == CTR_REGNO)
24249 return 66;
24250 if (regno == CA_REGNO)
24251 return 76; /* XER */
24252 if (CR_REGNO_P (regno))
24253 return regno - CR0_REGNO + 68;
24254 if (regno == VRSAVE_REGNO)
24255 return 109;
24256 if (regno == VSCR_REGNO)
24257 return 110;
24259 if (regno == FRAME_POINTER_REGNUM)
24260 return 111;
24261 if (regno == ARG_POINTER_REGNUM)
24262 return 67;
24263 if (regno == 64)
24264 return 64;
24266 gcc_unreachable ();
24269 /* target hook eh_return_filter_mode */
24270 static scalar_int_mode
24271 rs6000_eh_return_filter_mode (void)
24273 return TARGET_32BIT ? SImode : word_mode;
24276 /* Target hook for translate_mode_attribute. */
24277 static machine_mode
24278 rs6000_translate_mode_attribute (machine_mode mode)
24280 if ((FLOAT128_IEEE_P (mode)
24281 && ieee128_float_type_node == long_double_type_node)
24282 || (FLOAT128_IBM_P (mode)
24283 && ibm128_float_type_node == long_double_type_node))
24284 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
24285 return mode;
24288 /* Target hook for scalar_mode_supported_p. */
24289 static bool
24290 rs6000_scalar_mode_supported_p (scalar_mode mode)
24292 /* -m32 does not support TImode. This is the default, from
24293 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24294 same ABI as for -m32. But default_scalar_mode_supported_p allows
24295 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24296 for -mpowerpc64. */
24297 if (TARGET_32BIT && mode == TImode)
24298 return false;
24300 if (DECIMAL_FLOAT_MODE_P (mode))
24301 return default_decimal_float_supported_p ();
24302 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24303 return true;
24304 else
24305 return default_scalar_mode_supported_p (mode);
24308 /* Target hook for libgcc_floating_mode_supported_p. */
24310 static bool
24311 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24313 switch (mode)
24315 case E_SFmode:
24316 case E_DFmode:
24317 case E_TFmode:
24318 return true;
24320 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24321 if long double does not use the IEEE 128-bit format. If long double
24322 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24323 Because the code will not use KFmode in that case, there will be aborts
24324 because it can't find KFmode in the Floatn types. */
24325 case E_KFmode:
24326 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24328 default:
24329 return false;
24333 /* Target hook for vector_mode_supported_p. */
24334 static bool
24335 rs6000_vector_mode_supported_p (machine_mode mode)
24337 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24338 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24339 double-double. */
24340 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24341 return true;
24343 else
24344 return false;
24347 /* Target hook for floatn_mode. */
24348 static opt_scalar_float_mode
24349 rs6000_floatn_mode (int n, bool extended)
24351 if (extended)
24353 switch (n)
24355 case 32:
24356 return DFmode;
24358 case 64:
24359 if (TARGET_FLOAT128_TYPE)
24360 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24361 else
24362 return opt_scalar_float_mode ();
24364 case 128:
24365 return opt_scalar_float_mode ();
24367 default:
24368 /* Those are the only valid _FloatNx types. */
24369 gcc_unreachable ();
24372 else
24374 switch (n)
24376 case 32:
24377 return SFmode;
24379 case 64:
24380 return DFmode;
24382 case 128:
24383 if (TARGET_FLOAT128_TYPE)
24384 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24385 else
24386 return opt_scalar_float_mode ();
24388 default:
24389 return opt_scalar_float_mode ();
24395 /* Target hook for c_mode_for_suffix. */
24396 static machine_mode
24397 rs6000_c_mode_for_suffix (char suffix)
24399 if (TARGET_FLOAT128_TYPE)
24401 if (suffix == 'q' || suffix == 'Q')
24402 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24404 /* At the moment, we are not defining a suffix for IBM extended double.
24405 If/when the default for -mabi=ieeelongdouble is changed, and we want
24406 to support __ibm128 constants in legacy library code, we may need to
24407 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24408 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24409 __float80 constants. */
24412 return VOIDmode;
24415 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
24416 TI_LONG_DOUBLE_TYPE which is for long double type, go with the default
24417 one for the others. */
24419 static machine_mode
24420 rs6000_c_mode_for_floating_type (enum tree_index ti)
24422 if (ti == TI_LONG_DOUBLE_TYPE)
24423 return rs6000_long_double_type_size == 128 ? TFmode : DFmode;
24424 return default_mode_for_floating_type (ti);
24427 /* Target hook for invalid_arg_for_unprototyped_fn. */
24428 static const char *
24429 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24431 return (!rs6000_darwin64_abi
24432 && typelist == 0
24433 && VECTOR_TYPE_P (TREE_TYPE (val))
24434 && (funcdecl == NULL_TREE
24435 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24436 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD
24437 && !fndecl_built_in_p (funcdecl, BUILT_IN_CLASSIFY_TYPE))))
24438 ? N_("AltiVec argument passed to unprototyped function")
24439 : NULL;
24442 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24443 setup by using __stack_chk_fail_local hidden function instead of
24444 calling __stack_chk_fail directly. Otherwise it is better to call
24445 __stack_chk_fail directly. */
24447 static tree ATTRIBUTE_UNUSED
24448 rs6000_stack_protect_fail (void)
24450 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24451 ? default_hidden_stack_protect_fail ()
24452 : default_external_stack_protect_fail ();
24455 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24457 #if TARGET_ELF
24458 static unsigned HOST_WIDE_INT
24459 rs6000_asan_shadow_offset (void)
24461 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24463 #endif
24465 /* Mask options that we want to support inside of attribute((target)) and
24466 #pragma GCC target operations. Note, we do not include things like
24467 64/32-bit, endianness, hard/soft floating point, etc. that would have
24468 different calling sequences. */
24470 struct rs6000_opt_mask {
24471 const char *name; /* option name */
24472 HOST_WIDE_INT mask; /* mask to set */
24473 bool invert; /* invert sense of mask */
24474 bool valid_target; /* option is a target option */
24477 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24479 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24480 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24481 false, true },
24482 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24483 false, true },
24484 { "cmpb", OPTION_MASK_CMPB, false, true },
24485 { "crypto", OPTION_MASK_CRYPTO, false, true },
24486 { "direct-move", 0, false, true },
24487 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24488 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24489 false, true },
24490 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24491 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24492 { "fprnd", OPTION_MASK_FPRND, false, true },
24493 { "power10", OPTION_MASK_POWER10, false, true },
24494 { "power11", OPTION_MASK_POWER11, false, false },
24495 { "hard-dfp", OPTION_MASK_DFP, false, true },
24496 { "htm", OPTION_MASK_HTM, false, true },
24497 { "isel", OPTION_MASK_ISEL, false, true },
24498 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24499 { "mfpgpr", 0, false, true },
24500 { "mma", OPTION_MASK_MMA, false, true },
24501 { "modulo", OPTION_MASK_MODULO, false, true },
24502 { "mulhw", OPTION_MASK_MULHW, false, true },
24503 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24504 { "pcrel", OPTION_MASK_PCREL, false, true },
24505 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24506 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24507 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24508 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24509 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24510 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24511 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24512 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24513 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24514 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24515 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24516 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24517 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24518 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24519 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24520 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24521 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24522 { "string", 0, false, true },
24523 { "update", OPTION_MASK_NO_UPDATE, true , true },
24524 { "vsx", OPTION_MASK_VSX, false, true },
24525 #ifdef OPTION_MASK_64BIT
24526 #if TARGET_AIX_OS
24527 { "aix64", OPTION_MASK_64BIT, false, false },
24528 { "aix32", OPTION_MASK_64BIT, true, false },
24529 #else
24530 { "64", OPTION_MASK_64BIT, false, false },
24531 { "32", OPTION_MASK_64BIT, true, false },
24532 #endif
24533 #endif
24534 #ifdef OPTION_MASK_EABI
24535 { "eabi", OPTION_MASK_EABI, false, false },
24536 #endif
24537 #ifdef OPTION_MASK_LITTLE_ENDIAN
24538 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24539 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24540 #endif
24541 #ifdef OPTION_MASK_RELOCATABLE
24542 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24543 #endif
24544 #ifdef OPTION_MASK_STRICT_ALIGN
24545 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24546 #endif
24547 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24548 { "string", 0, false, false },
24551 /* Option variables that we want to support inside attribute((target)) and
24552 #pragma GCC target operations. */
24554 struct rs6000_opt_var {
24555 const char *name; /* option name */
24556 size_t global_offset; /* offset of the option in global_options. */
24557 size_t target_offset; /* offset of the option in target options. */
24560 static struct rs6000_opt_var const rs6000_opt_vars[] =
24562 { "friz",
24563 offsetof (struct gcc_options, x_TARGET_FRIZ),
24564 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24565 { "avoid-indexed-addresses",
24566 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24567 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24568 { "longcall",
24569 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24570 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24571 { "optimize-swaps",
24572 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24573 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24574 { "allow-movmisalign",
24575 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24576 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24577 { "sched-groups",
24578 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24579 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24580 { "always-hint",
24581 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24582 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24583 { "align-branch-targets",
24584 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24585 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24586 { "sched-prolog",
24587 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24588 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24589 { "sched-epilog",
24590 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24591 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24592 { "speculate-indirect-jumps",
24593 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24594 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24597 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24598 parsing. Return true if there were no errors. */
24600 static bool
24601 rs6000_inner_target_options (tree args, bool attr_p)
24603 bool ret = true;
24605 if (args == NULL_TREE)
24608 else if (TREE_CODE (args) == STRING_CST)
24610 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24611 char *q;
24613 while ((q = strtok (p, ",")) != NULL)
24615 bool error_p = false;
24616 bool not_valid_p = false;
24617 const char *cpu_opt = NULL;
24619 p = NULL;
24620 if (startswith (q, "cpu="))
24622 int cpu_index = rs6000_cpu_name_lookup (q+4);
24623 if (cpu_index >= 0)
24624 rs6000_cpu_index = cpu_index;
24625 else
24627 error_p = true;
24628 cpu_opt = q+4;
24631 else if (startswith (q, "tune="))
24633 int tune_index = rs6000_cpu_name_lookup (q+5);
24634 if (tune_index >= 0)
24635 rs6000_tune_index = tune_index;
24636 else
24638 error_p = true;
24639 cpu_opt = q+5;
24642 else
24644 size_t i;
24645 bool invert = false;
24646 char *r = q;
24648 error_p = true;
24649 if (startswith (r, "no-"))
24651 invert = true;
24652 r += 3;
24655 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24656 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24658 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24660 if (!rs6000_opt_masks[i].valid_target)
24661 not_valid_p = true;
24662 else
24664 error_p = false;
24665 rs6000_isa_flags_explicit |= mask;
24667 /* VSX needs altivec, so -mvsx automagically sets
24668 altivec and disables -mavoid-indexed-addresses. */
24669 if (!invert)
24671 if (mask == OPTION_MASK_VSX)
24673 if (!(rs6000_isa_flags_explicit
24674 & OPTION_MASK_ALTIVEC))
24675 mask |= OPTION_MASK_ALTIVEC;
24676 if (!OPTION_SET_P (TARGET_AVOID_XFORM))
24677 TARGET_AVOID_XFORM = 0;
24681 if (rs6000_opt_masks[i].invert)
24682 invert = !invert;
24684 if (invert)
24685 rs6000_isa_flags &= ~mask;
24686 else
24687 rs6000_isa_flags |= mask;
24689 break;
24692 if (error_p && !not_valid_p)
24694 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24695 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24697 size_t j = rs6000_opt_vars[i].global_offset;
24698 *((int *) ((char *) &global_options + j)) = !invert;
24699 *((int *) ((char *) &global_options_set + j)) = 1;
24700 error_p = false;
24701 not_valid_p = false;
24702 break;
24707 if (error_p)
24709 const char *eprefix, *esuffix;
24711 ret = false;
24712 if (attr_p)
24714 eprefix = "__attribute__((__target__(";
24715 esuffix = ")))";
24717 else
24719 eprefix = "#pragma GCC target ";
24720 esuffix = "";
24723 if (cpu_opt)
24724 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24725 q, esuffix);
24726 else if (not_valid_p)
24727 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24728 else
24729 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24734 else if (TREE_CODE (args) == TREE_LIST)
24738 tree value = TREE_VALUE (args);
24739 if (value)
24741 bool ret2 = rs6000_inner_target_options (value, attr_p);
24742 if (!ret2)
24743 ret = false;
24745 args = TREE_CHAIN (args);
24747 while (args != NULL_TREE);
24750 else
24752 error ("attribute %<target%> argument not a string");
24753 return false;
24756 return ret;
24759 /* Print out the target options as a list for -mdebug=target. */
24761 static void
24762 rs6000_debug_target_options (tree args, const char *prefix)
24764 if (args == NULL_TREE)
24765 fprintf (stderr, "%s<NULL>", prefix);
24767 else if (TREE_CODE (args) == STRING_CST)
24769 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24770 char *q;
24772 while ((q = strtok (p, ",")) != NULL)
24774 p = NULL;
24775 fprintf (stderr, "%s\"%s\"", prefix, q);
24776 prefix = ", ";
24780 else if (TREE_CODE (args) == TREE_LIST)
24784 tree value = TREE_VALUE (args);
24785 if (value)
24787 rs6000_debug_target_options (value, prefix);
24788 prefix = ", ";
24790 args = TREE_CHAIN (args);
24792 while (args != NULL_TREE);
24795 else
24796 gcc_unreachable ();
24798 return;
24802 /* Hook to validate attribute((target("..."))). */
24804 static bool
24805 rs6000_valid_attribute_p (tree fndecl,
24806 tree ARG_UNUSED (name),
24807 tree args,
24808 int flags)
24810 struct cl_target_option cur_target;
24811 bool ret;
24812 tree old_optimize;
24813 tree new_target, new_optimize;
24814 tree func_optimize;
24816 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24818 if (TARGET_DEBUG_TARGET)
24820 tree tname = DECL_NAME (fndecl);
24821 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24822 if (tname)
24823 fprintf (stderr, "function: %.*s\n",
24824 (int) IDENTIFIER_LENGTH (tname),
24825 IDENTIFIER_POINTER (tname));
24826 else
24827 fprintf (stderr, "function: unknown\n");
24829 fprintf (stderr, "args:");
24830 rs6000_debug_target_options (args, " ");
24831 fprintf (stderr, "\n");
24833 if (flags)
24834 fprintf (stderr, "flags: 0x%x\n", flags);
24836 fprintf (stderr, "--------------------\n");
24839 /* attribute((target("default"))) does nothing, beyond
24840 affecting multi-versioning. */
24841 if (TREE_VALUE (args)
24842 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24843 && TREE_CHAIN (args) == NULL_TREE
24844 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24845 return true;
24847 old_optimize = build_optimization_node (&global_options,
24848 &global_options_set);
24849 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24851 /* If the function changed the optimization levels as well as setting target
24852 options, start with the optimizations specified. */
24853 if (func_optimize && func_optimize != old_optimize)
24854 cl_optimization_restore (&global_options, &global_options_set,
24855 TREE_OPTIMIZATION (func_optimize));
24857 /* The target attributes may also change some optimization flags, so update
24858 the optimization options if necessary. */
24859 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24860 rs6000_cpu_index = rs6000_tune_index = -1;
24861 ret = rs6000_inner_target_options (args, true);
24863 /* Set up any additional state. */
24864 if (ret)
24866 ret = rs6000_option_override_internal (false);
24867 new_target = build_target_option_node (&global_options,
24868 &global_options_set);
24870 else
24871 new_target = NULL;
24873 new_optimize = build_optimization_node (&global_options,
24874 &global_options_set);
24876 if (!new_target)
24877 ret = false;
24879 else if (fndecl)
24881 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24883 if (old_optimize != new_optimize)
24884 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24887 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24889 if (old_optimize != new_optimize)
24890 cl_optimization_restore (&global_options, &global_options_set,
24891 TREE_OPTIMIZATION (old_optimize));
24893 return ret;
24897 /* Hook to validate the current #pragma GCC target and set the state, and
24898 update the macros based on what was changed. If ARGS is NULL, then
24899 POP_TARGET is used to reset the options. */
24901 bool
24902 rs6000_pragma_target_parse (tree args, tree pop_target)
24904 tree prev_tree = build_target_option_node (&global_options,
24905 &global_options_set);
24906 tree cur_tree;
24907 struct cl_target_option *prev_opt, *cur_opt;
24908 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24910 if (TARGET_DEBUG_TARGET)
24912 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24913 fprintf (stderr, "args:");
24914 rs6000_debug_target_options (args, " ");
24915 fprintf (stderr, "\n");
24917 if (pop_target)
24919 fprintf (stderr, "pop_target:\n");
24920 debug_tree (pop_target);
24922 else
24923 fprintf (stderr, "pop_target: <NULL>\n");
24925 fprintf (stderr, "--------------------\n");
24928 if (! args)
24930 cur_tree = ((pop_target)
24931 ? pop_target
24932 : target_option_default_node);
24933 cl_target_option_restore (&global_options, &global_options_set,
24934 TREE_TARGET_OPTION (cur_tree));
24936 else
24938 rs6000_cpu_index = rs6000_tune_index = -1;
24939 if (!rs6000_inner_target_options (args, false)
24940 || !rs6000_option_override_internal (false)
24941 || (cur_tree = build_target_option_node (&global_options,
24942 &global_options_set))
24943 == NULL_TREE)
24945 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24946 fprintf (stderr, "invalid pragma\n");
24948 return false;
24952 target_option_current_node = cur_tree;
24953 rs6000_activate_target_options (target_option_current_node);
24955 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24956 change the macros that are defined. */
24957 if (rs6000_target_modify_macros_ptr)
24959 prev_opt = TREE_TARGET_OPTION (prev_tree);
24960 prev_flags = prev_opt->x_rs6000_isa_flags;
24962 cur_opt = TREE_TARGET_OPTION (cur_tree);
24963 cur_flags = cur_opt->x_rs6000_isa_flags;
24965 diff_flags = (prev_flags ^ cur_flags);
24967 if (diff_flags != 0)
24969 /* Delete old macros. */
24970 rs6000_target_modify_macros_ptr (false,
24971 prev_flags & diff_flags);
24973 /* Define new macros. */
24974 rs6000_target_modify_macros_ptr (true,
24975 cur_flags & diff_flags);
24979 return true;
24983 /* Remember the last target of rs6000_set_current_function. */
24984 static GTY(()) tree rs6000_previous_fndecl;
24986 /* Restore target's globals from NEW_TREE and invalidate the
24987 rs6000_previous_fndecl cache. */
24989 void
24990 rs6000_activate_target_options (tree new_tree)
24992 cl_target_option_restore (&global_options, &global_options_set,
24993 TREE_TARGET_OPTION (new_tree));
24994 if (TREE_TARGET_GLOBALS (new_tree))
24995 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24996 else if (new_tree == target_option_default_node)
24997 restore_target_globals (&default_target_globals);
24998 else
24999 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
25000 rs6000_previous_fndecl = NULL_TREE;
25003 /* Establish appropriate back-end context for processing the function
25004 FNDECL. The argument might be NULL to indicate processing at top
25005 level, outside of any function scope. */
25006 static void
25007 rs6000_set_current_function (tree fndecl)
25009 if (TARGET_DEBUG_TARGET)
25011 fprintf (stderr, "\n==================== rs6000_set_current_function");
25013 if (fndecl)
25014 fprintf (stderr, ", fndecl %s (%p)",
25015 (DECL_NAME (fndecl)
25016 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
25017 : "<unknown>"), (void *)fndecl);
25019 if (rs6000_previous_fndecl)
25020 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
25022 fprintf (stderr, "\n");
25025 /* Only change the context if the function changes. This hook is called
25026 several times in the course of compiling a function, and we don't want to
25027 slow things down too much or call target_reinit when it isn't safe. */
25028 if (fndecl == rs6000_previous_fndecl)
25029 return;
25031 tree old_tree;
25032 if (rs6000_previous_fndecl == NULL_TREE)
25033 old_tree = target_option_current_node;
25034 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
25035 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
25036 else
25037 old_tree = target_option_default_node;
25039 tree new_tree;
25040 if (fndecl == NULL_TREE)
25042 if (old_tree != target_option_current_node)
25043 new_tree = target_option_current_node;
25044 else
25045 new_tree = NULL_TREE;
25047 else
25049 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25050 if (new_tree == NULL_TREE)
25051 new_tree = target_option_default_node;
25054 if (TARGET_DEBUG_TARGET)
25056 if (new_tree)
25058 fprintf (stderr, "\nnew fndecl target specific options:\n");
25059 debug_tree (new_tree);
25062 if (old_tree)
25064 fprintf (stderr, "\nold fndecl target specific options:\n");
25065 debug_tree (old_tree);
25068 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
25069 fprintf (stderr, "--------------------\n");
25072 if (new_tree && old_tree != new_tree)
25073 rs6000_activate_target_options (new_tree);
25075 if (fndecl)
25076 rs6000_previous_fndecl = fndecl;
25080 /* Save the current options */
25082 static void
25083 rs6000_function_specific_save (struct cl_target_option *ptr,
25084 struct gcc_options *opts,
25085 struct gcc_options */* opts_set */)
25087 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
25088 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
25091 /* Restore the current options */
25093 static void
25094 rs6000_function_specific_restore (struct gcc_options *opts,
25095 struct gcc_options */* opts_set */,
25096 struct cl_target_option *ptr)
25099 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
25100 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
25101 (void) rs6000_option_override_internal (false);
25104 /* Print the current options */
25106 static void
25107 rs6000_function_specific_print (FILE *file, int indent,
25108 struct cl_target_option *ptr)
25110 rs6000_print_isa_options (file, indent, "Isa options set",
25111 ptr->x_rs6000_isa_flags);
25113 rs6000_print_isa_options (file, indent, "Isa options explicit",
25114 ptr->x_rs6000_isa_flags_explicit);
25117 /* Helper function to print the current isa or misc options on a line. */
25119 static void
25120 rs6000_print_options_internal (FILE *file,
25121 int indent,
25122 const char *string,
25123 HOST_WIDE_INT flags,
25124 const char *prefix,
25125 const struct rs6000_opt_mask *opts,
25126 size_t num_elements)
25128 size_t i;
25129 size_t start_column = 0;
25130 size_t cur_column;
25131 size_t max_column = 120;
25132 size_t prefix_len = strlen (prefix);
25133 size_t comma_len = 0;
25134 const char *comma = "";
25136 if (indent)
25137 start_column += fprintf (file, "%*s", indent, "");
25139 if (!flags)
25141 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
25142 return;
25145 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
25147 /* Print the various mask options. */
25148 cur_column = start_column;
25149 for (i = 0; i < num_elements; i++)
25151 bool invert = opts[i].invert;
25152 const char *name = opts[i].name;
25153 const char *no_str = "";
25154 HOST_WIDE_INT mask = opts[i].mask;
25155 size_t len = comma_len + prefix_len + strlen (name);
25157 if (!invert)
25159 if ((flags & mask) == 0)
25161 no_str = "no-";
25162 len += strlen ("no-");
25165 flags &= ~mask;
25168 else
25170 if ((flags & mask) != 0)
25172 no_str = "no-";
25173 len += strlen ("no-");
25176 flags |= mask;
25179 cur_column += len;
25180 if (cur_column > max_column)
25182 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
25183 cur_column = start_column + len;
25184 comma = "";
25187 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
25188 comma = ", ";
25189 comma_len = strlen (", ");
25192 fputs ("\n", file);
25195 /* Helper function to print the current isa options on a line. */
25197 static void
25198 rs6000_print_isa_options (FILE *file, int indent, const char *string,
25199 HOST_WIDE_INT flags)
25201 rs6000_print_options_internal (file, indent, string, flags, "-m",
25202 &rs6000_opt_masks[0],
25203 ARRAY_SIZE (rs6000_opt_masks));
25206 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25207 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25208 -mupper-regs-df, etc.).
25210 This function does not handle explicit options such as the user specifying
25211 -mdirect-move. These are handled in rs6000_option_override_internal, and
25212 the appropriate error is given if needed.
25214 We return a mask of all of the implicit options that should not be enabled
25215 by default. */
25217 static HOST_WIDE_INT
25218 rs6000_disable_incompatible_switches (void)
25220 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
25221 size_t i, j;
25223 static const struct {
25224 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
25225 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
25226 const char *const name; /* name of the switch. */
25227 } flags[] = {
25228 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
25229 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
25232 for (i = 0; i < ARRAY_SIZE (flags); i++)
25234 HOST_WIDE_INT no_flag = flags[i].no_flag;
25236 if ((rs6000_isa_flags & no_flag) == 0
25237 && (rs6000_isa_flags_explicit & no_flag) != 0)
25239 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
25240 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
25241 & rs6000_isa_flags
25242 & dep_flags);
25244 if (set_flags)
25246 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
25247 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
25249 set_flags &= ~rs6000_opt_masks[j].mask;
25250 error ("%<-mno-%s%> turns off %<-m%s%>",
25251 flags[i].name,
25252 rs6000_opt_masks[j].name);
25255 gcc_assert (!set_flags);
25258 rs6000_isa_flags &= ~dep_flags;
25259 ignore_masks |= no_flag | dep_flags;
25263 return ignore_masks;
25267 /* Helper function for printing the function name when debugging. */
25269 static const char *
25270 get_decl_name (tree fn)
25272 tree name;
25274 if (!fn)
25275 return "<null>";
25277 name = DECL_NAME (fn);
25278 if (!name)
25279 return "<no-name>";
25281 return IDENTIFIER_POINTER (name);
25284 /* Return the clone id of the target we are compiling code for in a target
25285 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25286 the priority list for the target clones (ordered from lowest to
25287 highest). */
25289 static int
25290 rs6000_clone_priority (tree fndecl)
25292 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25293 HOST_WIDE_INT isa_masks;
25294 int ret = CLONE_DEFAULT;
25295 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
25296 const char *attrs_str = NULL;
25298 attrs = TREE_VALUE (TREE_VALUE (attrs));
25299 attrs_str = TREE_STRING_POINTER (attrs);
25301 /* Return priority zero for default function. Return the ISA needed for the
25302 function if it is not the default. */
25303 if (strcmp (attrs_str, "default") != 0)
25305 if (fn_opts == NULL_TREE)
25306 fn_opts = target_option_default_node;
25308 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
25309 isa_masks = rs6000_isa_flags;
25310 else
25311 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25313 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25314 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25315 break;
25318 if (TARGET_DEBUG_TARGET)
25319 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25320 get_decl_name (fndecl), ret);
25322 return ret;
25325 /* This compares the priority of target features in function DECL1 and DECL2.
25326 It returns positive value if DECL1 is higher priority, negative value if
25327 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25328 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25330 static int
25331 rs6000_compare_version_priority (tree decl1, tree decl2)
25333 int priority1 = rs6000_clone_priority (decl1);
25334 int priority2 = rs6000_clone_priority (decl2);
25335 int ret = priority1 - priority2;
25337 if (TARGET_DEBUG_TARGET)
25338 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25339 get_decl_name (decl1), get_decl_name (decl2), ret);
25341 return ret;
25344 /* Make a dispatcher declaration for the multi-versioned function DECL.
25345 Calls to DECL function will be replaced with calls to the dispatcher
25346 by the front-end. Returns the decl of the dispatcher function. */
25348 static tree
25349 rs6000_get_function_versions_dispatcher (void *decl)
25351 tree fn = (tree) decl;
25352 struct cgraph_node *node = NULL;
25353 struct cgraph_node *default_node = NULL;
25354 struct cgraph_function_version_info *node_v = NULL;
25355 struct cgraph_function_version_info *first_v = NULL;
25357 tree dispatch_decl = NULL;
25359 struct cgraph_function_version_info *default_version_info = NULL;
25360 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25362 if (TARGET_DEBUG_TARGET)
25363 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25364 get_decl_name (fn));
25366 node = cgraph_node::get (fn);
25367 gcc_assert (node != NULL);
25369 node_v = node->function_version ();
25370 gcc_assert (node_v != NULL);
25372 if (node_v->dispatcher_resolver != NULL)
25373 return node_v->dispatcher_resolver;
25375 /* Find the default version and make it the first node. */
25376 first_v = node_v;
25377 /* Go to the beginning of the chain. */
25378 while (first_v->prev != NULL)
25379 first_v = first_v->prev;
25381 default_version_info = first_v;
25382 while (default_version_info != NULL)
25384 const tree decl2 = default_version_info->this_node->decl;
25385 if (is_function_default_version (decl2))
25386 break;
25387 default_version_info = default_version_info->next;
25390 /* If there is no default node, just return NULL. */
25391 if (default_version_info == NULL)
25392 return NULL;
25394 /* Make default info the first node. */
25395 if (first_v != default_version_info)
25397 default_version_info->prev->next = default_version_info->next;
25398 if (default_version_info->next)
25399 default_version_info->next->prev = default_version_info->prev;
25400 first_v->prev = default_version_info;
25401 default_version_info->next = first_v;
25402 default_version_info->prev = NULL;
25405 default_node = default_version_info->this_node;
25407 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25408 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25409 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25410 "exports hardware capability bits");
25411 #else
25413 if (targetm.has_ifunc_p ())
25415 struct cgraph_function_version_info *it_v = NULL;
25416 struct cgraph_node *dispatcher_node = NULL;
25417 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25419 /* Right now, the dispatching is done via ifunc. */
25420 dispatch_decl = make_dispatcher_decl (default_node->decl);
25421 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25423 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25424 gcc_assert (dispatcher_node != NULL);
25425 dispatcher_node->dispatcher_function = 1;
25426 dispatcher_version_info
25427 = dispatcher_node->insert_new_function_version ();
25428 dispatcher_version_info->next = default_version_info;
25429 dispatcher_node->definition = 1;
25431 /* Set the dispatcher for all the versions. */
25432 it_v = default_version_info;
25433 while (it_v != NULL)
25435 it_v->dispatcher_resolver = dispatch_decl;
25436 it_v = it_v->next;
25439 else
25441 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25442 "multiversioning needs %<ifunc%> which is not supported "
25443 "on this target");
25445 #endif
25447 return dispatch_decl;
25450 /* Make the resolver function decl to dispatch the versions of a multi-
25451 versioned function, DEFAULT_DECL. Create an empty basic block in the
25452 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25453 function. */
25455 static tree
25456 make_resolver_func (const tree default_decl,
25457 const tree dispatch_decl,
25458 basic_block *empty_bb)
25460 /* Make the resolver function static. The resolver function returns
25461 void *. */
25462 tree decl_name = clone_function_name (default_decl, "resolver");
25463 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25464 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25465 tree decl = build_fn_decl (resolver_name, type);
25466 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25468 DECL_NAME (decl) = decl_name;
25469 TREE_USED (decl) = 1;
25470 DECL_ARTIFICIAL (decl) = 1;
25471 DECL_IGNORED_P (decl) = 0;
25472 TREE_PUBLIC (decl) = 0;
25473 DECL_UNINLINABLE (decl) = 1;
25475 /* Resolver is not external, body is generated. */
25476 DECL_EXTERNAL (decl) = 0;
25477 DECL_EXTERNAL (dispatch_decl) = 0;
25479 DECL_CONTEXT (decl) = NULL_TREE;
25480 DECL_INITIAL (decl) = make_node (BLOCK);
25481 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25483 if (DECL_COMDAT_GROUP (default_decl)
25484 || TREE_PUBLIC (default_decl))
25486 /* In this case, each translation unit with a call to this
25487 versioned function will put out a resolver. Ensure it
25488 is comdat to keep just one copy. */
25489 DECL_COMDAT (decl) = 1;
25490 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25492 else
25493 TREE_PUBLIC (dispatch_decl) = 0;
25495 /* Build result decl and add to function_decl. */
25496 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25497 DECL_CONTEXT (t) = decl;
25498 DECL_ARTIFICIAL (t) = 1;
25499 DECL_IGNORED_P (t) = 1;
25500 DECL_RESULT (decl) = t;
25502 gimplify_function_tree (decl);
25503 push_cfun (DECL_STRUCT_FUNCTION (decl));
25504 *empty_bb = init_lowered_empty_function (decl, false,
25505 profile_count::uninitialized ());
25507 cgraph_node::add_new_function (decl, true);
25508 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25510 pop_cfun ();
25512 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25513 DECL_ATTRIBUTES (dispatch_decl)
25514 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25516 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25518 return decl;
25521 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25522 return a pointer to VERSION_DECL if we are running on a machine that
25523 supports the index CLONE_ISA hardware architecture bits. This function will
25524 be called during version dispatch to decide which function version to
25525 execute. It returns the basic block at the end, to which more conditions
25526 can be added. */
25528 static basic_block
25529 add_condition_to_bb (tree function_decl, tree version_decl,
25530 int clone_isa, basic_block new_bb)
25532 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25534 gcc_assert (new_bb != NULL);
25535 gimple_seq gseq = bb_seq (new_bb);
25538 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25539 build_fold_addr_expr (version_decl));
25540 tree result_var = create_tmp_var (ptr_type_node);
25541 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25542 gimple *return_stmt = gimple_build_return (result_var);
25544 if (clone_isa == CLONE_DEFAULT)
25546 gimple_seq_add_stmt (&gseq, convert_stmt);
25547 gimple_seq_add_stmt (&gseq, return_stmt);
25548 set_bb_seq (new_bb, gseq);
25549 gimple_set_bb (convert_stmt, new_bb);
25550 gimple_set_bb (return_stmt, new_bb);
25551 pop_cfun ();
25552 return new_bb;
25555 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25556 tree cond_var = create_tmp_var (bool_int_type_node);
25557 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25558 const char *arg_str = rs6000_clone_map[clone_isa].name;
25559 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25560 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25561 gimple_call_set_lhs (call_cond_stmt, cond_var);
25563 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25564 gimple_set_bb (call_cond_stmt, new_bb);
25565 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25567 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25568 NULL_TREE, NULL_TREE);
25569 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25570 gimple_set_bb (if_else_stmt, new_bb);
25571 gimple_seq_add_stmt (&gseq, if_else_stmt);
25573 gimple_seq_add_stmt (&gseq, convert_stmt);
25574 gimple_seq_add_stmt (&gseq, return_stmt);
25575 set_bb_seq (new_bb, gseq);
25577 basic_block bb1 = new_bb;
25578 edge e12 = split_block (bb1, if_else_stmt);
25579 basic_block bb2 = e12->dest;
25580 e12->flags &= ~EDGE_FALLTHRU;
25581 e12->flags |= EDGE_TRUE_VALUE;
25583 edge e23 = split_block (bb2, return_stmt);
25584 gimple_set_bb (convert_stmt, bb2);
25585 gimple_set_bb (return_stmt, bb2);
25587 basic_block bb3 = e23->dest;
25588 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25590 remove_edge (e23);
25591 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25593 pop_cfun ();
25594 return bb3;
25597 /* This function generates the dispatch function for multi-versioned functions.
25598 DISPATCH_DECL is the function which will contain the dispatch logic.
25599 FNDECLS are the function choices for dispatch, and is a tree chain.
25600 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25601 code is generated. */
25603 static int
25604 dispatch_function_versions (tree dispatch_decl,
25605 void *fndecls_p,
25606 basic_block *empty_bb)
25608 int ix;
25609 tree ele;
25610 vec<tree> *fndecls;
25611 tree clones[CLONE_MAX];
25613 if (TARGET_DEBUG_TARGET)
25614 fputs ("dispatch_function_versions, top\n", stderr);
25616 gcc_assert (dispatch_decl != NULL
25617 && fndecls_p != NULL
25618 && empty_bb != NULL);
25620 /* fndecls_p is actually a vector. */
25621 fndecls = static_cast<vec<tree> *> (fndecls_p);
25623 /* At least one more version other than the default. */
25624 gcc_assert (fndecls->length () >= 2);
25626 /* The first version in the vector is the default decl. */
25627 memset ((void *) clones, '\0', sizeof (clones));
25628 clones[CLONE_DEFAULT] = (*fndecls)[0];
25630 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25631 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25632 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25633 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25634 to insert the code here to do the call. */
25636 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25638 int priority = rs6000_clone_priority (ele);
25639 if (!clones[priority])
25640 clones[priority] = ele;
25643 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25644 if (clones[ix])
25646 if (TARGET_DEBUG_TARGET)
25647 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25648 ix, get_decl_name (clones[ix]));
25650 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25651 *empty_bb);
25654 return 0;
25657 /* Generate the dispatching code body to dispatch multi-versioned function
25658 DECL. The target hook is called to process the "target" attributes and
25659 provide the code to dispatch the right function at run-time. NODE points
25660 to the dispatcher decl whose body will be created. */
25662 static tree
25663 rs6000_generate_version_dispatcher_body (void *node_p)
25665 tree resolver;
25666 basic_block empty_bb;
25667 struct cgraph_node *node = (cgraph_node *) node_p;
25668 struct cgraph_function_version_info *ninfo = node->function_version ();
25670 if (ninfo->dispatcher_resolver)
25671 return ninfo->dispatcher_resolver;
25673 /* node is going to be an alias, so remove the finalized bit. */
25674 node->definition = false;
25676 /* The first version in the chain corresponds to the default version. */
25677 ninfo->dispatcher_resolver = resolver
25678 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25680 if (TARGET_DEBUG_TARGET)
25681 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25682 get_decl_name (resolver));
25684 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25685 auto_vec<tree, 2> fn_ver_vec;
25687 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25688 vinfo;
25689 vinfo = vinfo->next)
25691 struct cgraph_node *version = vinfo->this_node;
25692 /* Check for virtual functions here again, as by this time it should
25693 have been determined if this function needs a vtable index or
25694 not. This happens for methods in derived classes that override
25695 virtual methods in base classes but are not explicitly marked as
25696 virtual. */
25697 if (DECL_VINDEX (version->decl))
25698 sorry ("Virtual function multiversioning not supported");
25700 fn_ver_vec.safe_push (version->decl);
25703 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25704 cgraph_edge::rebuild_edges ();
25705 pop_cfun ();
25706 return resolver;
25709 /* Hook to decide if we need to scan function gimple statements to
25710 collect target specific information for inlining, and update the
25711 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25712 to predict which ISA feature is used at this time. Return true
25713 if we need to scan, otherwise return false. */
25715 static bool
25716 rs6000_need_ipa_fn_target_info (const_tree decl,
25717 unsigned int &info ATTRIBUTE_UNUSED)
25719 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25720 if (!target)
25721 target = target_option_default_node;
25722 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25724 /* See PR102059, we only handle HTM for now, so will only do
25725 the consequent scannings when HTM feature enabled. */
25726 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25727 return true;
25729 return false;
25732 /* Hook to update target specific information INFO for inlining by
25733 checking the given STMT. Return false if we don't need to scan
25734 any more, otherwise return true. */
25736 static bool
25737 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25739 #ifndef HAVE_AS_POWER10_HTM
25740 /* Assume inline asm can use any instruction features. */
25741 if (gimple_code (stmt) == GIMPLE_ASM)
25743 const char *asm_str = gimple_asm_string (as_a<const gasm *> (stmt));
25744 /* Ignore empty inline asm string. */
25745 if (strlen (asm_str) > 0)
25746 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25747 the only bit we care about. */
25748 info |= RS6000_FN_TARGET_INFO_HTM;
25749 return false;
25751 #endif
25753 if (gimple_code (stmt) == GIMPLE_CALL)
25755 tree fndecl = gimple_call_fndecl (stmt);
25756 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25758 enum rs6000_gen_builtins fcode
25759 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25760 /* HTM bifs definitely exploit HTM insns. */
25761 if (bif_is_htm (rs6000_builtin_info[fcode]))
25763 info |= RS6000_FN_TARGET_INFO_HTM;
25764 return false;
25769 return true;
25772 /* Hook to determine if one function can safely inline another. */
25774 static bool
25775 rs6000_can_inline_p (tree caller, tree callee)
25777 bool ret = false;
25778 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25779 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25781 /* If the caller/callee has option attributes, then use them.
25782 Otherwise, use the command line options. */
25783 if (!callee_tree)
25784 callee_tree = target_option_default_node;
25785 if (!caller_tree)
25786 caller_tree = target_option_default_node;
25788 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25789 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
25791 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25792 HOST_WIDE_INT caller_isa = caller_opts->x_rs6000_isa_flags;
25793 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25795 cgraph_node *callee_node = cgraph_node::get (callee);
25796 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25798 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25799 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25801 callee_isa &= ~OPTION_MASK_HTM;
25802 explicit_isa &= ~OPTION_MASK_HTM;
25806 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25807 purposes. */
25808 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25809 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25811 /* The callee's options must be a subset of the caller's options, i.e.
25812 a vsx function may inline an altivec function, but a no-vsx function
25813 must not inline a vsx function. However, for those options that the
25814 callee has explicitly enabled or disabled, then we must enforce that
25815 the callee's and caller's options match exactly; see PR70010. */
25816 if (((caller_isa & callee_isa) == callee_isa)
25817 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25818 ret = true;
25820 if (TARGET_DEBUG_TARGET)
25821 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25822 get_decl_name (caller), get_decl_name (callee),
25823 (ret ? "can" : "cannot"));
25825 return ret;
25828 /* Allocate a stack temp and fixup the address so it meets the particular
25829 memory requirements (either offetable or REG+REG addressing). */
25832 rs6000_allocate_stack_temp (machine_mode mode,
25833 bool offsettable_p,
25834 bool reg_reg_p)
25836 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25837 rtx addr = XEXP (stack, 0);
25838 int strict_p = reload_completed;
25840 if (!legitimate_indirect_address_p (addr, strict_p))
25842 if (offsettable_p
25843 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25844 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25846 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25847 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25850 return stack;
25853 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25854 convert to such a form to deal with memory reference instructions
25855 like STFIWX and LDBRX that only take reg+reg addressing. */
25858 rs6000_force_indexed_or_indirect_mem (rtx x)
25860 machine_mode mode = GET_MODE (x);
25862 gcc_assert (MEM_P (x));
25863 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25865 rtx addr = XEXP (x, 0);
25866 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25868 rtx reg = XEXP (addr, 0);
25869 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25870 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25871 gcc_assert (REG_P (reg));
25872 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25873 addr = reg;
25875 else if (GET_CODE (addr) == PRE_MODIFY)
25877 rtx reg = XEXP (addr, 0);
25878 rtx expr = XEXP (addr, 1);
25879 gcc_assert (REG_P (reg));
25880 gcc_assert (GET_CODE (expr) == PLUS);
25881 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25882 addr = reg;
25885 if (GET_CODE (addr) == PLUS)
25887 rtx op0 = XEXP (addr, 0);
25888 rtx op1 = XEXP (addr, 1);
25889 op0 = force_reg (Pmode, op0);
25890 op1 = force_reg (Pmode, op1);
25891 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25893 else
25894 x = replace_equiv_address (x, force_reg (Pmode, addr));
25897 return x;
25900 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25902 On the RS/6000, all integer constants are acceptable, most won't be valid
25903 for particular insns, though. Only easy FP constants are acceptable. */
25905 static bool
25906 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25908 if (TARGET_ELF && tls_referenced_p (x))
25909 return false;
25911 if (CONST_DOUBLE_P (x))
25912 return easy_fp_constant (x, mode);
25914 if (GET_CODE (x) == CONST_VECTOR)
25915 return easy_vector_constant (x, mode);
25917 return true;
25920 #if TARGET_AIX_OS
25921 /* Implement TARGET_PRECOMPUTE_TLS_P.
25923 On the AIX, TLS symbols are in the TOC, which is maintained in the
25924 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25925 must be considered legitimate constants. */
25927 static bool
25928 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25930 return tls_referenced_p (x);
25932 #endif
25935 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25937 static bool
25938 chain_already_loaded (rtx_insn *last)
25940 for (; last != NULL; last = PREV_INSN (last))
25942 if (NONJUMP_INSN_P (last))
25944 rtx patt = PATTERN (last);
25946 if (GET_CODE (patt) == SET)
25948 rtx lhs = XEXP (patt, 0);
25950 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25951 return true;
25955 return false;
25958 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25960 void
25961 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25963 rtx func = func_desc;
25964 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25965 rtx toc_load = NULL_RTX;
25966 rtx toc_restore = NULL_RTX;
25967 rtx func_addr;
25968 rtx abi_reg = NULL_RTX;
25969 rtx call[5];
25970 int n_call;
25971 rtx insn;
25972 bool is_pltseq_longcall;
25974 if (global_tlsarg)
25975 tlsarg = global_tlsarg;
25977 /* Handle longcall attributes. */
25978 is_pltseq_longcall = false;
25979 if ((INTVAL (cookie) & CALL_LONG) != 0
25980 && GET_CODE (func_desc) == SYMBOL_REF)
25982 func = rs6000_longcall_ref (func_desc, tlsarg);
25983 if (TARGET_PLTSEQ)
25984 is_pltseq_longcall = true;
25987 /* Handle indirect calls. */
25988 if (!SYMBOL_REF_P (func)
25989 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25991 if (!rs6000_pcrel_p ())
25993 /* Save the TOC into its reserved slot before the call,
25994 and prepare to restore it after the call. */
25995 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25996 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25997 gen_rtvec (1, stack_toc_offset),
25998 UNSPEC_TOCSLOT);
25999 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
26001 /* Can we optimize saving the TOC in the prologue or
26002 do we need to do it at every call? */
26003 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
26004 cfun->machine->save_toc_in_prologue = true;
26005 else
26007 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26008 rtx stack_toc_mem = gen_frame_mem (Pmode,
26009 gen_rtx_PLUS (Pmode, stack_ptr,
26010 stack_toc_offset));
26011 MEM_VOLATILE_P (stack_toc_mem) = 1;
26012 if (is_pltseq_longcall)
26014 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
26015 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26016 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
26018 else
26019 emit_move_insn (stack_toc_mem, toc_reg);
26023 if (DEFAULT_ABI == ABI_ELFv2)
26025 /* A function pointer in the ELFv2 ABI is just a plain address, but
26026 the ABI requires it to be loaded into r12 before the call. */
26027 func_addr = gen_rtx_REG (Pmode, 12);
26028 emit_move_insn (func_addr, func);
26029 abi_reg = func_addr;
26030 /* Indirect calls via CTR are strongly preferred over indirect
26031 calls via LR, so move the address there. Needed to mark
26032 this insn for linker plt sequence editing too. */
26033 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26034 if (is_pltseq_longcall)
26036 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
26037 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26038 emit_insn (gen_rtx_SET (func_addr, mark_func));
26039 v = gen_rtvec (2, func_addr, func_desc);
26040 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26042 else
26043 emit_move_insn (func_addr, abi_reg);
26045 else
26047 /* A function pointer under AIX is a pointer to a data area whose
26048 first word contains the actual address of the function, whose
26049 second word contains a pointer to its TOC, and whose third word
26050 contains a value to place in the static chain register (r11).
26051 Note that if we load the static chain, our "trampoline" need
26052 not have any executable code. */
26054 /* Load up address of the actual function. */
26055 func = force_reg (Pmode, func);
26056 func_addr = gen_reg_rtx (Pmode);
26057 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
26059 /* Indirect calls via CTR are strongly preferred over indirect
26060 calls via LR, so move the address there. */
26061 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
26062 emit_move_insn (ctr_reg, func_addr);
26063 func_addr = ctr_reg;
26065 /* Prepare to load the TOC of the called function. Note that the
26066 TOC load must happen immediately before the actual call so
26067 that unwinding the TOC registers works correctly. See the
26068 comment in frob_update_context. */
26069 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
26070 rtx func_toc_mem = gen_rtx_MEM (Pmode,
26071 gen_rtx_PLUS (Pmode, func,
26072 func_toc_offset));
26073 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
26075 /* If we have a static chain, load it up. But, if the call was
26076 originally direct, the 3rd word has not been written since no
26077 trampoline has been built, so we ought not to load it, lest we
26078 override a static chain value. */
26079 if (!(GET_CODE (func_desc) == SYMBOL_REF
26080 && SYMBOL_REF_FUNCTION_P (func_desc))
26081 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26082 && !chain_already_loaded (get_current_sequence ()->next->last))
26084 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
26085 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
26086 rtx func_sc_mem = gen_rtx_MEM (Pmode,
26087 gen_rtx_PLUS (Pmode, func,
26088 func_sc_offset));
26089 emit_move_insn (sc_reg, func_sc_mem);
26090 abi_reg = sc_reg;
26094 else
26096 /* No TOC register needed for calls from PC-relative callers. */
26097 if (!rs6000_pcrel_p ())
26098 /* Direct calls use the TOC: for local calls, the callee will
26099 assume the TOC register is set; for non-local calls, the
26100 PLT stub needs the TOC register. */
26101 abi_reg = toc_reg;
26102 func_addr = func;
26105 /* Create the call. */
26106 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26107 if (value != NULL_RTX)
26108 call[0] = gen_rtx_SET (value, call[0]);
26109 call[1] = gen_rtx_USE (VOIDmode, cookie);
26110 n_call = 2;
26112 if (toc_load)
26113 call[n_call++] = toc_load;
26114 if (toc_restore)
26115 call[n_call++] = toc_restore;
26117 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26119 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
26120 insn = emit_call_insn (insn);
26122 /* Mention all registers defined by the ABI to hold information
26123 as uses in CALL_INSN_FUNCTION_USAGE. */
26124 if (abi_reg)
26125 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26128 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26130 void
26131 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26133 rtx call[2];
26134 rtx insn;
26135 rtx r12 = NULL_RTX;
26136 rtx func_addr = func_desc;
26138 if (global_tlsarg)
26139 tlsarg = global_tlsarg;
26141 /* Handle longcall attributes. */
26142 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
26144 /* PCREL can do a sibling call to a longcall function
26145 because we don't need to restore the TOC register. */
26146 gcc_assert (rs6000_pcrel_p ());
26147 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
26149 else
26150 gcc_assert (INTVAL (cookie) == 0);
26152 /* For ELFv2, r12 and CTR need to hold the function address
26153 for an indirect call. */
26154 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
26156 r12 = gen_rtx_REG (Pmode, 12);
26157 emit_move_insn (r12, func_desc);
26158 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26159 emit_move_insn (func_addr, r12);
26162 /* Create the call. */
26163 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26164 if (value != NULL_RTX)
26165 call[0] = gen_rtx_SET (value, call[0]);
26167 call[1] = simple_return_rtx;
26169 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
26170 insn = emit_call_insn (insn);
26172 /* Note use of the TOC register. */
26173 if (!rs6000_pcrel_p ())
26174 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
26175 gen_rtx_REG (Pmode, TOC_REGNUM));
26177 /* Note use of r12. */
26178 if (r12)
26179 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
26182 /* Expand code to perform a call under the SYSV4 ABI. */
26184 void
26185 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26187 rtx func = func_desc;
26188 rtx func_addr;
26189 rtx call[4];
26190 rtx insn;
26191 rtx abi_reg = NULL_RTX;
26192 int n;
26194 if (global_tlsarg)
26195 tlsarg = global_tlsarg;
26197 /* Handle longcall attributes. */
26198 if ((INTVAL (cookie) & CALL_LONG) != 0
26199 && GET_CODE (func_desc) == SYMBOL_REF)
26201 func = rs6000_longcall_ref (func_desc, tlsarg);
26202 /* If the longcall was implemented as an inline PLT call using
26203 PLT unspecs then func will be REG:r11. If not, func will be
26204 a pseudo reg. The inline PLT call sequence supports lazy
26205 linking (and longcalls to functions in dlopen'd libraries).
26206 The other style of longcalls don't. The lazy linking entry
26207 to the dynamic symbol resolver requires r11 be the function
26208 address (as it is for linker generated PLT stubs). Ensure
26209 r11 stays valid to the bctrl by marking r11 used by the call. */
26210 if (TARGET_PLTSEQ)
26211 abi_reg = func;
26214 /* Handle indirect calls. */
26215 if (GET_CODE (func) != SYMBOL_REF)
26217 func = force_reg (Pmode, func);
26219 /* Indirect calls via CTR are strongly preferred over indirect
26220 calls via LR, so move the address there. That can't be left
26221 to reload because we want to mark every instruction in an
26222 inline PLT call sequence with a reloc, enabling the linker to
26223 edit the sequence back to a direct call when that makes sense. */
26224 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26225 if (abi_reg)
26227 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26228 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26229 emit_insn (gen_rtx_SET (func_addr, mark_func));
26230 v = gen_rtvec (2, func_addr, func_desc);
26231 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26233 else
26234 emit_move_insn (func_addr, func);
26236 else
26237 func_addr = func;
26239 /* Create the call. */
26240 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26241 if (value != NULL_RTX)
26242 call[0] = gen_rtx_SET (value, call[0]);
26244 call[1] = gen_rtx_USE (VOIDmode, cookie);
26245 n = 2;
26246 if (TARGET_SECURE_PLT
26247 && flag_pic
26248 && GET_CODE (func_addr) == SYMBOL_REF
26249 && !SYMBOL_REF_LOCAL_P (func_addr))
26250 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
26252 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26254 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
26255 insn = emit_call_insn (insn);
26256 if (abi_reg)
26257 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26260 /* Expand code to perform a sibling call under the SysV4 ABI. */
26262 void
26263 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26265 rtx func = func_desc;
26266 rtx func_addr;
26267 rtx call[3];
26268 rtx insn;
26269 rtx abi_reg = NULL_RTX;
26271 if (global_tlsarg)
26272 tlsarg = global_tlsarg;
26274 /* Handle longcall attributes. */
26275 if ((INTVAL (cookie) & CALL_LONG) != 0
26276 && GET_CODE (func_desc) == SYMBOL_REF)
26278 func = rs6000_longcall_ref (func_desc, tlsarg);
26279 /* If the longcall was implemented as an inline PLT call using
26280 PLT unspecs then func will be REG:r11. If not, func will be
26281 a pseudo reg. The inline PLT call sequence supports lazy
26282 linking (and longcalls to functions in dlopen'd libraries).
26283 The other style of longcalls don't. The lazy linking entry
26284 to the dynamic symbol resolver requires r11 be the function
26285 address (as it is for linker generated PLT stubs). Ensure
26286 r11 stays valid to the bctr by marking r11 used by the call. */
26287 if (TARGET_PLTSEQ)
26288 abi_reg = func;
26291 /* Handle indirect calls. */
26292 if (GET_CODE (func) != SYMBOL_REF)
26294 func = force_reg (Pmode, func);
26296 /* Indirect sibcalls must go via CTR. That can't be left to
26297 reload because we want to mark every instruction in an inline
26298 PLT call sequence with a reloc, enabling the linker to edit
26299 the sequence back to a direct call when that makes sense. */
26300 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26301 if (abi_reg)
26303 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26304 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26305 emit_insn (gen_rtx_SET (func_addr, mark_func));
26306 v = gen_rtvec (2, func_addr, func_desc);
26307 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26309 else
26310 emit_move_insn (func_addr, func);
26312 else
26313 func_addr = func;
26315 /* Create the call. */
26316 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26317 if (value != NULL_RTX)
26318 call[0] = gen_rtx_SET (value, call[0]);
26320 call[1] = gen_rtx_USE (VOIDmode, cookie);
26321 call[2] = simple_return_rtx;
26323 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26324 insn = emit_call_insn (insn);
26325 if (abi_reg)
26326 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26329 #if TARGET_MACHO
26331 /* Expand code to perform a call under the Darwin ABI.
26332 Modulo handling of mlongcall, this is much the same as sysv.
26333 if/when the longcall optimisation is removed, we could drop this
26334 code and use the sysv case (taking care to avoid the tls stuff).
26336 We can use this for sibcalls too, if needed. */
26338 void
26339 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26340 rtx cookie, bool sibcall)
26342 rtx func = func_desc;
26343 rtx func_addr;
26344 rtx call[3];
26345 rtx insn;
26346 int cookie_val = INTVAL (cookie);
26347 bool make_island = false;
26349 /* Handle longcall attributes, there are two cases for Darwin:
26350 1) Newer linkers are capable of synthesising any branch islands needed.
26351 2) We need a helper branch island synthesised by the compiler.
26352 The second case has mostly been retired and we don't use it for m64.
26353 In fact, it's is an optimisation, we could just indirect as sysv does..
26354 ... however, backwards compatibility for now.
26355 If we're going to use this, then we need to keep the CALL_LONG bit set,
26356 so that we can pick up the special insn form later. */
26357 if ((cookie_val & CALL_LONG) != 0
26358 && GET_CODE (func_desc) == SYMBOL_REF)
26360 /* FIXME: the longcall opt should not hang off this flag, it is most
26361 likely incorrect for kernel-mode code-generation. */
26362 if (darwin_symbol_stubs && TARGET_32BIT)
26363 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26364 else
26366 /* The linker is capable of doing this, but the user explicitly
26367 asked for -mlongcall, so we'll do the 'normal' version. */
26368 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26369 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26373 /* Handle indirect calls. */
26374 if (GET_CODE (func) != SYMBOL_REF)
26376 func = force_reg (Pmode, func);
26378 /* Indirect calls via CTR are strongly preferred over indirect
26379 calls via LR, and are required for indirect sibcalls, so move
26380 the address there. */
26381 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26382 emit_move_insn (func_addr, func);
26384 else
26385 func_addr = func;
26387 /* Create the call. */
26388 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26389 if (value != NULL_RTX)
26390 call[0] = gen_rtx_SET (value, call[0]);
26392 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26394 if (sibcall)
26395 call[2] = simple_return_rtx;
26396 else
26397 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26399 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26400 insn = emit_call_insn (insn);
26401 /* Now we have the debug info in the insn, we can set up the branch island
26402 if we're using one. */
26403 if (make_island)
26405 tree funname = get_identifier (XSTR (func_desc, 0));
26407 if (no_previous_def (funname))
26409 rtx label_rtx = gen_label_rtx ();
26410 char *label_buf, temp_buf[256];
26411 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26412 CODE_LABEL_NUMBER (label_rtx));
26413 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26414 tree labelname = get_identifier (label_buf);
26415 add_compiler_branch_island (labelname, funname,
26416 insn_line ((const rtx_insn*)insn));
26420 #endif
26422 void
26423 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26424 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26426 #if TARGET_MACHO
26427 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26428 #else
26429 gcc_unreachable();
26430 #endif
26434 void
26435 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26436 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26438 #if TARGET_MACHO
26439 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26440 #else
26441 gcc_unreachable();
26442 #endif
26445 /* Return whether we should generate PC-relative code for FNDECL. */
26446 bool
26447 rs6000_fndecl_pcrel_p (const_tree fndecl)
26449 if (DEFAULT_ABI != ABI_ELFv2)
26450 return false;
26452 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26454 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26455 && TARGET_CMODEL == CMODEL_MEDIUM);
26458 /* Return whether we should generate PC-relative code for *FN. */
26459 bool
26460 rs6000_function_pcrel_p (struct function *fn)
26462 if (DEFAULT_ABI != ABI_ELFv2)
26463 return false;
26465 /* Optimize usual case. */
26466 if (fn == cfun)
26467 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26468 && TARGET_CMODEL == CMODEL_MEDIUM);
26470 return rs6000_fndecl_pcrel_p (fn->decl);
26473 /* Return whether we should generate PC-relative code for the current
26474 function. */
26475 bool
26476 rs6000_pcrel_p ()
26478 return (DEFAULT_ABI == ABI_ELFv2
26479 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26480 && TARGET_CMODEL == CMODEL_MEDIUM);
26484 /* Given an address (ADDR), a mode (MODE), and what the format of the
26485 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26486 for the address. */
26488 enum insn_form
26489 address_to_insn_form (rtx addr,
26490 machine_mode mode,
26491 enum non_prefixed_form non_prefixed_format)
26493 /* Single register is easy. */
26494 if (REG_P (addr) || SUBREG_P (addr))
26495 return INSN_FORM_BASE_REG;
26497 /* If the non prefixed instruction format doesn't support offset addressing,
26498 make sure only indexed addressing is allowed.
26500 We special case SDmode so that the register allocator does not try to move
26501 SDmode through GPR registers, but instead uses the 32-bit integer load and
26502 store instructions for the floating point registers. */
26503 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26505 if (GET_CODE (addr) != PLUS)
26506 return INSN_FORM_BAD;
26508 rtx op0 = XEXP (addr, 0);
26509 rtx op1 = XEXP (addr, 1);
26510 if (!REG_P (op0) && !SUBREG_P (op0))
26511 return INSN_FORM_BAD;
26513 if (!REG_P (op1) && !SUBREG_P (op1))
26514 return INSN_FORM_BAD;
26516 return INSN_FORM_X;
26519 /* Deal with update forms. */
26520 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26521 return INSN_FORM_UPDATE;
26523 /* Handle PC-relative symbols and labels. Check for both local and
26524 external symbols. Assume labels are always local. TLS symbols
26525 are not PC-relative for rs6000. */
26526 if (TARGET_PCREL)
26528 if (LABEL_REF_P (addr))
26529 return INSN_FORM_PCREL_LOCAL;
26531 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26533 if (!SYMBOL_REF_LOCAL_P (addr))
26534 return INSN_FORM_PCREL_EXTERNAL;
26535 else
26536 return INSN_FORM_PCREL_LOCAL;
26540 if (GET_CODE (addr) == CONST)
26541 addr = XEXP (addr, 0);
26543 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26544 if (GET_CODE (addr) == LO_SUM)
26545 return INSN_FORM_LO_SUM;
26547 /* Everything below must be an offset address of some form. */
26548 if (GET_CODE (addr) != PLUS)
26549 return INSN_FORM_BAD;
26551 rtx op0 = XEXP (addr, 0);
26552 rtx op1 = XEXP (addr, 1);
26554 /* Check for indexed addresses. */
26555 if (REG_P (op1) || SUBREG_P (op1))
26557 if (REG_P (op0) || SUBREG_P (op0))
26558 return INSN_FORM_X;
26560 return INSN_FORM_BAD;
26563 if (!CONST_INT_P (op1))
26564 return INSN_FORM_BAD;
26566 HOST_WIDE_INT offset = INTVAL (op1);
26567 if (!SIGNED_INTEGER_34BIT_P (offset))
26568 return INSN_FORM_BAD;
26570 /* Check for local and external PC-relative addresses. Labels are always
26571 local. TLS symbols are not PC-relative for rs6000. */
26572 if (TARGET_PCREL)
26574 if (LABEL_REF_P (op0))
26575 return INSN_FORM_PCREL_LOCAL;
26577 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26579 if (!SYMBOL_REF_LOCAL_P (op0))
26580 return INSN_FORM_PCREL_EXTERNAL;
26581 else
26582 return INSN_FORM_PCREL_LOCAL;
26586 /* If it isn't PC-relative, the address must use a base register. */
26587 if (!REG_P (op0) && !SUBREG_P (op0))
26588 return INSN_FORM_BAD;
26590 /* Large offsets must be prefixed. */
26591 if (!SIGNED_INTEGER_16BIT_P (offset))
26593 if (TARGET_PREFIXED)
26594 return INSN_FORM_PREFIXED_NUMERIC;
26596 return INSN_FORM_BAD;
26599 /* We have a 16-bit offset, see what default instruction format to use. */
26600 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26602 unsigned size = GET_MODE_SIZE (mode);
26604 /* On 64-bit systems, assume 64-bit integers need to use DS form
26605 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26606 (for LXV and STXV). TImode is problematical in that its normal usage
26607 is expected to be GPRs where it wants a DS instruction format, but if
26608 it goes into the vector registers, it wants a DQ instruction
26609 format. */
26610 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26611 non_prefixed_format = NON_PREFIXED_DS;
26613 else if (TARGET_VSX && size >= 16
26614 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26615 non_prefixed_format = NON_PREFIXED_DQ;
26617 else
26618 non_prefixed_format = NON_PREFIXED_D;
26621 /* Classify the D/DS/DQ-form addresses. */
26622 switch (non_prefixed_format)
26624 /* Instruction format D, all 16 bits are valid. */
26625 case NON_PREFIXED_D:
26626 return INSN_FORM_D;
26628 /* Instruction format DS, bottom 2 bits must be 0. */
26629 case NON_PREFIXED_DS:
26630 if ((offset & 3) == 0)
26631 return INSN_FORM_DS;
26633 else if (TARGET_PREFIXED)
26634 return INSN_FORM_PREFIXED_NUMERIC;
26636 else
26637 return INSN_FORM_BAD;
26639 /* Instruction format DQ, bottom 4 bits must be 0. */
26640 case NON_PREFIXED_DQ:
26641 if ((offset & 15) == 0)
26642 return INSN_FORM_DQ;
26644 else if (TARGET_PREFIXED)
26645 return INSN_FORM_PREFIXED_NUMERIC;
26647 else
26648 return INSN_FORM_BAD;
26650 default:
26651 break;
26654 return INSN_FORM_BAD;
26657 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26658 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26659 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26660 a D-form or DS-form instruction. X-form and base_reg are always
26661 allowed. */
26662 bool
26663 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26664 enum non_prefixed_form non_prefixed_format)
26666 enum insn_form result_form;
26668 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26670 switch (non_prefixed_format)
26672 case NON_PREFIXED_D:
26673 switch (result_form)
26675 case INSN_FORM_X:
26676 case INSN_FORM_D:
26677 case INSN_FORM_DS:
26678 case INSN_FORM_BASE_REG:
26679 return true;
26680 default:
26681 return false;
26683 break;
26684 case NON_PREFIXED_DS:
26685 switch (result_form)
26687 case INSN_FORM_X:
26688 case INSN_FORM_DS:
26689 case INSN_FORM_BASE_REG:
26690 return true;
26691 default:
26692 return false;
26694 break;
26695 default:
26696 break;
26698 return false;
26701 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26702 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26703 the load or store with the PCREL_OPT optimization to make sure it is an
26704 instruction that can be optimized.
26706 We need to specify the MODE separately from the REG to allow for loads that
26707 include zero/sign/float extension. */
26709 bool
26710 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26712 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26713 PCREL_OPT optimization. */
26714 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26715 if (non_prefixed == NON_PREFIXED_X)
26716 return false;
26718 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26719 rtx addr = XEXP (mem, 0);
26720 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26721 return (iform == INSN_FORM_BASE_REG
26722 || iform == INSN_FORM_D
26723 || iform == INSN_FORM_DS
26724 || iform == INSN_FORM_DQ);
26727 /* Helper function to see if we're potentially looking at lfs/stfs.
26728 - PARALLEL containing a SET and a CLOBBER
26729 - stfs:
26730 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26731 - CLOBBER is a V4SF
26732 - lfs:
26733 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26734 - CLOBBER is a DI
26737 static bool
26738 is_lfs_stfs_insn (rtx_insn *insn)
26740 rtx pattern = PATTERN (insn);
26741 if (GET_CODE (pattern) != PARALLEL)
26742 return false;
26744 /* This should be a parallel with exactly one set and one clobber. */
26745 if (XVECLEN (pattern, 0) != 2)
26746 return false;
26748 rtx set = XVECEXP (pattern, 0, 0);
26749 if (GET_CODE (set) != SET)
26750 return false;
26752 rtx clobber = XVECEXP (pattern, 0, 1);
26753 if (GET_CODE (clobber) != CLOBBER)
26754 return false;
26756 /* All we care is that the destination of the SET is a mem:SI,
26757 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26758 should be a scratch:V4SF. */
26760 rtx dest = SET_DEST (set);
26761 rtx src = SET_SRC (set);
26762 rtx scratch = SET_DEST (clobber);
26764 if (GET_CODE (src) != UNSPEC)
26765 return false;
26767 /* stfs case. */
26768 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26769 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26770 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26771 return true;
26773 /* lfs case. */
26774 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26775 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26776 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26777 return true;
26779 return false;
26782 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26783 instruction format (D/DS/DQ) used for offset memory. */
26785 enum non_prefixed_form
26786 reg_to_non_prefixed (rtx reg, machine_mode mode)
26788 /* If it isn't a register, use the defaults. */
26789 if (!REG_P (reg) && !SUBREG_P (reg))
26790 return NON_PREFIXED_DEFAULT;
26792 unsigned int r = reg_or_subregno (reg);
26794 /* If we have a pseudo, use the default instruction format. */
26795 if (!HARD_REGISTER_NUM_P (r))
26796 return NON_PREFIXED_DEFAULT;
26798 unsigned size = GET_MODE_SIZE (mode);
26800 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26801 128-bit floating point, and 128-bit integers. Before power9, only indexed
26802 addressing was available for vectors. */
26803 if (FP_REGNO_P (r))
26805 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26806 return NON_PREFIXED_D;
26808 else if (size < 8)
26809 return NON_PREFIXED_X;
26811 else if (TARGET_VSX && size >= 16
26812 && (VECTOR_MODE_P (mode)
26813 || VECTOR_ALIGNMENT_P (mode)
26814 || mode == TImode || mode == CTImode))
26815 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26817 else
26818 return NON_PREFIXED_DEFAULT;
26821 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26822 128-bit floating point, and 128-bit integers. Before power9, only indexed
26823 addressing was available. */
26824 else if (ALTIVEC_REGNO_P (r))
26826 if (!TARGET_P9_VECTOR)
26827 return NON_PREFIXED_X;
26829 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26830 return NON_PREFIXED_DS;
26832 else if (size < 8)
26833 return NON_PREFIXED_X;
26835 else if (TARGET_VSX && size >= 16
26836 && (VECTOR_MODE_P (mode)
26837 || VECTOR_ALIGNMENT_P (mode)
26838 || mode == TImode || mode == CTImode))
26839 return NON_PREFIXED_DQ;
26841 else
26842 return NON_PREFIXED_DEFAULT;
26845 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26846 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26847 through the GPR registers for memory operations. */
26848 else if (TARGET_POWERPC64 && size >= 8)
26849 return NON_PREFIXED_DS;
26851 return NON_PREFIXED_D;
26855 /* Whether a load instruction is a prefixed instruction. This is called from
26856 the prefixed attribute processing. */
26858 bool
26859 prefixed_load_p (rtx_insn *insn)
26861 /* Validate the insn to make sure it is a normal load insn. */
26862 extract_insn_cached (insn);
26863 if (recog_data.n_operands < 2)
26864 return false;
26866 rtx reg = recog_data.operand[0];
26867 rtx mem = recog_data.operand[1];
26869 if (!REG_P (reg) && !SUBREG_P (reg))
26870 return false;
26872 if (!MEM_P (mem))
26873 return false;
26875 /* Prefixed load instructions do not support update or indexed forms. */
26876 if (get_attr_indexed (insn) == INDEXED_YES
26877 || get_attr_update (insn) == UPDATE_YES)
26878 return false;
26880 /* LWA uses the DS format instead of the D format that LWZ uses. */
26881 enum non_prefixed_form non_prefixed;
26882 machine_mode reg_mode = GET_MODE (reg);
26883 machine_mode mem_mode = GET_MODE (mem);
26885 if (mem_mode == SImode && reg_mode == DImode
26886 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26887 non_prefixed = NON_PREFIXED_DS;
26889 else
26890 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26892 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26893 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26894 else
26895 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26898 /* Whether a store instruction is a prefixed instruction. This is called from
26899 the prefixed attribute processing. */
26901 bool
26902 prefixed_store_p (rtx_insn *insn)
26904 /* Validate the insn to make sure it is a normal store insn. */
26905 extract_insn_cached (insn);
26906 if (recog_data.n_operands < 2)
26907 return false;
26909 rtx mem = recog_data.operand[0];
26910 rtx reg = recog_data.operand[1];
26912 if (!REG_P (reg) && !SUBREG_P (reg))
26913 return false;
26915 if (!MEM_P (mem))
26916 return false;
26918 /* Prefixed store instructions do not support update or indexed forms. */
26919 if (get_attr_indexed (insn) == INDEXED_YES
26920 || get_attr_update (insn) == UPDATE_YES)
26921 return false;
26923 machine_mode mem_mode = GET_MODE (mem);
26924 rtx addr = XEXP (mem, 0);
26925 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26927 /* Need to make sure we aren't looking at a stfs which doesn't look
26928 like the other things reg_to_non_prefixed/address_is_prefixed
26929 looks for. */
26930 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26931 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26932 else
26933 return address_is_prefixed (addr, mem_mode, non_prefixed);
26936 /* Whether a load immediate or add instruction is a prefixed instruction. This
26937 is called from the prefixed attribute processing. */
26939 bool
26940 prefixed_paddi_p (rtx_insn *insn)
26942 rtx set = single_set (insn);
26943 if (!set)
26944 return false;
26946 rtx dest = SET_DEST (set);
26947 rtx src = SET_SRC (set);
26949 if (!REG_P (dest) && !SUBREG_P (dest))
26950 return false;
26952 /* Is this a load immediate that can't be done with a simple ADDI or
26953 ADDIS? */
26954 if (CONST_INT_P (src))
26955 return (satisfies_constraint_eI (src)
26956 && !satisfies_constraint_I (src)
26957 && !satisfies_constraint_L (src));
26959 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26960 ADDIS? */
26961 if (GET_CODE (src) == PLUS)
26963 rtx op1 = XEXP (src, 1);
26965 return (CONST_INT_P (op1)
26966 && satisfies_constraint_eI (op1)
26967 && !satisfies_constraint_I (op1)
26968 && !satisfies_constraint_L (op1));
26971 /* If not, is it a load of a PC-relative address? */
26972 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26973 return false;
26975 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26976 return false;
26978 enum insn_form iform = address_to_insn_form (src, Pmode,
26979 NON_PREFIXED_DEFAULT);
26981 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26984 /* Whether the next instruction needs a 'p' prefix issued before the
26985 instruction is printed out. */
26986 static bool prepend_p_to_next_insn;
26988 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26989 outputting the assembler code. On the PowerPC, we remember if the current
26990 insn is a prefixed insn where we need to emit a 'p' before the insn.
26992 In addition, if the insn is part of a PC-relative reference to an external
26993 label optimization, this is recorded also. */
26994 void
26995 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26997 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26998 == MAYBE_PREFIXED_YES
26999 && get_attr_prefixed (insn) == PREFIXED_YES);
27000 return;
27003 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
27004 We use it to emit a 'p' for prefixed insns that is set in
27005 FINAL_PRESCAN_INSN. */
27006 void
27007 rs6000_asm_output_opcode (FILE *stream)
27009 if (prepend_p_to_next_insn)
27011 fprintf (stream, "p");
27013 /* Reset the flag in the case where there are separate insn lines in the
27014 sequence, so the 'p' is only emitted for the first line. This shows up
27015 when we are doing the PCREL_OPT optimization, in that the label created
27016 with %r<n> would have a leading 'p' printed. */
27017 prepend_p_to_next_insn = false;
27020 return;
27023 /* Emit the relocation to tie the next instruction to a previous instruction
27024 that loads up an external address. This is used to do the PCREL_OPT
27025 optimization. Note, the label is generated after the PLD of the got
27026 pc-relative address to allow for the assembler to insert NOPs before the PLD
27027 instruction. The operand is a constant integer that is the label
27028 number. */
27030 void
27031 output_pcrel_opt_reloc (rtx label_num)
27033 rtx operands[1] = { label_num };
27034 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
27035 operands);
27038 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
27039 should be adjusted to reflect any required changes. This macro is used when
27040 there is some systematic length adjustment required that would be difficult
27041 to express in the length attribute.
27043 In the PowerPC, we use this to adjust the length of an instruction if one or
27044 more prefixed instructions are generated, using the attribute
27045 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27046 hardware requires that a prefied instruciton does not cross a 64-byte
27047 boundary. This means the compiler has to assume the length of the first
27048 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27049 already set for the non-prefixed instruction, we just need to udpate for the
27050 difference. */
27053 rs6000_adjust_insn_length (rtx_insn *insn, int length)
27055 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
27057 rtx pattern = PATTERN (insn);
27058 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
27059 && get_attr_prefixed (insn) == PREFIXED_YES)
27061 int num_prefixed = get_attr_max_prefixed_insns (insn);
27062 length += 4 * (num_prefixed + 1);
27066 return length;
27070 #ifdef HAVE_GAS_HIDDEN
27071 # define USE_HIDDEN_LINKONCE 1
27072 #else
27073 # define USE_HIDDEN_LINKONCE 0
27074 #endif
27076 /* Fills in the label name that should be used for a 476 link stack thunk. */
27078 void
27079 get_ppc476_thunk_name (char name[32])
27081 gcc_assert (TARGET_LINK_STACK);
27083 if (USE_HIDDEN_LINKONCE)
27084 sprintf (name, "__ppc476.get_thunk");
27085 else
27086 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
27089 /* This function emits the simple thunk routine that is used to preserve
27090 the link stack on the 476 cpu. */
27092 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
27093 static void
27094 rs6000_code_end (void)
27096 char name[32];
27097 tree decl;
27099 if (!TARGET_LINK_STACK)
27100 return;
27102 get_ppc476_thunk_name (name);
27104 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
27105 build_function_type_list (void_type_node, NULL_TREE));
27106 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
27107 NULL_TREE, void_type_node);
27108 TREE_PUBLIC (decl) = 1;
27109 TREE_STATIC (decl) = 1;
27111 #if RS6000_WEAK
27112 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
27114 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
27115 targetm.asm_out.unique_section (decl, 0);
27116 switch_to_section (get_named_section (decl, NULL, 0));
27117 DECL_WEAK (decl) = 1;
27118 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
27119 targetm.asm_out.globalize_label (asm_out_file, name);
27120 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
27121 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
27123 else
27124 #endif
27126 switch_to_section (text_section);
27127 ASM_OUTPUT_LABEL (asm_out_file, name);
27130 DECL_INITIAL (decl) = make_node (BLOCK);
27131 current_function_decl = decl;
27132 allocate_struct_function (decl, false);
27133 init_function_start (decl);
27134 first_function_block_is_cold = false;
27135 /* Make sure unwind info is emitted for the thunk if needed. */
27136 final_start_function (emit_barrier (), asm_out_file, 1);
27138 fputs ("\tblr\n", asm_out_file);
27140 final_end_function ();
27141 init_insn_lengths ();
27142 free_after_compilation (cfun);
27143 set_cfun (NULL);
27144 current_function_decl = NULL;
27147 /* Add r30 to hard reg set if the prologue sets it up and it is not
27148 pic_offset_table_rtx. */
27150 static void
27151 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
27153 if (!TARGET_SINGLE_PIC_BASE
27154 && TARGET_TOC
27155 && TARGET_MINIMAL_TOC
27156 && !constant_pool_empty_p ())
27157 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27158 if (cfun->machine->split_stack_argp_used)
27159 add_to_hard_reg_set (&set->set, Pmode, 12);
27161 /* Make sure the hard reg set doesn't include r2, which was possibly added
27162 via PIC_OFFSET_TABLE_REGNUM. */
27163 if (TARGET_TOC)
27164 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
27168 /* Helper function for rs6000_split_logical to emit a logical instruction after
27169 spliting the operation to single GPR registers.
27171 DEST is the destination register.
27172 OP1 and OP2 are the input source registers.
27173 CODE is the base operation (AND, IOR, XOR, NOT).
27174 MODE is the machine mode.
27175 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27176 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27177 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27179 static void
27180 rs6000_split_logical_inner (rtx dest,
27181 rtx op1,
27182 rtx op2,
27183 enum rtx_code code,
27184 machine_mode mode,
27185 bool complement_final_p,
27186 bool complement_op1_p,
27187 bool complement_op2_p)
27189 rtx bool_rtx;
27191 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27192 if (op2 && CONST_INT_P (op2)
27193 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
27194 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27196 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
27197 HOST_WIDE_INT value = INTVAL (op2) & mask;
27199 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27200 if (code == AND)
27202 if (value == 0)
27204 emit_insn (gen_rtx_SET (dest, const0_rtx));
27205 return;
27208 else if (value == mask)
27210 if (!rtx_equal_p (dest, op1))
27211 emit_insn (gen_rtx_SET (dest, op1));
27212 return;
27216 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27217 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27218 else if (code == IOR || code == XOR)
27220 if (value == 0)
27222 if (!rtx_equal_p (dest, op1))
27223 emit_insn (gen_rtx_SET (dest, op1));
27224 return;
27229 if (code == AND && mode == SImode
27230 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27232 emit_insn (gen_andsi3 (dest, op1, op2));
27233 return;
27236 if (complement_op1_p)
27237 op1 = gen_rtx_NOT (mode, op1);
27239 if (complement_op2_p)
27240 op2 = gen_rtx_NOT (mode, op2);
27242 /* For canonical RTL, if only one arm is inverted it is the first. */
27243 if (!complement_op1_p && complement_op2_p)
27244 std::swap (op1, op2);
27246 bool_rtx = ((code == NOT)
27247 ? gen_rtx_NOT (mode, op1)
27248 : gen_rtx_fmt_ee (code, mode, op1, op2));
27250 if (complement_final_p)
27251 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
27253 emit_insn (gen_rtx_SET (dest, bool_rtx));
27256 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27257 operations are split immediately during RTL generation to allow for more
27258 optimizations of the AND/IOR/XOR.
27260 OPERANDS is an array containing the destination and two input operands.
27261 CODE is the base operation (AND, IOR, XOR, NOT).
27262 MODE is the machine mode.
27263 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27264 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27265 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27266 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27267 formation of the AND instructions. */
27269 static void
27270 rs6000_split_logical_di (rtx operands[3],
27271 enum rtx_code code,
27272 bool complement_final_p,
27273 bool complement_op1_p,
27274 bool complement_op2_p)
27276 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
27277 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
27278 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
27279 enum hi_lo { hi = 0, lo = 1 };
27280 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
27281 size_t i;
27283 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
27284 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
27285 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
27286 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
27288 if (code == NOT)
27289 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
27290 else
27292 if (!CONST_INT_P (operands[2]))
27294 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
27295 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
27297 else
27299 HOST_WIDE_INT value = INTVAL (operands[2]);
27300 HOST_WIDE_INT value_hi_lo[2];
27302 gcc_assert (!complement_final_p);
27303 gcc_assert (!complement_op1_p);
27304 gcc_assert (!complement_op2_p);
27306 value_hi_lo[hi] = value >> 32;
27307 value_hi_lo[lo] = value & lower_32bits;
27309 for (i = 0; i < 2; i++)
27311 HOST_WIDE_INT sub_value = value_hi_lo[i];
27313 if (sub_value & sign_bit)
27314 sub_value |= upper_32bits;
27316 op2_hi_lo[i] = GEN_INT (sub_value);
27318 /* If this is an AND instruction, check to see if we need to load
27319 the value in a register. */
27320 if (code == AND && sub_value != -1 && sub_value != 0
27321 && !and_operand (op2_hi_lo[i], SImode))
27322 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27327 for (i = 0; i < 2; i++)
27329 /* Split large IOR/XOR operations. */
27330 if ((code == IOR || code == XOR)
27331 && CONST_INT_P (op2_hi_lo[i])
27332 && !complement_final_p
27333 && !complement_op1_p
27334 && !complement_op2_p
27335 && !logical_const_operand (op2_hi_lo[i], SImode))
27337 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27338 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27339 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27340 rtx tmp = gen_reg_rtx (SImode);
27342 /* Make sure the constant is sign extended. */
27343 if ((hi_16bits & sign_bit) != 0)
27344 hi_16bits |= upper_32bits;
27346 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27347 code, SImode, false, false, false);
27349 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27350 code, SImode, false, false, false);
27352 else
27353 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27354 code, SImode, complement_final_p,
27355 complement_op1_p, complement_op2_p);
27358 return;
27361 /* Split the insns that make up boolean operations operating on multiple GPR
27362 registers. The boolean MD patterns ensure that the inputs either are
27363 exactly the same as the output registers, or there is no overlap.
27365 OPERANDS is an array containing the destination and two input operands.
27366 CODE is the base operation (AND, IOR, XOR, NOT).
27367 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27368 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27369 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27371 void
27372 rs6000_split_logical (rtx operands[3],
27373 enum rtx_code code,
27374 bool complement_final_p,
27375 bool complement_op1_p,
27376 bool complement_op2_p)
27378 machine_mode mode = GET_MODE (operands[0]);
27379 machine_mode sub_mode;
27380 rtx op0, op1, op2;
27381 int sub_size, regno0, regno1, nregs, i;
27383 /* If this is DImode, use the specialized version that can run before
27384 register allocation. */
27385 if (mode == DImode && !TARGET_POWERPC64)
27387 rs6000_split_logical_di (operands, code, complement_final_p,
27388 complement_op1_p, complement_op2_p);
27389 return;
27392 op0 = operands[0];
27393 op1 = operands[1];
27394 op2 = (code == NOT) ? NULL_RTX : operands[2];
27395 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27396 sub_size = GET_MODE_SIZE (sub_mode);
27397 regno0 = REGNO (op0);
27398 regno1 = REGNO (op1);
27400 gcc_assert (reload_completed);
27401 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27402 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27404 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27405 gcc_assert (nregs > 1);
27407 if (op2 && REG_P (op2))
27408 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27410 for (i = 0; i < nregs; i++)
27412 int offset = i * sub_size;
27413 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27414 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27415 rtx sub_op2 = ((code == NOT)
27416 ? NULL_RTX
27417 : simplify_subreg (sub_mode, op2, mode, offset));
27419 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27420 complement_final_p, complement_op1_p,
27421 complement_op2_p);
27424 return;
27427 /* Emit instructions to move SRC to DST. Called by splitters for
27428 multi-register moves. It will emit at most one instruction for
27429 each register that is accessed; that is, it won't emit li/lis pairs
27430 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27431 register. */
27433 void
27434 rs6000_split_multireg_move (rtx dst, rtx src)
27436 /* The register number of the first register being moved. */
27437 int reg;
27438 /* The mode that is to be moved. */
27439 machine_mode mode;
27440 /* The mode that the move is being done in, and its size. */
27441 machine_mode reg_mode;
27442 int reg_mode_size;
27443 /* The number of registers that will be moved. */
27444 int nregs;
27446 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27447 mode = GET_MODE (dst);
27448 nregs = hard_regno_nregs (reg, mode);
27450 /* If we have a vector quad register for MMA, and this is a load or store,
27451 see if we can use vector paired load/stores. */
27452 if (mode == XOmode && TARGET_MMA
27453 && (MEM_P (dst) || MEM_P (src)))
27455 reg_mode = OOmode;
27456 nregs /= 2;
27458 /* If we have a vector pair/quad mode, split it into two/four separate
27459 vectors. */
27460 else if (mode == OOmode || mode == XOmode)
27461 reg_mode = V1TImode;
27462 else if (FP_REGNO_P (reg))
27463 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27464 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27465 else if (ALTIVEC_REGNO_P (reg))
27466 reg_mode = V16QImode;
27467 else
27468 reg_mode = word_mode;
27469 reg_mode_size = GET_MODE_SIZE (reg_mode);
27471 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27473 /* TDmode residing in FP registers is special, since the ISA requires that
27474 the lower-numbered word of a register pair is always the most significant
27475 word, even in little-endian mode. This does not match the usual subreg
27476 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27477 the appropriate constituent registers "by hand" in little-endian mode.
27479 Note we do not need to check for destructive overlap here since TDmode
27480 can only reside in even/odd register pairs. */
27481 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27483 rtx p_src, p_dst;
27484 int i;
27486 for (i = 0; i < nregs; i++)
27488 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27489 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27490 else
27491 p_src = simplify_gen_subreg (reg_mode, src, mode,
27492 i * reg_mode_size);
27494 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27495 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27496 else
27497 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27498 i * reg_mode_size);
27500 emit_insn (gen_rtx_SET (p_dst, p_src));
27503 return;
27506 /* The __vector_pair and __vector_quad modes are multi-register
27507 modes, so if we have to load or store the registers, we have to be
27508 careful to properly swap them if we're in little endian mode
27509 below. This means the last register gets the first memory
27510 location. We also need to be careful of using the right register
27511 numbers if we are splitting XO to OO. */
27512 if (mode == OOmode || mode == XOmode)
27514 nregs = hard_regno_nregs (reg, mode);
27515 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27516 if (MEM_P (dst))
27518 unsigned offset = 0;
27519 unsigned size = GET_MODE_SIZE (reg_mode);
27521 /* If we are reading an accumulator register, we have to
27522 deprime it before we can access it. */
27523 if (TARGET_MMA
27524 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27525 emit_insn (gen_mma_xxmfacc (src, src));
27527 for (int i = 0; i < nregs; i += reg_mode_nregs)
27529 unsigned subreg
27530 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27531 rtx dst2 = adjust_address (dst, reg_mode, offset);
27532 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27533 offset += size;
27534 emit_insn (gen_rtx_SET (dst2, src2));
27537 return;
27540 if (MEM_P (src))
27542 unsigned offset = 0;
27543 unsigned size = GET_MODE_SIZE (reg_mode);
27545 for (int i = 0; i < nregs; i += reg_mode_nregs)
27547 unsigned subreg
27548 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27549 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27550 rtx src2 = adjust_address (src, reg_mode, offset);
27551 offset += size;
27552 emit_insn (gen_rtx_SET (dst2, src2));
27555 /* If we are writing an accumulator register, we have to
27556 prime it after we've written it. */
27557 if (TARGET_MMA
27558 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27559 emit_insn (gen_mma_xxmtacc (dst, dst));
27561 return;
27564 if (GET_CODE (src) == UNSPEC
27565 || GET_CODE (src) == UNSPEC_VOLATILE)
27567 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27568 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27569 gcc_assert (REG_P (dst));
27570 if (GET_MODE (src) == XOmode)
27571 gcc_assert (FP_REGNO_P (REGNO (dst)));
27572 if (GET_MODE (src) == OOmode)
27573 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27575 int nvecs = XVECLEN (src, 0);
27576 for (int i = 0; i < nvecs; i++)
27578 rtx op;
27579 int regno = reg + i;
27581 if (WORDS_BIG_ENDIAN)
27583 op = XVECEXP (src, 0, i);
27585 /* If we are loading an even VSX register and the memory location
27586 is adjacent to the next register's memory location (if any),
27587 then we can load them both with one LXVP instruction. */
27588 if ((regno & 1) == 0)
27590 rtx op2 = XVECEXP (src, 0, i + 1);
27591 if (adjacent_mem_locations (op, op2) == op)
27593 op = adjust_address (op, OOmode, 0);
27594 /* Skip the next register, since we're going to
27595 load it together with this register. */
27596 i++;
27600 else
27602 op = XVECEXP (src, 0, nvecs - i - 1);
27604 /* If we are loading an even VSX register and the memory location
27605 is adjacent to the next register's memory location (if any),
27606 then we can load them both with one LXVP instruction. */
27607 if ((regno & 1) == 0)
27609 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27610 if (adjacent_mem_locations (op2, op) == op2)
27612 op = adjust_address (op2, OOmode, 0);
27613 /* Skip the next register, since we're going to
27614 load it together with this register. */
27615 i++;
27620 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27621 emit_insn (gen_rtx_SET (dst_i, op));
27624 /* We are writing an accumulator register, so we have to
27625 prime it after we've written it. */
27626 if (GET_MODE (src) == XOmode)
27627 emit_insn (gen_mma_xxmtacc (dst, dst));
27629 return;
27632 /* Register -> register moves can use common code. */
27635 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27637 /* If we are reading an accumulator register, we have to
27638 deprime it before we can access it. */
27639 if (TARGET_MMA
27640 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27641 emit_insn (gen_mma_xxmfacc (src, src));
27643 /* Move register range backwards, if we might have destructive
27644 overlap. */
27645 int i;
27646 /* XO/OO are opaque so cannot use subregs. */
27647 if (mode == OOmode || mode == XOmode )
27649 for (i = nregs - 1; i >= 0; i--)
27651 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27652 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27653 emit_insn (gen_rtx_SET (dst_i, src_i));
27656 else
27658 for (i = nregs - 1; i >= 0; i--)
27659 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27660 i * reg_mode_size),
27661 simplify_gen_subreg (reg_mode, src, mode,
27662 i * reg_mode_size)));
27665 /* If we are writing an accumulator register, we have to
27666 prime it after we've written it. */
27667 if (TARGET_MMA
27668 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27669 emit_insn (gen_mma_xxmtacc (dst, dst));
27671 else
27673 int i;
27674 int j = -1;
27675 bool used_update = false;
27676 rtx restore_basereg = NULL_RTX;
27678 if (MEM_P (src) && INT_REGNO_P (reg))
27680 rtx breg;
27682 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27683 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27685 rtx delta_rtx;
27686 breg = XEXP (XEXP (src, 0), 0);
27687 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27688 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27689 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27690 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27691 src = replace_equiv_address (src, breg);
27693 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27695 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27697 rtx basereg = XEXP (XEXP (src, 0), 0);
27698 if (TARGET_UPDATE)
27700 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27701 emit_insn (gen_rtx_SET (ndst,
27702 gen_rtx_MEM (reg_mode,
27703 XEXP (src, 0))));
27704 used_update = true;
27706 else
27707 emit_insn (gen_rtx_SET (basereg,
27708 XEXP (XEXP (src, 0), 1)));
27709 src = replace_equiv_address (src, basereg);
27711 else
27713 rtx basereg = gen_rtx_REG (Pmode, reg);
27714 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27715 src = replace_equiv_address (src, basereg);
27719 breg = XEXP (src, 0);
27720 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27721 breg = XEXP (breg, 0);
27723 /* If the base register we are using to address memory is
27724 also a destination reg, then change that register last. */
27725 if (REG_P (breg)
27726 && REGNO (breg) >= REGNO (dst)
27727 && REGNO (breg) < REGNO (dst) + nregs)
27728 j = REGNO (breg) - REGNO (dst);
27730 else if (MEM_P (dst) && INT_REGNO_P (reg))
27732 rtx breg;
27734 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27735 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27737 rtx delta_rtx;
27738 breg = XEXP (XEXP (dst, 0), 0);
27739 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27740 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27741 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27743 /* We have to update the breg before doing the store.
27744 Use store with update, if available. */
27746 if (TARGET_UPDATE)
27748 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27749 emit_insn (TARGET_32BIT
27750 ? (TARGET_POWERPC64
27751 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27752 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27753 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27754 used_update = true;
27756 else
27757 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27758 dst = replace_equiv_address (dst, breg);
27760 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27761 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27763 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27765 rtx basereg = XEXP (XEXP (dst, 0), 0);
27766 if (TARGET_UPDATE)
27768 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27769 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27770 XEXP (dst, 0)),
27771 nsrc));
27772 used_update = true;
27774 else
27775 emit_insn (gen_rtx_SET (basereg,
27776 XEXP (XEXP (dst, 0), 1)));
27777 dst = replace_equiv_address (dst, basereg);
27779 else
27781 rtx basereg = XEXP (XEXP (dst, 0), 0);
27782 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27783 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27784 && REG_P (basereg)
27785 && REG_P (offsetreg)
27786 && REGNO (basereg) != REGNO (offsetreg));
27787 if (REGNO (basereg) == 0)
27789 rtx tmp = offsetreg;
27790 offsetreg = basereg;
27791 basereg = tmp;
27793 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27794 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27795 dst = replace_equiv_address (dst, basereg);
27798 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27799 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27802 /* If we are reading an accumulator register, we have to
27803 deprime it before we can access it. */
27804 if (TARGET_MMA && REG_P (src)
27805 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27806 emit_insn (gen_mma_xxmfacc (src, src));
27808 for (i = 0; i < nregs; i++)
27810 /* Calculate index to next subword. */
27811 ++j;
27812 if (j == nregs)
27813 j = 0;
27815 /* If compiler already emitted move of first word by
27816 store with update, no need to do anything. */
27817 if (j == 0 && used_update)
27818 continue;
27820 /* XO/OO are opaque so cannot use subregs. */
27821 if (mode == OOmode || mode == XOmode )
27823 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27824 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27825 emit_insn (gen_rtx_SET (dst_i, src_i));
27827 else
27828 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27829 j * reg_mode_size),
27830 simplify_gen_subreg (reg_mode, src, mode,
27831 j * reg_mode_size)));
27834 /* If we are writing an accumulator register, we have to
27835 prime it after we've written it. */
27836 if (TARGET_MMA && REG_P (dst)
27837 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27838 emit_insn (gen_mma_xxmtacc (dst, dst));
27840 if (restore_basereg != NULL_RTX)
27841 emit_insn (restore_basereg);
27845 /* Return true if the peephole2 can combine a load involving a combination of
27846 an addis instruction and a load with an offset that can be fused together on
27847 a power8. */
27849 bool
27850 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27851 rtx addis_value, /* addis value. */
27852 rtx target, /* target register that is loaded. */
27853 rtx mem) /* bottom part of the memory addr. */
27855 rtx addr;
27856 rtx base_reg;
27858 /* Validate arguments. */
27859 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27860 return false;
27862 if (!base_reg_operand (target, GET_MODE (target)))
27863 return false;
27865 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27866 return false;
27868 /* Allow sign/zero extension. */
27869 if (GET_CODE (mem) == ZERO_EXTEND
27870 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27871 mem = XEXP (mem, 0);
27873 if (!MEM_P (mem))
27874 return false;
27876 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27877 return false;
27879 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27880 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27881 return false;
27883 /* Validate that the register used to load the high value is either the
27884 register being loaded, or we can safely replace its use.
27886 This function is only called from the peephole2 pass and we assume that
27887 there are 2 instructions in the peephole (addis and load), so we want to
27888 check if the target register was not used in the memory address and the
27889 register to hold the addis result is dead after the peephole. */
27890 if (REGNO (addis_reg) != REGNO (target))
27892 if (reg_mentioned_p (target, mem))
27893 return false;
27895 if (!peep2_reg_dead_p (2, addis_reg))
27896 return false;
27898 /* If the target register being loaded is the stack pointer, we must
27899 avoid loading any other value into it, even temporarily. */
27900 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27901 return false;
27904 base_reg = XEXP (addr, 0);
27905 return REGNO (addis_reg) == REGNO (base_reg);
27908 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27909 sequence. We adjust the addis register to use the target register. If the
27910 load sign extends, we adjust the code to do the zero extending load, and an
27911 explicit sign extension later since the fusion only covers zero extending
27912 loads.
27914 The operands are:
27915 operands[0] register set with addis (to be replaced with target)
27916 operands[1] value set via addis
27917 operands[2] target register being loaded
27918 operands[3] D-form memory reference using operands[0]. */
27920 void
27921 expand_fusion_gpr_load (rtx *operands)
27923 rtx addis_value = operands[1];
27924 rtx target = operands[2];
27925 rtx orig_mem = operands[3];
27926 rtx new_addr, new_mem, orig_addr, offset;
27927 enum rtx_code plus_or_lo_sum;
27928 machine_mode target_mode = GET_MODE (target);
27929 machine_mode extend_mode = target_mode;
27930 machine_mode ptr_mode = Pmode;
27931 enum rtx_code extend = UNKNOWN;
27933 if (GET_CODE (orig_mem) == ZERO_EXTEND
27934 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27936 extend = GET_CODE (orig_mem);
27937 orig_mem = XEXP (orig_mem, 0);
27938 target_mode = GET_MODE (orig_mem);
27941 gcc_assert (MEM_P (orig_mem));
27943 orig_addr = XEXP (orig_mem, 0);
27944 plus_or_lo_sum = GET_CODE (orig_addr);
27945 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27947 offset = XEXP (orig_addr, 1);
27948 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27949 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27951 if (extend != UNKNOWN)
27952 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27954 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27955 UNSPEC_FUSION_GPR);
27956 emit_insn (gen_rtx_SET (target, new_mem));
27958 if (extend == SIGN_EXTEND)
27960 int sub_off = ((BYTES_BIG_ENDIAN)
27961 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27962 : 0);
27963 rtx sign_reg
27964 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27966 emit_insn (gen_rtx_SET (target,
27967 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27970 return;
27973 /* Emit the addis instruction that will be part of a fused instruction
27974 sequence. */
27976 void
27977 emit_fusion_addis (rtx target, rtx addis_value)
27979 rtx fuse_ops[10];
27980 const char *addis_str = NULL;
27982 /* Emit the addis instruction. */
27983 fuse_ops[0] = target;
27984 if (satisfies_constraint_L (addis_value))
27986 fuse_ops[1] = addis_value;
27987 addis_str = "lis %0,%v1";
27990 else if (GET_CODE (addis_value) == PLUS)
27992 rtx op0 = XEXP (addis_value, 0);
27993 rtx op1 = XEXP (addis_value, 1);
27995 if (REG_P (op0) && CONST_INT_P (op1)
27996 && satisfies_constraint_L (op1))
27998 fuse_ops[1] = op0;
27999 fuse_ops[2] = op1;
28000 addis_str = "addis %0,%1,%v2";
28004 else if (GET_CODE (addis_value) == HIGH)
28006 rtx value = XEXP (addis_value, 0);
28007 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
28009 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
28010 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
28011 if (TARGET_ELF)
28012 addis_str = "addis %0,%2,%1@toc@ha";
28014 else if (TARGET_XCOFF)
28015 addis_str = "addis %0,%1@u(%2)";
28017 else
28018 gcc_unreachable ();
28021 else if (GET_CODE (value) == PLUS)
28023 rtx op0 = XEXP (value, 0);
28024 rtx op1 = XEXP (value, 1);
28026 if (GET_CODE (op0) == UNSPEC
28027 && XINT (op0, 1) == UNSPEC_TOCREL
28028 && CONST_INT_P (op1))
28030 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
28031 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
28032 fuse_ops[3] = op1;
28033 if (TARGET_ELF)
28034 addis_str = "addis %0,%2,%1+%3@toc@ha";
28036 else if (TARGET_XCOFF)
28037 addis_str = "addis %0,%1+%3@u(%2)";
28039 else
28040 gcc_unreachable ();
28044 else if (satisfies_constraint_L (value))
28046 fuse_ops[1] = value;
28047 addis_str = "lis %0,%v1";
28050 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
28052 fuse_ops[1] = value;
28053 addis_str = "lis %0,%1@ha";
28057 if (!addis_str)
28058 fatal_insn ("Could not generate addis value for fusion", addis_value);
28060 output_asm_insn (addis_str, fuse_ops);
28063 /* Emit a D-form load or store instruction that is the second instruction
28064 of a fusion sequence. */
28066 static void
28067 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
28069 rtx fuse_ops[10];
28070 char insn_template[80];
28072 fuse_ops[0] = load_reg;
28073 fuse_ops[1] = addis_reg;
28075 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
28077 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
28078 fuse_ops[2] = offset;
28079 output_asm_insn (insn_template, fuse_ops);
28082 else if (GET_CODE (offset) == UNSPEC
28083 && XINT (offset, 1) == UNSPEC_TOCREL)
28085 if (TARGET_ELF)
28086 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
28088 else if (TARGET_XCOFF)
28089 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28091 else
28092 gcc_unreachable ();
28094 fuse_ops[2] = XVECEXP (offset, 0, 0);
28095 output_asm_insn (insn_template, fuse_ops);
28098 else if (GET_CODE (offset) == PLUS
28099 && GET_CODE (XEXP (offset, 0)) == UNSPEC
28100 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
28101 && CONST_INT_P (XEXP (offset, 1)))
28103 rtx tocrel_unspec = XEXP (offset, 0);
28104 if (TARGET_ELF)
28105 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
28107 else if (TARGET_XCOFF)
28108 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
28110 else
28111 gcc_unreachable ();
28113 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
28114 fuse_ops[3] = XEXP (offset, 1);
28115 output_asm_insn (insn_template, fuse_ops);
28118 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
28120 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28122 fuse_ops[2] = offset;
28123 output_asm_insn (insn_template, fuse_ops);
28126 else
28127 fatal_insn ("Unable to generate load/store offset for fusion", offset);
28129 return;
28132 /* Given an address, convert it into the addis and load offset parts. Addresses
28133 created during the peephole2 process look like:
28134 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28135 (unspec [(...)] UNSPEC_TOCREL)) */
28137 static void
28138 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
28140 rtx hi, lo;
28142 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
28144 hi = XEXP (addr, 0);
28145 lo = XEXP (addr, 1);
28147 else
28148 gcc_unreachable ();
28150 *p_hi = hi;
28151 *p_lo = lo;
28154 /* Return a string to fuse an addis instruction with a gpr load to the same
28155 register that we loaded up the addis instruction. The address that is used
28156 is the logical address that was formed during peephole2:
28157 (lo_sum (high) (low-part))
28159 The code is complicated, so we call output_asm_insn directly, and just
28160 return "". */
28162 const char *
28163 emit_fusion_gpr_load (rtx target, rtx mem)
28165 rtx addis_value;
28166 rtx addr;
28167 rtx load_offset;
28168 const char *load_str = NULL;
28169 machine_mode mode;
28171 if (GET_CODE (mem) == ZERO_EXTEND)
28172 mem = XEXP (mem, 0);
28174 gcc_assert (REG_P (target) && MEM_P (mem));
28176 addr = XEXP (mem, 0);
28177 fusion_split_address (addr, &addis_value, &load_offset);
28179 /* Now emit the load instruction to the same register. */
28180 mode = GET_MODE (mem);
28181 switch (mode)
28183 case E_QImode:
28184 load_str = "lbz";
28185 break;
28187 case E_HImode:
28188 load_str = "lhz";
28189 break;
28191 case E_SImode:
28192 case E_SFmode:
28193 load_str = "lwz";
28194 break;
28196 case E_DImode:
28197 case E_DFmode:
28198 gcc_assert (TARGET_POWERPC64);
28199 load_str = "ld";
28200 break;
28202 default:
28203 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
28206 /* Emit the addis instruction. */
28207 emit_fusion_addis (target, addis_value);
28209 /* Emit the D-form load instruction. */
28210 emit_fusion_load (target, target, load_offset, load_str);
28212 return "";
28215 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28216 ignores it then. */
28217 static GTY(()) tree atomic_hold_decl;
28218 static GTY(()) tree atomic_clear_decl;
28219 static GTY(()) tree atomic_update_decl;
28221 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28222 static void
28223 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
28225 if (!TARGET_HARD_FLOAT)
28227 #ifdef RS6000_GLIBC_ATOMIC_FENV
28228 if (atomic_hold_decl == NULL_TREE)
28230 atomic_hold_decl
28231 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28232 get_identifier ("__atomic_feholdexcept"),
28233 build_function_type_list (void_type_node,
28234 double_ptr_type_node,
28235 NULL_TREE));
28236 TREE_PUBLIC (atomic_hold_decl) = 1;
28237 DECL_EXTERNAL (atomic_hold_decl) = 1;
28240 if (atomic_clear_decl == NULL_TREE)
28242 atomic_clear_decl
28243 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28244 get_identifier ("__atomic_feclearexcept"),
28245 build_function_type_list (void_type_node,
28246 NULL_TREE));
28247 TREE_PUBLIC (atomic_clear_decl) = 1;
28248 DECL_EXTERNAL (atomic_clear_decl) = 1;
28251 tree const_double = build_qualified_type (double_type_node,
28252 TYPE_QUAL_CONST);
28253 tree const_double_ptr = build_pointer_type (const_double);
28254 if (atomic_update_decl == NULL_TREE)
28256 atomic_update_decl
28257 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28258 get_identifier ("__atomic_feupdateenv"),
28259 build_function_type_list (void_type_node,
28260 const_double_ptr,
28261 NULL_TREE));
28262 TREE_PUBLIC (atomic_update_decl) = 1;
28263 DECL_EXTERNAL (atomic_update_decl) = 1;
28266 tree fenv_var = create_tmp_var_raw (double_type_node);
28267 TREE_ADDRESSABLE (fenv_var) = 1;
28268 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
28269 build4 (TARGET_EXPR, double_type_node, fenv_var,
28270 void_node, NULL_TREE, NULL_TREE));
28272 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
28273 *clear = build_call_expr (atomic_clear_decl, 0);
28274 *update = build_call_expr (atomic_update_decl, 1,
28275 fold_convert (const_double_ptr, fenv_addr));
28276 #endif
28277 return;
28280 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
28281 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
28282 tree call_mffs = build_call_expr (mffs, 0);
28284 /* Generates the equivalent of feholdexcept (&fenv_var)
28286 *fenv_var = __builtin_mffs ();
28287 double fenv_hold;
28288 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28289 __builtin_mtfsf (0xff, fenv_hold); */
28291 /* Mask to clear everything except for the rounding modes and non-IEEE
28292 arithmetic flag. */
28293 const unsigned HOST_WIDE_INT hold_exception_mask
28294 = HOST_WIDE_INT_C (0xffffffff00000007);
28296 tree fenv_var = create_tmp_var_raw (double_type_node);
28298 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
28299 NULL_TREE, NULL_TREE);
28301 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
28302 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28303 build_int_cst (uint64_type_node,
28304 hold_exception_mask));
28306 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28307 fenv_llu_and);
28309 tree hold_mtfsf = build_call_expr (mtfsf, 2,
28310 build_int_cst (unsigned_type_node, 0xff),
28311 fenv_hold_mtfsf);
28313 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28315 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28317 double fenv_clear = __builtin_mffs ();
28318 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28319 __builtin_mtfsf (0xff, fenv_clear); */
28321 /* Mask to clear everything except for the rounding modes and non-IEEE
28322 arithmetic flag. */
28323 const unsigned HOST_WIDE_INT clear_exception_mask
28324 = HOST_WIDE_INT_C (0xffffffff00000000);
28326 tree fenv_clear = create_tmp_var_raw (double_type_node);
28328 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28329 call_mffs, NULL_TREE, NULL_TREE);
28331 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28332 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28333 fenv_clean_llu,
28334 build_int_cst (uint64_type_node,
28335 clear_exception_mask));
28337 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28338 fenv_clear_llu_and);
28340 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28341 build_int_cst (unsigned_type_node, 0xff),
28342 fenv_clear_mtfsf);
28344 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28346 /* Generates the equivalent of feupdateenv (&fenv_var)
28348 double old_fenv = __builtin_mffs ();
28349 double fenv_update;
28350 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28351 (*(uint64_t*)fenv_var 0x1ff80fff);
28352 __builtin_mtfsf (0xff, fenv_update); */
28354 const unsigned HOST_WIDE_INT update_exception_mask
28355 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28356 const unsigned HOST_WIDE_INT new_exception_mask
28357 = HOST_WIDE_INT_C (0x1ff80fff);
28359 tree old_fenv = create_tmp_var_raw (double_type_node);
28360 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28361 call_mffs, NULL_TREE, NULL_TREE);
28363 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28364 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28365 build_int_cst (uint64_type_node,
28366 update_exception_mask));
28368 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28369 build_int_cst (uint64_type_node,
28370 new_exception_mask));
28372 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28373 old_llu_and, new_llu_and);
28375 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28376 new_llu_mask);
28378 tree update_mtfsf = build_call_expr (mtfsf, 2,
28379 build_int_cst (unsigned_type_node, 0xff),
28380 fenv_update_mtfsf);
28382 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28385 void
28386 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28388 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28390 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28391 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28393 /* The destination of the vmrgew instruction layout is:
28394 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28395 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28396 vmrgew instruction will be correct. */
28397 if (BYTES_BIG_ENDIAN)
28399 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28400 GEN_INT (0)));
28401 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28402 GEN_INT (3)));
28404 else
28406 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28407 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28410 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28411 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28413 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28414 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28416 if (BYTES_BIG_ENDIAN)
28417 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28418 else
28419 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28422 void
28423 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28425 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28427 rtx_tmp0 = gen_reg_rtx (V2DImode);
28428 rtx_tmp1 = gen_reg_rtx (V2DImode);
28430 /* The destination of the vmrgew instruction layout is:
28431 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28432 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28433 vmrgew instruction will be correct. */
28434 if (BYTES_BIG_ENDIAN)
28436 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28437 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28439 else
28441 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28442 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28445 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28446 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28448 if (signed_convert)
28450 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28451 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28453 else
28455 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28456 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28459 if (BYTES_BIG_ENDIAN)
28460 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28461 else
28462 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28465 void
28466 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28467 rtx src2)
28469 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28471 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28472 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28474 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28475 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28477 rtx_tmp2 = gen_reg_rtx (V4SImode);
28478 rtx_tmp3 = gen_reg_rtx (V4SImode);
28480 if (signed_convert)
28482 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28483 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28485 else
28487 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28488 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28491 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28494 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28496 static bool
28497 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28498 optimization_type opt_type)
28500 switch (op)
28502 case rsqrt_optab:
28503 return (opt_type == OPTIMIZE_FOR_SPEED
28504 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28506 default:
28507 return true;
28511 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28513 static HOST_WIDE_INT
28514 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28516 if (TREE_CODE (exp) == STRING_CST
28517 && (STRICT_ALIGNMENT || !optimize_size))
28518 return MAX (align, BITS_PER_WORD);
28519 return align;
28522 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28524 static HOST_WIDE_INT
28525 rs6000_starting_frame_offset (void)
28527 if (FRAME_GROWS_DOWNWARD)
28528 return 0;
28529 return RS6000_STARTING_FRAME_OFFSET;
28532 /* Internal function to return the built-in function id for the complex
28533 multiply operation for a given mode. */
28535 static inline built_in_function
28536 complex_multiply_builtin_code (machine_mode mode)
28538 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28539 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28540 return (built_in_function) func;
28543 /* Internal function to return the built-in function id for the complex divide
28544 operation for a given mode. */
28546 static inline built_in_function
28547 complex_divide_builtin_code (machine_mode mode)
28549 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28550 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28551 return (built_in_function) func;
28554 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28555 function names from <foo>l to <foo>f128 if the default long double type is
28556 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28557 include file switches the names on systems that support long double as IEEE
28558 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28559 In the future, glibc will export names like __ieee128_sinf128 and we can
28560 switch to using those instead of using sinf128, which pollutes the user's
28561 namespace.
28563 This will switch the names for Fortran math functions as well (which doesn't
28564 use math.h). However, Fortran needs other changes to the compiler and
28565 library before you can switch the real*16 type at compile time.
28567 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28568 only do this transformation if the __float128 type is enabled. This
28569 prevents us from doing the transformation on older 32-bit ports that might
28570 have enabled using IEEE 128-bit floating point as the default long double
28571 type.
28573 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28574 function names used for complex multiply and divide to the appropriate
28575 names. */
28577 static tree
28578 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28580 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28581 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28582 if (TARGET_FLOAT128_TYPE
28583 && TREE_CODE (decl) == FUNCTION_DECL
28584 && DECL_IS_UNDECLARED_BUILTIN (decl)
28585 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28587 built_in_function id = DECL_FUNCTION_CODE (decl);
28588 const char *newname = NULL;
28590 if (id == complex_multiply_builtin_code (KCmode))
28591 newname = "__mulkc3";
28593 else if (id == complex_multiply_builtin_code (ICmode))
28594 newname = "__multc3";
28596 else if (id == complex_multiply_builtin_code (TCmode))
28597 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28599 else if (id == complex_divide_builtin_code (KCmode))
28600 newname = "__divkc3";
28602 else if (id == complex_divide_builtin_code (ICmode))
28603 newname = "__divtc3";
28605 else if (id == complex_divide_builtin_code (TCmode))
28606 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28608 if (newname)
28610 if (TARGET_DEBUG_BUILTIN)
28611 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28613 return get_identifier (newname);
28617 /* Map long double built-in functions if long double is IEEE 128-bit. */
28618 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28619 && TREE_CODE (decl) == FUNCTION_DECL
28620 && DECL_IS_UNDECLARED_BUILTIN (decl)
28621 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28623 size_t len = IDENTIFIER_LENGTH (id);
28624 const char *name = IDENTIFIER_POINTER (id);
28625 char *newname = NULL;
28627 /* See if it is one of the built-in functions with an unusual name. */
28628 switch (DECL_FUNCTION_CODE (decl))
28630 case BUILT_IN_DREML:
28631 newname = xstrdup ("__remainderieee128");
28632 break;
28634 case BUILT_IN_GAMMAL:
28635 newname = xstrdup ("__lgammaieee128");
28636 break;
28638 case BUILT_IN_GAMMAL_R:
28639 case BUILT_IN_LGAMMAL_R:
28640 newname = xstrdup ("__lgammaieee128_r");
28641 break;
28643 case BUILT_IN_NEXTTOWARD:
28644 newname = xstrdup ("__nexttoward_to_ieee128");
28645 break;
28647 case BUILT_IN_NEXTTOWARDF:
28648 newname = xstrdup ("__nexttowardf_to_ieee128");
28649 break;
28651 case BUILT_IN_NEXTTOWARDL:
28652 newname = xstrdup ("__nexttowardieee128");
28653 break;
28655 case BUILT_IN_POW10L:
28656 newname = xstrdup ("__exp10ieee128");
28657 break;
28659 case BUILT_IN_SCALBL:
28660 newname = xstrdup ("__scalbieee128");
28661 break;
28663 case BUILT_IN_SIGNIFICANDL:
28664 newname = xstrdup ("__significandieee128");
28665 break;
28667 case BUILT_IN_SINCOSL:
28668 newname = xstrdup ("__sincosieee128");
28669 break;
28671 default:
28672 break;
28675 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28676 if (!newname)
28678 size_t printf_len = strlen ("printf");
28679 size_t scanf_len = strlen ("scanf");
28680 size_t printf_chk_len = strlen ("printf_chk");
28682 if (len >= printf_len
28683 && strcmp (name + len - printf_len, "printf") == 0)
28684 newname = xasprintf ("__%sieee128", name);
28686 else if (len >= scanf_len
28687 && strcmp (name + len - scanf_len, "scanf") == 0)
28688 newname = xasprintf ("__isoc99_%sieee128", name);
28690 else if (len >= printf_chk_len
28691 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28692 newname = xasprintf ("%sieee128", name);
28694 else if (name[len - 1] == 'l')
28696 bool uses_ieee128_p = false;
28697 tree type = TREE_TYPE (decl);
28698 machine_mode ret_mode = TYPE_MODE (type);
28700 /* See if the function returns a IEEE 128-bit floating point type or
28701 complex type. */
28702 if (ret_mode == TFmode || ret_mode == TCmode)
28703 uses_ieee128_p = true;
28704 else
28706 function_args_iterator args_iter;
28707 tree arg;
28709 /* See if the function passes a IEEE 128-bit floating point type
28710 or complex type. */
28711 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28713 machine_mode arg_mode = TYPE_MODE (arg);
28714 if (arg_mode == TFmode || arg_mode == TCmode)
28716 uses_ieee128_p = true;
28717 break;
28722 /* If we passed or returned an IEEE 128-bit floating point type,
28723 change the name. Use __<name>ieee128, instead of <name>l. */
28724 if (uses_ieee128_p)
28725 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28729 if (newname)
28731 if (TARGET_DEBUG_BUILTIN)
28732 fprintf (stderr, "Map %s => %s\n", name, newname);
28734 id = get_identifier (newname);
28735 free (newname);
28739 return id;
28742 /* Predict whether the given loop in gimple will be transformed in the RTL
28743 doloop_optimize pass. */
28745 static bool
28746 rs6000_predict_doloop_p (struct loop *loop)
28748 gcc_assert (loop);
28750 /* On rs6000, targetm.can_use_doloop_p is actually
28751 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28752 if (loop->inner != NULL)
28754 if (dump_file && (dump_flags & TDF_DETAILS))
28755 fprintf (dump_file, "Predict doloop failure due to"
28756 " loop nesting.\n");
28757 return false;
28760 return true;
28763 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28765 static machine_mode
28766 rs6000_preferred_doloop_mode (machine_mode)
28768 return word_mode;
28771 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28773 static bool
28774 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28776 gcc_assert (MEM_P (mem));
28778 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28779 type addresses, so don't allow MEMs with those address types to be
28780 substituted as an equivalent expression. See PR93974 for details. */
28781 if (GET_CODE (XEXP (mem, 0)) == AND)
28782 return true;
28784 return false;
28787 /* Implement TARGET_INVALID_CONVERSION. */
28789 static const char *
28790 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28792 /* Make sure we're working with the canonical types. */
28793 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28794 fromtype = TYPE_CANONICAL (fromtype);
28795 if (TYPE_CANONICAL (totype) != NULL_TREE)
28796 totype = TYPE_CANONICAL (totype);
28798 machine_mode frommode = TYPE_MODE (fromtype);
28799 machine_mode tomode = TYPE_MODE (totype);
28801 if (frommode != tomode)
28803 /* Do not allow conversions to/from XOmode and OOmode types. */
28804 if (frommode == XOmode)
28805 return N_("invalid conversion from type %<__vector_quad%>");
28806 if (tomode == XOmode)
28807 return N_("invalid conversion to type %<__vector_quad%>");
28808 if (frommode == OOmode)
28809 return N_("invalid conversion from type %<__vector_pair%>");
28810 if (tomode == OOmode)
28811 return N_("invalid conversion to type %<__vector_pair%>");
28814 /* Conversion allowed. */
28815 return NULL;
28818 /* Convert a SFmode constant to the integer bit pattern. */
28820 long
28821 rs6000_const_f32_to_i32 (rtx operand)
28823 long value;
28824 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28826 gcc_assert (GET_MODE (operand) == SFmode);
28827 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28828 return value;
28831 void
28832 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28834 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28835 inform (input_location,
28836 "the result for the xxspltidp instruction "
28837 "is undefined for subnormal input values");
28838 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28841 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28843 static bool
28844 rs6000_gen_pic_addr_diff_vec (void)
28846 return rs6000_relative_jumptables;
28849 void
28850 rs6000_output_addr_vec_elt (FILE *file, int value)
28852 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28853 char buf[100];
28855 fprintf (file, "%s", directive);
28856 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28857 assemble_name (file, buf);
28858 fprintf (file, "\n");
28862 /* Copy an integer constant to the vector constant structure. */
28864 static void
28865 constant_int_to_128bit_vector (rtx op,
28866 machine_mode mode,
28867 size_t byte_num,
28868 vec_const_128bit_type *info)
28870 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28871 unsigned bitsize = GET_MODE_BITSIZE (mode);
28873 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28874 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28877 /* Copy a floating point constant to the vector constant structure. */
28879 static void
28880 constant_fp_to_128bit_vector (rtx op,
28881 machine_mode mode,
28882 size_t byte_num,
28883 vec_const_128bit_type *info)
28885 unsigned bitsize = GET_MODE_BITSIZE (mode);
28886 unsigned num_words = bitsize / 32;
28887 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28888 long real_words[VECTOR_128BIT_WORDS];
28890 /* Make sure we don't overflow the real_words array and that it is
28891 filled completely. */
28892 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28894 real_to_target (real_words, rtype, mode);
28896 /* Iterate over each 32-bit word in the floating point constant. The
28897 real_to_target function puts out words in target endian fashion. We need
28898 to arrange the order so that the bytes are written in big endian order. */
28899 for (unsigned num = 0; num < num_words; num++)
28901 unsigned endian_num = (BYTES_BIG_ENDIAN
28902 ? num
28903 : num_words - 1 - num);
28905 unsigned uvalue = real_words[endian_num];
28906 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28907 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28910 /* Mark that this constant involves floating point. */
28911 info->fp_constant_p = true;
28914 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28915 structure INFO.
28917 Break out the constant out to bytes, half words, words, and double words.
28918 Return true if we have successfully converted the constant.
28920 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28921 constants. Integer and floating point scalar constants are splatted to fill
28922 out the vector. */
28924 bool
28925 vec_const_128bit_to_bytes (rtx op,
28926 machine_mode mode,
28927 vec_const_128bit_type *info)
28929 /* Initialize the constant structure. */
28930 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28932 /* Assume CONST_INTs are DImode. */
28933 if (mode == VOIDmode)
28934 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28936 if (mode == VOIDmode)
28937 return false;
28939 unsigned size = GET_MODE_SIZE (mode);
28940 bool splat_p = false;
28942 if (size > VECTOR_128BIT_BYTES)
28943 return false;
28945 /* Set up the bits. */
28946 switch (GET_CODE (op))
28948 /* Integer constants, default to double word. */
28949 case CONST_INT:
28951 constant_int_to_128bit_vector (op, mode, 0, info);
28952 splat_p = true;
28953 break;
28956 /* Floating point constants. */
28957 case CONST_DOUBLE:
28959 /* Fail if the floating point constant is the wrong mode. */
28960 if (GET_MODE (op) != mode)
28961 return false;
28963 /* SFmode stored as scalars are stored in DFmode format. */
28964 if (mode == SFmode)
28966 mode = DFmode;
28967 size = GET_MODE_SIZE (DFmode);
28970 constant_fp_to_128bit_vector (op, mode, 0, info);
28971 splat_p = true;
28972 break;
28975 /* Vector constants, iterate over each element. On little endian
28976 systems, we have to reverse the element numbers. */
28977 case CONST_VECTOR:
28979 /* Fail if the vector constant is the wrong mode or size. */
28980 if (GET_MODE (op) != mode
28981 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28982 return false;
28984 machine_mode ele_mode = GET_MODE_INNER (mode);
28985 size_t ele_size = GET_MODE_SIZE (ele_mode);
28986 size_t nunits = GET_MODE_NUNITS (mode);
28988 for (size_t num = 0; num < nunits; num++)
28990 rtx ele = CONST_VECTOR_ELT (op, num);
28991 size_t byte_num = (BYTES_BIG_ENDIAN
28992 ? num
28993 : nunits - 1 - num) * ele_size;
28995 if (CONST_INT_P (ele))
28996 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28997 else if (CONST_DOUBLE_P (ele))
28998 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28999 else
29000 return false;
29003 break;
29006 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
29007 Since we are duplicating the element, we don't have to worry about
29008 endian issues. */
29009 case VEC_DUPLICATE:
29011 /* Fail if the vector duplicate is the wrong mode or size. */
29012 if (GET_MODE (op) != mode
29013 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
29014 return false;
29016 machine_mode ele_mode = GET_MODE_INNER (mode);
29017 size_t ele_size = GET_MODE_SIZE (ele_mode);
29018 rtx ele = XEXP (op, 0);
29019 size_t nunits = GET_MODE_NUNITS (mode);
29021 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
29022 return false;
29024 for (size_t num = 0; num < nunits; num++)
29026 size_t byte_num = num * ele_size;
29028 if (CONST_INT_P (ele))
29029 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
29030 else
29031 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
29034 break;
29037 /* Any thing else, just return failure. */
29038 default:
29039 return false;
29042 /* Splat the constant to fill 128 bits if desired. */
29043 if (splat_p && size < VECTOR_128BIT_BYTES)
29045 if ((VECTOR_128BIT_BYTES % size) != 0)
29046 return false;
29048 for (size_t offset = size;
29049 offset < VECTOR_128BIT_BYTES;
29050 offset += size)
29051 memcpy ((void *) &info->bytes[offset],
29052 (void *) &info->bytes[0],
29053 size);
29056 /* Remember original size. */
29057 info->original_size = size;
29059 /* Determine if the bytes are all the same. */
29060 unsigned char first_byte = info->bytes[0];
29061 info->all_bytes_same = true;
29062 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
29063 if (first_byte != info->bytes[i])
29065 info->all_bytes_same = false;
29066 break;
29069 /* Pack half words together & determine if all of the half words are the
29070 same. */
29071 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
29072 info->half_words[i] = ((info->bytes[i * 2] << 8)
29073 | info->bytes[(i * 2) + 1]);
29075 unsigned short first_hword = info->half_words[0];
29076 info->all_half_words_same = true;
29077 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
29078 if (first_hword != info->half_words[i])
29080 info->all_half_words_same = false;
29081 break;
29084 /* Pack words together & determine if all of the words are the same. */
29085 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
29086 info->words[i] = ((info->bytes[i * 4] << 24)
29087 | (info->bytes[(i * 4) + 1] << 16)
29088 | (info->bytes[(i * 4) + 2] << 8)
29089 | info->bytes[(i * 4) + 3]);
29091 info->all_words_same
29092 = (info->words[0] == info->words[1]
29093 && info->words[0] == info->words[2]
29094 && info->words[0] == info->words[3]);
29096 /* Pack double words together & determine if all of the double words are the
29097 same. */
29098 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
29100 unsigned HOST_WIDE_INT d_word = 0;
29101 for (size_t j = 0; j < 8; j++)
29102 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
29104 info->double_words[i] = d_word;
29107 info->all_double_words_same
29108 = (info->double_words[0] == info->double_words[1]);
29110 return true;
29113 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29114 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29115 value to be used with the LXVKQ instruction. */
29117 unsigned
29118 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
29120 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29121 floating point hardware and VSX registers are available. */
29122 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
29123 || !TARGET_VSX)
29124 return 0;
29126 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29127 that are 0. */
29128 if (vsx_const->words[1] != 0
29129 || vsx_const->words[2] != 0
29130 || vsx_const->words[3] != 0)
29131 return 0;
29133 /* See if we have a match for the first word. */
29134 switch (vsx_const->words[0])
29136 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
29137 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
29138 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
29139 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
29140 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
29141 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
29142 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
29143 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
29144 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
29145 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
29146 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
29147 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
29148 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
29149 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
29150 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
29151 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
29152 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
29153 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
29155 /* anything else cannot be loaded. */
29156 default:
29157 break;
29160 return 0;
29163 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29164 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29165 value to be used with the XXSPLTIW instruction. */
29167 unsigned
29168 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
29170 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29171 return 0;
29173 if (!vsx_const->all_words_same)
29174 return 0;
29176 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29177 if (vsx_const->all_bytes_same)
29178 return 0;
29180 /* See if we can use VSPLTISH or VSPLTISW. */
29181 if (vsx_const->all_half_words_same)
29183 short sign_h_word = vsx_const->half_words[0];
29184 if (EASY_VECTOR_15 (sign_h_word))
29185 return 0;
29188 int sign_word = vsx_const->words[0];
29189 if (EASY_VECTOR_15 (sign_word))
29190 return 0;
29192 return vsx_const->words[0];
29195 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29196 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29197 value to be used with the XXSPLTIDP instruction. */
29199 unsigned
29200 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
29202 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29203 return 0;
29205 /* Reject if the two 64-bit segments are not the same. */
29206 if (!vsx_const->all_double_words_same)
29207 return 0;
29209 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29210 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29211 if (vsx_const->all_bytes_same
29212 || vsx_const->all_half_words_same
29213 || vsx_const->all_words_same)
29214 return 0;
29216 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
29218 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29219 pattern and the signalling NaN bit pattern. Recognize infinity and
29220 negative infinity. */
29222 /* Bit representation of DFmode normal quiet NaN. */
29223 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29225 /* Bit representation of DFmode normal signaling NaN. */
29226 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29228 /* Bit representation of DFmode positive infinity. */
29229 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29231 /* Bit representation of DFmode negative infinity. */
29232 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29234 if (value != RS6000_CONST_DF_NAN
29235 && value != RS6000_CONST_DF_NANS
29236 && value != RS6000_CONST_DF_INF
29237 && value != RS6000_CONST_DF_NEG_INF)
29239 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29240 the exponent, and 52 bits for the mantissa (not counting the hidden
29241 bit used for normal numbers). NaN values have the exponent set to all
29242 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29244 int df_exponent = (value >> 52) & 0x7ff;
29245 unsigned HOST_WIDE_INT
29246 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
29248 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
29249 return 0;
29251 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29252 the exponent all 0 bits, and the mantissa non-zero. If the value is
29253 subnormal, then the hidden bit in the mantissa is not set. */
29254 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
29255 return 0;
29258 /* Change the representation to DFmode constant. */
29259 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
29261 /* real_from_target takes the target words in target order. */
29262 if (!BYTES_BIG_ENDIAN)
29263 std::swap (df_words[0], df_words[1]);
29265 REAL_VALUE_TYPE rv_type;
29266 real_from_target (&rv_type, df_words, DFmode);
29268 const REAL_VALUE_TYPE *rv = &rv_type;
29270 /* Validate that the number can be stored as a SFmode value. */
29271 if (!exact_real_truncate (SFmode, rv))
29272 return 0;
29274 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29275 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29276 instruction. */
29277 long sf_value;
29278 real_to_target (&sf_value, rv, SFmode);
29280 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29281 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29282 0 bits, and the mantissa non-zero. */
29283 long sf_exponent = (sf_value >> 23) & 0xFF;
29284 long sf_mantissa = sf_value & 0x7FFFFF;
29286 if (sf_exponent == 0 && sf_mantissa != 0)
29287 return 0;
29289 /* Return the immediate to be used. */
29290 return sf_value;
29293 /* Now we have only two opaque types, they are __vector_quad and
29294 __vector_pair built-in types. They are target specific and
29295 only available when MMA is supported. With MMA supported, it
29296 simply returns true, otherwise it checks if the given gimple
29297 STMT is an assignment, asm or call stmt and uses either of
29298 these two opaque types unexpectedly, if yes, it would raise
29299 an error message and returns true, otherwise it returns false. */
29301 bool
29302 rs6000_opaque_type_invalid_use_p (gimple *stmt)
29304 if (TARGET_MMA)
29305 return false;
29307 /* If the given TYPE is one MMA opaque type, emit the corresponding
29308 error messages and return true, otherwise return false. */
29309 auto check_and_error_invalid_use = [](tree type)
29311 tree mv = TYPE_MAIN_VARIANT (type);
29312 if (mv == vector_quad_type_node)
29314 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29315 return true;
29317 else if (mv == vector_pair_type_node)
29319 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29320 return true;
29322 return false;
29325 if (stmt)
29327 /* The usage of MMA opaque types is very limited for now,
29328 to check with gassign, gasm and gcall is enough so far. */
29329 if (gassign *ga = dyn_cast<gassign *> (stmt))
29331 tree lhs = gimple_assign_lhs (ga);
29332 tree type = TREE_TYPE (lhs);
29333 if (check_and_error_invalid_use (type))
29334 return true;
29336 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29338 unsigned ninputs = gimple_asm_ninputs (gs);
29339 for (unsigned i = 0; i < ninputs; i++)
29341 tree op = gimple_asm_input_op (gs, i);
29342 tree val = TREE_VALUE (op);
29343 tree type = TREE_TYPE (val);
29344 if (check_and_error_invalid_use (type))
29345 return true;
29347 unsigned noutputs = gimple_asm_noutputs (gs);
29348 for (unsigned i = 0; i < noutputs; i++)
29350 tree op = gimple_asm_output_op (gs, i);
29351 tree val = TREE_VALUE (op);
29352 tree type = TREE_TYPE (val);
29353 if (check_and_error_invalid_use (type))
29354 return true;
29357 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29359 unsigned nargs = gimple_call_num_args (gc);
29360 for (unsigned i = 0; i < nargs; i++)
29362 tree arg = gimple_call_arg (gc, i);
29363 tree type = TREE_TYPE (arg);
29364 if (check_and_error_invalid_use (type))
29365 return true;
29370 return false;
29373 struct gcc_target targetm = TARGET_INITIALIZER;
29375 #include "gt-rs6000.h"