aix: 64 bit AIX TLS libpthread dependency.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobe073b26b43079fe770020bbb1bbdd320d893fdba
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2021 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "except.h"
75 #if TARGET_XCOFF
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "rs6000-internal.h"
81 #include "opts.h"
83 /* This file should be included last. */
84 #include "target-def.h"
86 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
87 systems will also set long double to be IEEE 128-bit. AIX and Darwin
88 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
89 those systems will not pick up this default. This needs to be after all
90 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
91 properly defined. */
92 #ifndef TARGET_IEEEQUAD_DEFAULT
93 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
94 #define TARGET_IEEEQUAD_DEFAULT 1
95 #else
96 #define TARGET_IEEEQUAD_DEFAULT 0
97 #endif
98 #endif
100 /* Don't enable PC-relative addressing if the target does not support it. */
101 #ifndef PCREL_SUPPORTED_BY_OS
102 #define PCREL_SUPPORTED_BY_OS 0
103 #endif
105 /* Support targetm.vectorize.builtin_mask_for_load. */
106 tree altivec_builtin_mask_for_load;
108 #ifdef USING_ELFOS_H
109 /* Counter for labels which are to be placed in .fixup. */
110 int fixuplabelno = 0;
111 #endif
113 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
114 int dot_symbols;
116 /* Specify the machine mode that pointers have. After generation of rtl, the
117 compiler makes no further distinction between pointers and any other objects
118 of this machine mode. */
119 scalar_int_mode rs6000_pmode;
121 #if TARGET_ELF
122 /* Note whether IEEE 128-bit floating point was passed or returned, either as
123 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
124 floating point. We changed the default C++ mangling for these types and we
125 may want to generate a weak alias of the old mangling (U10__float128) to the
126 new mangling (u9__ieee128). */
127 bool rs6000_passes_ieee128 = false;
128 #endif
130 /* Track use of r13 in 64bit AIX TLS. */
131 static bool xcoff_tls_exec_model_detected = false;
133 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
134 name used in current releases (i.e. u9__ieee128). */
135 static bool ieee128_mangling_gcc_8_1;
137 /* Width in bits of a pointer. */
138 unsigned rs6000_pointer_size;
140 #ifdef HAVE_AS_GNU_ATTRIBUTE
141 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
142 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
143 # endif
144 /* Flag whether floating point values have been passed/returned.
145 Note that this doesn't say whether fprs are used, since the
146 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
147 should be set for soft-float values passed in gprs and ieee128
148 values passed in vsx registers. */
149 bool rs6000_passes_float = false;
150 bool rs6000_passes_long_double = false;
151 /* Flag whether vector values have been passed/returned. */
152 bool rs6000_passes_vector = false;
153 /* Flag whether small (<= 8 byte) structures have been returned. */
154 bool rs6000_returns_struct = false;
155 #endif
157 /* Value is TRUE if register/mode pair is acceptable. */
158 static bool rs6000_hard_regno_mode_ok_p
159 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
161 /* Maximum number of registers needed for a given register class and mode. */
162 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
164 /* How many registers are needed for a given register and mode. */
165 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
167 /* Map register number to register class. */
168 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
170 static int dbg_cost_ctrl;
172 /* Built in types. */
173 tree rs6000_builtin_types[RS6000_BTI_MAX];
174 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
176 /* Flag to say the TOC is initialized */
177 int toc_initialized, need_toc_init;
178 char toc_label_name[10];
180 /* Cached value of rs6000_variable_issue. This is cached in
181 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
182 static short cached_can_issue_more;
184 static GTY(()) section *read_only_data_section;
185 static GTY(()) section *private_data_section;
186 static GTY(()) section *tls_data_section;
187 static GTY(()) section *tls_private_data_section;
188 static GTY(()) section *read_only_private_data_section;
189 static GTY(()) section *sdata2_section;
191 section *toc_section = 0;
193 /* Describe the vector unit used for modes. */
194 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
195 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
197 /* Register classes for various constraints that are based on the target
198 switches. */
199 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
201 /* Describe the alignment of a vector. */
202 int rs6000_vector_align[NUM_MACHINE_MODES];
204 /* Map selected modes to types for builtins. */
205 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
207 /* What modes to automatically generate reciprocal divide estimate (fre) and
208 reciprocal sqrt (frsqrte) for. */
209 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
211 /* Masks to determine which reciprocal esitmate instructions to generate
212 automatically. */
213 enum rs6000_recip_mask {
214 RECIP_SF_DIV = 0x001, /* Use divide estimate */
215 RECIP_DF_DIV = 0x002,
216 RECIP_V4SF_DIV = 0x004,
217 RECIP_V2DF_DIV = 0x008,
219 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
220 RECIP_DF_RSQRT = 0x020,
221 RECIP_V4SF_RSQRT = 0x040,
222 RECIP_V2DF_RSQRT = 0x080,
224 /* Various combination of flags for -mrecip=xxx. */
225 RECIP_NONE = 0,
226 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
227 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
228 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
230 RECIP_HIGH_PRECISION = RECIP_ALL,
232 /* On low precision machines like the power5, don't enable double precision
233 reciprocal square root estimate, since it isn't accurate enough. */
234 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
237 /* -mrecip options. */
238 static struct
240 const char *string; /* option name */
241 unsigned int mask; /* mask bits to set */
242 } recip_options[] = {
243 { "all", RECIP_ALL },
244 { "none", RECIP_NONE },
245 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
246 | RECIP_V2DF_DIV) },
247 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
248 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
249 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
250 | RECIP_V2DF_RSQRT) },
251 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
252 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
255 /* On PowerPC, we have a limited number of target clones that we care about
256 which means we can use an array to hold the options, rather than having more
257 elaborate data structures to identify each possible variation. Order the
258 clones from the default to the highest ISA. */
259 enum {
260 CLONE_DEFAULT = 0, /* default clone. */
261 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
262 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
263 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
264 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
265 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
266 CLONE_MAX
269 /* Map compiler ISA bits into HWCAP names. */
270 struct clone_map {
271 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
272 const char *name; /* name to use in __builtin_cpu_supports. */
275 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
276 { 0, "" }, /* Default options. */
277 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
278 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
279 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
280 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
281 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
285 /* Newer LIBCs explicitly export this symbol to declare that they provide
286 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
287 reference to this symbol whenever we expand a CPU builtin, so that
288 we never link against an old LIBC. */
289 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
291 /* True if we have expanded a CPU builtin. */
292 bool cpu_builtin_p = false;
294 /* Pointer to function (in rs6000-c.c) that can define or undefine target
295 macros that have changed. Languages that don't support the preprocessor
296 don't link in rs6000-c.c, so we can't call it directly. */
297 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
299 /* Simplfy register classes into simpler classifications. We assume
300 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
301 check for standard register classes (gpr/floating/altivec/vsx) and
302 floating/vector classes (float/altivec/vsx). */
304 enum rs6000_reg_type {
305 NO_REG_TYPE,
306 PSEUDO_REG_TYPE,
307 GPR_REG_TYPE,
308 VSX_REG_TYPE,
309 ALTIVEC_REG_TYPE,
310 FPR_REG_TYPE,
311 SPR_REG_TYPE,
312 CR_REG_TYPE
315 /* Map register class to register type. */
316 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
318 /* First/last register type for the 'normal' register types (i.e. general
319 purpose, floating point, altivec, and VSX registers). */
320 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
322 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
325 /* Register classes we care about in secondary reload or go if legitimate
326 address. We only need to worry about GPR, FPR, and Altivec registers here,
327 along an ANY field that is the OR of the 3 register classes. */
329 enum rs6000_reload_reg_type {
330 RELOAD_REG_GPR, /* General purpose registers. */
331 RELOAD_REG_FPR, /* Traditional floating point regs. */
332 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
333 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
334 N_RELOAD_REG
337 /* For setting up register classes, loop through the 3 register classes mapping
338 into real registers, and skip the ANY class, which is just an OR of the
339 bits. */
340 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
341 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
343 /* Map reload register type to a register in the register class. */
344 struct reload_reg_map_type {
345 const char *name; /* Register class name. */
346 int reg; /* Register in the register class. */
349 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
350 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
351 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
352 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
353 { "Any", -1 }, /* RELOAD_REG_ANY. */
356 /* Mask bits for each register class, indexed per mode. Historically the
357 compiler has been more restrictive which types can do PRE_MODIFY instead of
358 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
359 typedef unsigned char addr_mask_type;
361 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
362 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
363 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
364 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
365 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
366 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
367 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
368 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
370 /* Register type masks based on the type, of valid addressing modes. */
371 struct rs6000_reg_addr {
372 enum insn_code reload_load; /* INSN to reload for loading. */
373 enum insn_code reload_store; /* INSN to reload for storing. */
374 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
375 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
376 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
377 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
378 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
381 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
383 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
384 static inline bool
385 mode_supports_pre_incdec_p (machine_mode mode)
387 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
388 != 0);
391 /* Helper function to say whether a mode supports PRE_MODIFY. */
392 static inline bool
393 mode_supports_pre_modify_p (machine_mode mode)
395 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
396 != 0);
399 /* Return true if we have D-form addressing in altivec registers. */
400 static inline bool
401 mode_supports_vmx_dform (machine_mode mode)
403 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
406 /* Return true if we have D-form addressing in VSX registers. This addressing
407 is more limited than normal d-form addressing in that the offset must be
408 aligned on a 16-byte boundary. */
409 static inline bool
410 mode_supports_dq_form (machine_mode mode)
412 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
413 != 0);
416 /* Given that there exists at least one variable that is set (produced)
417 by OUT_INSN and read (consumed) by IN_INSN, return true iff
418 IN_INSN represents one or more memory store operations and none of
419 the variables set by OUT_INSN is used by IN_INSN as the address of a
420 store operation. If either IN_INSN or OUT_INSN does not represent
421 a "single" RTL SET expression (as loosely defined by the
422 implementation of the single_set function) or a PARALLEL with only
423 SETs, CLOBBERs, and USEs inside, this function returns false.
425 This rs6000-specific version of store_data_bypass_p checks for
426 certain conditions that result in assertion failures (and internal
427 compiler errors) in the generic store_data_bypass_p function and
428 returns false rather than calling store_data_bypass_p if one of the
429 problematic conditions is detected. */
432 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
434 rtx out_set, in_set;
435 rtx out_pat, in_pat;
436 rtx out_exp, in_exp;
437 int i, j;
439 in_set = single_set (in_insn);
440 if (in_set)
442 if (MEM_P (SET_DEST (in_set)))
444 out_set = single_set (out_insn);
445 if (!out_set)
447 out_pat = PATTERN (out_insn);
448 if (GET_CODE (out_pat) == PARALLEL)
450 for (i = 0; i < XVECLEN (out_pat, 0); i++)
452 out_exp = XVECEXP (out_pat, 0, i);
453 if ((GET_CODE (out_exp) == CLOBBER)
454 || (GET_CODE (out_exp) == USE))
455 continue;
456 else if (GET_CODE (out_exp) != SET)
457 return false;
463 else
465 in_pat = PATTERN (in_insn);
466 if (GET_CODE (in_pat) != PARALLEL)
467 return false;
469 for (i = 0; i < XVECLEN (in_pat, 0); i++)
471 in_exp = XVECEXP (in_pat, 0, i);
472 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
473 continue;
474 else if (GET_CODE (in_exp) != SET)
475 return false;
477 if (MEM_P (SET_DEST (in_exp)))
479 out_set = single_set (out_insn);
480 if (!out_set)
482 out_pat = PATTERN (out_insn);
483 if (GET_CODE (out_pat) != PARALLEL)
484 return false;
485 for (j = 0; j < XVECLEN (out_pat, 0); j++)
487 out_exp = XVECEXP (out_pat, 0, j);
488 if ((GET_CODE (out_exp) == CLOBBER)
489 || (GET_CODE (out_exp) == USE))
490 continue;
491 else if (GET_CODE (out_exp) != SET)
492 return false;
498 return store_data_bypass_p (out_insn, in_insn);
502 /* Processor costs (relative to an add) */
504 const struct processor_costs *rs6000_cost;
506 /* Instruction size costs on 32bit processors. */
507 static const
508 struct processor_costs size32_cost = {
509 COSTS_N_INSNS (1), /* mulsi */
510 COSTS_N_INSNS (1), /* mulsi_const */
511 COSTS_N_INSNS (1), /* mulsi_const9 */
512 COSTS_N_INSNS (1), /* muldi */
513 COSTS_N_INSNS (1), /* divsi */
514 COSTS_N_INSNS (1), /* divdi */
515 COSTS_N_INSNS (1), /* fp */
516 COSTS_N_INSNS (1), /* dmul */
517 COSTS_N_INSNS (1), /* sdiv */
518 COSTS_N_INSNS (1), /* ddiv */
519 32, /* cache line size */
520 0, /* l1 cache */
521 0, /* l2 cache */
522 0, /* streams */
523 0, /* SF->DF convert */
526 /* Instruction size costs on 64bit processors. */
527 static const
528 struct processor_costs size64_cost = {
529 COSTS_N_INSNS (1), /* mulsi */
530 COSTS_N_INSNS (1), /* mulsi_const */
531 COSTS_N_INSNS (1), /* mulsi_const9 */
532 COSTS_N_INSNS (1), /* muldi */
533 COSTS_N_INSNS (1), /* divsi */
534 COSTS_N_INSNS (1), /* divdi */
535 COSTS_N_INSNS (1), /* fp */
536 COSTS_N_INSNS (1), /* dmul */
537 COSTS_N_INSNS (1), /* sdiv */
538 COSTS_N_INSNS (1), /* ddiv */
539 128, /* cache line size */
540 0, /* l1 cache */
541 0, /* l2 cache */
542 0, /* streams */
543 0, /* SF->DF convert */
546 /* Instruction costs on RS64A processors. */
547 static const
548 struct processor_costs rs64a_cost = {
549 COSTS_N_INSNS (20), /* mulsi */
550 COSTS_N_INSNS (12), /* mulsi_const */
551 COSTS_N_INSNS (8), /* mulsi_const9 */
552 COSTS_N_INSNS (34), /* muldi */
553 COSTS_N_INSNS (65), /* divsi */
554 COSTS_N_INSNS (67), /* divdi */
555 COSTS_N_INSNS (4), /* fp */
556 COSTS_N_INSNS (4), /* dmul */
557 COSTS_N_INSNS (31), /* sdiv */
558 COSTS_N_INSNS (31), /* ddiv */
559 128, /* cache line size */
560 128, /* l1 cache */
561 2048, /* l2 cache */
562 1, /* streams */
563 0, /* SF->DF convert */
566 /* Instruction costs on MPCCORE processors. */
567 static const
568 struct processor_costs mpccore_cost = {
569 COSTS_N_INSNS (2), /* mulsi */
570 COSTS_N_INSNS (2), /* mulsi_const */
571 COSTS_N_INSNS (2), /* mulsi_const9 */
572 COSTS_N_INSNS (2), /* muldi */
573 COSTS_N_INSNS (6), /* divsi */
574 COSTS_N_INSNS (6), /* divdi */
575 COSTS_N_INSNS (4), /* fp */
576 COSTS_N_INSNS (5), /* dmul */
577 COSTS_N_INSNS (10), /* sdiv */
578 COSTS_N_INSNS (17), /* ddiv */
579 32, /* cache line size */
580 4, /* l1 cache */
581 16, /* l2 cache */
582 1, /* streams */
583 0, /* SF->DF convert */
586 /* Instruction costs on PPC403 processors. */
587 static const
588 struct processor_costs ppc403_cost = {
589 COSTS_N_INSNS (4), /* mulsi */
590 COSTS_N_INSNS (4), /* mulsi_const */
591 COSTS_N_INSNS (4), /* mulsi_const9 */
592 COSTS_N_INSNS (4), /* muldi */
593 COSTS_N_INSNS (33), /* divsi */
594 COSTS_N_INSNS (33), /* divdi */
595 COSTS_N_INSNS (11), /* fp */
596 COSTS_N_INSNS (11), /* dmul */
597 COSTS_N_INSNS (11), /* sdiv */
598 COSTS_N_INSNS (11), /* ddiv */
599 32, /* cache line size */
600 4, /* l1 cache */
601 16, /* l2 cache */
602 1, /* streams */
603 0, /* SF->DF convert */
606 /* Instruction costs on PPC405 processors. */
607 static const
608 struct processor_costs ppc405_cost = {
609 COSTS_N_INSNS (5), /* mulsi */
610 COSTS_N_INSNS (4), /* mulsi_const */
611 COSTS_N_INSNS (3), /* mulsi_const9 */
612 COSTS_N_INSNS (5), /* muldi */
613 COSTS_N_INSNS (35), /* divsi */
614 COSTS_N_INSNS (35), /* divdi */
615 COSTS_N_INSNS (11), /* fp */
616 COSTS_N_INSNS (11), /* dmul */
617 COSTS_N_INSNS (11), /* sdiv */
618 COSTS_N_INSNS (11), /* ddiv */
619 32, /* cache line size */
620 16, /* l1 cache */
621 128, /* l2 cache */
622 1, /* streams */
623 0, /* SF->DF convert */
626 /* Instruction costs on PPC440 processors. */
627 static const
628 struct processor_costs ppc440_cost = {
629 COSTS_N_INSNS (3), /* mulsi */
630 COSTS_N_INSNS (2), /* mulsi_const */
631 COSTS_N_INSNS (2), /* mulsi_const9 */
632 COSTS_N_INSNS (3), /* muldi */
633 COSTS_N_INSNS (34), /* divsi */
634 COSTS_N_INSNS (34), /* divdi */
635 COSTS_N_INSNS (5), /* fp */
636 COSTS_N_INSNS (5), /* dmul */
637 COSTS_N_INSNS (19), /* sdiv */
638 COSTS_N_INSNS (33), /* ddiv */
639 32, /* cache line size */
640 32, /* l1 cache */
641 256, /* l2 cache */
642 1, /* streams */
643 0, /* SF->DF convert */
646 /* Instruction costs on PPC476 processors. */
647 static const
648 struct processor_costs ppc476_cost = {
649 COSTS_N_INSNS (4), /* mulsi */
650 COSTS_N_INSNS (4), /* mulsi_const */
651 COSTS_N_INSNS (4), /* mulsi_const9 */
652 COSTS_N_INSNS (4), /* muldi */
653 COSTS_N_INSNS (11), /* divsi */
654 COSTS_N_INSNS (11), /* divdi */
655 COSTS_N_INSNS (6), /* fp */
656 COSTS_N_INSNS (6), /* dmul */
657 COSTS_N_INSNS (19), /* sdiv */
658 COSTS_N_INSNS (33), /* ddiv */
659 32, /* l1 cache line size */
660 32, /* l1 cache */
661 512, /* l2 cache */
662 1, /* streams */
663 0, /* SF->DF convert */
666 /* Instruction costs on PPC601 processors. */
667 static const
668 struct processor_costs ppc601_cost = {
669 COSTS_N_INSNS (5), /* mulsi */
670 COSTS_N_INSNS (5), /* mulsi_const */
671 COSTS_N_INSNS (5), /* mulsi_const9 */
672 COSTS_N_INSNS (5), /* muldi */
673 COSTS_N_INSNS (36), /* divsi */
674 COSTS_N_INSNS (36), /* divdi */
675 COSTS_N_INSNS (4), /* fp */
676 COSTS_N_INSNS (5), /* dmul */
677 COSTS_N_INSNS (17), /* sdiv */
678 COSTS_N_INSNS (31), /* ddiv */
679 32, /* cache line size */
680 32, /* l1 cache */
681 256, /* l2 cache */
682 1, /* streams */
683 0, /* SF->DF convert */
686 /* Instruction costs on PPC603 processors. */
687 static const
688 struct processor_costs ppc603_cost = {
689 COSTS_N_INSNS (5), /* mulsi */
690 COSTS_N_INSNS (3), /* mulsi_const */
691 COSTS_N_INSNS (2), /* mulsi_const9 */
692 COSTS_N_INSNS (5), /* muldi */
693 COSTS_N_INSNS (37), /* divsi */
694 COSTS_N_INSNS (37), /* divdi */
695 COSTS_N_INSNS (3), /* fp */
696 COSTS_N_INSNS (4), /* dmul */
697 COSTS_N_INSNS (18), /* sdiv */
698 COSTS_N_INSNS (33), /* ddiv */
699 32, /* cache line size */
700 8, /* l1 cache */
701 64, /* l2 cache */
702 1, /* streams */
703 0, /* SF->DF convert */
706 /* Instruction costs on PPC604 processors. */
707 static const
708 struct processor_costs ppc604_cost = {
709 COSTS_N_INSNS (4), /* mulsi */
710 COSTS_N_INSNS (4), /* mulsi_const */
711 COSTS_N_INSNS (4), /* mulsi_const9 */
712 COSTS_N_INSNS (4), /* muldi */
713 COSTS_N_INSNS (20), /* divsi */
714 COSTS_N_INSNS (20), /* divdi */
715 COSTS_N_INSNS (3), /* fp */
716 COSTS_N_INSNS (3), /* dmul */
717 COSTS_N_INSNS (18), /* sdiv */
718 COSTS_N_INSNS (32), /* ddiv */
719 32, /* cache line size */
720 16, /* l1 cache */
721 512, /* l2 cache */
722 1, /* streams */
723 0, /* SF->DF convert */
726 /* Instruction costs on PPC604e processors. */
727 static const
728 struct processor_costs ppc604e_cost = {
729 COSTS_N_INSNS (2), /* mulsi */
730 COSTS_N_INSNS (2), /* mulsi_const */
731 COSTS_N_INSNS (2), /* mulsi_const9 */
732 COSTS_N_INSNS (2), /* muldi */
733 COSTS_N_INSNS (20), /* divsi */
734 COSTS_N_INSNS (20), /* divdi */
735 COSTS_N_INSNS (3), /* fp */
736 COSTS_N_INSNS (3), /* dmul */
737 COSTS_N_INSNS (18), /* sdiv */
738 COSTS_N_INSNS (32), /* ddiv */
739 32, /* cache line size */
740 32, /* l1 cache */
741 1024, /* l2 cache */
742 1, /* streams */
743 0, /* SF->DF convert */
746 /* Instruction costs on PPC620 processors. */
747 static const
748 struct processor_costs ppc620_cost = {
749 COSTS_N_INSNS (5), /* mulsi */
750 COSTS_N_INSNS (4), /* mulsi_const */
751 COSTS_N_INSNS (3), /* mulsi_const9 */
752 COSTS_N_INSNS (7), /* muldi */
753 COSTS_N_INSNS (21), /* divsi */
754 COSTS_N_INSNS (37), /* divdi */
755 COSTS_N_INSNS (3), /* fp */
756 COSTS_N_INSNS (3), /* dmul */
757 COSTS_N_INSNS (18), /* sdiv */
758 COSTS_N_INSNS (32), /* ddiv */
759 128, /* cache line size */
760 32, /* l1 cache */
761 1024, /* l2 cache */
762 1, /* streams */
763 0, /* SF->DF convert */
766 /* Instruction costs on PPC630 processors. */
767 static const
768 struct processor_costs ppc630_cost = {
769 COSTS_N_INSNS (5), /* mulsi */
770 COSTS_N_INSNS (4), /* mulsi_const */
771 COSTS_N_INSNS (3), /* mulsi_const9 */
772 COSTS_N_INSNS (7), /* muldi */
773 COSTS_N_INSNS (21), /* divsi */
774 COSTS_N_INSNS (37), /* divdi */
775 COSTS_N_INSNS (3), /* fp */
776 COSTS_N_INSNS (3), /* dmul */
777 COSTS_N_INSNS (17), /* sdiv */
778 COSTS_N_INSNS (21), /* ddiv */
779 128, /* cache line size */
780 64, /* l1 cache */
781 1024, /* l2 cache */
782 1, /* streams */
783 0, /* SF->DF convert */
786 /* Instruction costs on Cell processor. */
787 /* COSTS_N_INSNS (1) ~ one add. */
788 static const
789 struct processor_costs ppccell_cost = {
790 COSTS_N_INSNS (9/2)+2, /* mulsi */
791 COSTS_N_INSNS (6/2), /* mulsi_const */
792 COSTS_N_INSNS (6/2), /* mulsi_const9 */
793 COSTS_N_INSNS (15/2)+2, /* muldi */
794 COSTS_N_INSNS (38/2), /* divsi */
795 COSTS_N_INSNS (70/2), /* divdi */
796 COSTS_N_INSNS (10/2), /* fp */
797 COSTS_N_INSNS (10/2), /* dmul */
798 COSTS_N_INSNS (74/2), /* sdiv */
799 COSTS_N_INSNS (74/2), /* ddiv */
800 128, /* cache line size */
801 32, /* l1 cache */
802 512, /* l2 cache */
803 6, /* streams */
804 0, /* SF->DF convert */
807 /* Instruction costs on PPC750 and PPC7400 processors. */
808 static const
809 struct processor_costs ppc750_cost = {
810 COSTS_N_INSNS (5), /* mulsi */
811 COSTS_N_INSNS (3), /* mulsi_const */
812 COSTS_N_INSNS (2), /* mulsi_const9 */
813 COSTS_N_INSNS (5), /* muldi */
814 COSTS_N_INSNS (17), /* divsi */
815 COSTS_N_INSNS (17), /* divdi */
816 COSTS_N_INSNS (3), /* fp */
817 COSTS_N_INSNS (3), /* dmul */
818 COSTS_N_INSNS (17), /* sdiv */
819 COSTS_N_INSNS (31), /* ddiv */
820 32, /* cache line size */
821 32, /* l1 cache */
822 512, /* l2 cache */
823 1, /* streams */
824 0, /* SF->DF convert */
827 /* Instruction costs on PPC7450 processors. */
828 static const
829 struct processor_costs ppc7450_cost = {
830 COSTS_N_INSNS (4), /* mulsi */
831 COSTS_N_INSNS (3), /* mulsi_const */
832 COSTS_N_INSNS (3), /* mulsi_const9 */
833 COSTS_N_INSNS (4), /* muldi */
834 COSTS_N_INSNS (23), /* divsi */
835 COSTS_N_INSNS (23), /* divdi */
836 COSTS_N_INSNS (5), /* fp */
837 COSTS_N_INSNS (5), /* dmul */
838 COSTS_N_INSNS (21), /* sdiv */
839 COSTS_N_INSNS (35), /* ddiv */
840 32, /* cache line size */
841 32, /* l1 cache */
842 1024, /* l2 cache */
843 1, /* streams */
844 0, /* SF->DF convert */
847 /* Instruction costs on PPC8540 processors. */
848 static const
849 struct processor_costs ppc8540_cost = {
850 COSTS_N_INSNS (4), /* mulsi */
851 COSTS_N_INSNS (4), /* mulsi_const */
852 COSTS_N_INSNS (4), /* mulsi_const9 */
853 COSTS_N_INSNS (4), /* muldi */
854 COSTS_N_INSNS (19), /* divsi */
855 COSTS_N_INSNS (19), /* divdi */
856 COSTS_N_INSNS (4), /* fp */
857 COSTS_N_INSNS (4), /* dmul */
858 COSTS_N_INSNS (29), /* sdiv */
859 COSTS_N_INSNS (29), /* ddiv */
860 32, /* cache line size */
861 32, /* l1 cache */
862 256, /* l2 cache */
863 1, /* prefetch streams /*/
864 0, /* SF->DF convert */
867 /* Instruction costs on E300C2 and E300C3 cores. */
868 static const
869 struct processor_costs ppce300c2c3_cost = {
870 COSTS_N_INSNS (4), /* mulsi */
871 COSTS_N_INSNS (4), /* mulsi_const */
872 COSTS_N_INSNS (4), /* mulsi_const9 */
873 COSTS_N_INSNS (4), /* muldi */
874 COSTS_N_INSNS (19), /* divsi */
875 COSTS_N_INSNS (19), /* divdi */
876 COSTS_N_INSNS (3), /* fp */
877 COSTS_N_INSNS (4), /* dmul */
878 COSTS_N_INSNS (18), /* sdiv */
879 COSTS_N_INSNS (33), /* ddiv */
881 16, /* l1 cache */
882 16, /* l2 cache */
883 1, /* prefetch streams /*/
884 0, /* SF->DF convert */
887 /* Instruction costs on PPCE500MC processors. */
888 static const
889 struct processor_costs ppce500mc_cost = {
890 COSTS_N_INSNS (4), /* mulsi */
891 COSTS_N_INSNS (4), /* mulsi_const */
892 COSTS_N_INSNS (4), /* mulsi_const9 */
893 COSTS_N_INSNS (4), /* muldi */
894 COSTS_N_INSNS (14), /* divsi */
895 COSTS_N_INSNS (14), /* divdi */
896 COSTS_N_INSNS (8), /* fp */
897 COSTS_N_INSNS (10), /* dmul */
898 COSTS_N_INSNS (36), /* sdiv */
899 COSTS_N_INSNS (66), /* ddiv */
900 64, /* cache line size */
901 32, /* l1 cache */
902 128, /* l2 cache */
903 1, /* prefetch streams /*/
904 0, /* SF->DF convert */
907 /* Instruction costs on PPCE500MC64 processors. */
908 static const
909 struct processor_costs ppce500mc64_cost = {
910 COSTS_N_INSNS (4), /* mulsi */
911 COSTS_N_INSNS (4), /* mulsi_const */
912 COSTS_N_INSNS (4), /* mulsi_const9 */
913 COSTS_N_INSNS (4), /* muldi */
914 COSTS_N_INSNS (14), /* divsi */
915 COSTS_N_INSNS (14), /* divdi */
916 COSTS_N_INSNS (4), /* fp */
917 COSTS_N_INSNS (10), /* dmul */
918 COSTS_N_INSNS (36), /* sdiv */
919 COSTS_N_INSNS (66), /* ddiv */
920 64, /* cache line size */
921 32, /* l1 cache */
922 128, /* l2 cache */
923 1, /* prefetch streams /*/
924 0, /* SF->DF convert */
927 /* Instruction costs on PPCE5500 processors. */
928 static const
929 struct processor_costs ppce5500_cost = {
930 COSTS_N_INSNS (5), /* mulsi */
931 COSTS_N_INSNS (5), /* mulsi_const */
932 COSTS_N_INSNS (4), /* mulsi_const9 */
933 COSTS_N_INSNS (5), /* muldi */
934 COSTS_N_INSNS (14), /* divsi */
935 COSTS_N_INSNS (14), /* divdi */
936 COSTS_N_INSNS (7), /* fp */
937 COSTS_N_INSNS (10), /* dmul */
938 COSTS_N_INSNS (36), /* sdiv */
939 COSTS_N_INSNS (66), /* ddiv */
940 64, /* cache line size */
941 32, /* l1 cache */
942 128, /* l2 cache */
943 1, /* prefetch streams /*/
944 0, /* SF->DF convert */
947 /* Instruction costs on PPCE6500 processors. */
948 static const
949 struct processor_costs ppce6500_cost = {
950 COSTS_N_INSNS (5), /* mulsi */
951 COSTS_N_INSNS (5), /* mulsi_const */
952 COSTS_N_INSNS (4), /* mulsi_const9 */
953 COSTS_N_INSNS (5), /* muldi */
954 COSTS_N_INSNS (14), /* divsi */
955 COSTS_N_INSNS (14), /* divdi */
956 COSTS_N_INSNS (7), /* fp */
957 COSTS_N_INSNS (10), /* dmul */
958 COSTS_N_INSNS (36), /* sdiv */
959 COSTS_N_INSNS (66), /* ddiv */
960 64, /* cache line size */
961 32, /* l1 cache */
962 128, /* l2 cache */
963 1, /* prefetch streams /*/
964 0, /* SF->DF convert */
967 /* Instruction costs on AppliedMicro Titan processors. */
968 static const
969 struct processor_costs titan_cost = {
970 COSTS_N_INSNS (5), /* mulsi */
971 COSTS_N_INSNS (5), /* mulsi_const */
972 COSTS_N_INSNS (5), /* mulsi_const9 */
973 COSTS_N_INSNS (5), /* muldi */
974 COSTS_N_INSNS (18), /* divsi */
975 COSTS_N_INSNS (18), /* divdi */
976 COSTS_N_INSNS (10), /* fp */
977 COSTS_N_INSNS (10), /* dmul */
978 COSTS_N_INSNS (46), /* sdiv */
979 COSTS_N_INSNS (72), /* ddiv */
980 32, /* cache line size */
981 32, /* l1 cache */
982 512, /* l2 cache */
983 1, /* prefetch streams /*/
984 0, /* SF->DF convert */
987 /* Instruction costs on POWER4 and POWER5 processors. */
988 static const
989 struct processor_costs power4_cost = {
990 COSTS_N_INSNS (3), /* mulsi */
991 COSTS_N_INSNS (2), /* mulsi_const */
992 COSTS_N_INSNS (2), /* mulsi_const9 */
993 COSTS_N_INSNS (4), /* muldi */
994 COSTS_N_INSNS (18), /* divsi */
995 COSTS_N_INSNS (34), /* divdi */
996 COSTS_N_INSNS (3), /* fp */
997 COSTS_N_INSNS (3), /* dmul */
998 COSTS_N_INSNS (17), /* sdiv */
999 COSTS_N_INSNS (17), /* ddiv */
1000 128, /* cache line size */
1001 32, /* l1 cache */
1002 1024, /* l2 cache */
1003 8, /* prefetch streams /*/
1004 0, /* SF->DF convert */
1007 /* Instruction costs on POWER6 processors. */
1008 static const
1009 struct processor_costs power6_cost = {
1010 COSTS_N_INSNS (8), /* mulsi */
1011 COSTS_N_INSNS (8), /* mulsi_const */
1012 COSTS_N_INSNS (8), /* mulsi_const9 */
1013 COSTS_N_INSNS (8), /* muldi */
1014 COSTS_N_INSNS (22), /* divsi */
1015 COSTS_N_INSNS (28), /* divdi */
1016 COSTS_N_INSNS (3), /* fp */
1017 COSTS_N_INSNS (3), /* dmul */
1018 COSTS_N_INSNS (13), /* sdiv */
1019 COSTS_N_INSNS (16), /* ddiv */
1020 128, /* cache line size */
1021 64, /* l1 cache */
1022 2048, /* l2 cache */
1023 16, /* prefetch streams */
1024 0, /* SF->DF convert */
1027 /* Instruction costs on POWER7 processors. */
1028 static const
1029 struct processor_costs power7_cost = {
1030 COSTS_N_INSNS (2), /* mulsi */
1031 COSTS_N_INSNS (2), /* mulsi_const */
1032 COSTS_N_INSNS (2), /* mulsi_const9 */
1033 COSTS_N_INSNS (2), /* muldi */
1034 COSTS_N_INSNS (18), /* divsi */
1035 COSTS_N_INSNS (34), /* divdi */
1036 COSTS_N_INSNS (3), /* fp */
1037 COSTS_N_INSNS (3), /* dmul */
1038 COSTS_N_INSNS (13), /* sdiv */
1039 COSTS_N_INSNS (16), /* ddiv */
1040 128, /* cache line size */
1041 32, /* l1 cache */
1042 256, /* l2 cache */
1043 12, /* prefetch streams */
1044 COSTS_N_INSNS (3), /* SF->DF convert */
1047 /* Instruction costs on POWER8 processors. */
1048 static const
1049 struct processor_costs power8_cost = {
1050 COSTS_N_INSNS (3), /* mulsi */
1051 COSTS_N_INSNS (3), /* mulsi_const */
1052 COSTS_N_INSNS (3), /* mulsi_const9 */
1053 COSTS_N_INSNS (3), /* muldi */
1054 COSTS_N_INSNS (19), /* divsi */
1055 COSTS_N_INSNS (35), /* divdi */
1056 COSTS_N_INSNS (3), /* fp */
1057 COSTS_N_INSNS (3), /* dmul */
1058 COSTS_N_INSNS (14), /* sdiv */
1059 COSTS_N_INSNS (17), /* ddiv */
1060 128, /* cache line size */
1061 32, /* l1 cache */
1062 512, /* l2 cache */
1063 12, /* prefetch streams */
1064 COSTS_N_INSNS (3), /* SF->DF convert */
1067 /* Instruction costs on POWER9 processors. */
1068 static const
1069 struct processor_costs power9_cost = {
1070 COSTS_N_INSNS (3), /* mulsi */
1071 COSTS_N_INSNS (3), /* mulsi_const */
1072 COSTS_N_INSNS (3), /* mulsi_const9 */
1073 COSTS_N_INSNS (3), /* muldi */
1074 COSTS_N_INSNS (8), /* divsi */
1075 COSTS_N_INSNS (12), /* divdi */
1076 COSTS_N_INSNS (3), /* fp */
1077 COSTS_N_INSNS (3), /* dmul */
1078 COSTS_N_INSNS (13), /* sdiv */
1079 COSTS_N_INSNS (18), /* ddiv */
1080 128, /* cache line size */
1081 32, /* l1 cache */
1082 512, /* l2 cache */
1083 8, /* prefetch streams */
1084 COSTS_N_INSNS (3), /* SF->DF convert */
1087 /* Instruction costs on POWER10 processors. */
1088 static const
1089 struct processor_costs power10_cost = {
1090 COSTS_N_INSNS (2), /* mulsi */
1091 COSTS_N_INSNS (2), /* mulsi_const */
1092 COSTS_N_INSNS (2), /* mulsi_const9 */
1093 COSTS_N_INSNS (2), /* muldi */
1094 COSTS_N_INSNS (6), /* divsi */
1095 COSTS_N_INSNS (6), /* divdi */
1096 COSTS_N_INSNS (2), /* fp */
1097 COSTS_N_INSNS (2), /* dmul */
1098 COSTS_N_INSNS (11), /* sdiv */
1099 COSTS_N_INSNS (13), /* ddiv */
1100 128, /* cache line size */
1101 32, /* l1 cache */
1102 512, /* l2 cache */
1103 16, /* prefetch streams */
1104 COSTS_N_INSNS (2), /* SF->DF convert */
1107 /* Instruction costs on POWER A2 processors. */
1108 static const
1109 struct processor_costs ppca2_cost = {
1110 COSTS_N_INSNS (16), /* mulsi */
1111 COSTS_N_INSNS (16), /* mulsi_const */
1112 COSTS_N_INSNS (16), /* mulsi_const9 */
1113 COSTS_N_INSNS (16), /* muldi */
1114 COSTS_N_INSNS (22), /* divsi */
1115 COSTS_N_INSNS (28), /* divdi */
1116 COSTS_N_INSNS (3), /* fp */
1117 COSTS_N_INSNS (3), /* dmul */
1118 COSTS_N_INSNS (59), /* sdiv */
1119 COSTS_N_INSNS (72), /* ddiv */
1121 16, /* l1 cache */
1122 2048, /* l2 cache */
1123 16, /* prefetch streams */
1124 0, /* SF->DF convert */
1127 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1128 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1131 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1132 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1133 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1134 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1135 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1136 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1137 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1138 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1139 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1140 bool);
1141 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1142 unsigned int);
1143 static bool is_microcoded_insn (rtx_insn *);
1144 static bool is_nonpipeline_insn (rtx_insn *);
1145 static bool is_cracked_insn (rtx_insn *);
1146 static bool is_load_insn (rtx, rtx *);
1147 static bool is_store_insn (rtx, rtx *);
1148 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1149 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1150 static bool insn_must_be_first_in_group (rtx_insn *);
1151 static bool insn_must_be_last_in_group (rtx_insn *);
1152 bool easy_vector_constant (rtx, machine_mode);
1153 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1154 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1155 #if TARGET_MACHO
1156 static tree get_prev_label (tree);
1157 #endif
1158 static bool rs6000_mode_dependent_address (const_rtx);
1159 static bool rs6000_debug_mode_dependent_address (const_rtx);
1160 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1161 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1162 machine_mode, rtx);
1163 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1164 machine_mode,
1165 rtx);
1166 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1167 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1168 enum reg_class);
1169 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1170 reg_class_t,
1171 reg_class_t);
1172 static bool rs6000_debug_can_change_mode_class (machine_mode,
1173 machine_mode,
1174 reg_class_t);
1176 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1177 = rs6000_mode_dependent_address;
1179 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1180 machine_mode, rtx)
1181 = rs6000_secondary_reload_class;
1183 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1184 = rs6000_preferred_reload_class;
1186 const int INSN_NOT_AVAILABLE = -1;
1188 static void rs6000_print_isa_options (FILE *, int, const char *,
1189 HOST_WIDE_INT);
1190 static void rs6000_print_builtin_options (FILE *, int, const char *,
1191 HOST_WIDE_INT);
1192 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1194 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1195 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1196 enum rs6000_reg_type,
1197 machine_mode,
1198 secondary_reload_info *,
1199 bool);
1200 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1202 /* Hash table stuff for keeping track of TOC entries. */
1204 struct GTY((for_user)) toc_hash_struct
1206 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1207 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1208 rtx key;
1209 machine_mode key_mode;
1210 int labelno;
1213 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1215 static hashval_t hash (toc_hash_struct *);
1216 static bool equal (toc_hash_struct *, toc_hash_struct *);
1219 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1223 /* Default register names. */
1224 char rs6000_reg_names[][8] =
1226 /* GPRs */
1227 "0", "1", "2", "3", "4", "5", "6", "7",
1228 "8", "9", "10", "11", "12", "13", "14", "15",
1229 "16", "17", "18", "19", "20", "21", "22", "23",
1230 "24", "25", "26", "27", "28", "29", "30", "31",
1231 /* FPRs */
1232 "0", "1", "2", "3", "4", "5", "6", "7",
1233 "8", "9", "10", "11", "12", "13", "14", "15",
1234 "16", "17", "18", "19", "20", "21", "22", "23",
1235 "24", "25", "26", "27", "28", "29", "30", "31",
1236 /* VRs */
1237 "0", "1", "2", "3", "4", "5", "6", "7",
1238 "8", "9", "10", "11", "12", "13", "14", "15",
1239 "16", "17", "18", "19", "20", "21", "22", "23",
1240 "24", "25", "26", "27", "28", "29", "30", "31",
1241 /* lr ctr ca ap */
1242 "lr", "ctr", "ca", "ap",
1243 /* cr0..cr7 */
1244 "0", "1", "2", "3", "4", "5", "6", "7",
1245 /* vrsave vscr sfp */
1246 "vrsave", "vscr", "sfp",
1249 #ifdef TARGET_REGNAMES
1250 static const char alt_reg_names[][8] =
1252 /* GPRs */
1253 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1254 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1255 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1256 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1257 /* FPRs */
1258 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1259 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1260 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1261 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1262 /* VRs */
1263 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1264 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1265 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1266 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1267 /* lr ctr ca ap */
1268 "lr", "ctr", "ca", "ap",
1269 /* cr0..cr7 */
1270 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1271 /* vrsave vscr sfp */
1272 "vrsave", "vscr", "sfp",
1274 #endif
1276 /* Table of valid machine attributes. */
1278 static const struct attribute_spec rs6000_attribute_table[] =
1280 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1281 affects_type_identity, handler, exclude } */
1282 { "altivec", 1, 1, false, true, false, false,
1283 rs6000_handle_altivec_attribute, NULL },
1284 { "longcall", 0, 0, false, true, true, false,
1285 rs6000_handle_longcall_attribute, NULL },
1286 { "shortcall", 0, 0, false, true, true, false,
1287 rs6000_handle_longcall_attribute, NULL },
1288 { "ms_struct", 0, 0, false, false, false, false,
1289 rs6000_handle_struct_attribute, NULL },
1290 { "gcc_struct", 0, 0, false, false, false, false,
1291 rs6000_handle_struct_attribute, NULL },
1292 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1293 SUBTARGET_ATTRIBUTE_TABLE,
1294 #endif
1295 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1298 #ifndef TARGET_PROFILE_KERNEL
1299 #define TARGET_PROFILE_KERNEL 0
1300 #endif
1302 /* Initialize the GCC target structure. */
1303 #undef TARGET_ATTRIBUTE_TABLE
1304 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1305 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1307 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1308 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1313 /* Default unaligned ops are only provided for ELF. Find the ops needed
1314 for non-ELF systems. */
1315 #ifndef OBJECT_FORMAT_ELF
1316 #if TARGET_XCOFF
1317 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1318 64-bit targets. */
1319 #undef TARGET_ASM_UNALIGNED_HI_OP
1320 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1321 #undef TARGET_ASM_UNALIGNED_SI_OP
1322 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1323 #undef TARGET_ASM_UNALIGNED_DI_OP
1324 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1325 #else
1326 /* For Darwin. */
1327 #undef TARGET_ASM_UNALIGNED_HI_OP
1328 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1329 #undef TARGET_ASM_UNALIGNED_SI_OP
1330 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1331 #undef TARGET_ASM_UNALIGNED_DI_OP
1332 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1333 #undef TARGET_ASM_ALIGNED_DI_OP
1334 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1335 #endif
1336 #endif
1338 /* This hook deals with fixups for relocatable code and DI-mode objects
1339 in 64-bit code. */
1340 #undef TARGET_ASM_INTEGER
1341 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1343 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1344 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1345 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1346 #endif
1348 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1349 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1350 rs6000_print_patchable_function_entry
1352 #undef TARGET_SET_UP_BY_PROLOGUE
1353 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1355 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1356 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1357 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1358 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1359 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1360 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1361 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1362 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1363 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1364 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1365 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1366 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1368 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1369 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1371 #undef TARGET_INTERNAL_ARG_POINTER
1372 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1374 #undef TARGET_HAVE_TLS
1375 #define TARGET_HAVE_TLS HAVE_AS_TLS
1377 #undef TARGET_CANNOT_FORCE_CONST_MEM
1378 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1380 #undef TARGET_DELEGITIMIZE_ADDRESS
1381 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1383 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1384 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1386 #undef TARGET_LEGITIMATE_COMBINED_INSN
1387 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1389 #undef TARGET_ASM_FUNCTION_PROLOGUE
1390 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1391 #undef TARGET_ASM_FUNCTION_EPILOGUE
1392 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1397 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1398 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1400 #undef TARGET_LEGITIMIZE_ADDRESS
1401 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1403 #undef TARGET_SCHED_VARIABLE_ISSUE
1404 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1406 #undef TARGET_SCHED_ISSUE_RATE
1407 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1408 #undef TARGET_SCHED_ADJUST_COST
1409 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1410 #undef TARGET_SCHED_ADJUST_PRIORITY
1411 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1412 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1413 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1414 #undef TARGET_SCHED_INIT
1415 #define TARGET_SCHED_INIT rs6000_sched_init
1416 #undef TARGET_SCHED_FINISH
1417 #define TARGET_SCHED_FINISH rs6000_sched_finish
1418 #undef TARGET_SCHED_REORDER
1419 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1420 #undef TARGET_SCHED_REORDER2
1421 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1423 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1424 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1426 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1427 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1429 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1430 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1431 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1432 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1433 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1434 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1435 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1436 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1438 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1439 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1441 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1442 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1443 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1444 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1445 rs6000_builtin_support_vector_misalignment
1446 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1447 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1448 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1449 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1450 rs6000_builtin_vectorization_cost
1451 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1452 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1453 rs6000_preferred_simd_mode
1454 #undef TARGET_VECTORIZE_INIT_COST
1455 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1456 #undef TARGET_VECTORIZE_ADD_STMT_COST
1457 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1458 #undef TARGET_VECTORIZE_FINISH_COST
1459 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1460 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1461 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1463 #undef TARGET_LOOP_UNROLL_ADJUST
1464 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1466 #undef TARGET_INIT_BUILTINS
1467 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1468 #undef TARGET_BUILTIN_DECL
1469 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1471 #undef TARGET_FOLD_BUILTIN
1472 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1473 #undef TARGET_GIMPLE_FOLD_BUILTIN
1474 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1476 #undef TARGET_EXPAND_BUILTIN
1477 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1479 #undef TARGET_MANGLE_TYPE
1480 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1482 #undef TARGET_INIT_LIBFUNCS
1483 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1485 #if TARGET_MACHO
1486 #undef TARGET_BINDS_LOCAL_P
1487 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1488 #endif
1490 #undef TARGET_MS_BITFIELD_LAYOUT_P
1491 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1493 #undef TARGET_ASM_OUTPUT_MI_THUNK
1494 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1499 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1500 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1502 #undef TARGET_REGISTER_MOVE_COST
1503 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1504 #undef TARGET_MEMORY_MOVE_COST
1505 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1506 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1507 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1508 rs6000_ira_change_pseudo_allocno_class
1509 #undef TARGET_CANNOT_COPY_INSN_P
1510 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1511 #undef TARGET_RTX_COSTS
1512 #define TARGET_RTX_COSTS rs6000_rtx_costs
1513 #undef TARGET_ADDRESS_COST
1514 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1515 #undef TARGET_INSN_COST
1516 #define TARGET_INSN_COST rs6000_insn_cost
1518 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1519 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1521 #undef TARGET_PROMOTE_FUNCTION_MODE
1522 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1524 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1525 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1527 #undef TARGET_RETURN_IN_MEMORY
1528 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1530 #undef TARGET_RETURN_IN_MSB
1531 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1533 #undef TARGET_SETUP_INCOMING_VARARGS
1534 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1536 /* Always strict argument naming on rs6000. */
1537 #undef TARGET_STRICT_ARGUMENT_NAMING
1538 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1539 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1540 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1541 #undef TARGET_SPLIT_COMPLEX_ARG
1542 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1543 #undef TARGET_MUST_PASS_IN_STACK
1544 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1545 #undef TARGET_PASS_BY_REFERENCE
1546 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1547 #undef TARGET_ARG_PARTIAL_BYTES
1548 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1549 #undef TARGET_FUNCTION_ARG_ADVANCE
1550 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1551 #undef TARGET_FUNCTION_ARG
1552 #define TARGET_FUNCTION_ARG rs6000_function_arg
1553 #undef TARGET_FUNCTION_ARG_PADDING
1554 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1555 #undef TARGET_FUNCTION_ARG_BOUNDARY
1556 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1558 #undef TARGET_BUILD_BUILTIN_VA_LIST
1559 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1561 #undef TARGET_EXPAND_BUILTIN_VA_START
1562 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1564 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1565 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1567 #undef TARGET_EH_RETURN_FILTER_MODE
1568 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1570 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1571 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1573 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1574 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1576 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1577 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1578 rs6000_libgcc_floating_mode_supported_p
1580 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1581 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1583 #undef TARGET_FLOATN_MODE
1584 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1586 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1587 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1589 #undef TARGET_MD_ASM_ADJUST
1590 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1592 #undef TARGET_OPTION_OVERRIDE
1593 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1595 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1596 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1597 rs6000_builtin_vectorized_function
1599 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1600 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1601 rs6000_builtin_md_vectorized_function
1603 #undef TARGET_STACK_PROTECT_GUARD
1604 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1606 #if !TARGET_MACHO
1607 #undef TARGET_STACK_PROTECT_FAIL
1608 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1609 #endif
1611 #ifdef HAVE_AS_TLS
1612 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1613 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1614 #endif
1616 /* Use a 32-bit anchor range. This leads to sequences like:
1618 addis tmp,anchor,high
1619 add dest,tmp,low
1621 where tmp itself acts as an anchor, and can be shared between
1622 accesses to the same 64k page. */
1623 #undef TARGET_MIN_ANCHOR_OFFSET
1624 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1625 #undef TARGET_MAX_ANCHOR_OFFSET
1626 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1627 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1628 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1629 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1630 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1632 #undef TARGET_BUILTIN_RECIPROCAL
1633 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1635 #undef TARGET_SECONDARY_RELOAD
1636 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1637 #undef TARGET_SECONDARY_MEMORY_NEEDED
1638 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1639 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1640 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1642 #undef TARGET_LEGITIMATE_ADDRESS_P
1643 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1645 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1646 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1648 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1649 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1651 #undef TARGET_CAN_ELIMINATE
1652 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1654 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1655 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1657 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1658 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1660 #undef TARGET_TRAMPOLINE_INIT
1661 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1663 #undef TARGET_FUNCTION_VALUE
1664 #define TARGET_FUNCTION_VALUE rs6000_function_value
1666 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1667 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1669 #undef TARGET_OPTION_SAVE
1670 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1672 #undef TARGET_OPTION_RESTORE
1673 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1675 #undef TARGET_OPTION_PRINT
1676 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1678 #undef TARGET_CAN_INLINE_P
1679 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1681 #undef TARGET_SET_CURRENT_FUNCTION
1682 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1684 #undef TARGET_LEGITIMATE_CONSTANT_P
1685 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1687 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1688 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1690 #undef TARGET_CAN_USE_DOLOOP_P
1691 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1693 #undef TARGET_PREDICT_DOLOOP_P
1694 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1696 #undef TARGET_HAVE_COUNT_REG_DECR_P
1697 #define TARGET_HAVE_COUNT_REG_DECR_P true
1699 /* 1000000000 is infinite cost in IVOPTs. */
1700 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1701 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1703 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1704 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1706 #undef TARGET_PREFERRED_DOLOOP_MODE
1707 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1709 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1710 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1712 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1713 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1714 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1715 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1716 #undef TARGET_UNWIND_WORD_MODE
1717 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1719 #undef TARGET_OFFLOAD_OPTIONS
1720 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1722 #undef TARGET_C_MODE_FOR_SUFFIX
1723 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1725 #undef TARGET_INVALID_BINARY_OP
1726 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1728 #undef TARGET_OPTAB_SUPPORTED_P
1729 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1731 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1732 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1734 #undef TARGET_COMPARE_VERSION_PRIORITY
1735 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1737 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1738 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1739 rs6000_generate_version_dispatcher_body
1741 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1742 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1743 rs6000_get_function_versions_dispatcher
1745 #undef TARGET_OPTION_FUNCTION_VERSIONS
1746 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1748 #undef TARGET_HARD_REGNO_NREGS
1749 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1750 #undef TARGET_HARD_REGNO_MODE_OK
1751 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1753 #undef TARGET_MODES_TIEABLE_P
1754 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1756 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1757 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1758 rs6000_hard_regno_call_part_clobbered
1760 #undef TARGET_SLOW_UNALIGNED_ACCESS
1761 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1763 #undef TARGET_CAN_CHANGE_MODE_CLASS
1764 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1766 #undef TARGET_CONSTANT_ALIGNMENT
1767 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1769 #undef TARGET_STARTING_FRAME_OFFSET
1770 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1772 #if TARGET_ELF && RS6000_WEAK
1773 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1774 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1775 #endif
1777 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1778 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1780 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1781 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1783 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1784 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1785 rs6000_cannot_substitute_mem_equiv_p
1787 #undef TARGET_INVALID_CONVERSION
1788 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1791 /* Processor table. */
1792 struct rs6000_ptt
1794 const char *const name; /* Canonical processor name. */
1795 const enum processor_type processor; /* Processor type enum value. */
1796 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1799 static struct rs6000_ptt const processor_target_table[] =
1801 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1802 #include "rs6000-cpus.def"
1803 #undef RS6000_CPU
1806 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1807 name is invalid. */
1809 static int
1810 rs6000_cpu_name_lookup (const char *name)
1812 size_t i;
1814 if (name != NULL)
1816 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1817 if (! strcmp (name, processor_target_table[i].name))
1818 return (int)i;
1821 return -1;
1825 /* Return number of consecutive hard regs needed starting at reg REGNO
1826 to hold something of mode MODE.
1827 This is ordinarily the length in words of a value of mode MODE
1828 but can be less for certain modes in special long registers.
1830 POWER and PowerPC GPRs hold 32 bits worth;
1831 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1833 static int
1834 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1836 unsigned HOST_WIDE_INT reg_size;
1838 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1839 128-bit floating point that can go in vector registers, which has VSX
1840 memory addressing. */
1841 if (FP_REGNO_P (regno))
1842 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1843 ? UNITS_PER_VSX_WORD
1844 : UNITS_PER_FP_WORD);
1846 else if (ALTIVEC_REGNO_P (regno))
1847 reg_size = UNITS_PER_ALTIVEC_WORD;
1849 else
1850 reg_size = UNITS_PER_WORD;
1852 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1855 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1856 MODE. */
1857 static int
1858 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1860 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1862 if (COMPLEX_MODE_P (mode))
1863 mode = GET_MODE_INNER (mode);
1865 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1866 registers. */
1867 if (mode == OOmode)
1868 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1870 /* MMA accumulator modes need FPR registers divisible by 4. */
1871 if (mode == XOmode)
1872 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1874 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1875 register combinations, and use PTImode where we need to deal with quad
1876 word memory operations. Don't allow quad words in the argument or frame
1877 pointer registers, just registers 0..31. */
1878 if (mode == PTImode)
1879 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1880 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1881 && ((regno & 1) == 0));
1883 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1884 implementations. Don't allow an item to be split between a FP register
1885 and an Altivec register. Allow TImode in all VSX registers if the user
1886 asked for it. */
1887 if (TARGET_VSX && VSX_REGNO_P (regno)
1888 && (VECTOR_MEM_VSX_P (mode)
1889 || VECTOR_ALIGNMENT_P (mode)
1890 || reg_addr[mode].scalar_in_vmx_p
1891 || mode == TImode
1892 || (TARGET_VADDUQM && mode == V1TImode)))
1894 if (FP_REGNO_P (regno))
1895 return FP_REGNO_P (last_regno);
1897 if (ALTIVEC_REGNO_P (regno))
1899 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1900 return 0;
1902 return ALTIVEC_REGNO_P (last_regno);
1906 /* The GPRs can hold any mode, but values bigger than one register
1907 cannot go past R31. */
1908 if (INT_REGNO_P (regno))
1909 return INT_REGNO_P (last_regno);
1911 /* The float registers (except for VSX vector modes) can only hold floating
1912 modes and DImode. */
1913 if (FP_REGNO_P (regno))
1915 if (VECTOR_ALIGNMENT_P (mode))
1916 return false;
1918 if (SCALAR_FLOAT_MODE_P (mode)
1919 && (mode != TDmode || (regno % 2) == 0)
1920 && FP_REGNO_P (last_regno))
1921 return 1;
1923 if (GET_MODE_CLASS (mode) == MODE_INT)
1925 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1926 return 1;
1928 if (TARGET_P8_VECTOR && (mode == SImode))
1929 return 1;
1931 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1932 return 1;
1935 return 0;
1938 /* The CR register can only hold CC modes. */
1939 if (CR_REGNO_P (regno))
1940 return GET_MODE_CLASS (mode) == MODE_CC;
1942 if (CA_REGNO_P (regno))
1943 return mode == Pmode || mode == SImode;
1945 /* AltiVec only in AldyVec registers. */
1946 if (ALTIVEC_REGNO_P (regno))
1947 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1948 || mode == V1TImode);
1950 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1951 and it must be able to fit within the register set. */
1953 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1956 /* Implement TARGET_HARD_REGNO_NREGS. */
1958 static unsigned int
1959 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1961 return rs6000_hard_regno_nregs[mode][regno];
1964 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1966 static bool
1967 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1969 return rs6000_hard_regno_mode_ok_p[mode][regno];
1972 /* Implement TARGET_MODES_TIEABLE_P.
1974 PTImode cannot tie with other modes because PTImode is restricted to even
1975 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1976 57744).
1978 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1979 registers) or XOmode (vector quad, restricted to FPR registers divisible
1980 by 4) to tie with other modes.
1982 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1983 128-bit floating point on VSX systems ties with other vectors. */
1985 static bool
1986 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1988 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1989 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1990 return mode1 == mode2;
1992 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1993 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1994 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1995 return false;
1997 if (SCALAR_FLOAT_MODE_P (mode1))
1998 return SCALAR_FLOAT_MODE_P (mode2);
1999 if (SCALAR_FLOAT_MODE_P (mode2))
2000 return false;
2002 if (GET_MODE_CLASS (mode1) == MODE_CC)
2003 return GET_MODE_CLASS (mode2) == MODE_CC;
2004 if (GET_MODE_CLASS (mode2) == MODE_CC)
2005 return false;
2007 return true;
2010 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2012 static bool
2013 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2014 machine_mode mode)
2016 if (TARGET_32BIT
2017 && TARGET_POWERPC64
2018 && GET_MODE_SIZE (mode) > 4
2019 && INT_REGNO_P (regno))
2020 return true;
2022 if (TARGET_VSX
2023 && FP_REGNO_P (regno)
2024 && GET_MODE_SIZE (mode) > 8
2025 && !FLOAT128_2REG_P (mode))
2026 return true;
2028 return false;
2031 /* Print interesting facts about registers. */
2032 static void
2033 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2035 int r, m;
2037 for (r = first_regno; r <= last_regno; ++r)
2039 const char *comma = "";
2040 int len;
2042 if (first_regno == last_regno)
2043 fprintf (stderr, "%s:\t", reg_name);
2044 else
2045 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2047 len = 8;
2048 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2049 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2051 if (len > 70)
2053 fprintf (stderr, ",\n\t");
2054 len = 8;
2055 comma = "";
2058 if (rs6000_hard_regno_nregs[m][r] > 1)
2059 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2060 rs6000_hard_regno_nregs[m][r]);
2061 else
2062 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2064 comma = ", ";
2067 if (call_used_or_fixed_reg_p (r))
2069 if (len > 70)
2071 fprintf (stderr, ",\n\t");
2072 len = 8;
2073 comma = "";
2076 len += fprintf (stderr, "%s%s", comma, "call-used");
2077 comma = ", ";
2080 if (fixed_regs[r])
2082 if (len > 70)
2084 fprintf (stderr, ",\n\t");
2085 len = 8;
2086 comma = "";
2089 len += fprintf (stderr, "%s%s", comma, "fixed");
2090 comma = ", ";
2093 if (len > 70)
2095 fprintf (stderr, ",\n\t");
2096 comma = "";
2099 len += fprintf (stderr, "%sreg-class = %s", comma,
2100 reg_class_names[(int)rs6000_regno_regclass[r]]);
2101 comma = ", ";
2103 if (len > 70)
2105 fprintf (stderr, ",\n\t");
2106 comma = "";
2109 fprintf (stderr, "%sregno = %d\n", comma, r);
2113 static const char *
2114 rs6000_debug_vector_unit (enum rs6000_vector v)
2116 const char *ret;
2118 switch (v)
2120 case VECTOR_NONE: ret = "none"; break;
2121 case VECTOR_ALTIVEC: ret = "altivec"; break;
2122 case VECTOR_VSX: ret = "vsx"; break;
2123 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2124 default: ret = "unknown"; break;
2127 return ret;
2130 /* Inner function printing just the address mask for a particular reload
2131 register class. */
2132 DEBUG_FUNCTION char *
2133 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2135 static char ret[8];
2136 char *p = ret;
2138 if ((mask & RELOAD_REG_VALID) != 0)
2139 *p++ = 'v';
2140 else if (keep_spaces)
2141 *p++ = ' ';
2143 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2144 *p++ = 'm';
2145 else if (keep_spaces)
2146 *p++ = ' ';
2148 if ((mask & RELOAD_REG_INDEXED) != 0)
2149 *p++ = 'i';
2150 else if (keep_spaces)
2151 *p++ = ' ';
2153 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2154 *p++ = 'O';
2155 else if ((mask & RELOAD_REG_OFFSET) != 0)
2156 *p++ = 'o';
2157 else if (keep_spaces)
2158 *p++ = ' ';
2160 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2161 *p++ = '+';
2162 else if (keep_spaces)
2163 *p++ = ' ';
2165 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2166 *p++ = '+';
2167 else if (keep_spaces)
2168 *p++ = ' ';
2170 if ((mask & RELOAD_REG_AND_M16) != 0)
2171 *p++ = '&';
2172 else if (keep_spaces)
2173 *p++ = ' ';
2175 *p = '\0';
2177 return ret;
2180 /* Print the address masks in a human readble fashion. */
2181 DEBUG_FUNCTION void
2182 rs6000_debug_print_mode (ssize_t m)
2184 ssize_t rc;
2185 int spaces = 0;
2187 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2188 for (rc = 0; rc < N_RELOAD_REG; rc++)
2189 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2190 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2192 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2193 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2195 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2196 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2197 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2198 spaces = 0;
2200 else
2201 spaces += strlen (" Reload=sl");
2203 if (reg_addr[m].scalar_in_vmx_p)
2205 fprintf (stderr, "%*s Upper=y", spaces, "");
2206 spaces = 0;
2208 else
2209 spaces += strlen (" Upper=y");
2211 if (rs6000_vector_unit[m] != VECTOR_NONE
2212 || rs6000_vector_mem[m] != VECTOR_NONE)
2214 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2215 spaces, "",
2216 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2217 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2220 fputs ("\n", stderr);
2223 #define DEBUG_FMT_ID "%-32s= "
2224 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2225 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2226 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2228 /* Print various interesting information with -mdebug=reg. */
2229 static void
2230 rs6000_debug_reg_global (void)
2232 static const char *const tf[2] = { "false", "true" };
2233 const char *nl = (const char *)0;
2234 int m;
2235 size_t m1, m2, v;
2236 char costly_num[20];
2237 char nop_num[20];
2238 char flags_buffer[40];
2239 const char *costly_str;
2240 const char *nop_str;
2241 const char *trace_str;
2242 const char *abi_str;
2243 const char *cmodel_str;
2244 struct cl_target_option cl_opts;
2246 /* Modes we want tieable information on. */
2247 static const machine_mode print_tieable_modes[] = {
2248 QImode,
2249 HImode,
2250 SImode,
2251 DImode,
2252 TImode,
2253 PTImode,
2254 SFmode,
2255 DFmode,
2256 TFmode,
2257 IFmode,
2258 KFmode,
2259 SDmode,
2260 DDmode,
2261 TDmode,
2262 V2SImode,
2263 V2SFmode,
2264 V16QImode,
2265 V8HImode,
2266 V4SImode,
2267 V2DImode,
2268 V1TImode,
2269 V32QImode,
2270 V16HImode,
2271 V8SImode,
2272 V4DImode,
2273 V2TImode,
2274 V4SFmode,
2275 V2DFmode,
2276 V8SFmode,
2277 V4DFmode,
2278 OOmode,
2279 XOmode,
2280 CCmode,
2281 CCUNSmode,
2282 CCEQmode,
2283 CCFPmode,
2286 /* Virtual regs we are interested in. */
2287 const static struct {
2288 int regno; /* register number. */
2289 const char *name; /* register name. */
2290 } virtual_regs[] = {
2291 { STACK_POINTER_REGNUM, "stack pointer:" },
2292 { TOC_REGNUM, "toc: " },
2293 { STATIC_CHAIN_REGNUM, "static chain: " },
2294 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2295 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2296 { ARG_POINTER_REGNUM, "arg pointer: " },
2297 { FRAME_POINTER_REGNUM, "frame pointer:" },
2298 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2299 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2300 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2301 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2302 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2303 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2304 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2305 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2306 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2309 fputs ("\nHard register information:\n", stderr);
2310 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2311 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2312 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2313 LAST_ALTIVEC_REGNO,
2314 "vs");
2315 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2316 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2317 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2318 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2319 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2320 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2322 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2323 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2324 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2326 fprintf (stderr,
2327 "\n"
2328 "d reg_class = %s\n"
2329 "f reg_class = %s\n"
2330 "v reg_class = %s\n"
2331 "wa reg_class = %s\n"
2332 "we reg_class = %s\n"
2333 "wr reg_class = %s\n"
2334 "wx reg_class = %s\n"
2335 "wA reg_class = %s\n"
2336 "\n",
2337 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2338 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2339 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2340 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2341 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2342 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2343 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2344 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2346 nl = "\n";
2347 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2348 rs6000_debug_print_mode (m);
2350 fputs ("\n", stderr);
2352 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2354 machine_mode mode1 = print_tieable_modes[m1];
2355 bool first_time = true;
2357 nl = (const char *)0;
2358 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2360 machine_mode mode2 = print_tieable_modes[m2];
2361 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2363 if (first_time)
2365 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2366 nl = "\n";
2367 first_time = false;
2370 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2374 if (!first_time)
2375 fputs ("\n", stderr);
2378 if (nl)
2379 fputs (nl, stderr);
2381 if (rs6000_recip_control)
2383 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2385 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2386 if (rs6000_recip_bits[m])
2388 fprintf (stderr,
2389 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2390 GET_MODE_NAME (m),
2391 (RS6000_RECIP_AUTO_RE_P (m)
2392 ? "auto"
2393 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2394 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2395 ? "auto"
2396 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2399 fputs ("\n", stderr);
2402 if (rs6000_cpu_index >= 0)
2404 const char *name = processor_target_table[rs6000_cpu_index].name;
2405 HOST_WIDE_INT flags
2406 = processor_target_table[rs6000_cpu_index].target_enable;
2408 sprintf (flags_buffer, "-mcpu=%s flags", name);
2409 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2411 else
2412 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2414 if (rs6000_tune_index >= 0)
2416 const char *name = processor_target_table[rs6000_tune_index].name;
2417 HOST_WIDE_INT flags
2418 = processor_target_table[rs6000_tune_index].target_enable;
2420 sprintf (flags_buffer, "-mtune=%s flags", name);
2421 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2423 else
2424 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2426 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2427 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2428 rs6000_isa_flags);
2430 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2431 rs6000_isa_flags_explicit);
2433 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2434 rs6000_builtin_mask);
2436 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2438 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2439 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2441 switch (rs6000_sched_costly_dep)
2443 case max_dep_latency:
2444 costly_str = "max_dep_latency";
2445 break;
2447 case no_dep_costly:
2448 costly_str = "no_dep_costly";
2449 break;
2451 case all_deps_costly:
2452 costly_str = "all_deps_costly";
2453 break;
2455 case true_store_to_load_dep_costly:
2456 costly_str = "true_store_to_load_dep_costly";
2457 break;
2459 case store_to_load_dep_costly:
2460 costly_str = "store_to_load_dep_costly";
2461 break;
2463 default:
2464 costly_str = costly_num;
2465 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2466 break;
2469 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2471 switch (rs6000_sched_insert_nops)
2473 case sched_finish_regroup_exact:
2474 nop_str = "sched_finish_regroup_exact";
2475 break;
2477 case sched_finish_pad_groups:
2478 nop_str = "sched_finish_pad_groups";
2479 break;
2481 case sched_finish_none:
2482 nop_str = "sched_finish_none";
2483 break;
2485 default:
2486 nop_str = nop_num;
2487 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2488 break;
2491 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2493 switch (rs6000_sdata)
2495 default:
2496 case SDATA_NONE:
2497 break;
2499 case SDATA_DATA:
2500 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2501 break;
2503 case SDATA_SYSV:
2504 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2505 break;
2507 case SDATA_EABI:
2508 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2509 break;
2513 switch (rs6000_traceback)
2515 case traceback_default: trace_str = "default"; break;
2516 case traceback_none: trace_str = "none"; break;
2517 case traceback_part: trace_str = "part"; break;
2518 case traceback_full: trace_str = "full"; break;
2519 default: trace_str = "unknown"; break;
2522 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2524 switch (rs6000_current_cmodel)
2526 case CMODEL_SMALL: cmodel_str = "small"; break;
2527 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2528 case CMODEL_LARGE: cmodel_str = "large"; break;
2529 default: cmodel_str = "unknown"; break;
2532 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2534 switch (rs6000_current_abi)
2536 case ABI_NONE: abi_str = "none"; break;
2537 case ABI_AIX: abi_str = "aix"; break;
2538 case ABI_ELFv2: abi_str = "ELFv2"; break;
2539 case ABI_V4: abi_str = "V4"; break;
2540 case ABI_DARWIN: abi_str = "darwin"; break;
2541 default: abi_str = "unknown"; break;
2544 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2546 if (rs6000_altivec_abi)
2547 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2549 if (rs6000_aix_extabi)
2550 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2552 if (rs6000_darwin64_abi)
2553 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2555 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2556 (TARGET_SOFT_FLOAT ? "true" : "false"));
2558 if (TARGET_LINK_STACK)
2559 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2561 if (TARGET_P8_FUSION)
2563 char options[80];
2565 strcpy (options, "power8");
2566 if (TARGET_P8_FUSION_SIGN)
2567 strcat (options, ", sign");
2569 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2572 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2573 TARGET_SECURE_PLT ? "secure" : "bss");
2574 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2575 aix_struct_return ? "aix" : "sysv");
2576 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2577 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2578 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2579 tf[!!rs6000_align_branch_targets]);
2580 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2581 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2582 rs6000_long_double_type_size);
2583 if (rs6000_long_double_type_size > 64)
2585 fprintf (stderr, DEBUG_FMT_S, "long double type",
2586 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2587 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2588 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2590 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2591 (int)rs6000_sched_restricted_insns_priority);
2592 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2593 (int)END_BUILTINS);
2594 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2595 (int)RS6000_BUILTIN_COUNT);
2597 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2598 (int)TARGET_FLOAT128_ENABLE_TYPE);
2600 if (TARGET_VSX)
2601 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2602 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2604 if (TARGET_DIRECT_MOVE_128)
2605 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2606 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2610 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2611 legitimate address support to figure out the appropriate addressing to
2612 use. */
2614 static void
2615 rs6000_setup_reg_addr_masks (void)
2617 ssize_t rc, reg, m, nregs;
2618 addr_mask_type any_addr_mask, addr_mask;
2620 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2622 machine_mode m2 = (machine_mode) m;
2623 bool complex_p = false;
2624 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2625 size_t msize;
2627 if (COMPLEX_MODE_P (m2))
2629 complex_p = true;
2630 m2 = GET_MODE_INNER (m2);
2633 msize = GET_MODE_SIZE (m2);
2635 /* SDmode is special in that we want to access it only via REG+REG
2636 addressing on power7 and above, since we want to use the LFIWZX and
2637 STFIWZX instructions to load it. */
2638 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2640 any_addr_mask = 0;
2641 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2643 addr_mask = 0;
2644 reg = reload_reg_map[rc].reg;
2646 /* Can mode values go in the GPR/FPR/Altivec registers? */
2647 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2649 bool small_int_vsx_p = (small_int_p
2650 && (rc == RELOAD_REG_FPR
2651 || rc == RELOAD_REG_VMX));
2653 nregs = rs6000_hard_regno_nregs[m][reg];
2654 addr_mask |= RELOAD_REG_VALID;
2656 /* Indicate if the mode takes more than 1 physical register. If
2657 it takes a single register, indicate it can do REG+REG
2658 addressing. Small integers in VSX registers can only do
2659 REG+REG addressing. */
2660 if (small_int_vsx_p)
2661 addr_mask |= RELOAD_REG_INDEXED;
2662 else if (nregs > 1 || m == BLKmode || complex_p)
2663 addr_mask |= RELOAD_REG_MULTIPLE;
2664 else
2665 addr_mask |= RELOAD_REG_INDEXED;
2667 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2668 addressing. If we allow scalars into Altivec registers,
2669 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2671 For VSX systems, we don't allow update addressing for
2672 DFmode/SFmode if those registers can go in both the
2673 traditional floating point registers and Altivec registers.
2674 The load/store instructions for the Altivec registers do not
2675 have update forms. If we allowed update addressing, it seems
2676 to break IV-OPT code using floating point if the index type is
2677 int instead of long (PR target/81550 and target/84042). */
2679 if (TARGET_UPDATE
2680 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2681 && msize <= 8
2682 && !VECTOR_MODE_P (m2)
2683 && !VECTOR_ALIGNMENT_P (m2)
2684 && !complex_p
2685 && (m != E_DFmode || !TARGET_VSX)
2686 && (m != E_SFmode || !TARGET_P8_VECTOR)
2687 && !small_int_vsx_p)
2689 addr_mask |= RELOAD_REG_PRE_INCDEC;
2691 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2692 we don't allow PRE_MODIFY for some multi-register
2693 operations. */
2694 switch (m)
2696 default:
2697 addr_mask |= RELOAD_REG_PRE_MODIFY;
2698 break;
2700 case E_DImode:
2701 if (TARGET_POWERPC64)
2702 addr_mask |= RELOAD_REG_PRE_MODIFY;
2703 break;
2705 case E_DFmode:
2706 case E_DDmode:
2707 if (TARGET_HARD_FLOAT)
2708 addr_mask |= RELOAD_REG_PRE_MODIFY;
2709 break;
2714 /* GPR and FPR registers can do REG+OFFSET addressing, except
2715 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2716 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2717 if ((addr_mask != 0) && !indexed_only_p
2718 && msize <= 8
2719 && (rc == RELOAD_REG_GPR
2720 || ((msize == 8 || m2 == SFmode)
2721 && (rc == RELOAD_REG_FPR
2722 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2723 addr_mask |= RELOAD_REG_OFFSET;
2725 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2726 instructions are enabled. The offset for 128-bit VSX registers is
2727 only 12-bits. While GPRs can handle the full offset range, VSX
2728 registers can only handle the restricted range. */
2729 else if ((addr_mask != 0) && !indexed_only_p
2730 && msize == 16 && TARGET_P9_VECTOR
2731 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2732 || (m2 == TImode && TARGET_VSX)))
2734 addr_mask |= RELOAD_REG_OFFSET;
2735 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2736 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2739 /* Vector pairs can do both indexed and offset loads if the
2740 instructions are enabled, otherwise they can only do offset loads
2741 since it will be broken into two vector moves. Vector quads can
2742 only do offset loads. */
2743 else if ((addr_mask != 0) && TARGET_MMA
2744 && (m2 == OOmode || m2 == XOmode))
2746 addr_mask |= RELOAD_REG_OFFSET;
2747 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2749 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2750 if (m2 == OOmode)
2751 addr_mask |= RELOAD_REG_INDEXED;
2755 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2756 addressing on 128-bit types. */
2757 if (rc == RELOAD_REG_VMX && msize == 16
2758 && (addr_mask & RELOAD_REG_VALID) != 0)
2759 addr_mask |= RELOAD_REG_AND_M16;
2761 reg_addr[m].addr_mask[rc] = addr_mask;
2762 any_addr_mask |= addr_mask;
2765 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2770 /* Initialize the various global tables that are based on register size. */
2771 static void
2772 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2774 ssize_t r, m, c;
2775 int align64;
2776 int align32;
2778 /* Precalculate REGNO_REG_CLASS. */
2779 rs6000_regno_regclass[0] = GENERAL_REGS;
2780 for (r = 1; r < 32; ++r)
2781 rs6000_regno_regclass[r] = BASE_REGS;
2783 for (r = 32; r < 64; ++r)
2784 rs6000_regno_regclass[r] = FLOAT_REGS;
2786 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2787 rs6000_regno_regclass[r] = NO_REGS;
2789 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2790 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2792 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2793 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2794 rs6000_regno_regclass[r] = CR_REGS;
2796 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2797 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2798 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2799 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2800 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2801 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2802 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2804 /* Precalculate register class to simpler reload register class. We don't
2805 need all of the register classes that are combinations of different
2806 classes, just the simple ones that have constraint letters. */
2807 for (c = 0; c < N_REG_CLASSES; c++)
2808 reg_class_to_reg_type[c] = NO_REG_TYPE;
2810 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2811 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2812 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2813 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2814 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2815 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2816 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2817 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2818 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2819 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2821 if (TARGET_VSX)
2823 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2824 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2826 else
2828 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2829 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2832 /* Precalculate the valid memory formats as well as the vector information,
2833 this must be set up before the rs6000_hard_regno_nregs_internal calls
2834 below. */
2835 gcc_assert ((int)VECTOR_NONE == 0);
2836 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2837 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2839 gcc_assert ((int)CODE_FOR_nothing == 0);
2840 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2842 gcc_assert ((int)NO_REGS == 0);
2843 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2845 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2846 believes it can use native alignment or still uses 128-bit alignment. */
2847 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2849 align64 = 64;
2850 align32 = 32;
2852 else
2854 align64 = 128;
2855 align32 = 128;
2858 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2859 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2860 if (TARGET_FLOAT128_TYPE)
2862 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2863 rs6000_vector_align[KFmode] = 128;
2865 if (FLOAT128_IEEE_P (TFmode))
2867 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2868 rs6000_vector_align[TFmode] = 128;
2872 /* V2DF mode, VSX only. */
2873 if (TARGET_VSX)
2875 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2876 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2877 rs6000_vector_align[V2DFmode] = align64;
2880 /* V4SF mode, either VSX or Altivec. */
2881 if (TARGET_VSX)
2883 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2884 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2885 rs6000_vector_align[V4SFmode] = align32;
2887 else if (TARGET_ALTIVEC)
2889 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2890 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2891 rs6000_vector_align[V4SFmode] = align32;
2894 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2895 and stores. */
2896 if (TARGET_ALTIVEC)
2898 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2899 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2900 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2901 rs6000_vector_align[V4SImode] = align32;
2902 rs6000_vector_align[V8HImode] = align32;
2903 rs6000_vector_align[V16QImode] = align32;
2905 if (TARGET_VSX)
2907 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2908 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2909 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2911 else
2913 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2914 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2915 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2919 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2920 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2921 if (TARGET_VSX)
2923 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2924 rs6000_vector_unit[V2DImode]
2925 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2926 rs6000_vector_align[V2DImode] = align64;
2928 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2929 rs6000_vector_unit[V1TImode]
2930 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2931 rs6000_vector_align[V1TImode] = 128;
2934 /* DFmode, see if we want to use the VSX unit. Memory is handled
2935 differently, so don't set rs6000_vector_mem. */
2936 if (TARGET_VSX)
2938 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2939 rs6000_vector_align[DFmode] = 64;
2942 /* SFmode, see if we want to use the VSX unit. */
2943 if (TARGET_P8_VECTOR)
2945 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2946 rs6000_vector_align[SFmode] = 32;
2949 /* Allow TImode in VSX register and set the VSX memory macros. */
2950 if (TARGET_VSX)
2952 rs6000_vector_mem[TImode] = VECTOR_VSX;
2953 rs6000_vector_align[TImode] = align64;
2956 /* Add support for vector pairs and vector quad registers. */
2957 if (TARGET_MMA)
2959 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2960 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2961 rs6000_vector_align[OOmode] = 256;
2963 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2964 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2965 rs6000_vector_align[XOmode] = 512;
2968 /* Register class constraints for the constraints that depend on compile
2969 switches. When the VSX code was added, different constraints were added
2970 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2971 of the VSX registers are used. The register classes for scalar floating
2972 point types is set, based on whether we allow that type into the upper
2973 (Altivec) registers. GCC has register classes to target the Altivec
2974 registers for load/store operations, to select using a VSX memory
2975 operation instead of the traditional floating point operation. The
2976 constraints are:
2978 d - Register class to use with traditional DFmode instructions.
2979 f - Register class to use with traditional SFmode instructions.
2980 v - Altivec register.
2981 wa - Any VSX register.
2982 wc - Reserved to represent individual CR bits (used in LLVM).
2983 wn - always NO_REGS.
2984 wr - GPR if 64-bit mode is permitted.
2985 wx - Float register if we can do 32-bit int stores. */
2987 if (TARGET_HARD_FLOAT)
2989 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2990 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2993 if (TARGET_VSX)
2994 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2996 /* Add conditional constraints based on various options, to allow us to
2997 collapse multiple insn patterns. */
2998 if (TARGET_ALTIVEC)
2999 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3001 if (TARGET_POWERPC64)
3003 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3004 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3007 if (TARGET_STFIWX)
3008 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3010 /* Support for new direct moves (ISA 3.0 + 64bit). */
3011 if (TARGET_DIRECT_MOVE_128)
3012 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3014 /* Set up the reload helper and direct move functions. */
3015 if (TARGET_VSX || TARGET_ALTIVEC)
3017 if (TARGET_64BIT)
3019 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3020 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3021 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3022 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3023 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3024 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3025 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3026 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3027 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3028 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3029 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3030 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3031 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3032 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3033 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3034 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3035 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3036 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3037 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3038 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3040 if (FLOAT128_VECTOR_P (KFmode))
3042 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3043 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3046 if (FLOAT128_VECTOR_P (TFmode))
3048 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3049 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3052 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3053 available. */
3054 if (TARGET_NO_SDMODE_STACK)
3056 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3057 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3060 if (TARGET_VSX)
3062 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3063 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3066 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3068 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3069 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3070 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3071 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3072 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3073 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3074 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3075 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3076 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3078 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3079 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3080 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3081 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3082 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3083 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3084 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3085 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3086 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3088 if (FLOAT128_VECTOR_P (KFmode))
3090 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3091 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3094 if (FLOAT128_VECTOR_P (TFmode))
3096 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3097 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3100 if (TARGET_MMA)
3102 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3103 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3104 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3105 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3109 else
3111 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3112 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3113 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3114 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3115 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3116 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3117 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3118 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3119 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3120 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3121 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3122 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3123 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3124 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3125 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3126 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3127 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3128 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3129 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3130 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3132 if (FLOAT128_VECTOR_P (KFmode))
3134 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3135 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3138 if (FLOAT128_IEEE_P (TFmode))
3140 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3141 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3144 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3145 available. */
3146 if (TARGET_NO_SDMODE_STACK)
3148 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3149 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3152 if (TARGET_VSX)
3154 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3155 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3158 if (TARGET_DIRECT_MOVE)
3160 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3161 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3162 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3166 reg_addr[DFmode].scalar_in_vmx_p = true;
3167 reg_addr[DImode].scalar_in_vmx_p = true;
3169 if (TARGET_P8_VECTOR)
3171 reg_addr[SFmode].scalar_in_vmx_p = true;
3172 reg_addr[SImode].scalar_in_vmx_p = true;
3174 if (TARGET_P9_VECTOR)
3176 reg_addr[HImode].scalar_in_vmx_p = true;
3177 reg_addr[QImode].scalar_in_vmx_p = true;
3182 /* Precalculate HARD_REGNO_NREGS. */
3183 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3184 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3185 rs6000_hard_regno_nregs[m][r]
3186 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3188 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3189 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3190 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3191 rs6000_hard_regno_mode_ok_p[m][r]
3192 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3194 /* Precalculate CLASS_MAX_NREGS sizes. */
3195 for (c = 0; c < LIM_REG_CLASSES; ++c)
3197 int reg_size;
3199 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3200 reg_size = UNITS_PER_VSX_WORD;
3202 else if (c == ALTIVEC_REGS)
3203 reg_size = UNITS_PER_ALTIVEC_WORD;
3205 else if (c == FLOAT_REGS)
3206 reg_size = UNITS_PER_FP_WORD;
3208 else
3209 reg_size = UNITS_PER_WORD;
3211 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3213 machine_mode m2 = (machine_mode)m;
3214 int reg_size2 = reg_size;
3216 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3217 in VSX. */
3218 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3219 reg_size2 = UNITS_PER_FP_WORD;
3221 rs6000_class_max_nregs[m][c]
3222 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3226 /* Calculate which modes to automatically generate code to use a the
3227 reciprocal divide and square root instructions. In the future, possibly
3228 automatically generate the instructions even if the user did not specify
3229 -mrecip. The older machines double precision reciprocal sqrt estimate is
3230 not accurate enough. */
3231 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3232 if (TARGET_FRES)
3233 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3234 if (TARGET_FRE)
3235 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3236 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3237 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3238 if (VECTOR_UNIT_VSX_P (V2DFmode))
3239 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3241 if (TARGET_FRSQRTES)
3242 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3243 if (TARGET_FRSQRTE)
3244 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3245 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3246 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3247 if (VECTOR_UNIT_VSX_P (V2DFmode))
3248 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3250 if (rs6000_recip_control)
3252 if (!flag_finite_math_only)
3253 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3254 "-ffast-math");
3255 if (flag_trapping_math)
3256 warning (0, "%qs requires %qs or %qs", "-mrecip",
3257 "-fno-trapping-math", "-ffast-math");
3258 if (!flag_reciprocal_math)
3259 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3260 "-ffast-math");
3261 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3263 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3264 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3265 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3267 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3268 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3269 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3271 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3272 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3273 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3275 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3276 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3277 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3279 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3280 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3281 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3283 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3284 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3285 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3287 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3288 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3289 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3291 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3292 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3293 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3297 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3298 legitimate address support to figure out the appropriate addressing to
3299 use. */
3300 rs6000_setup_reg_addr_masks ();
3302 if (global_init_p || TARGET_DEBUG_TARGET)
3304 if (TARGET_DEBUG_REG)
3305 rs6000_debug_reg_global ();
3307 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3308 fprintf (stderr,
3309 "SImode variable mult cost = %d\n"
3310 "SImode constant mult cost = %d\n"
3311 "SImode short constant mult cost = %d\n"
3312 "DImode multipliciation cost = %d\n"
3313 "SImode division cost = %d\n"
3314 "DImode division cost = %d\n"
3315 "Simple fp operation cost = %d\n"
3316 "DFmode multiplication cost = %d\n"
3317 "SFmode division cost = %d\n"
3318 "DFmode division cost = %d\n"
3319 "cache line size = %d\n"
3320 "l1 cache size = %d\n"
3321 "l2 cache size = %d\n"
3322 "simultaneous prefetches = %d\n"
3323 "\n",
3324 rs6000_cost->mulsi,
3325 rs6000_cost->mulsi_const,
3326 rs6000_cost->mulsi_const9,
3327 rs6000_cost->muldi,
3328 rs6000_cost->divsi,
3329 rs6000_cost->divdi,
3330 rs6000_cost->fp,
3331 rs6000_cost->dmul,
3332 rs6000_cost->sdiv,
3333 rs6000_cost->ddiv,
3334 rs6000_cost->cache_line_size,
3335 rs6000_cost->l1_cache_size,
3336 rs6000_cost->l2_cache_size,
3337 rs6000_cost->simultaneous_prefetches);
3341 #if TARGET_MACHO
3342 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3344 static void
3345 darwin_rs6000_override_options (void)
3347 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3348 off. */
3349 rs6000_altivec_abi = 1;
3350 TARGET_ALTIVEC_VRSAVE = 1;
3351 rs6000_current_abi = ABI_DARWIN;
3353 if (DEFAULT_ABI == ABI_DARWIN
3354 && TARGET_64BIT)
3355 darwin_one_byte_bool = 1;
3357 if (TARGET_64BIT && ! TARGET_POWERPC64)
3359 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3360 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3363 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3364 optimisation, and will not work with the most generic case (where the
3365 symbol is undefined external, but there is no symbl stub). */
3366 if (TARGET_64BIT)
3367 rs6000_default_long_calls = 0;
3369 /* ld_classic is (so far) still used for kernel (static) code, and supports
3370 the JBSR longcall / branch islands. */
3371 if (flag_mkernel)
3373 rs6000_default_long_calls = 1;
3375 /* Allow a kext author to do -mkernel -mhard-float. */
3376 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3377 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3380 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3381 Altivec. */
3382 if (!flag_mkernel && !flag_apple_kext
3383 && TARGET_64BIT
3384 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3385 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3387 /* Unless the user (not the configurer) has explicitly overridden
3388 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3389 G4 unless targeting the kernel. */
3390 if (!flag_mkernel
3391 && !flag_apple_kext
3392 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3393 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3394 && ! global_options_set.x_rs6000_cpu_index)
3396 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3399 #endif
3401 /* If not otherwise specified by a target, make 'long double' equivalent to
3402 'double'. */
3404 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3405 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3406 #endif
3408 /* Return the builtin mask of the various options used that could affect which
3409 builtins were used. In the past we used target_flags, but we've run out of
3410 bits, and some options are no longer in target_flags. */
3412 HOST_WIDE_INT
3413 rs6000_builtin_mask_calculate (void)
3415 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3416 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3417 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3418 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3419 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3420 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3421 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3422 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3423 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3424 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3425 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3426 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3427 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3428 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3429 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3430 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3431 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3432 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3433 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3434 | ((TARGET_LONG_DOUBLE_128
3435 && TARGET_HARD_FLOAT
3436 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3437 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3438 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3439 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3440 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3443 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3444 to clobber the XER[CA] bit because clobbering that bit without telling
3445 the compiler worked just fine with versions of GCC before GCC 5, and
3446 breaking a lot of older code in ways that are hard to track down is
3447 not such a great idea. */
3449 static rtx_insn *
3450 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3451 vec<machine_mode> & /*input_modes*/,
3452 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3453 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3455 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3456 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3457 return NULL;
3460 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3461 but is called when the optimize level is changed via an attribute or
3462 pragma or when it is reset at the end of the code affected by the
3463 attribute or pragma. It is not called at the beginning of compilation
3464 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3465 actions then, you should have TARGET_OPTION_OVERRIDE call
3466 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3468 static void
3469 rs6000_override_options_after_change (void)
3471 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3472 turns -frename-registers on. */
3473 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
3474 || (global_options_set.x_flag_unroll_all_loops
3475 && flag_unroll_all_loops))
3477 if (!global_options_set.x_unroll_only_small_loops)
3478 unroll_only_small_loops = 0;
3479 if (!global_options_set.x_flag_rename_registers)
3480 flag_rename_registers = 1;
3481 if (!global_options_set.x_flag_cunroll_grow_size)
3482 flag_cunroll_grow_size = 1;
3484 else if (!global_options_set.x_flag_cunroll_grow_size)
3485 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3488 #ifdef TARGET_USES_LINUX64_OPT
3489 static void
3490 rs6000_linux64_override_options ()
3492 if (!global_options_set.x_rs6000_alignment_flags)
3493 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3494 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3496 if (DEFAULT_ABI != ABI_AIX)
3498 rs6000_current_abi = ABI_AIX;
3499 error (INVALID_64BIT, "call");
3501 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3502 if (ELFv2_ABI_CHECK)
3504 rs6000_current_abi = ABI_ELFv2;
3505 if (dot_symbols)
3506 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3508 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3510 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3511 error (INVALID_64BIT, "relocatable");
3513 if (rs6000_isa_flags & OPTION_MASK_EABI)
3515 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3516 error (INVALID_64BIT, "eabi");
3518 if (TARGET_PROTOTYPE)
3520 target_prototype = 0;
3521 error (INVALID_64BIT, "prototype");
3523 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3525 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3526 error ("%<-m64%> requires a PowerPC64 cpu");
3528 if (!global_options_set.x_rs6000_current_cmodel)
3529 SET_CMODEL (CMODEL_MEDIUM);
3530 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3532 if (global_options_set.x_rs6000_current_cmodel
3533 && rs6000_current_cmodel != CMODEL_SMALL)
3534 error ("%<-mcmodel incompatible with other toc options%>");
3535 if (TARGET_MINIMAL_TOC)
3536 SET_CMODEL (CMODEL_SMALL);
3537 else if (TARGET_PCREL
3538 || (PCREL_SUPPORTED_BY_OS
3539 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3540 /* Ignore -mno-minimal-toc. */
3542 else
3543 SET_CMODEL (CMODEL_SMALL);
3545 if (rs6000_current_cmodel != CMODEL_SMALL)
3547 if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
3548 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3549 if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
3550 TARGET_NO_SUM_IN_TOC = 0;
3552 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3554 if (global_options_set.x_rs6000_pltseq)
3555 warning (0, "%qs unsupported for this ABI",
3556 "-mpltseq");
3557 rs6000_pltseq = false;
3560 else if (TARGET_64BIT)
3561 error (INVALID_32BIT, "32");
3562 else
3564 if (TARGET_PROFILE_KERNEL)
3566 profile_kernel = 0;
3567 error (INVALID_32BIT, "profile-kernel");
3569 if (global_options_set.x_rs6000_current_cmodel)
3571 SET_CMODEL (CMODEL_SMALL);
3572 error (INVALID_32BIT, "cmodel");
3576 #endif
3578 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3579 This support is only in little endian GLIBC 2.32 or newer. */
3580 static bool
3581 glibc_supports_ieee_128bit (void)
3583 #ifdef OPTION_GLIBC
3584 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3585 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3586 return true;
3587 #endif /* OPTION_GLIBC. */
3589 return false;
3592 /* Override command line options.
3594 Combine build-specific configuration information with options
3595 specified on the command line to set various state variables which
3596 influence code generation, optimization, and expansion of built-in
3597 functions. Assure that command-line configuration preferences are
3598 compatible with each other and with the build configuration; issue
3599 warnings while adjusting configuration or error messages while
3600 rejecting configuration.
3602 Upon entry to this function:
3604 This function is called once at the beginning of
3605 compilation, and then again at the start and end of compiling
3606 each section of code that has a different configuration, as
3607 indicated, for example, by adding the
3609 __attribute__((__target__("cpu=power9")))
3611 qualifier to a function definition or, for example, by bracketing
3612 code between
3614 #pragma GCC target("altivec")
3618 #pragma GCC reset_options
3620 directives. Parameter global_init_p is true for the initial
3621 invocation, which initializes global variables, and false for all
3622 subsequent invocations.
3625 Various global state information is assumed to be valid. This
3626 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3627 default CPU specified at build configure time, TARGET_DEFAULT,
3628 representing the default set of option flags for the default
3629 target, and global_options_set.x_rs6000_isa_flags, representing
3630 which options were requested on the command line.
3632 Upon return from this function:
3634 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3635 was set by name on the command line. Additionally, if certain
3636 attributes are automatically enabled or disabled by this function
3637 in order to assure compatibility between options and
3638 configuration, the flags associated with those attributes are
3639 also set. By setting these "explicit bits", we avoid the risk
3640 that other code might accidentally overwrite these particular
3641 attributes with "default values".
3643 The various bits of rs6000_isa_flags are set to indicate the
3644 target options that have been selected for the most current
3645 compilation efforts. This has the effect of also turning on the
3646 associated TARGET_XXX values since these are macros which are
3647 generally defined to test the corresponding bit of the
3648 rs6000_isa_flags variable.
3650 The variable rs6000_builtin_mask is set to represent the target
3651 options for the most current compilation efforts, consistent with
3652 the current contents of rs6000_isa_flags. This variable controls
3653 expansion of built-in functions.
3655 Various other global variables and fields of global structures
3656 (over 50 in all) are initialized to reflect the desired options
3657 for the most current compilation efforts. */
3659 static bool
3660 rs6000_option_override_internal (bool global_init_p)
3662 bool ret = true;
3664 HOST_WIDE_INT set_masks;
3665 HOST_WIDE_INT ignore_masks;
3666 int cpu_index = -1;
3667 int tune_index;
3668 struct cl_target_option *main_target_opt
3669 = ((global_init_p || target_option_default_node == NULL)
3670 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3672 /* Print defaults. */
3673 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3674 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3676 /* Remember the explicit arguments. */
3677 if (global_init_p)
3678 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3680 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3681 library functions, so warn about it. The flag may be useful for
3682 performance studies from time to time though, so don't disable it
3683 entirely. */
3684 if (global_options_set.x_rs6000_alignment_flags
3685 && rs6000_alignment_flags == MASK_ALIGN_POWER
3686 && DEFAULT_ABI == ABI_DARWIN
3687 && TARGET_64BIT)
3688 warning (0, "%qs is not supported for 64-bit Darwin;"
3689 " it is incompatible with the installed C and C++ libraries",
3690 "-malign-power");
3692 /* Numerous experiment shows that IRA based loop pressure
3693 calculation works better for RTL loop invariant motion on targets
3694 with enough (>= 32) registers. It is an expensive optimization.
3695 So it is on only for peak performance. */
3696 if (optimize >= 3 && global_init_p
3697 && !global_options_set.x_flag_ira_loop_pressure)
3698 flag_ira_loop_pressure = 1;
3700 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3701 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3702 options were already specified. */
3703 if (flag_sanitize & SANITIZE_USER_ADDRESS
3704 && !global_options_set.x_flag_asynchronous_unwind_tables)
3705 flag_asynchronous_unwind_tables = 1;
3707 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3708 loop unroller is active. It is only checked during unrolling, so
3709 we can just set it on by default. */
3710 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3711 flag_variable_expansion_in_unroller = 1;
3713 /* Set the pointer size. */
3714 if (TARGET_64BIT)
3716 rs6000_pmode = DImode;
3717 rs6000_pointer_size = 64;
3719 else
3721 rs6000_pmode = SImode;
3722 rs6000_pointer_size = 32;
3725 /* Some OSs don't support saving the high part of 64-bit registers on context
3726 switch. Other OSs don't support saving Altivec registers. On those OSs,
3727 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3728 if the user wants either, the user must explicitly specify them and we
3729 won't interfere with the user's specification. */
3731 set_masks = POWERPC_MASKS;
3732 #ifdef OS_MISSING_POWERPC64
3733 if (OS_MISSING_POWERPC64)
3734 set_masks &= ~OPTION_MASK_POWERPC64;
3735 #endif
3736 #ifdef OS_MISSING_ALTIVEC
3737 if (OS_MISSING_ALTIVEC)
3738 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3739 | OTHER_VSX_VECTOR_MASKS);
3740 #endif
3742 /* Don't override by the processor default if given explicitly. */
3743 set_masks &= ~rs6000_isa_flags_explicit;
3745 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3746 the cpu in a target attribute or pragma, but did not specify a tuning
3747 option, use the cpu for the tuning option rather than the option specified
3748 with -mtune on the command line. Process a '--with-cpu' configuration
3749 request as an implicit --cpu. */
3750 if (rs6000_cpu_index >= 0)
3751 cpu_index = rs6000_cpu_index;
3752 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3753 cpu_index = main_target_opt->x_rs6000_cpu_index;
3754 else if (OPTION_TARGET_CPU_DEFAULT)
3755 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3757 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3758 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3759 with those from the cpu, except for options that were explicitly set. If
3760 we don't have a cpu, do not override the target bits set in
3761 TARGET_DEFAULT. */
3762 if (cpu_index >= 0)
3764 rs6000_cpu_index = cpu_index;
3765 rs6000_isa_flags &= ~set_masks;
3766 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3767 & set_masks);
3769 else
3771 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3772 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3773 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3774 to using rs6000_isa_flags, we need to do the initialization here.
3776 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3777 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3778 HOST_WIDE_INT flags;
3779 if (TARGET_DEFAULT)
3780 flags = TARGET_DEFAULT;
3781 else
3783 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3784 const char *default_cpu = (!TARGET_POWERPC64
3785 ? "powerpc"
3786 : (BYTES_BIG_ENDIAN
3787 ? "powerpc64"
3788 : "powerpc64le"));
3789 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3790 flags = processor_target_table[default_cpu_index].target_enable;
3792 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3795 if (rs6000_tune_index >= 0)
3796 tune_index = rs6000_tune_index;
3797 else if (cpu_index >= 0)
3798 rs6000_tune_index = tune_index = cpu_index;
3799 else
3801 size_t i;
3802 enum processor_type tune_proc
3803 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3805 tune_index = -1;
3806 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3807 if (processor_target_table[i].processor == tune_proc)
3809 tune_index = i;
3810 break;
3814 if (cpu_index >= 0)
3815 rs6000_cpu = processor_target_table[cpu_index].processor;
3816 else
3817 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3819 gcc_assert (tune_index >= 0);
3820 rs6000_tune = processor_target_table[tune_index].processor;
3822 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3823 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3824 || rs6000_cpu == PROCESSOR_PPCE5500)
3826 if (TARGET_ALTIVEC)
3827 error ("AltiVec not supported in this target");
3830 /* If we are optimizing big endian systems for space, use the load/store
3831 multiple instructions. */
3832 if (BYTES_BIG_ENDIAN && optimize_size)
3833 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3835 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3836 because the hardware doesn't support the instructions used in little
3837 endian mode, and causes an alignment trap. The 750 does not cause an
3838 alignment trap (except when the target is unaligned). */
3840 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3842 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3843 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3844 warning (0, "%qs is not supported on little endian systems",
3845 "-mmultiple");
3848 /* If little-endian, default to -mstrict-align on older processors.
3849 Testing for direct_move matches power8 and later. */
3850 if (!BYTES_BIG_ENDIAN
3851 && !(processor_target_table[tune_index].target_enable
3852 & OPTION_MASK_DIRECT_MOVE))
3853 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3855 if (!rs6000_fold_gimple)
3856 fprintf (stderr,
3857 "gimple folding of rs6000 builtins has been disabled.\n");
3859 /* Add some warnings for VSX. */
3860 if (TARGET_VSX)
3862 const char *msg = NULL;
3863 if (!TARGET_HARD_FLOAT)
3865 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3866 msg = N_("%<-mvsx%> requires hardware floating point");
3867 else
3869 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3870 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3873 else if (TARGET_AVOID_XFORM > 0)
3874 msg = N_("%<-mvsx%> needs indexed addressing");
3875 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3876 & OPTION_MASK_ALTIVEC))
3878 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3879 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3880 else
3881 msg = N_("%<-mno-altivec%> disables vsx");
3884 if (msg)
3886 warning (0, msg);
3887 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3888 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3892 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3893 the -mcpu setting to enable options that conflict. */
3894 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3895 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3896 | OPTION_MASK_ALTIVEC
3897 | OPTION_MASK_VSX)) != 0)
3898 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3899 | OPTION_MASK_DIRECT_MOVE)
3900 & ~rs6000_isa_flags_explicit);
3902 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3903 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3905 #ifdef XCOFF_DEBUGGING_INFO
3906 /* For AIX default to 64-bit DWARF. */
3907 if (!global_options_set.x_dwarf_offset_size)
3908 dwarf_offset_size = POINTER_SIZE_UNITS;
3909 #endif
3911 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3912 off all of the options that depend on those flags. */
3913 ignore_masks = rs6000_disable_incompatible_switches ();
3915 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3916 unless the user explicitly used the -mno-<option> to disable the code. */
3917 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3918 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3919 else if (TARGET_P9_MINMAX)
3921 if (cpu_index >= 0)
3923 if (cpu_index == PROCESSOR_POWER9)
3925 /* legacy behavior: allow -mcpu=power9 with certain
3926 capabilities explicitly disabled. */
3927 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3929 else
3930 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3931 "for <xxx> less than power9", "-mcpu");
3933 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3934 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3935 & rs6000_isa_flags_explicit))
3936 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3937 were explicitly cleared. */
3938 error ("%qs incompatible with explicitly disabled options",
3939 "-mpower9-minmax");
3940 else
3941 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3943 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3944 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3945 else if (TARGET_VSX)
3946 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3947 else if (TARGET_POPCNTD)
3948 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3949 else if (TARGET_DFP)
3950 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3951 else if (TARGET_CMPB)
3952 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3953 else if (TARGET_FPRND)
3954 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3955 else if (TARGET_POPCNTB)
3956 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3957 else if (TARGET_ALTIVEC)
3958 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3960 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3962 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3963 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3964 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3967 if (!TARGET_FPRND && TARGET_VSX)
3969 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3970 /* TARGET_VSX = 1 implies Power 7 and newer */
3971 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3972 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3975 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3977 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3978 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3979 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3982 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3984 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3985 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3986 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3989 if (TARGET_P8_VECTOR && !TARGET_VSX)
3991 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3992 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3993 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3994 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3996 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3997 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3998 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4000 else
4002 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4003 not explicit. */
4004 rs6000_isa_flags |= OPTION_MASK_VSX;
4005 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4009 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4011 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4012 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4013 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4016 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4017 silently turn off quad memory mode. */
4018 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4020 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4021 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
4023 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4024 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
4026 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4027 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4030 /* Non-atomic quad memory load/store are disabled for little endian, since
4031 the words are reversed, but atomic operations can still be done by
4032 swapping the words. */
4033 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4035 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4036 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4037 "mode"));
4039 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4042 /* Assume if the user asked for normal quad memory instructions, they want
4043 the atomic versions as well, unless they explicity told us not to use quad
4044 word atomic instructions. */
4045 if (TARGET_QUAD_MEMORY
4046 && !TARGET_QUAD_MEMORY_ATOMIC
4047 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4048 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4050 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
4051 if (rs6000_rop_protect)
4052 flag_shrink_wrap = 0;
4054 /* If we can shrink-wrap the TOC register save separately, then use
4055 -msave-toc-indirect unless explicitly disabled. */
4056 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4057 && flag_shrink_wrap_separate
4058 && optimize_function_for_speed_p (cfun))
4059 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4061 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4062 generating power8 instructions. Power9 does not optimize power8 fusion
4063 cases. */
4064 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4066 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4067 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4068 else
4069 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4072 /* Setting additional fusion flags turns on base fusion. */
4073 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4075 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4077 if (TARGET_P8_FUSION_SIGN)
4078 error ("%qs requires %qs", "-mpower8-fusion-sign",
4079 "-mpower8-fusion");
4081 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4083 else
4084 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4087 /* Power8 does not fuse sign extended loads with the addis. If we are
4088 optimizing at high levels for speed, convert a sign extended load into a
4089 zero extending load, and an explicit sign extension. */
4090 if (TARGET_P8_FUSION
4091 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4092 && optimize_function_for_speed_p (cfun)
4093 && optimize >= 3)
4094 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4096 /* ISA 3.0 vector instructions include ISA 2.07. */
4097 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4099 /* We prefer to not mention undocumented options in
4100 error messages. However, if users have managed to select
4101 power9-vector without selecting power8-vector, they
4102 already know about undocumented flags. */
4103 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4104 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4105 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4106 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4108 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4109 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4110 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4112 else
4114 /* OPTION_MASK_P9_VECTOR is explicit and
4115 OPTION_MASK_P8_VECTOR is not explicit. */
4116 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4117 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4121 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4122 support. If we only have ISA 2.06 support, and the user did not specify
4123 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4124 but we don't enable the full vectorization support */
4125 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4126 TARGET_ALLOW_MOVMISALIGN = 1;
4128 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4130 if (TARGET_ALLOW_MOVMISALIGN > 0
4131 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4132 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4134 TARGET_ALLOW_MOVMISALIGN = 0;
4137 /* Determine when unaligned vector accesses are permitted, and when
4138 they are preferred over masked Altivec loads. Note that if
4139 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4140 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4141 not true. */
4142 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4144 if (!TARGET_VSX)
4146 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4147 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4149 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4152 else if (!TARGET_ALLOW_MOVMISALIGN)
4154 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4155 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4156 "-mallow-movmisalign");
4158 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4162 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4164 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4165 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4166 else
4167 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4170 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4172 if (TARGET_MMA && TARGET_EFFICIENT_UNALIGNED_VSX)
4173 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4174 else
4175 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4178 /* Use long double size to select the appropriate long double. We use
4179 TYPE_PRECISION to differentiate the 3 different long double types. We map
4180 128 into the precision used for TFmode. */
4181 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4182 ? 64
4183 : FLOAT_PRECISION_TFmode);
4185 /* Set long double size before the IEEE 128-bit tests. */
4186 if (!global_options_set.x_rs6000_long_double_type_size)
4188 if (main_target_opt != NULL
4189 && (main_target_opt->x_rs6000_long_double_type_size
4190 != default_long_double_size))
4191 error ("target attribute or pragma changes %<long double%> size");
4192 else
4193 rs6000_long_double_type_size = default_long_double_size;
4195 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4196 ; /* The option value can be seen when cl_target_option_restore is called. */
4197 else if (rs6000_long_double_type_size == 128)
4198 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4199 else if (global_options_set.x_rs6000_ieeequad)
4201 if (global_options.x_rs6000_ieeequad)
4202 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4203 else
4204 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4207 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4208 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4209 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4210 those systems will not pick up this default. Warn if the user changes the
4211 default unless -Wno-psabi. */
4212 if (!global_options_set.x_rs6000_ieeequad)
4213 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4215 else
4217 if (global_options.x_rs6000_ieeequad
4218 && (!TARGET_POPCNTD || !TARGET_VSX))
4219 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4221 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4223 /* Determine if the user can change the default long double type at
4224 compilation time. Only C and C++ support this, and you need GLIBC
4225 2.32 or newer. Only issue one warning. */
4226 static bool warned_change_long_double;
4228 if (!warned_change_long_double
4229 && (!glibc_supports_ieee_128bit ()
4230 || (!lang_GNU_C () && !lang_GNU_CXX ())))
4232 warned_change_long_double = true;
4233 if (TARGET_IEEEQUAD)
4234 warning (OPT_Wpsabi, "Using IEEE extended precision "
4235 "%<long double%>");
4236 else
4237 warning (OPT_Wpsabi, "Using IBM extended precision "
4238 "%<long double%>");
4243 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4244 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4245 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4246 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4247 the keyword as well as the type. */
4248 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4250 /* IEEE 128-bit floating point requires VSX support. */
4251 if (TARGET_FLOAT128_KEYWORD)
4253 if (!TARGET_VSX)
4255 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4256 error ("%qs requires VSX support", "-mfloat128");
4258 TARGET_FLOAT128_TYPE = 0;
4259 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4260 | OPTION_MASK_FLOAT128_HW);
4262 else if (!TARGET_FLOAT128_TYPE)
4264 TARGET_FLOAT128_TYPE = 1;
4265 warning (0, "The %<-mfloat128%> option may not be fully supported");
4269 /* Enable the __float128 keyword under Linux by default. */
4270 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4271 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4272 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4274 /* If we have are supporting the float128 type and full ISA 3.0 support,
4275 enable -mfloat128-hardware by default. However, don't enable the
4276 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4277 because sometimes the compiler wants to put things in an integer
4278 container, and if we don't have __int128 support, it is impossible. */
4279 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4280 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4281 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4282 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4284 if (TARGET_FLOAT128_HW
4285 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4287 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4288 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4290 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4293 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4295 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4296 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4298 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4301 /* Enable -mprefixed by default on power10 systems. */
4302 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4303 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4305 /* -mprefixed requires -mcpu=power10 (or later). */
4306 else if (TARGET_PREFIXED && !TARGET_POWER10)
4308 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4309 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4311 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4314 /* -mpcrel requires prefixed load/store addressing. */
4315 if (TARGET_PCREL && !TARGET_PREFIXED)
4317 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4318 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4320 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4323 /* Print the options after updating the defaults. */
4324 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4325 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4327 /* E500mc does "better" if we inline more aggressively. Respect the
4328 user's opinion, though. */
4329 if (rs6000_block_move_inline_limit == 0
4330 && (rs6000_tune == PROCESSOR_PPCE500MC
4331 || rs6000_tune == PROCESSOR_PPCE500MC64
4332 || rs6000_tune == PROCESSOR_PPCE5500
4333 || rs6000_tune == PROCESSOR_PPCE6500))
4334 rs6000_block_move_inline_limit = 128;
4336 /* store_one_arg depends on expand_block_move to handle at least the
4337 size of reg_parm_stack_space. */
4338 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4339 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4341 if (global_init_p)
4343 /* If the appropriate debug option is enabled, replace the target hooks
4344 with debug versions that call the real version and then prints
4345 debugging information. */
4346 if (TARGET_DEBUG_COST)
4348 targetm.rtx_costs = rs6000_debug_rtx_costs;
4349 targetm.address_cost = rs6000_debug_address_cost;
4350 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4353 if (TARGET_DEBUG_ADDR)
4355 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4356 targetm.legitimize_address = rs6000_debug_legitimize_address;
4357 rs6000_secondary_reload_class_ptr
4358 = rs6000_debug_secondary_reload_class;
4359 targetm.secondary_memory_needed
4360 = rs6000_debug_secondary_memory_needed;
4361 targetm.can_change_mode_class
4362 = rs6000_debug_can_change_mode_class;
4363 rs6000_preferred_reload_class_ptr
4364 = rs6000_debug_preferred_reload_class;
4365 rs6000_mode_dependent_address_ptr
4366 = rs6000_debug_mode_dependent_address;
4369 if (rs6000_veclibabi_name)
4371 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4372 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4373 else
4375 error ("unknown vectorization library ABI type (%qs) for "
4376 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4377 ret = false;
4382 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4383 target attribute or pragma which automatically enables both options,
4384 unless the altivec ABI was set. This is set by default for 64-bit, but
4385 not for 32-bit. */
4386 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4388 TARGET_FLOAT128_TYPE = 0;
4389 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4390 | OPTION_MASK_FLOAT128_KEYWORD)
4391 & ~rs6000_isa_flags_explicit);
4394 /* Enable Altivec ABI for AIX -maltivec. */
4395 if (TARGET_XCOFF
4396 && (TARGET_ALTIVEC || TARGET_VSX)
4397 && !global_options_set.x_rs6000_altivec_abi)
4399 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4400 error ("target attribute or pragma changes AltiVec ABI");
4401 else
4402 rs6000_altivec_abi = 1;
4405 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4406 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4407 be explicitly overridden in either case. */
4408 if (TARGET_ELF)
4410 if (!global_options_set.x_rs6000_altivec_abi
4411 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4413 if (main_target_opt != NULL &&
4414 !main_target_opt->x_rs6000_altivec_abi)
4415 error ("target attribute or pragma changes AltiVec ABI");
4416 else
4417 rs6000_altivec_abi = 1;
4421 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4422 So far, the only darwin64 targets are also MACH-O. */
4423 if (TARGET_MACHO
4424 && DEFAULT_ABI == ABI_DARWIN
4425 && TARGET_64BIT)
4427 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4428 error ("target attribute or pragma changes darwin64 ABI");
4429 else
4431 rs6000_darwin64_abi = 1;
4432 /* Default to natural alignment, for better performance. */
4433 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4437 /* Place FP constants in the constant pool instead of TOC
4438 if section anchors enabled. */
4439 if (flag_section_anchors
4440 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4441 TARGET_NO_FP_IN_TOC = 1;
4443 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4444 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4446 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4447 SUBTARGET_OVERRIDE_OPTIONS;
4448 #endif
4449 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4450 SUBSUBTARGET_OVERRIDE_OPTIONS;
4451 #endif
4452 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4453 SUB3TARGET_OVERRIDE_OPTIONS;
4454 #endif
4456 /* If the ABI has support for PC-relative relocations, enable it by default.
4457 This test depends on the sub-target tests above setting the code model to
4458 medium for ELF v2 systems. */
4459 if (PCREL_SUPPORTED_BY_OS
4460 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4461 rs6000_isa_flags |= OPTION_MASK_PCREL;
4463 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4464 after the subtarget override options are done. */
4465 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4467 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4468 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4470 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4473 /* Enable -mmma by default on power10 systems. */
4474 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4475 rs6000_isa_flags |= OPTION_MASK_MMA;
4477 if (TARGET_POWER10
4478 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
4479 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4481 if (TARGET_POWER10 &&
4482 (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
4483 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
4485 if (TARGET_POWER10
4486 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
4487 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
4489 if (TARGET_POWER10
4490 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LOGADD) == 0)
4491 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LOGADD;
4493 if (TARGET_POWER10
4494 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ADDLOG) == 0)
4495 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ADDLOG;
4497 if (TARGET_POWER10
4498 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
4499 rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
4501 /* Turn off vector pair/mma options on non-power10 systems. */
4502 else if (!TARGET_POWER10 && TARGET_MMA)
4504 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4505 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4507 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4510 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4511 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4513 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4514 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4516 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4517 && rs6000_tune != PROCESSOR_POWER5
4518 && rs6000_tune != PROCESSOR_POWER6
4519 && rs6000_tune != PROCESSOR_POWER7
4520 && rs6000_tune != PROCESSOR_POWER8
4521 && rs6000_tune != PROCESSOR_POWER9
4522 && rs6000_tune != PROCESSOR_POWER10
4523 && rs6000_tune != PROCESSOR_PPCA2
4524 && rs6000_tune != PROCESSOR_CELL
4525 && rs6000_tune != PROCESSOR_PPC476);
4526 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4527 || rs6000_tune == PROCESSOR_POWER5
4528 || rs6000_tune == PROCESSOR_POWER7
4529 || rs6000_tune == PROCESSOR_POWER8);
4530 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4531 || rs6000_tune == PROCESSOR_POWER5
4532 || rs6000_tune == PROCESSOR_POWER6
4533 || rs6000_tune == PROCESSOR_POWER7
4534 || rs6000_tune == PROCESSOR_POWER8
4535 || rs6000_tune == PROCESSOR_POWER9
4536 || rs6000_tune == PROCESSOR_POWER10
4537 || rs6000_tune == PROCESSOR_PPCE500MC
4538 || rs6000_tune == PROCESSOR_PPCE500MC64
4539 || rs6000_tune == PROCESSOR_PPCE5500
4540 || rs6000_tune == PROCESSOR_PPCE6500);
4542 /* Allow debug switches to override the above settings. These are set to -1
4543 in rs6000.opt to indicate the user hasn't directly set the switch. */
4544 if (TARGET_ALWAYS_HINT >= 0)
4545 rs6000_always_hint = TARGET_ALWAYS_HINT;
4547 if (TARGET_SCHED_GROUPS >= 0)
4548 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4550 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4551 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4553 rs6000_sched_restricted_insns_priority
4554 = (rs6000_sched_groups ? 1 : 0);
4556 /* Handle -msched-costly-dep option. */
4557 rs6000_sched_costly_dep
4558 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4560 if (rs6000_sched_costly_dep_str)
4562 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4563 rs6000_sched_costly_dep = no_dep_costly;
4564 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4565 rs6000_sched_costly_dep = all_deps_costly;
4566 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4567 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4568 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4569 rs6000_sched_costly_dep = store_to_load_dep_costly;
4570 else
4571 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4572 atoi (rs6000_sched_costly_dep_str));
4575 /* Handle -minsert-sched-nops option. */
4576 rs6000_sched_insert_nops
4577 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4579 if (rs6000_sched_insert_nops_str)
4581 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4582 rs6000_sched_insert_nops = sched_finish_none;
4583 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4584 rs6000_sched_insert_nops = sched_finish_pad_groups;
4585 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4586 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4587 else
4588 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4589 atoi (rs6000_sched_insert_nops_str));
4592 /* Handle stack protector */
4593 if (!global_options_set.x_rs6000_stack_protector_guard)
4594 #ifdef TARGET_THREAD_SSP_OFFSET
4595 rs6000_stack_protector_guard = SSP_TLS;
4596 #else
4597 rs6000_stack_protector_guard = SSP_GLOBAL;
4598 #endif
4600 #ifdef TARGET_THREAD_SSP_OFFSET
4601 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4602 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4603 #endif
4605 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4607 char *endp;
4608 const char *str = rs6000_stack_protector_guard_offset_str;
4610 errno = 0;
4611 long offset = strtol (str, &endp, 0);
4612 if (!*str || *endp || errno)
4613 error ("%qs is not a valid number in %qs", str,
4614 "-mstack-protector-guard-offset=");
4616 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4617 || (TARGET_64BIT && (offset & 3)))
4618 error ("%qs is not a valid offset in %qs", str,
4619 "-mstack-protector-guard-offset=");
4621 rs6000_stack_protector_guard_offset = offset;
4624 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4626 const char *str = rs6000_stack_protector_guard_reg_str;
4627 int reg = decode_reg_name (str);
4629 if (!IN_RANGE (reg, 1, 31))
4630 error ("%qs is not a valid base register in %qs", str,
4631 "-mstack-protector-guard-reg=");
4633 rs6000_stack_protector_guard_reg = reg;
4636 if (rs6000_stack_protector_guard == SSP_TLS
4637 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4638 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4640 if (global_init_p)
4642 #ifdef TARGET_REGNAMES
4643 /* If the user desires alternate register names, copy in the
4644 alternate names now. */
4645 if (TARGET_REGNAMES)
4646 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4647 #endif
4649 /* Set aix_struct_return last, after the ABI is determined.
4650 If -maix-struct-return or -msvr4-struct-return was explicitly
4651 used, don't override with the ABI default. */
4652 if (!global_options_set.x_aix_struct_return)
4653 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4655 #if 0
4656 /* IBM XL compiler defaults to unsigned bitfields. */
4657 if (TARGET_XL_COMPAT)
4658 flag_signed_bitfields = 0;
4659 #endif
4661 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4662 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4664 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4666 /* We can only guarantee the availability of DI pseudo-ops when
4667 assembling for 64-bit targets. */
4668 if (!TARGET_64BIT)
4670 targetm.asm_out.aligned_op.di = NULL;
4671 targetm.asm_out.unaligned_op.di = NULL;
4675 /* Set branch target alignment, if not optimizing for size. */
4676 if (!optimize_size)
4678 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4679 aligned 8byte to avoid misprediction by the branch predictor. */
4680 if (rs6000_tune == PROCESSOR_TITAN
4681 || rs6000_tune == PROCESSOR_CELL)
4683 if (flag_align_functions && !str_align_functions)
4684 str_align_functions = "8";
4685 if (flag_align_jumps && !str_align_jumps)
4686 str_align_jumps = "8";
4687 if (flag_align_loops && !str_align_loops)
4688 str_align_loops = "8";
4690 if (rs6000_align_branch_targets)
4692 if (flag_align_functions && !str_align_functions)
4693 str_align_functions = "16";
4694 if (flag_align_jumps && !str_align_jumps)
4695 str_align_jumps = "16";
4696 if (flag_align_loops && !str_align_loops)
4698 can_override_loop_align = 1;
4699 str_align_loops = "16";
4704 /* Arrange to save and restore machine status around nested functions. */
4705 init_machine_status = rs6000_init_machine_status;
4707 /* We should always be splitting complex arguments, but we can't break
4708 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4709 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4710 targetm.calls.split_complex_arg = NULL;
4712 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4713 if (DEFAULT_ABI == ABI_AIX)
4714 targetm.calls.custom_function_descriptors = 0;
4717 /* Initialize rs6000_cost with the appropriate target costs. */
4718 if (optimize_size)
4719 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4720 else
4721 switch (rs6000_tune)
4723 case PROCESSOR_RS64A:
4724 rs6000_cost = &rs64a_cost;
4725 break;
4727 case PROCESSOR_MPCCORE:
4728 rs6000_cost = &mpccore_cost;
4729 break;
4731 case PROCESSOR_PPC403:
4732 rs6000_cost = &ppc403_cost;
4733 break;
4735 case PROCESSOR_PPC405:
4736 rs6000_cost = &ppc405_cost;
4737 break;
4739 case PROCESSOR_PPC440:
4740 rs6000_cost = &ppc440_cost;
4741 break;
4743 case PROCESSOR_PPC476:
4744 rs6000_cost = &ppc476_cost;
4745 break;
4747 case PROCESSOR_PPC601:
4748 rs6000_cost = &ppc601_cost;
4749 break;
4751 case PROCESSOR_PPC603:
4752 rs6000_cost = &ppc603_cost;
4753 break;
4755 case PROCESSOR_PPC604:
4756 rs6000_cost = &ppc604_cost;
4757 break;
4759 case PROCESSOR_PPC604e:
4760 rs6000_cost = &ppc604e_cost;
4761 break;
4763 case PROCESSOR_PPC620:
4764 rs6000_cost = &ppc620_cost;
4765 break;
4767 case PROCESSOR_PPC630:
4768 rs6000_cost = &ppc630_cost;
4769 break;
4771 case PROCESSOR_CELL:
4772 rs6000_cost = &ppccell_cost;
4773 break;
4775 case PROCESSOR_PPC750:
4776 case PROCESSOR_PPC7400:
4777 rs6000_cost = &ppc750_cost;
4778 break;
4780 case PROCESSOR_PPC7450:
4781 rs6000_cost = &ppc7450_cost;
4782 break;
4784 case PROCESSOR_PPC8540:
4785 case PROCESSOR_PPC8548:
4786 rs6000_cost = &ppc8540_cost;
4787 break;
4789 case PROCESSOR_PPCE300C2:
4790 case PROCESSOR_PPCE300C3:
4791 rs6000_cost = &ppce300c2c3_cost;
4792 break;
4794 case PROCESSOR_PPCE500MC:
4795 rs6000_cost = &ppce500mc_cost;
4796 break;
4798 case PROCESSOR_PPCE500MC64:
4799 rs6000_cost = &ppce500mc64_cost;
4800 break;
4802 case PROCESSOR_PPCE5500:
4803 rs6000_cost = &ppce5500_cost;
4804 break;
4806 case PROCESSOR_PPCE6500:
4807 rs6000_cost = &ppce6500_cost;
4808 break;
4810 case PROCESSOR_TITAN:
4811 rs6000_cost = &titan_cost;
4812 break;
4814 case PROCESSOR_POWER4:
4815 case PROCESSOR_POWER5:
4816 rs6000_cost = &power4_cost;
4817 break;
4819 case PROCESSOR_POWER6:
4820 rs6000_cost = &power6_cost;
4821 break;
4823 case PROCESSOR_POWER7:
4824 rs6000_cost = &power7_cost;
4825 break;
4827 case PROCESSOR_POWER8:
4828 rs6000_cost = &power8_cost;
4829 break;
4831 case PROCESSOR_POWER9:
4832 rs6000_cost = &power9_cost;
4833 break;
4835 case PROCESSOR_POWER10:
4836 rs6000_cost = &power10_cost;
4837 break;
4839 case PROCESSOR_PPCA2:
4840 rs6000_cost = &ppca2_cost;
4841 break;
4843 default:
4844 gcc_unreachable ();
4847 if (global_init_p)
4849 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4850 param_simultaneous_prefetches,
4851 rs6000_cost->simultaneous_prefetches);
4852 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4853 param_l1_cache_size,
4854 rs6000_cost->l1_cache_size);
4855 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4856 param_l1_cache_line_size,
4857 rs6000_cost->cache_line_size);
4858 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4859 param_l2_cache_size,
4860 rs6000_cost->l2_cache_size);
4862 /* Increase loop peeling limits based on performance analysis. */
4863 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4864 param_max_peeled_insns, 400);
4865 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4866 param_max_completely_peeled_insns, 400);
4868 /* The lxvl/stxvl instructions don't perform well before Power10. */
4869 if (TARGET_POWER10)
4870 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4871 param_vect_partial_vector_usage, 1);
4872 else
4873 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4874 param_vect_partial_vector_usage, 0);
4876 /* Use the 'model' -fsched-pressure algorithm by default. */
4877 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4878 param_sched_pressure_algorithm,
4879 SCHED_PRESSURE_MODEL);
4881 /* If using typedef char *va_list, signal that
4882 __builtin_va_start (&ap, 0) can be optimized to
4883 ap = __builtin_next_arg (0). */
4884 if (DEFAULT_ABI != ABI_V4)
4885 targetm.expand_builtin_va_start = NULL;
4888 rs6000_override_options_after_change ();
4890 /* If not explicitly specified via option, decide whether to generate indexed
4891 load/store instructions. A value of -1 indicates that the
4892 initial value of this variable has not been overwritten. During
4893 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4894 if (TARGET_AVOID_XFORM == -1)
4895 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4896 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4897 need indexed accesses and the type used is the scalar type of the element
4898 being loaded or stored. */
4899 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4900 && !TARGET_ALTIVEC);
4902 /* Set the -mrecip options. */
4903 if (rs6000_recip_name)
4905 char *p = ASTRDUP (rs6000_recip_name);
4906 char *q;
4907 unsigned int mask, i;
4908 bool invert;
4910 while ((q = strtok (p, ",")) != NULL)
4912 p = NULL;
4913 if (*q == '!')
4915 invert = true;
4916 q++;
4918 else
4919 invert = false;
4921 if (!strcmp (q, "default"))
4922 mask = ((TARGET_RECIP_PRECISION)
4923 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4924 else
4926 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4927 if (!strcmp (q, recip_options[i].string))
4929 mask = recip_options[i].mask;
4930 break;
4933 if (i == ARRAY_SIZE (recip_options))
4935 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4936 invert = false;
4937 mask = 0;
4938 ret = false;
4942 if (invert)
4943 rs6000_recip_control &= ~mask;
4944 else
4945 rs6000_recip_control |= mask;
4949 /* Set the builtin mask of the various options used that could affect which
4950 builtins were used. In the past we used target_flags, but we've run out
4951 of bits, and some options are no longer in target_flags. */
4952 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4953 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4954 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4955 rs6000_builtin_mask);
4957 /* Initialize all of the registers. */
4958 rs6000_init_hard_regno_mode_ok (global_init_p);
4960 /* Save the initial options in case the user does function specific options */
4961 if (global_init_p)
4962 target_option_default_node = target_option_current_node
4963 = build_target_option_node (&global_options, &global_options_set);
4965 /* If not explicitly specified via option, decide whether to generate the
4966 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4967 if (TARGET_LINK_STACK == -1)
4968 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4970 /* Deprecate use of -mno-speculate-indirect-jumps. */
4971 if (!rs6000_speculate_indirect_jumps)
4972 warning (0, "%qs is deprecated and not recommended in any circumstances",
4973 "-mno-speculate-indirect-jumps");
4975 return ret;
4978 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4979 define the target cpu type. */
4981 static void
4982 rs6000_option_override (void)
4984 (void) rs6000_option_override_internal (true);
4988 /* Implement targetm.vectorize.builtin_mask_for_load. */
4989 static tree
4990 rs6000_builtin_mask_for_load (void)
4992 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4993 if ((TARGET_ALTIVEC && !TARGET_VSX)
4994 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4995 return altivec_builtin_mask_for_load;
4996 else
4997 return 0;
5000 /* Implement LOOP_ALIGN. */
5001 align_flags
5002 rs6000_loop_align (rtx label)
5004 basic_block bb;
5005 int ninsns;
5007 /* Don't override loop alignment if -falign-loops was specified. */
5008 if (!can_override_loop_align)
5009 return align_loops;
5011 bb = BLOCK_FOR_INSN (label);
5012 ninsns = num_loop_insns(bb->loop_father);
5014 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5015 if (ninsns > 4 && ninsns <= 8
5016 && (rs6000_tune == PROCESSOR_POWER4
5017 || rs6000_tune == PROCESSOR_POWER5
5018 || rs6000_tune == PROCESSOR_POWER6
5019 || rs6000_tune == PROCESSOR_POWER7
5020 || rs6000_tune == PROCESSOR_POWER8))
5021 return align_flags (5);
5022 else
5023 return align_loops;
5026 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5027 after applying N number of iterations. This routine does not determine
5028 how may iterations are required to reach desired alignment. */
5030 static bool
5031 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5033 if (is_packed)
5034 return false;
5036 if (TARGET_32BIT)
5038 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5039 return true;
5041 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5042 return true;
5044 return false;
5046 else
5048 if (TARGET_MACHO)
5049 return false;
5051 /* Assuming that all other types are naturally aligned. CHECKME! */
5052 return true;
5056 /* Return true if the vector misalignment factor is supported by the
5057 target. */
5058 static bool
5059 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5060 const_tree type,
5061 int misalignment,
5062 bool is_packed)
5064 if (TARGET_VSX)
5066 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5067 return true;
5069 /* Return if movmisalign pattern is not supported for this mode. */
5070 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5071 return false;
5073 if (misalignment == -1)
5075 /* Misalignment factor is unknown at compile time but we know
5076 it's word aligned. */
5077 if (rs6000_vector_alignment_reachable (type, is_packed))
5079 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5081 if (element_size == 64 || element_size == 32)
5082 return true;
5085 return false;
5088 /* VSX supports word-aligned vector. */
5089 if (misalignment % 4 == 0)
5090 return true;
5092 return false;
5095 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5096 static int
5097 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5098 tree vectype, int misalign)
5100 unsigned elements;
5101 tree elem_type;
5103 switch (type_of_cost)
5105 case scalar_stmt:
5106 case scalar_store:
5107 case vector_stmt:
5108 case vector_store:
5109 case vec_to_scalar:
5110 case scalar_to_vec:
5111 case cond_branch_not_taken:
5112 return 1;
5113 case scalar_load:
5114 case vector_load:
5115 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5116 return 2;
5118 case vec_perm:
5119 /* Power7 has only one permute unit, make it a bit expensive. */
5120 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5121 return 3;
5122 else
5123 return 1;
5125 case vec_promote_demote:
5126 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5127 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5128 return 4;
5129 else
5130 return 1;
5132 case cond_branch_taken:
5133 return 3;
5135 case unaligned_load:
5136 case vector_gather_load:
5137 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5138 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5139 return 2;
5141 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5143 elements = TYPE_VECTOR_SUBPARTS (vectype);
5144 if (elements == 2)
5145 /* Double word aligned. */
5146 return 4;
5148 if (elements == 4)
5150 switch (misalign)
5152 case 8:
5153 /* Double word aligned. */
5154 return 4;
5156 case -1:
5157 /* Unknown misalignment. */
5158 case 4:
5159 case 12:
5160 /* Word aligned. */
5161 return 33;
5163 default:
5164 gcc_unreachable ();
5169 if (TARGET_ALTIVEC)
5170 /* Misaligned loads are not supported. */
5171 gcc_unreachable ();
5173 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5174 return 4;
5176 case unaligned_store:
5177 case vector_scatter_store:
5178 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5179 return 1;
5181 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5183 elements = TYPE_VECTOR_SUBPARTS (vectype);
5184 if (elements == 2)
5185 /* Double word aligned. */
5186 return 2;
5188 if (elements == 4)
5190 switch (misalign)
5192 case 8:
5193 /* Double word aligned. */
5194 return 2;
5196 case -1:
5197 /* Unknown misalignment. */
5198 case 4:
5199 case 12:
5200 /* Word aligned. */
5201 return 23;
5203 default:
5204 gcc_unreachable ();
5209 if (TARGET_ALTIVEC)
5210 /* Misaligned stores are not supported. */
5211 gcc_unreachable ();
5213 return 2;
5215 case vec_construct:
5216 /* This is a rough approximation assuming non-constant elements
5217 constructed into a vector via element insertion. FIXME:
5218 vec_construct is not granular enough for uniformly good
5219 decisions. If the initialization is a splat, this is
5220 cheaper than we estimate. Improve this someday. */
5221 elem_type = TREE_TYPE (vectype);
5222 /* 32-bit vectors loaded into registers are stored as double
5223 precision, so we need 2 permutes, 2 converts, and 1 merge
5224 to construct a vector of short floats from them. */
5225 if (SCALAR_FLOAT_TYPE_P (elem_type)
5226 && TYPE_PRECISION (elem_type) == 32)
5227 return 5;
5228 /* On POWER9, integer vector types are built up in GPRs and then
5229 use a direct move (2 cycles). For POWER8 this is even worse,
5230 as we need two direct moves and a merge, and the direct moves
5231 are five cycles. */
5232 else if (INTEGRAL_TYPE_P (elem_type))
5234 if (TARGET_P9_VECTOR)
5235 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5236 else
5237 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5239 else
5240 /* V2DFmode doesn't need a direct move. */
5241 return 2;
5243 default:
5244 gcc_unreachable ();
5248 /* Implement targetm.vectorize.preferred_simd_mode. */
5250 static machine_mode
5251 rs6000_preferred_simd_mode (scalar_mode mode)
5253 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5255 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5256 return vmode.require ();
5258 return word_mode;
5261 typedef struct _rs6000_cost_data
5263 struct loop *loop_info;
5264 unsigned cost[3];
5265 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5266 instruction is needed by the vectorization. */
5267 bool vect_nonmem;
5268 /* Indicates this is costing for the scalar version of a loop or block. */
5269 bool costing_for_scalar;
5270 } rs6000_cost_data;
5272 /* Test for likely overcommitment of vector hardware resources. If a
5273 loop iteration is relatively large, and too large a percentage of
5274 instructions in the loop are vectorized, the cost model may not
5275 adequately reflect delays from unavailable vector resources.
5276 Penalize the loop body cost for this case. */
5278 static void
5279 rs6000_density_test (rs6000_cost_data *data)
5281 const int DENSITY_PCT_THRESHOLD = 85;
5282 const int DENSITY_SIZE_THRESHOLD = 70;
5283 const int DENSITY_PENALTY = 10;
5284 struct loop *loop = data->loop_info;
5285 basic_block *bbs = get_loop_body (loop);
5286 int nbbs = loop->num_nodes;
5287 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5288 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5289 int i, density_pct;
5291 /* This density test only cares about the cost of vector version of the
5292 loop, so immediately return if we are passed costing for the scalar
5293 version (namely computing single scalar iteration cost). */
5294 if (data->costing_for_scalar)
5295 return;
5297 for (i = 0; i < nbbs; i++)
5299 basic_block bb = bbs[i];
5300 gimple_stmt_iterator gsi;
5302 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5304 gimple *stmt = gsi_stmt (gsi);
5305 if (is_gimple_debug (stmt))
5306 continue;
5308 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5310 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5311 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5312 not_vec_cost++;
5316 free (bbs);
5317 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5319 if (density_pct > DENSITY_PCT_THRESHOLD
5320 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5322 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5323 if (dump_enabled_p ())
5324 dump_printf_loc (MSG_NOTE, vect_location,
5325 "density %d%%, cost %d exceeds threshold, penalizing "
5326 "loop body cost by %d%%", density_pct,
5327 vec_cost + not_vec_cost, DENSITY_PENALTY);
5331 /* Implement targetm.vectorize.init_cost. */
5333 static void *
5334 rs6000_init_cost (struct loop *loop_info, bool costing_for_scalar)
5336 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5337 data->loop_info = loop_info;
5338 data->cost[vect_prologue] = 0;
5339 data->cost[vect_body] = 0;
5340 data->cost[vect_epilogue] = 0;
5341 data->vect_nonmem = false;
5342 data->costing_for_scalar = costing_for_scalar;
5343 return data;
5346 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5347 For some statement, we would like to further fine-grain tweak the cost on
5348 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5349 information on statement operation codes etc. One typical case here is
5350 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5351 for scalar cost, but it should be priced more whatever transformed to either
5352 compare + branch or compare + isel instructions. */
5354 static unsigned
5355 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5356 struct _stmt_vec_info *stmt_info)
5358 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5359 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5361 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5362 if (subcode == COND_EXPR)
5363 return 2;
5366 return 0;
5369 /* Implement targetm.vectorize.add_stmt_cost. */
5371 static unsigned
5372 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5373 enum vect_cost_for_stmt kind,
5374 struct _stmt_vec_info *stmt_info, tree vectype,
5375 int misalign, enum vect_cost_model_location where)
5377 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5378 unsigned retval = 0;
5380 if (flag_vect_cost_model)
5382 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5383 misalign);
5384 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5385 /* Statements in an inner loop relative to the loop being
5386 vectorized are weighted more heavily. The value here is
5387 arbitrary and could potentially be improved with analysis. */
5388 if (where == vect_body && stmt_info
5389 && stmt_in_inner_loop_p (vinfo, stmt_info))
5391 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
5392 gcc_assert (loop_vinfo);
5393 count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */
5396 retval = (unsigned) (count * stmt_cost);
5397 cost_data->cost[where] += retval;
5399 /* Check whether we're doing something other than just a copy loop.
5400 Not all such loops may be profitably vectorized; see
5401 rs6000_finish_cost. */
5402 if ((kind == vec_to_scalar || kind == vec_perm
5403 || kind == vec_promote_demote || kind == vec_construct
5404 || kind == scalar_to_vec)
5405 || (where == vect_body && kind == vector_stmt))
5406 cost_data->vect_nonmem = true;
5409 return retval;
5412 /* For some target specific vectorization cost which can't be handled per stmt,
5413 we check the requisite conditions and adjust the vectorization cost
5414 accordingly if satisfied. One typical example is to model shift cost for
5415 vector with length by counting number of required lengths under condition
5416 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5418 static void
5419 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
5421 struct loop *loop = data->loop_info;
5422 gcc_assert (loop);
5423 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
5425 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5427 rgroup_controls *rgc;
5428 unsigned int num_vectors_m1;
5429 unsigned int shift_cnt = 0;
5430 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5431 if (rgc->type)
5432 /* Each length needs one shift to fill into bits 0-7. */
5433 shift_cnt += num_vectors_m1 + 1;
5435 rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
5436 NULL, NULL_TREE, 0, vect_body);
5440 /* Implement targetm.vectorize.finish_cost. */
5442 static void
5443 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5444 unsigned *body_cost, unsigned *epilogue_cost)
5446 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5448 if (cost_data->loop_info)
5450 rs6000_adjust_vect_cost_per_loop (cost_data);
5451 rs6000_density_test (cost_data);
5454 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5455 that require versioning for any reason. The vectorization is at
5456 best a wash inside the loop, and the versioning checks make
5457 profitability highly unlikely and potentially quite harmful. */
5458 if (cost_data->loop_info)
5460 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5461 if (!cost_data->vect_nonmem
5462 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5463 && LOOP_REQUIRES_VERSIONING (vec_info))
5464 cost_data->cost[vect_body] += 10000;
5467 *prologue_cost = cost_data->cost[vect_prologue];
5468 *body_cost = cost_data->cost[vect_body];
5469 *epilogue_cost = cost_data->cost[vect_epilogue];
5472 /* Implement targetm.vectorize.destroy_cost_data. */
5474 static void
5475 rs6000_destroy_cost_data (void *data)
5477 free (data);
5480 /* Implement targetm.loop_unroll_adjust. */
5482 static unsigned
5483 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5485 if (unroll_only_small_loops)
5487 /* TODO: These are hardcoded values right now. We probably should use
5488 a PARAM here. */
5489 if (loop->ninsns <= 6)
5490 return MIN (4, nunroll);
5491 if (loop->ninsns <= 10)
5492 return MIN (2, nunroll);
5494 return 0;
5497 return nunroll;
5500 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5501 library with vectorized intrinsics. */
5503 static tree
5504 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5505 tree type_in)
5507 char name[32];
5508 const char *suffix = NULL;
5509 tree fntype, new_fndecl, bdecl = NULL_TREE;
5510 int n_args = 1;
5511 const char *bname;
5512 machine_mode el_mode, in_mode;
5513 int n, in_n;
5515 /* Libmass is suitable for unsafe math only as it does not correctly support
5516 parts of IEEE with the required precision such as denormals. Only support
5517 it if we have VSX to use the simd d2 or f4 functions.
5518 XXX: Add variable length support. */
5519 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5520 return NULL_TREE;
5522 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5523 n = TYPE_VECTOR_SUBPARTS (type_out);
5524 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5525 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5526 if (el_mode != in_mode
5527 || n != in_n)
5528 return NULL_TREE;
5530 switch (fn)
5532 CASE_CFN_ATAN2:
5533 CASE_CFN_HYPOT:
5534 CASE_CFN_POW:
5535 n_args = 2;
5536 gcc_fallthrough ();
5538 CASE_CFN_ACOS:
5539 CASE_CFN_ACOSH:
5540 CASE_CFN_ASIN:
5541 CASE_CFN_ASINH:
5542 CASE_CFN_ATAN:
5543 CASE_CFN_ATANH:
5544 CASE_CFN_CBRT:
5545 CASE_CFN_COS:
5546 CASE_CFN_COSH:
5547 CASE_CFN_ERF:
5548 CASE_CFN_ERFC:
5549 CASE_CFN_EXP2:
5550 CASE_CFN_EXP:
5551 CASE_CFN_EXPM1:
5552 CASE_CFN_LGAMMA:
5553 CASE_CFN_LOG10:
5554 CASE_CFN_LOG1P:
5555 CASE_CFN_LOG2:
5556 CASE_CFN_LOG:
5557 CASE_CFN_SIN:
5558 CASE_CFN_SINH:
5559 CASE_CFN_SQRT:
5560 CASE_CFN_TAN:
5561 CASE_CFN_TANH:
5562 if (el_mode == DFmode && n == 2)
5564 bdecl = mathfn_built_in (double_type_node, fn);
5565 suffix = "d2"; /* pow -> powd2 */
5567 else if (el_mode == SFmode && n == 4)
5569 bdecl = mathfn_built_in (float_type_node, fn);
5570 suffix = "4"; /* powf -> powf4 */
5572 else
5573 return NULL_TREE;
5574 if (!bdecl)
5575 return NULL_TREE;
5576 break;
5578 default:
5579 return NULL_TREE;
5582 gcc_assert (suffix != NULL);
5583 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5584 if (!bname)
5585 return NULL_TREE;
5587 strcpy (name, bname + strlen ("__builtin_"));
5588 strcat (name, suffix);
5590 if (n_args == 1)
5591 fntype = build_function_type_list (type_out, type_in, NULL);
5592 else if (n_args == 2)
5593 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5594 else
5595 gcc_unreachable ();
5597 /* Build a function declaration for the vectorized function. */
5598 new_fndecl = build_decl (BUILTINS_LOCATION,
5599 FUNCTION_DECL, get_identifier (name), fntype);
5600 TREE_PUBLIC (new_fndecl) = 1;
5601 DECL_EXTERNAL (new_fndecl) = 1;
5602 DECL_IS_NOVOPS (new_fndecl) = 1;
5603 TREE_READONLY (new_fndecl) = 1;
5605 return new_fndecl;
5608 /* Returns a function decl for a vectorized version of the builtin function
5609 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5610 if it is not available. */
5612 static tree
5613 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5614 tree type_in)
5616 machine_mode in_mode, out_mode;
5617 int in_n, out_n;
5619 if (TARGET_DEBUG_BUILTIN)
5620 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5621 combined_fn_name (combined_fn (fn)),
5622 GET_MODE_NAME (TYPE_MODE (type_out)),
5623 GET_MODE_NAME (TYPE_MODE (type_in)));
5625 if (TREE_CODE (type_out) != VECTOR_TYPE
5626 || TREE_CODE (type_in) != VECTOR_TYPE)
5627 return NULL_TREE;
5629 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5630 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5631 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5632 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5634 switch (fn)
5636 CASE_CFN_COPYSIGN:
5637 if (VECTOR_UNIT_VSX_P (V2DFmode)
5638 && out_mode == DFmode && out_n == 2
5639 && in_mode == DFmode && in_n == 2)
5640 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5641 if (VECTOR_UNIT_VSX_P (V4SFmode)
5642 && out_mode == SFmode && out_n == 4
5643 && in_mode == SFmode && in_n == 4)
5644 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5645 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5646 && out_mode == SFmode && out_n == 4
5647 && in_mode == SFmode && in_n == 4)
5648 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5649 break;
5650 CASE_CFN_CEIL:
5651 if (VECTOR_UNIT_VSX_P (V2DFmode)
5652 && out_mode == DFmode && out_n == 2
5653 && in_mode == DFmode && in_n == 2)
5654 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5655 if (VECTOR_UNIT_VSX_P (V4SFmode)
5656 && out_mode == SFmode && out_n == 4
5657 && in_mode == SFmode && in_n == 4)
5658 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5659 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5660 && out_mode == SFmode && out_n == 4
5661 && in_mode == SFmode && in_n == 4)
5662 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5663 break;
5664 CASE_CFN_FLOOR:
5665 if (VECTOR_UNIT_VSX_P (V2DFmode)
5666 && out_mode == DFmode && out_n == 2
5667 && in_mode == DFmode && in_n == 2)
5668 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5669 if (VECTOR_UNIT_VSX_P (V4SFmode)
5670 && out_mode == SFmode && out_n == 4
5671 && in_mode == SFmode && in_n == 4)
5672 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5673 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5674 && out_mode == SFmode && out_n == 4
5675 && in_mode == SFmode && in_n == 4)
5676 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5677 break;
5678 CASE_CFN_FMA:
5679 if (VECTOR_UNIT_VSX_P (V2DFmode)
5680 && out_mode == DFmode && out_n == 2
5681 && in_mode == DFmode && in_n == 2)
5682 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5683 if (VECTOR_UNIT_VSX_P (V4SFmode)
5684 && out_mode == SFmode && out_n == 4
5685 && in_mode == SFmode && in_n == 4)
5686 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5687 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5688 && out_mode == SFmode && out_n == 4
5689 && in_mode == SFmode && in_n == 4)
5690 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5691 break;
5692 CASE_CFN_TRUNC:
5693 if (VECTOR_UNIT_VSX_P (V2DFmode)
5694 && out_mode == DFmode && out_n == 2
5695 && in_mode == DFmode && in_n == 2)
5696 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5697 if (VECTOR_UNIT_VSX_P (V4SFmode)
5698 && out_mode == SFmode && out_n == 4
5699 && in_mode == SFmode && in_n == 4)
5700 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5701 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5702 && out_mode == SFmode && out_n == 4
5703 && in_mode == SFmode && in_n == 4)
5704 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5705 break;
5706 CASE_CFN_NEARBYINT:
5707 if (VECTOR_UNIT_VSX_P (V2DFmode)
5708 && flag_unsafe_math_optimizations
5709 && out_mode == DFmode && out_n == 2
5710 && in_mode == DFmode && in_n == 2)
5711 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5712 if (VECTOR_UNIT_VSX_P (V4SFmode)
5713 && flag_unsafe_math_optimizations
5714 && out_mode == SFmode && out_n == 4
5715 && in_mode == SFmode && in_n == 4)
5716 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5717 break;
5718 CASE_CFN_RINT:
5719 if (VECTOR_UNIT_VSX_P (V2DFmode)
5720 && !flag_trapping_math
5721 && out_mode == DFmode && out_n == 2
5722 && in_mode == DFmode && in_n == 2)
5723 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5724 if (VECTOR_UNIT_VSX_P (V4SFmode)
5725 && !flag_trapping_math
5726 && out_mode == SFmode && out_n == 4
5727 && in_mode == SFmode && in_n == 4)
5728 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5729 break;
5730 default:
5731 break;
5734 /* Generate calls to libmass if appropriate. */
5735 if (rs6000_veclib_handler)
5736 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5738 return NULL_TREE;
5741 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5743 static tree
5744 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5745 tree type_in)
5747 machine_mode in_mode, out_mode;
5748 int in_n, out_n;
5750 if (TARGET_DEBUG_BUILTIN)
5751 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5752 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5753 GET_MODE_NAME (TYPE_MODE (type_out)),
5754 GET_MODE_NAME (TYPE_MODE (type_in)));
5756 if (TREE_CODE (type_out) != VECTOR_TYPE
5757 || TREE_CODE (type_in) != VECTOR_TYPE)
5758 return NULL_TREE;
5760 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5761 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5762 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5763 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5765 enum rs6000_builtins fn
5766 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5767 switch (fn)
5769 case RS6000_BUILTIN_RSQRTF:
5770 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5771 && out_mode == SFmode && out_n == 4
5772 && in_mode == SFmode && in_n == 4)
5773 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5774 break;
5775 case RS6000_BUILTIN_RSQRT:
5776 if (VECTOR_UNIT_VSX_P (V2DFmode)
5777 && out_mode == DFmode && out_n == 2
5778 && in_mode == DFmode && in_n == 2)
5779 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5780 break;
5781 case RS6000_BUILTIN_RECIPF:
5782 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5783 && out_mode == SFmode && out_n == 4
5784 && in_mode == SFmode && in_n == 4)
5785 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5786 break;
5787 case RS6000_BUILTIN_RECIP:
5788 if (VECTOR_UNIT_VSX_P (V2DFmode)
5789 && out_mode == DFmode && out_n == 2
5790 && in_mode == DFmode && in_n == 2)
5791 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5792 break;
5793 default:
5794 break;
5796 return NULL_TREE;
5799 /* Default CPU string for rs6000*_file_start functions. */
5800 static const char *rs6000_default_cpu;
5802 #ifdef USING_ELFOS_H
5803 const char *rs6000_machine;
5805 const char *
5806 rs6000_machine_from_flags (void)
5808 HOST_WIDE_INT flags = rs6000_isa_flags;
5810 /* Disable the flags that should never influence the .machine selection. */
5811 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5813 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5814 return "power10";
5815 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5816 return "power9";
5817 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5818 return "power8";
5819 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5820 return "power7";
5821 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5822 return "power6";
5823 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5824 return "power5";
5825 if ((flags & ISA_2_1_MASKS) != 0)
5826 return "power4";
5827 if ((flags & OPTION_MASK_POWERPC64) != 0)
5828 return "ppc64";
5829 return "ppc";
5832 void
5833 emit_asm_machine (void)
5835 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5837 #endif
5839 /* Do anything needed at the start of the asm file. */
5841 static void
5842 rs6000_file_start (void)
5844 char buffer[80];
5845 const char *start = buffer;
5846 FILE *file = asm_out_file;
5848 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5850 default_file_start ();
5852 if (flag_verbose_asm)
5854 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5856 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5858 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5859 start = "";
5862 if (global_options_set.x_rs6000_cpu_index)
5864 fprintf (file, "%s -mcpu=%s", start,
5865 processor_target_table[rs6000_cpu_index].name);
5866 start = "";
5869 if (global_options_set.x_rs6000_tune_index)
5871 fprintf (file, "%s -mtune=%s", start,
5872 processor_target_table[rs6000_tune_index].name);
5873 start = "";
5876 if (PPC405_ERRATUM77)
5878 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5879 start = "";
5882 #ifdef USING_ELFOS_H
5883 switch (rs6000_sdata)
5885 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5886 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5887 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5888 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5891 if (rs6000_sdata && g_switch_value)
5893 fprintf (file, "%s -G %d", start,
5894 g_switch_value);
5895 start = "";
5897 #endif
5899 if (*start == '\0')
5900 putc ('\n', file);
5903 #ifdef USING_ELFOS_H
5904 rs6000_machine = rs6000_machine_from_flags ();
5905 emit_asm_machine ();
5906 #endif
5908 if (DEFAULT_ABI == ABI_ELFv2)
5909 fprintf (file, "\t.abiversion 2\n");
5913 /* Return nonzero if this function is known to have a null epilogue. */
5916 direct_return (void)
5918 if (reload_completed)
5920 rs6000_stack_t *info = rs6000_stack_info ();
5922 if (info->first_gp_reg_save == 32
5923 && info->first_fp_reg_save == 64
5924 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5925 && ! info->lr_save_p
5926 && ! info->cr_save_p
5927 && info->vrsave_size == 0
5928 && ! info->push_p)
5929 return 1;
5932 return 0;
5935 /* Helper for num_insns_constant. Calculate number of instructions to
5936 load VALUE to a single gpr using combinations of addi, addis, ori,
5937 oris, sldi and rldimi instructions. */
5939 static int
5940 num_insns_constant_gpr (HOST_WIDE_INT value)
5942 /* signed constant loadable with addi */
5943 if (SIGNED_INTEGER_16BIT_P (value))
5944 return 1;
5946 /* constant loadable with addis */
5947 else if ((value & 0xffff) == 0
5948 && (value >> 31 == -1 || value >> 31 == 0))
5949 return 1;
5951 /* PADDI can support up to 34 bit signed integers. */
5952 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5953 return 1;
5955 else if (TARGET_POWERPC64)
5957 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5958 HOST_WIDE_INT high = value >> 31;
5960 if (high == 0 || high == -1)
5961 return 2;
5963 high >>= 1;
5965 if (low == 0 || low == high)
5966 return num_insns_constant_gpr (high) + 1;
5967 else if (high == 0)
5968 return num_insns_constant_gpr (low) + 1;
5969 else
5970 return (num_insns_constant_gpr (high)
5971 + num_insns_constant_gpr (low) + 1);
5974 else
5975 return 2;
5978 /* Helper for num_insns_constant. Allow constants formed by the
5979 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5980 and handle modes that require multiple gprs. */
5982 static int
5983 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5985 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5986 int total = 0;
5987 while (nregs-- > 0)
5989 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5990 int insns = num_insns_constant_gpr (low);
5991 if (insns > 2
5992 /* We won't get more than 2 from num_insns_constant_gpr
5993 except when TARGET_POWERPC64 and mode is DImode or
5994 wider, so the register mode must be DImode. */
5995 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5996 insns = 2;
5997 total += insns;
5998 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5999 it all at once would be UB. */
6000 value >>= (BITS_PER_WORD - 1);
6001 value >>= 1;
6003 return total;
6006 /* Return the number of instructions it takes to form a constant in as
6007 many gprs are needed for MODE. */
6010 num_insns_constant (rtx op, machine_mode mode)
6012 HOST_WIDE_INT val;
6014 switch (GET_CODE (op))
6016 case CONST_INT:
6017 val = INTVAL (op);
6018 break;
6020 case CONST_WIDE_INT:
6022 int insns = 0;
6023 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6024 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6025 DImode);
6026 return insns;
6029 case CONST_DOUBLE:
6031 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6033 if (mode == SFmode || mode == SDmode)
6035 long l;
6037 if (mode == SDmode)
6038 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6039 else
6040 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6041 /* See the first define_split in rs6000.md handling a
6042 const_double_operand. */
6043 val = l;
6044 mode = SImode;
6046 else if (mode == DFmode || mode == DDmode)
6048 long l[2];
6050 if (mode == DDmode)
6051 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6052 else
6053 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6055 /* See the second (32-bit) and third (64-bit) define_split
6056 in rs6000.md handling a const_double_operand. */
6057 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6058 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6059 mode = DImode;
6061 else if (mode == TFmode || mode == TDmode
6062 || mode == KFmode || mode == IFmode)
6064 long l[4];
6065 int insns;
6067 if (mode == TDmode)
6068 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6069 else
6070 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6072 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6073 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6074 insns = num_insns_constant_multi (val, DImode);
6075 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6076 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6077 insns += num_insns_constant_multi (val, DImode);
6078 return insns;
6080 else
6081 gcc_unreachable ();
6083 break;
6085 default:
6086 gcc_unreachable ();
6089 return num_insns_constant_multi (val, mode);
6092 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6093 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6094 corresponding element of the vector, but for V4SFmode, the
6095 corresponding "float" is interpreted as an SImode integer. */
6097 HOST_WIDE_INT
6098 const_vector_elt_as_int (rtx op, unsigned int elt)
6100 rtx tmp;
6102 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6103 gcc_assert (GET_MODE (op) != V2DImode
6104 && GET_MODE (op) != V2DFmode);
6106 tmp = CONST_VECTOR_ELT (op, elt);
6107 if (GET_MODE (op) == V4SFmode)
6108 tmp = gen_lowpart (SImode, tmp);
6109 return INTVAL (tmp);
6112 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6113 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6114 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6115 all items are set to the same value and contain COPIES replicas of the
6116 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6117 operand and the others are set to the value of the operand's msb. */
6119 static bool
6120 vspltis_constant (rtx op, unsigned step, unsigned copies)
6122 machine_mode mode = GET_MODE (op);
6123 machine_mode inner = GET_MODE_INNER (mode);
6125 unsigned i;
6126 unsigned nunits;
6127 unsigned bitsize;
6128 unsigned mask;
6130 HOST_WIDE_INT val;
6131 HOST_WIDE_INT splat_val;
6132 HOST_WIDE_INT msb_val;
6134 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6135 return false;
6137 nunits = GET_MODE_NUNITS (mode);
6138 bitsize = GET_MODE_BITSIZE (inner);
6139 mask = GET_MODE_MASK (inner);
6141 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6142 splat_val = val;
6143 msb_val = val >= 0 ? 0 : -1;
6145 if (val == 0 && step > 1)
6147 /* Special case for loading most significant bit with step > 1.
6148 In that case, match 0s in all but step-1s elements, where match
6149 EASY_VECTOR_MSB. */
6150 for (i = 1; i < nunits; ++i)
6152 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6153 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6154 if ((i & (step - 1)) == step - 1)
6156 if (!EASY_VECTOR_MSB (elt_val, inner))
6157 break;
6159 else if (elt_val)
6160 break;
6162 if (i == nunits)
6163 return true;
6166 /* Construct the value to be splatted, if possible. If not, return 0. */
6167 for (i = 2; i <= copies; i *= 2)
6169 HOST_WIDE_INT small_val;
6170 bitsize /= 2;
6171 small_val = splat_val >> bitsize;
6172 mask >>= bitsize;
6173 if (splat_val != ((HOST_WIDE_INT)
6174 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6175 | (small_val & mask)))
6176 return false;
6177 splat_val = small_val;
6178 inner = smallest_int_mode_for_size (bitsize);
6181 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6182 if (EASY_VECTOR_15 (splat_val))
6185 /* Also check if we can splat, and then add the result to itself. Do so if
6186 the value is positive, of if the splat instruction is using OP's mode;
6187 for splat_val < 0, the splat and the add should use the same mode. */
6188 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6189 && (splat_val >= 0 || (step == 1 && copies == 1)))
6192 /* Also check if are loading up the most significant bit which can be done by
6193 loading up -1 and shifting the value left by -1. Only do this for
6194 step 1 here, for larger steps it is done earlier. */
6195 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6198 else
6199 return false;
6201 /* Check if VAL is present in every STEP-th element, and the
6202 other elements are filled with its most significant bit. */
6203 for (i = 1; i < nunits; ++i)
6205 HOST_WIDE_INT desired_val;
6206 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6207 if ((i & (step - 1)) == 0)
6208 desired_val = val;
6209 else
6210 desired_val = msb_val;
6212 if (desired_val != const_vector_elt_as_int (op, elt))
6213 return false;
6216 return true;
6219 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6220 instruction, filling in the bottom elements with 0 or -1.
6222 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6223 for the number of zeroes to shift in, or negative for the number of 0xff
6224 bytes to shift in.
6226 OP is a CONST_VECTOR. */
6229 vspltis_shifted (rtx op)
6231 machine_mode mode = GET_MODE (op);
6232 machine_mode inner = GET_MODE_INNER (mode);
6234 unsigned i, j;
6235 unsigned nunits;
6236 unsigned mask;
6238 HOST_WIDE_INT val;
6240 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6241 return false;
6243 /* We need to create pseudo registers to do the shift, so don't recognize
6244 shift vector constants after reload. */
6245 if (!can_create_pseudo_p ())
6246 return false;
6248 nunits = GET_MODE_NUNITS (mode);
6249 mask = GET_MODE_MASK (inner);
6251 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6253 /* Check if the value can really be the operand of a vspltis[bhw]. */
6254 if (EASY_VECTOR_15 (val))
6257 /* Also check if we are loading up the most significant bit which can be done
6258 by loading up -1 and shifting the value left by -1. */
6259 else if (EASY_VECTOR_MSB (val, inner))
6262 else
6263 return 0;
6265 /* Check if VAL is present in every STEP-th element until we find elements
6266 that are 0 or all 1 bits. */
6267 for (i = 1; i < nunits; ++i)
6269 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6270 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6272 /* If the value isn't the splat value, check for the remaining elements
6273 being 0/-1. */
6274 if (val != elt_val)
6276 if (elt_val == 0)
6278 for (j = i+1; j < nunits; ++j)
6280 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6281 if (const_vector_elt_as_int (op, elt2) != 0)
6282 return 0;
6285 return (nunits - i) * GET_MODE_SIZE (inner);
6288 else if ((elt_val & mask) == mask)
6290 for (j = i+1; j < nunits; ++j)
6292 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6293 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6294 return 0;
6297 return -((nunits - i) * GET_MODE_SIZE (inner));
6300 else
6301 return 0;
6305 /* If all elements are equal, we don't need to do VSLDOI. */
6306 return 0;
6310 /* Return non-zero (element mode byte size) if OP is of the given MODE
6311 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6314 easy_altivec_constant (rtx op, machine_mode mode)
6316 unsigned step, copies;
6318 if (mode == VOIDmode)
6319 mode = GET_MODE (op);
6320 else if (mode != GET_MODE (op))
6321 return 0;
6323 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6324 constants. */
6325 if (mode == V2DFmode)
6326 return zero_constant (op, mode) ? 8 : 0;
6328 else if (mode == V2DImode)
6330 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6331 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6332 return 0;
6334 if (zero_constant (op, mode))
6335 return 8;
6337 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6338 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6339 return 8;
6341 return 0;
6344 /* V1TImode is a special container for TImode. Ignore for now. */
6345 else if (mode == V1TImode)
6346 return 0;
6348 /* Start with a vspltisw. */
6349 step = GET_MODE_NUNITS (mode) / 4;
6350 copies = 1;
6352 if (vspltis_constant (op, step, copies))
6353 return 4;
6355 /* Then try with a vspltish. */
6356 if (step == 1)
6357 copies <<= 1;
6358 else
6359 step >>= 1;
6361 if (vspltis_constant (op, step, copies))
6362 return 2;
6364 /* And finally a vspltisb. */
6365 if (step == 1)
6366 copies <<= 1;
6367 else
6368 step >>= 1;
6370 if (vspltis_constant (op, step, copies))
6371 return 1;
6373 if (vspltis_shifted (op) != 0)
6374 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6376 return 0;
6379 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6380 result is OP. Abort if it is not possible. */
6383 gen_easy_altivec_constant (rtx op)
6385 machine_mode mode = GET_MODE (op);
6386 int nunits = GET_MODE_NUNITS (mode);
6387 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6388 unsigned step = nunits / 4;
6389 unsigned copies = 1;
6391 /* Start with a vspltisw. */
6392 if (vspltis_constant (op, step, copies))
6393 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6395 /* Then try with a vspltish. */
6396 if (step == 1)
6397 copies <<= 1;
6398 else
6399 step >>= 1;
6401 if (vspltis_constant (op, step, copies))
6402 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6404 /* And finally a vspltisb. */
6405 if (step == 1)
6406 copies <<= 1;
6407 else
6408 step >>= 1;
6410 if (vspltis_constant (op, step, copies))
6411 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6413 gcc_unreachable ();
6416 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6417 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6419 Return the number of instructions needed (1 or 2) into the address pointed
6420 via NUM_INSNS_PTR.
6422 Return the constant that is being split via CONSTANT_PTR. */
6424 bool
6425 xxspltib_constant_p (rtx op,
6426 machine_mode mode,
6427 int *num_insns_ptr,
6428 int *constant_ptr)
6430 size_t nunits = GET_MODE_NUNITS (mode);
6431 size_t i;
6432 HOST_WIDE_INT value;
6433 rtx element;
6435 /* Set the returned values to out of bound values. */
6436 *num_insns_ptr = -1;
6437 *constant_ptr = 256;
6439 if (!TARGET_P9_VECTOR)
6440 return false;
6442 if (mode == VOIDmode)
6443 mode = GET_MODE (op);
6445 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6446 return false;
6448 /* Handle (vec_duplicate <constant>). */
6449 if (GET_CODE (op) == VEC_DUPLICATE)
6451 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6452 && mode != V2DImode)
6453 return false;
6455 element = XEXP (op, 0);
6456 if (!CONST_INT_P (element))
6457 return false;
6459 value = INTVAL (element);
6460 if (!IN_RANGE (value, -128, 127))
6461 return false;
6464 /* Handle (const_vector [...]). */
6465 else if (GET_CODE (op) == CONST_VECTOR)
6467 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6468 && mode != V2DImode)
6469 return false;
6471 element = CONST_VECTOR_ELT (op, 0);
6472 if (!CONST_INT_P (element))
6473 return false;
6475 value = INTVAL (element);
6476 if (!IN_RANGE (value, -128, 127))
6477 return false;
6479 for (i = 1; i < nunits; i++)
6481 element = CONST_VECTOR_ELT (op, i);
6482 if (!CONST_INT_P (element))
6483 return false;
6485 if (value != INTVAL (element))
6486 return false;
6490 /* Handle integer constants being loaded into the upper part of the VSX
6491 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6492 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6493 else if (CONST_INT_P (op))
6495 if (!SCALAR_INT_MODE_P (mode))
6496 return false;
6498 value = INTVAL (op);
6499 if (!IN_RANGE (value, -128, 127))
6500 return false;
6502 if (!IN_RANGE (value, -1, 0))
6504 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6505 return false;
6507 if (EASY_VECTOR_15 (value))
6508 return false;
6512 else
6513 return false;
6515 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6516 sign extend. Special case 0/-1 to allow getting any VSX register instead
6517 of an Altivec register. */
6518 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6519 && EASY_VECTOR_15 (value))
6520 return false;
6522 /* Return # of instructions and the constant byte for XXSPLTIB. */
6523 if (mode == V16QImode)
6524 *num_insns_ptr = 1;
6526 else if (IN_RANGE (value, -1, 0))
6527 *num_insns_ptr = 1;
6529 else
6530 *num_insns_ptr = 2;
6532 *constant_ptr = (int) value;
6533 return true;
6536 const char *
6537 output_vec_const_move (rtx *operands)
6539 int shift;
6540 machine_mode mode;
6541 rtx dest, vec;
6543 dest = operands[0];
6544 vec = operands[1];
6545 mode = GET_MODE (dest);
6547 if (TARGET_VSX)
6549 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6550 int xxspltib_value = 256;
6551 int num_insns = -1;
6553 if (zero_constant (vec, mode))
6555 if (TARGET_P9_VECTOR)
6556 return "xxspltib %x0,0";
6558 else if (dest_vmx_p)
6559 return "vspltisw %0,0";
6561 else
6562 return "xxlxor %x0,%x0,%x0";
6565 if (all_ones_constant (vec, mode))
6567 if (TARGET_P9_VECTOR)
6568 return "xxspltib %x0,255";
6570 else if (dest_vmx_p)
6571 return "vspltisw %0,-1";
6573 else if (TARGET_P8_VECTOR)
6574 return "xxlorc %x0,%x0,%x0";
6576 else
6577 gcc_unreachable ();
6580 if (TARGET_P9_VECTOR
6581 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6583 if (num_insns == 1)
6585 operands[2] = GEN_INT (xxspltib_value & 0xff);
6586 return "xxspltib %x0,%2";
6589 return "#";
6593 if (TARGET_ALTIVEC)
6595 rtx splat_vec;
6597 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6598 if (zero_constant (vec, mode))
6599 return "vspltisw %0,0";
6601 if (all_ones_constant (vec, mode))
6602 return "vspltisw %0,-1";
6604 /* Do we need to construct a value using VSLDOI? */
6605 shift = vspltis_shifted (vec);
6606 if (shift != 0)
6607 return "#";
6609 splat_vec = gen_easy_altivec_constant (vec);
6610 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6611 operands[1] = XEXP (splat_vec, 0);
6612 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6613 return "#";
6615 switch (GET_MODE (splat_vec))
6617 case E_V4SImode:
6618 return "vspltisw %0,%1";
6620 case E_V8HImode:
6621 return "vspltish %0,%1";
6623 case E_V16QImode:
6624 return "vspltisb %0,%1";
6626 default:
6627 gcc_unreachable ();
6631 gcc_unreachable ();
6634 /* Initialize vector TARGET to VALS. */
6636 void
6637 rs6000_expand_vector_init (rtx target, rtx vals)
6639 machine_mode mode = GET_MODE (target);
6640 machine_mode inner_mode = GET_MODE_INNER (mode);
6641 unsigned int n_elts = GET_MODE_NUNITS (mode);
6642 int n_var = 0, one_var = -1;
6643 bool all_same = true, all_const_zero = true;
6644 rtx x, mem;
6645 unsigned int i;
6647 for (i = 0; i < n_elts; ++i)
6649 x = XVECEXP (vals, 0, i);
6650 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6651 ++n_var, one_var = i;
6652 else if (x != CONST0_RTX (inner_mode))
6653 all_const_zero = false;
6655 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6656 all_same = false;
6659 if (n_var == 0)
6661 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6662 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6663 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6665 /* Zero register. */
6666 emit_move_insn (target, CONST0_RTX (mode));
6667 return;
6669 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6671 /* Splat immediate. */
6672 emit_insn (gen_rtx_SET (target, const_vec));
6673 return;
6675 else
6677 /* Load from constant pool. */
6678 emit_move_insn (target, const_vec);
6679 return;
6683 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6684 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6686 rtx op[2];
6687 size_t i;
6688 size_t num_elements = all_same ? 1 : 2;
6689 for (i = 0; i < num_elements; i++)
6691 op[i] = XVECEXP (vals, 0, i);
6692 /* Just in case there is a SUBREG with a smaller mode, do a
6693 conversion. */
6694 if (GET_MODE (op[i]) != inner_mode)
6696 rtx tmp = gen_reg_rtx (inner_mode);
6697 convert_move (tmp, op[i], 0);
6698 op[i] = tmp;
6700 /* Allow load with splat double word. */
6701 else if (MEM_P (op[i]))
6703 if (!all_same)
6704 op[i] = force_reg (inner_mode, op[i]);
6706 else if (!REG_P (op[i]))
6707 op[i] = force_reg (inner_mode, op[i]);
6710 if (all_same)
6712 if (mode == V2DFmode)
6713 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6714 else
6715 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6717 else
6719 if (mode == V2DFmode)
6720 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6721 else
6722 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6724 return;
6727 /* Special case initializing vector int if we are on 64-bit systems with
6728 direct move or we have the ISA 3.0 instructions. */
6729 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6730 && TARGET_DIRECT_MOVE_64BIT)
6732 if (all_same)
6734 rtx element0 = XVECEXP (vals, 0, 0);
6735 if (MEM_P (element0))
6736 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6737 else
6738 element0 = force_reg (SImode, element0);
6740 if (TARGET_P9_VECTOR)
6741 emit_insn (gen_vsx_splat_v4si (target, element0));
6742 else
6744 rtx tmp = gen_reg_rtx (DImode);
6745 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6746 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6748 return;
6750 else
6752 rtx elements[4];
6753 size_t i;
6755 for (i = 0; i < 4; i++)
6756 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6758 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6759 elements[2], elements[3]));
6760 return;
6764 /* With single precision floating point on VSX, know that internally single
6765 precision is actually represented as a double, and either make 2 V2DF
6766 vectors, and convert these vectors to single precision, or do one
6767 conversion, and splat the result to the other elements. */
6768 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6770 if (all_same)
6772 rtx element0 = XVECEXP (vals, 0, 0);
6774 if (TARGET_P9_VECTOR)
6776 if (MEM_P (element0))
6777 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6779 emit_insn (gen_vsx_splat_v4sf (target, element0));
6782 else
6784 rtx freg = gen_reg_rtx (V4SFmode);
6785 rtx sreg = force_reg (SFmode, element0);
6786 rtx cvt = (TARGET_XSCVDPSPN
6787 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6788 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6790 emit_insn (cvt);
6791 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6792 const0_rtx));
6795 else
6797 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6799 rtx tmp_sf[4];
6800 rtx tmp_si[4];
6801 rtx tmp_di[4];
6802 rtx mrg_di[4];
6803 for (i = 0; i < 4; i++)
6805 tmp_si[i] = gen_reg_rtx (SImode);
6806 tmp_di[i] = gen_reg_rtx (DImode);
6807 mrg_di[i] = gen_reg_rtx (DImode);
6808 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6809 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6810 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6813 if (!BYTES_BIG_ENDIAN)
6815 std::swap (tmp_di[0], tmp_di[1]);
6816 std::swap (tmp_di[2], tmp_di[3]);
6819 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6820 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6821 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6822 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6824 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6825 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6826 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6828 else
6830 rtx dbl_even = gen_reg_rtx (V2DFmode);
6831 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6832 rtx flt_even = gen_reg_rtx (V4SFmode);
6833 rtx flt_odd = gen_reg_rtx (V4SFmode);
6834 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6835 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6836 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6837 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6839 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6840 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6841 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6842 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6843 rs6000_expand_extract_even (target, flt_even, flt_odd);
6846 return;
6849 /* Special case initializing vector short/char that are splats if we are on
6850 64-bit systems with direct move. */
6851 if (all_same && TARGET_DIRECT_MOVE_64BIT
6852 && (mode == V16QImode || mode == V8HImode))
6854 rtx op0 = XVECEXP (vals, 0, 0);
6855 rtx di_tmp = gen_reg_rtx (DImode);
6857 if (!REG_P (op0))
6858 op0 = force_reg (GET_MODE_INNER (mode), op0);
6860 if (mode == V16QImode)
6862 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6863 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6864 return;
6867 if (mode == V8HImode)
6869 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6870 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6871 return;
6875 /* Store value to stack temp. Load vector element. Splat. However, splat
6876 of 64-bit items is not supported on Altivec. */
6877 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6879 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6880 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6881 XVECEXP (vals, 0, 0));
6882 x = gen_rtx_UNSPEC (VOIDmode,
6883 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6884 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6885 gen_rtvec (2,
6886 gen_rtx_SET (target, mem),
6887 x)));
6888 x = gen_rtx_VEC_SELECT (inner_mode, target,
6889 gen_rtx_PARALLEL (VOIDmode,
6890 gen_rtvec (1, const0_rtx)));
6891 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6892 return;
6895 /* One field is non-constant. Load constant then overwrite
6896 varying field. */
6897 if (n_var == 1)
6899 rtx copy = copy_rtx (vals);
6901 /* Load constant part of vector, substitute neighboring value for
6902 varying element. */
6903 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6904 rs6000_expand_vector_init (target, copy);
6906 /* Insert variable. */
6907 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
6908 GEN_INT (one_var));
6909 return;
6912 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
6914 rtx op[16];
6915 /* Force the values into word_mode registers. */
6916 for (i = 0; i < n_elts; i++)
6918 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
6919 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
6920 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
6923 /* Take unsigned char big endianness on 64bit as example for below
6924 construction, the input values are: A, B, C, D, ..., O, P. */
6926 if (TARGET_DIRECT_MOVE_128)
6928 /* Move to VSX register with vec_concat, each has 2 values.
6929 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
6930 vr1[1] = { xxxxxxxC, xxxxxxxD };
6932 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
6933 rtx vr1[8];
6934 for (i = 0; i < n_elts / 2; i++)
6936 vr1[i] = gen_reg_rtx (V2DImode);
6937 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
6938 op[i * 2 + 1]));
6941 /* Pack vectors with 2 values into vectors with 4 values.
6942 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
6943 vr2[1] = { xxxExxxF, xxxGxxxH };
6944 vr2[1] = { xxxIxxxJ, xxxKxxxL };
6945 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
6946 rtx vr2[4];
6947 for (i = 0; i < n_elts / 4; i++)
6949 vr2[i] = gen_reg_rtx (V4SImode);
6950 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
6951 vr1[i * 2 + 1]));
6954 /* Pack vectors with 4 values into vectors with 8 values.
6955 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
6956 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
6957 rtx vr3[2];
6958 for (i = 0; i < n_elts / 8; i++)
6960 vr3[i] = gen_reg_rtx (V8HImode);
6961 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
6962 vr2[i * 2 + 1]));
6965 /* If it's V8HImode, it's done and return it. */
6966 if (mode == V8HImode)
6968 emit_insn (gen_rtx_SET (target, vr3[0]));
6969 return;
6972 /* Pack vectors with 8 values into 16 values. */
6973 rtx res = gen_reg_rtx (V16QImode);
6974 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
6975 emit_insn (gen_rtx_SET (target, res));
6977 else
6979 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
6980 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
6981 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
6982 rtx perm_idx;
6984 /* Set up some common gen routines and values. */
6985 if (BYTES_BIG_ENDIAN)
6987 if (mode == V16QImode)
6989 merge_v16qi = gen_altivec_vmrghb;
6990 merge_v8hi = gen_altivec_vmrglh;
6992 else
6993 merge_v8hi = gen_altivec_vmrghh;
6995 merge_v4si = gen_altivec_vmrglw;
6996 perm_idx = GEN_INT (3);
6998 else
7000 if (mode == V16QImode)
7002 merge_v16qi = gen_altivec_vmrglb;
7003 merge_v8hi = gen_altivec_vmrghh;
7005 else
7006 merge_v8hi = gen_altivec_vmrglh;
7008 merge_v4si = gen_altivec_vmrghw;
7009 perm_idx = GEN_INT (0);
7012 /* Move to VSX register with direct move.
7013 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7014 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7016 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7017 rtx vr_qi[16];
7018 for (i = 0; i < n_elts; i++)
7020 vr_qi[i] = gen_reg_rtx (V16QImode);
7021 if (TARGET_POWERPC64)
7022 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7023 else
7024 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7027 /* Merge/move to vector short.
7028 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7029 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7031 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7032 rtx vr_hi[8];
7033 for (i = 0; i < 8; i++)
7035 rtx tmp = vr_qi[i];
7036 if (mode == V16QImode)
7038 tmp = gen_reg_rtx (V16QImode);
7039 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7041 vr_hi[i] = gen_reg_rtx (V8HImode);
7042 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7045 /* Merge vector short to vector int.
7046 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7047 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7049 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7050 rtx vr_si[4];
7051 for (i = 0; i < 4; i++)
7053 rtx tmp = gen_reg_rtx (V8HImode);
7054 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7055 vr_si[i] = gen_reg_rtx (V4SImode);
7056 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7059 /* Merge vector int to vector long.
7060 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7061 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7062 rtx vr_di[2];
7063 for (i = 0; i < 2; i++)
7065 rtx tmp = gen_reg_rtx (V4SImode);
7066 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7067 vr_di[i] = gen_reg_rtx (V2DImode);
7068 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7071 rtx res = gen_reg_rtx (V2DImode);
7072 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7073 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7076 return;
7079 /* Construct the vector in memory one field at a time
7080 and load the whole vector. */
7081 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7082 for (i = 0; i < n_elts; i++)
7083 emit_move_insn (adjust_address_nv (mem, inner_mode,
7084 i * GET_MODE_SIZE (inner_mode)),
7085 XVECEXP (vals, 0, i));
7086 emit_move_insn (target, mem);
7089 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7090 is variable and also counts by vector element size for p9 and above. */
7092 static void
7093 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7095 machine_mode mode = GET_MODE (target);
7097 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7099 machine_mode inner_mode = GET_MODE (val);
7101 int width = GET_MODE_SIZE (inner_mode);
7103 gcc_assert (width >= 1 && width <= 8);
7105 int shift = exact_log2 (width);
7107 machine_mode idx_mode = GET_MODE (idx);
7109 machine_mode shift_mode;
7110 rtx (*gen_ashl)(rtx, rtx, rtx);
7111 rtx (*gen_lvsl)(rtx, rtx);
7112 rtx (*gen_lvsr)(rtx, rtx);
7114 if (TARGET_POWERPC64)
7116 shift_mode = DImode;
7117 gen_ashl = gen_ashldi3;
7118 gen_lvsl = gen_altivec_lvsl_reg_di;
7119 gen_lvsr = gen_altivec_lvsr_reg_di;
7121 else
7123 shift_mode = SImode;
7124 gen_ashl = gen_ashlsi3;
7125 gen_lvsl = gen_altivec_lvsl_reg_si;
7126 gen_lvsr = gen_altivec_lvsr_reg_si;
7128 /* Generate the IDX for permute shift, width is the vector element size.
7129 idx = idx * width. */
7130 rtx tmp = gen_reg_rtx (shift_mode);
7131 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7133 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7135 /* lvsr v1,0,idx. */
7136 rtx pcvr = gen_reg_rtx (V16QImode);
7137 emit_insn (gen_lvsr (pcvr, tmp));
7139 /* lvsl v2,0,idx. */
7140 rtx pcvl = gen_reg_rtx (V16QImode);
7141 emit_insn (gen_lvsl (pcvl, tmp));
7143 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7145 rtx permr
7146 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvr);
7147 emit_insn (permr);
7149 rs6000_expand_vector_set (target, val, const0_rtx);
7151 rtx perml
7152 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, pcvl);
7153 emit_insn (perml);
7156 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7157 is variable and also counts by vector element size for p7 & p8. */
7159 static void
7160 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7162 machine_mode mode = GET_MODE (target);
7164 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7166 machine_mode inner_mode = GET_MODE (val);
7167 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7169 int width = GET_MODE_SIZE (inner_mode);
7170 gcc_assert (width >= 1 && width <= 4);
7172 int shift = exact_log2 (width);
7174 machine_mode idx_mode = GET_MODE (idx);
7176 machine_mode shift_mode;
7177 rtx (*gen_ashl)(rtx, rtx, rtx);
7178 rtx (*gen_add)(rtx, rtx, rtx);
7179 rtx (*gen_sub)(rtx, rtx, rtx);
7180 rtx (*gen_lvsl)(rtx, rtx);
7182 if (TARGET_POWERPC64)
7184 shift_mode = DImode;
7185 gen_ashl = gen_ashldi3;
7186 gen_add = gen_adddi3;
7187 gen_sub = gen_subdi3;
7188 gen_lvsl = gen_altivec_lvsl_reg_di;
7190 else
7192 shift_mode = SImode;
7193 gen_ashl = gen_ashlsi3;
7194 gen_add = gen_addsi3;
7195 gen_sub = gen_subsi3;
7196 gen_lvsl = gen_altivec_lvsl_reg_si;
7199 /* idx = idx * width. */
7200 rtx tmp = gen_reg_rtx (shift_mode);
7201 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7203 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7205 /* For LE: idx = idx + 8. */
7206 if (!BYTES_BIG_ENDIAN)
7207 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7208 else
7209 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7211 /* lxv vs33, mask.
7212 DImode: 0xffffffffffffffff0000000000000000
7213 SImode: 0x00000000ffffffff0000000000000000
7214 HImode: 0x000000000000ffff0000000000000000.
7215 QImode: 0x00000000000000ff0000000000000000. */
7216 rtx mask = gen_reg_rtx (V16QImode);
7217 rtx mask_v2di = gen_reg_rtx (V2DImode);
7218 rtvec v = rtvec_alloc (2);
7219 if (!BYTES_BIG_ENDIAN)
7221 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7222 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7224 else
7226 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7227 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7229 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7230 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7231 emit_insn (gen_rtx_SET (mask, sub_mask));
7233 /* mtvsrd[wz] f0,tmp_val. */
7234 rtx tmp_val = gen_reg_rtx (SImode);
7235 if (inner_mode == E_SFmode)
7236 if (TARGET_DIRECT_MOVE_64BIT)
7237 emit_insn (gen_movsi_from_sf (tmp_val, val));
7238 else
7240 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7241 emit_insn (gen_movsf_hardfloat (stack, val));
7242 rtx stack2 = copy_rtx (stack);
7243 PUT_MODE (stack2, SImode);
7244 emit_move_insn (tmp_val, stack2);
7246 else
7247 tmp_val = force_reg (SImode, val);
7249 rtx val_v16qi = gen_reg_rtx (V16QImode);
7250 rtx val_v2di = gen_reg_rtx (V2DImode);
7251 rtvec vec_val = rtvec_alloc (2);
7252 if (!BYTES_BIG_ENDIAN)
7254 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7255 RTVEC_ELT (vec_val, 1) = tmp_val;
7257 else
7259 RTVEC_ELT (vec_val, 0) = tmp_val;
7260 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7262 emit_insn (
7263 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7264 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7265 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7267 /* lvsl 13,0,idx. */
7268 rtx pcv = gen_reg_rtx (V16QImode);
7269 emit_insn (gen_lvsl (pcv, tmp));
7271 /* vperm 1,1,1,13. */
7272 /* vperm 0,0,0,13. */
7273 rtx val_perm = gen_reg_rtx (V16QImode);
7274 rtx mask_perm = gen_reg_rtx (V16QImode);
7275 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7276 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7278 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7280 /* xxsel 34,34,32,33. */
7281 emit_insn (
7282 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7285 /* Set field ELT_RTX of TARGET to VAL. */
7287 void
7288 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7290 machine_mode mode = GET_MODE (target);
7291 machine_mode inner_mode = GET_MODE_INNER (mode);
7292 rtx reg = gen_reg_rtx (mode);
7293 rtx mask, mem, x;
7294 int width = GET_MODE_SIZE (inner_mode);
7295 int i;
7297 val = force_reg (GET_MODE (val), val);
7299 if (VECTOR_MEM_VSX_P (mode))
7301 if (!CONST_INT_P (elt_rtx))
7303 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7304 when elt_rtx is variable. */
7305 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7307 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7308 return;
7310 else if (TARGET_VSX)
7312 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7313 return;
7315 else
7316 gcc_assert (CONST_INT_P (elt_rtx));
7319 rtx insn = NULL_RTX;
7321 if (mode == V2DFmode)
7322 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7324 else if (mode == V2DImode)
7325 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7327 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7329 if (mode == V4SImode)
7330 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7331 else if (mode == V8HImode)
7332 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7333 else if (mode == V16QImode)
7334 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7335 else if (mode == V4SFmode)
7336 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7339 if (insn)
7341 emit_insn (insn);
7342 return;
7346 /* Simplify setting single element vectors like V1TImode. */
7347 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7348 && INTVAL (elt_rtx) == 0)
7350 emit_move_insn (target, gen_lowpart (mode, val));
7351 return;
7354 /* Load single variable value. */
7355 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7356 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7357 x = gen_rtx_UNSPEC (VOIDmode,
7358 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7359 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7360 gen_rtvec (2,
7361 gen_rtx_SET (reg, mem),
7362 x)));
7364 /* Linear sequence. */
7365 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7366 for (i = 0; i < 16; ++i)
7367 XVECEXP (mask, 0, i) = GEN_INT (i);
7369 /* Set permute mask to insert element into target. */
7370 for (i = 0; i < width; ++i)
7371 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7372 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7374 if (BYTES_BIG_ENDIAN)
7375 x = gen_rtx_UNSPEC (mode,
7376 gen_rtvec (3, target, reg,
7377 force_reg (V16QImode, x)),
7378 UNSPEC_VPERM);
7379 else
7381 if (TARGET_P9_VECTOR)
7382 x = gen_rtx_UNSPEC (mode,
7383 gen_rtvec (3, reg, target,
7384 force_reg (V16QImode, x)),
7385 UNSPEC_VPERMR);
7386 else
7388 /* Invert selector. We prefer to generate VNAND on P8 so
7389 that future fusion opportunities can kick in, but must
7390 generate VNOR elsewhere. */
7391 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7392 rtx iorx = (TARGET_P8_VECTOR
7393 ? gen_rtx_IOR (V16QImode, notx, notx)
7394 : gen_rtx_AND (V16QImode, notx, notx));
7395 rtx tmp = gen_reg_rtx (V16QImode);
7396 emit_insn (gen_rtx_SET (tmp, iorx));
7398 /* Permute with operands reversed and adjusted selector. */
7399 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7400 UNSPEC_VPERM);
7404 emit_insn (gen_rtx_SET (target, x));
7407 /* Extract field ELT from VEC into TARGET. */
7409 void
7410 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7412 machine_mode mode = GET_MODE (vec);
7413 machine_mode inner_mode = GET_MODE_INNER (mode);
7414 rtx mem;
7416 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7418 switch (mode)
7420 default:
7421 break;
7422 case E_V1TImode:
7423 emit_move_insn (target, gen_lowpart (TImode, vec));
7424 break;
7425 case E_V2DFmode:
7426 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7427 return;
7428 case E_V2DImode:
7429 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7430 return;
7431 case E_V4SFmode:
7432 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7433 return;
7434 case E_V16QImode:
7435 if (TARGET_DIRECT_MOVE_64BIT)
7437 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7438 return;
7440 else
7441 break;
7442 case E_V8HImode:
7443 if (TARGET_DIRECT_MOVE_64BIT)
7445 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7446 return;
7448 else
7449 break;
7450 case E_V4SImode:
7451 if (TARGET_DIRECT_MOVE_64BIT)
7453 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7454 return;
7456 break;
7459 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7460 && TARGET_DIRECT_MOVE_64BIT)
7462 if (GET_MODE (elt) != DImode)
7464 rtx tmp = gen_reg_rtx (DImode);
7465 convert_move (tmp, elt, 0);
7466 elt = tmp;
7468 else if (!REG_P (elt))
7469 elt = force_reg (DImode, elt);
7471 switch (mode)
7473 case E_V1TImode:
7474 emit_move_insn (target, gen_lowpart (TImode, vec));
7475 return;
7477 case E_V2DFmode:
7478 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7479 return;
7481 case E_V2DImode:
7482 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7483 return;
7485 case E_V4SFmode:
7486 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7487 return;
7489 case E_V4SImode:
7490 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7491 return;
7493 case E_V8HImode:
7494 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7495 return;
7497 case E_V16QImode:
7498 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7499 return;
7501 default:
7502 gcc_unreachable ();
7506 /* Allocate mode-sized buffer. */
7507 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7509 emit_move_insn (mem, vec);
7510 if (CONST_INT_P (elt))
7512 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7514 /* Add offset to field within buffer matching vector element. */
7515 mem = adjust_address_nv (mem, inner_mode,
7516 modulo_elt * GET_MODE_SIZE (inner_mode));
7517 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7519 else
7521 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7522 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7523 rtx new_addr = gen_reg_rtx (Pmode);
7525 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7526 if (ele_size > 1)
7527 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7528 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7529 new_addr = change_address (mem, inner_mode, new_addr);
7530 emit_move_insn (target, new_addr);
7534 /* Return the offset within a memory object (MEM) of a vector type to a given
7535 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7536 the element is constant, we return a constant integer.
7538 Otherwise, we use a base register temporary to calculate the offset after
7539 masking it to fit within the bounds of the vector and scaling it. The
7540 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7541 built-in function. */
7543 static rtx
7544 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7546 if (CONST_INT_P (element))
7547 return GEN_INT (INTVAL (element) * scalar_size);
7549 /* All insns should use the 'Q' constraint (address is a single register) if
7550 the element number is not a constant. */
7551 gcc_assert (satisfies_constraint_Q (mem));
7553 /* Mask the element to make sure the element number is between 0 and the
7554 maximum number of elements - 1 so that we don't generate an address
7555 outside the vector. */
7556 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7557 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7558 emit_insn (gen_rtx_SET (base_tmp, and_op));
7560 /* Shift the element to get the byte offset from the element number. */
7561 int shift = exact_log2 (scalar_size);
7562 gcc_assert (shift >= 0);
7564 if (shift > 0)
7566 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7567 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7570 return base_tmp;
7573 /* Helper function update PC-relative addresses when we are adjusting a memory
7574 address (ADDR) to a vector to point to a scalar field within the vector with
7575 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7576 use the base register temporary (BASE_TMP) to form the address. */
7578 static rtx
7579 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7581 rtx new_addr = NULL;
7583 gcc_assert (CONST_INT_P (element_offset));
7585 if (GET_CODE (addr) == CONST)
7586 addr = XEXP (addr, 0);
7588 if (GET_CODE (addr) == PLUS)
7590 rtx op0 = XEXP (addr, 0);
7591 rtx op1 = XEXP (addr, 1);
7593 if (CONST_INT_P (op1))
7595 HOST_WIDE_INT offset
7596 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7598 if (offset == 0)
7599 new_addr = op0;
7601 else
7603 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7604 new_addr = gen_rtx_CONST (Pmode, plus);
7608 else
7610 emit_move_insn (base_tmp, addr);
7611 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7615 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7617 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7618 new_addr = gen_rtx_CONST (Pmode, plus);
7621 else
7622 gcc_unreachable ();
7624 return new_addr;
7627 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7628 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7629 temporary (BASE_TMP) to fixup the address. Return the new memory address
7630 that is valid for reads or writes to a given register (SCALAR_REG).
7632 This function is expected to be called after reload is completed when we are
7633 splitting insns. The temporary BASE_TMP might be set multiple times with
7634 this code. */
7637 rs6000_adjust_vec_address (rtx scalar_reg,
7638 rtx mem,
7639 rtx element,
7640 rtx base_tmp,
7641 machine_mode scalar_mode)
7643 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7644 rtx addr = XEXP (mem, 0);
7645 rtx new_addr;
7647 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7648 gcc_assert (!reg_mentioned_p (base_tmp, element));
7650 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7651 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7653 /* Calculate what we need to add to the address to get the element
7654 address. */
7655 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7657 /* Create the new address pointing to the element within the vector. If we
7658 are adding 0, we don't have to change the address. */
7659 if (element_offset == const0_rtx)
7660 new_addr = addr;
7662 /* A simple indirect address can be converted into a reg + offset
7663 address. */
7664 else if (REG_P (addr) || SUBREG_P (addr))
7665 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7667 /* For references to local static variables, fold a constant offset into the
7668 address. */
7669 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7670 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7672 /* Optimize D-FORM addresses with constant offset with a constant element, to
7673 include the element offset in the address directly. */
7674 else if (GET_CODE (addr) == PLUS)
7676 rtx op0 = XEXP (addr, 0);
7677 rtx op1 = XEXP (addr, 1);
7679 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7680 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7682 /* op0 should never be r0, because r0+offset is not valid. But it
7683 doesn't hurt to make sure it is not r0. */
7684 gcc_assert (reg_or_subregno (op0) != 0);
7686 /* D-FORM address with constant element number. */
7687 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7688 rtx offset_rtx = GEN_INT (offset);
7689 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7691 else
7693 /* If we don't have a D-FORM address with a constant element number,
7694 add the two elements in the current address. Then add the offset.
7696 Previously, we tried to add the offset to OP1 and change the
7697 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7698 complicated because we had to verify that op1 was not GPR0 and we
7699 had a constant element offset (due to the way ADDI is defined).
7700 By doing the add of OP0 and OP1 first, and then adding in the
7701 offset, it has the benefit that if D-FORM instructions are
7702 allowed, the offset is part of the memory access to the vector
7703 element. */
7704 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7705 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7709 else
7711 emit_move_insn (base_tmp, addr);
7712 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7715 /* If the address isn't valid, move the address into the temporary base
7716 register. Some reasons it could not be valid include:
7718 The address offset overflowed the 16 or 34 bit offset size;
7719 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7720 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7721 Only X_FORM loads can be done, and the address is D_FORM. */
7723 enum insn_form iform
7724 = address_to_insn_form (new_addr, scalar_mode,
7725 reg_to_non_prefixed (scalar_reg, scalar_mode));
7727 if (iform == INSN_FORM_BAD)
7729 emit_move_insn (base_tmp, new_addr);
7730 new_addr = base_tmp;
7733 return change_address (mem, scalar_mode, new_addr);
7736 /* Split a variable vec_extract operation into the component instructions. */
7738 void
7739 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7740 rtx tmp_altivec)
7742 machine_mode mode = GET_MODE (src);
7743 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7744 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7745 int byte_shift = exact_log2 (scalar_size);
7747 gcc_assert (byte_shift >= 0);
7749 /* If we are given a memory address, optimize to load just the element. We
7750 don't have to adjust the vector element number on little endian
7751 systems. */
7752 if (MEM_P (src))
7754 emit_move_insn (dest,
7755 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7756 scalar_mode));
7757 return;
7760 else if (REG_P (src) || SUBREG_P (src))
7762 int num_elements = GET_MODE_NUNITS (mode);
7763 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7764 int bit_shift = 7 - exact_log2 (num_elements);
7765 rtx element2;
7766 unsigned int dest_regno = reg_or_subregno (dest);
7767 unsigned int src_regno = reg_or_subregno (src);
7768 unsigned int element_regno = reg_or_subregno (element);
7770 gcc_assert (REG_P (tmp_gpr));
7772 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7773 a general purpose register. */
7774 if (TARGET_P9_VECTOR
7775 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7776 && INT_REGNO_P (dest_regno)
7777 && ALTIVEC_REGNO_P (src_regno)
7778 && INT_REGNO_P (element_regno))
7780 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7781 rtx element_si = gen_rtx_REG (SImode, element_regno);
7783 if (mode == V16QImode)
7784 emit_insn (BYTES_BIG_ENDIAN
7785 ? gen_vextublx (dest_si, element_si, src)
7786 : gen_vextubrx (dest_si, element_si, src));
7788 else if (mode == V8HImode)
7790 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7791 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7792 emit_insn (BYTES_BIG_ENDIAN
7793 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7794 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7798 else
7800 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7801 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7802 emit_insn (BYTES_BIG_ENDIAN
7803 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7804 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7807 return;
7811 gcc_assert (REG_P (tmp_altivec));
7813 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7814 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7815 will shift the element into the upper position (adding 3 to convert a
7816 byte shift into a bit shift). */
7817 if (scalar_size == 8)
7819 if (!BYTES_BIG_ENDIAN)
7821 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7822 element2 = tmp_gpr;
7824 else
7825 element2 = element;
7827 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7828 bit. */
7829 emit_insn (gen_rtx_SET (tmp_gpr,
7830 gen_rtx_AND (DImode,
7831 gen_rtx_ASHIFT (DImode,
7832 element2,
7833 GEN_INT (6)),
7834 GEN_INT (64))));
7836 else
7838 if (!BYTES_BIG_ENDIAN)
7840 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7842 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7843 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7844 element2 = tmp_gpr;
7846 else
7847 element2 = element;
7849 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7852 /* Get the value into the lower byte of the Altivec register where VSLO
7853 expects it. */
7854 if (TARGET_P9_VECTOR)
7855 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7856 else if (can_create_pseudo_p ())
7857 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7858 else
7860 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7861 emit_move_insn (tmp_di, tmp_gpr);
7862 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7865 /* Do the VSLO to get the value into the final location. */
7866 switch (mode)
7868 case E_V2DFmode:
7869 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7870 return;
7872 case E_V2DImode:
7873 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7874 return;
7876 case E_V4SFmode:
7878 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7879 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7880 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7881 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7882 tmp_altivec));
7884 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7885 return;
7888 case E_V4SImode:
7889 case E_V8HImode:
7890 case E_V16QImode:
7892 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7893 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7894 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7895 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7896 tmp_altivec));
7897 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7898 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7899 GEN_INT (64 - bits_in_element)));
7900 return;
7903 default:
7904 gcc_unreachable ();
7907 return;
7909 else
7910 gcc_unreachable ();
7913 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7914 selects whether the alignment is abi mandated, optional, or
7915 both abi and optional alignment. */
7917 unsigned int
7918 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7920 if (how != align_opt)
7922 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7923 align = 128;
7926 if (how != align_abi)
7928 if (TREE_CODE (type) == ARRAY_TYPE
7929 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7931 if (align < BITS_PER_WORD)
7932 align = BITS_PER_WORD;
7936 return align;
7939 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7940 instructions simply ignore the low bits; VSX memory instructions
7941 are aligned to 4 or 8 bytes. */
7943 static bool
7944 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7946 return (STRICT_ALIGNMENT
7947 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7948 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7949 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7950 && (int) align < VECTOR_ALIGN (mode)))));
7953 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
7955 unsigned int
7956 rs6000_special_adjust_field_align (tree type, unsigned int computed)
7958 if (computed <= 32)
7959 return computed;
7961 /* Strip initial arrays. */
7962 while (TREE_CODE (type) == ARRAY_TYPE)
7963 type = TREE_TYPE (type);
7965 /* If RECORD or UNION, recursively find the first field. */
7966 while (AGGREGATE_TYPE_P (type))
7968 tree field = TYPE_FIELDS (type);
7970 /* Skip all non field decls */
7971 while (field != NULL
7972 && (TREE_CODE (field) != FIELD_DECL
7973 || DECL_FIELD_ABI_IGNORED (field)))
7974 field = DECL_CHAIN (field);
7976 if (! field)
7977 break;
7979 /* A packed field does not contribute any extra alignment. */
7980 if (DECL_PACKED (field))
7981 return computed;
7983 type = TREE_TYPE (field);
7985 /* Strip arrays. */
7986 while (TREE_CODE (type) == ARRAY_TYPE)
7987 type = TREE_TYPE (type);
7990 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
7991 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
7992 computed = MIN (computed, 32);
7994 return computed;
7997 /* AIX increases natural record alignment to doubleword if the innermost first
7998 field is an FP double while the FP fields remain word aligned.
7999 Only called if TYPE initially is a RECORD or UNION. */
8001 unsigned int
8002 rs6000_special_round_type_align (tree type, unsigned int computed,
8003 unsigned int specified)
8005 unsigned int align = MAX (computed, specified);
8007 if (TYPE_PACKED (type) || align >= 64)
8008 return align;
8010 /* If RECORD or UNION, recursively find the first field. */
8013 tree field = TYPE_FIELDS (type);
8015 /* Skip all non field decls */
8016 while (field != NULL
8017 && (TREE_CODE (field) != FIELD_DECL
8018 || DECL_FIELD_ABI_IGNORED (field)))
8019 field = DECL_CHAIN (field);
8021 if (! field)
8022 break;
8024 /* A packed field does not contribute any extra alignment. */
8025 if (DECL_PACKED (field))
8026 return align;
8028 type = TREE_TYPE (field);
8030 /* Strip arrays. */
8031 while (TREE_CODE (type) == ARRAY_TYPE)
8032 type = TREE_TYPE (type);
8033 } while (AGGREGATE_TYPE_P (type));
8035 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8036 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8037 align = MAX (align, 64);
8039 return align;
8042 /* Darwin increases record alignment to the natural alignment of
8043 the first field. */
8045 unsigned int
8046 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8047 unsigned int specified)
8049 unsigned int align = MAX (computed, specified);
8051 if (TYPE_PACKED (type))
8052 return align;
8054 /* Find the first field, looking down into aggregates. */
8055 do {
8056 tree field = TYPE_FIELDS (type);
8057 /* Skip all non field decls */
8058 while (field != NULL
8059 && (TREE_CODE (field) != FIELD_DECL
8060 || DECL_FIELD_ABI_IGNORED (field)))
8061 field = DECL_CHAIN (field);
8062 if (! field)
8063 break;
8064 /* A packed field does not contribute any extra alignment. */
8065 if (DECL_PACKED (field))
8066 return align;
8067 type = TREE_TYPE (field);
8068 while (TREE_CODE (type) == ARRAY_TYPE)
8069 type = TREE_TYPE (type);
8070 } while (AGGREGATE_TYPE_P (type));
8072 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
8073 align = MAX (align, TYPE_ALIGN (type));
8075 return align;
8078 /* Return 1 for an operand in small memory on V.4/eabi. */
8081 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8082 machine_mode mode ATTRIBUTE_UNUSED)
8084 #if TARGET_ELF
8085 rtx sym_ref;
8087 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8088 return 0;
8090 if (DEFAULT_ABI != ABI_V4)
8091 return 0;
8093 if (SYMBOL_REF_P (op))
8094 sym_ref = op;
8096 else if (GET_CODE (op) != CONST
8097 || GET_CODE (XEXP (op, 0)) != PLUS
8098 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8099 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8100 return 0;
8102 else
8104 rtx sum = XEXP (op, 0);
8105 HOST_WIDE_INT summand;
8107 /* We have to be careful here, because it is the referenced address
8108 that must be 32k from _SDA_BASE_, not just the symbol. */
8109 summand = INTVAL (XEXP (sum, 1));
8110 if (summand < 0 || summand > g_switch_value)
8111 return 0;
8113 sym_ref = XEXP (sum, 0);
8116 return SYMBOL_REF_SMALL_P (sym_ref);
8117 #else
8118 return 0;
8119 #endif
8122 /* Return true if either operand is a general purpose register. */
8124 bool
8125 gpr_or_gpr_p (rtx op0, rtx op1)
8127 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8128 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8131 /* Return true if this is a move direct operation between GPR registers and
8132 floating point/VSX registers. */
8134 bool
8135 direct_move_p (rtx op0, rtx op1)
8137 if (!REG_P (op0) || !REG_P (op1))
8138 return false;
8140 if (!TARGET_DIRECT_MOVE)
8141 return false;
8143 int regno0 = REGNO (op0);
8144 int regno1 = REGNO (op1);
8145 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8146 return false;
8148 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8149 return true;
8151 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8152 return true;
8154 return false;
8157 /* Return true if the ADDR is an acceptable address for a quad memory
8158 operation of mode MODE (either LQ/STQ for general purpose registers, or
8159 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8160 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8161 3.0 LXV/STXV instruction. */
8163 bool
8164 quad_address_p (rtx addr, machine_mode mode, bool strict)
8166 rtx op0, op1;
8168 if (GET_MODE_SIZE (mode) < 16)
8169 return false;
8171 if (legitimate_indirect_address_p (addr, strict))
8172 return true;
8174 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8175 return false;
8177 /* Is this a valid prefixed address? If the bottom four bits of the offset
8178 are non-zero, we could use a prefixed instruction (which does not have the
8179 DQ-form constraint that the traditional instruction had) instead of
8180 forcing the unaligned offset to a GPR. */
8181 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8182 return true;
8184 if (GET_CODE (addr) != PLUS)
8185 return false;
8187 op0 = XEXP (addr, 0);
8188 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8189 return false;
8191 op1 = XEXP (addr, 1);
8192 if (!CONST_INT_P (op1))
8193 return false;
8195 return quad_address_offset_p (INTVAL (op1));
8198 /* Return true if this is a load or store quad operation. This function does
8199 not handle the atomic quad memory instructions. */
8201 bool
8202 quad_load_store_p (rtx op0, rtx op1)
8204 bool ret;
8206 if (!TARGET_QUAD_MEMORY)
8207 ret = false;
8209 else if (REG_P (op0) && MEM_P (op1))
8210 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8211 && quad_memory_operand (op1, GET_MODE (op1))
8212 && !reg_overlap_mentioned_p (op0, op1));
8214 else if (MEM_P (op0) && REG_P (op1))
8215 ret = (quad_memory_operand (op0, GET_MODE (op0))
8216 && quad_int_reg_operand (op1, GET_MODE (op1)));
8218 else
8219 ret = false;
8221 if (TARGET_DEBUG_ADDR)
8223 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8224 ret ? "true" : "false");
8225 debug_rtx (gen_rtx_SET (op0, op1));
8228 return ret;
8231 /* Given an address, return a constant offset term if one exists. */
8233 static rtx
8234 address_offset (rtx op)
8236 if (GET_CODE (op) == PRE_INC
8237 || GET_CODE (op) == PRE_DEC)
8238 op = XEXP (op, 0);
8239 else if (GET_CODE (op) == PRE_MODIFY
8240 || GET_CODE (op) == LO_SUM)
8241 op = XEXP (op, 1);
8243 if (GET_CODE (op) == CONST)
8244 op = XEXP (op, 0);
8246 if (GET_CODE (op) == PLUS)
8247 op = XEXP (op, 1);
8249 if (CONST_INT_P (op))
8250 return op;
8252 return NULL_RTX;
8255 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8256 the mode. If we can't find (or don't know) the alignment of the symbol
8257 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8258 should be pessimistic]. Offsets are validated in the same way as for
8259 reg + offset. */
8260 static bool
8261 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8263 /* We should not get here with this. */
8264 gcc_checking_assert (! mode_supports_dq_form (mode));
8266 if (GET_CODE (x) == CONST)
8267 x = XEXP (x, 0);
8269 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8270 x = XVECEXP (x, 0, 0);
8272 rtx sym = NULL_RTX;
8273 unsigned HOST_WIDE_INT offset = 0;
8275 if (GET_CODE (x) == PLUS)
8277 sym = XEXP (x, 0);
8278 if (! SYMBOL_REF_P (sym))
8279 return false;
8280 if (!CONST_INT_P (XEXP (x, 1)))
8281 return false;
8282 offset = INTVAL (XEXP (x, 1));
8284 else if (SYMBOL_REF_P (x))
8285 sym = x;
8286 else if (CONST_INT_P (x))
8287 offset = INTVAL (x);
8288 else if (GET_CODE (x) == LABEL_REF)
8289 offset = 0; // We assume code labels are Pmode aligned
8290 else
8291 return false; // not sure what we have here.
8293 /* If we don't know the alignment of the thing to which the symbol refers,
8294 we assume optimistically it is "enough".
8295 ??? maybe we should be pessimistic instead. */
8296 unsigned align = 0;
8298 if (sym)
8300 tree decl = SYMBOL_REF_DECL (sym);
8301 #if TARGET_MACHO
8302 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8303 /* The decl in an indirection symbol is the original one, which might
8304 be less aligned than the indirection. Our indirections are always
8305 pointer-aligned. */
8307 else
8308 #endif
8309 if (decl && DECL_ALIGN (decl))
8310 align = DECL_ALIGN_UNIT (decl);
8313 unsigned int extra = 0;
8314 switch (mode)
8316 case E_DFmode:
8317 case E_DDmode:
8318 case E_DImode:
8319 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8320 addressing. */
8321 if (VECTOR_MEM_VSX_P (mode))
8322 return false;
8324 if (!TARGET_POWERPC64)
8325 extra = 4;
8326 else if ((offset & 3) || (align & 3))
8327 return false;
8328 break;
8330 case E_TFmode:
8331 case E_IFmode:
8332 case E_KFmode:
8333 case E_TDmode:
8334 case E_TImode:
8335 case E_PTImode:
8336 extra = 8;
8337 if (!TARGET_POWERPC64)
8338 extra = 12;
8339 else if ((offset & 3) || (align & 3))
8340 return false;
8341 break;
8343 default:
8344 break;
8347 /* We only care if the access(es) would cause a change to the high part. */
8348 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8349 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8352 /* Return true if the MEM operand is a memory operand suitable for use
8353 with a (full width, possibly multiple) gpr load/store. On
8354 powerpc64 this means the offset must be divisible by 4.
8355 Implements 'Y' constraint.
8357 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8358 a constraint function we know the operand has satisfied a suitable
8359 memory predicate.
8361 Offsetting a lo_sum should not be allowed, except where we know by
8362 alignment that a 32k boundary is not crossed. Note that by
8363 "offsetting" here we mean a further offset to access parts of the
8364 MEM. It's fine to have a lo_sum where the inner address is offset
8365 from a sym, since the same sym+offset will appear in the high part
8366 of the address calculation. */
8368 bool
8369 mem_operand_gpr (rtx op, machine_mode mode)
8371 unsigned HOST_WIDE_INT offset;
8372 int extra;
8373 rtx addr = XEXP (op, 0);
8375 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8376 if (TARGET_UPDATE
8377 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8378 && mode_supports_pre_incdec_p (mode)
8379 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8380 return true;
8382 /* Allow prefixed instructions if supported. If the bottom two bits of the
8383 offset are non-zero, we could use a prefixed instruction (which does not
8384 have the DS-form constraint that the traditional instruction had) instead
8385 of forcing the unaligned offset to a GPR. */
8386 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8387 return true;
8389 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8390 really OK. Doing this early avoids teaching all the other machinery
8391 about them. */
8392 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8393 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8395 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8396 if (!rs6000_offsettable_memref_p (op, mode, false))
8397 return false;
8399 op = address_offset (addr);
8400 if (op == NULL_RTX)
8401 return true;
8403 offset = INTVAL (op);
8404 if (TARGET_POWERPC64 && (offset & 3) != 0)
8405 return false;
8407 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8408 if (extra < 0)
8409 extra = 0;
8411 if (GET_CODE (addr) == LO_SUM)
8412 /* For lo_sum addresses, we must allow any offset except one that
8413 causes a wrap, so test only the low 16 bits. */
8414 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8416 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8419 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8420 enforce an offset divisible by 4 even for 32-bit. */
8422 bool
8423 mem_operand_ds_form (rtx op, machine_mode mode)
8425 unsigned HOST_WIDE_INT offset;
8426 int extra;
8427 rtx addr = XEXP (op, 0);
8429 /* Allow prefixed instructions if supported. If the bottom two bits of the
8430 offset are non-zero, we could use a prefixed instruction (which does not
8431 have the DS-form constraint that the traditional instruction had) instead
8432 of forcing the unaligned offset to a GPR. */
8433 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8434 return true;
8436 if (!offsettable_address_p (false, mode, addr))
8437 return false;
8439 op = address_offset (addr);
8440 if (op == NULL_RTX)
8441 return true;
8443 offset = INTVAL (op);
8444 if ((offset & 3) != 0)
8445 return false;
8447 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8448 if (extra < 0)
8449 extra = 0;
8451 if (GET_CODE (addr) == LO_SUM)
8452 /* For lo_sum addresses, we must allow any offset except one that
8453 causes a wrap, so test only the low 16 bits. */
8454 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8456 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8459 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8461 static bool
8462 reg_offset_addressing_ok_p (machine_mode mode)
8464 switch (mode)
8466 case E_V16QImode:
8467 case E_V8HImode:
8468 case E_V4SFmode:
8469 case E_V4SImode:
8470 case E_V2DFmode:
8471 case E_V2DImode:
8472 case E_V1TImode:
8473 case E_TImode:
8474 case E_TFmode:
8475 case E_KFmode:
8476 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8477 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8478 a vector mode, if we want to use the VSX registers to move it around,
8479 we need to restrict ourselves to reg+reg addressing. Similarly for
8480 IEEE 128-bit floating point that is passed in a single vector
8481 register. */
8482 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8483 return mode_supports_dq_form (mode);
8484 break;
8486 /* The vector pair/quad types support offset addressing if the
8487 underlying vectors support offset addressing. */
8488 case E_OOmode:
8489 case E_XOmode:
8490 return TARGET_MMA;
8492 case E_SDmode:
8493 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8494 addressing for the LFIWZX and STFIWX instructions. */
8495 if (TARGET_NO_SDMODE_STACK)
8496 return false;
8497 break;
8499 default:
8500 break;
8503 return true;
8506 static bool
8507 virtual_stack_registers_memory_p (rtx op)
8509 int regnum;
8511 if (REG_P (op))
8512 regnum = REGNO (op);
8514 else if (GET_CODE (op) == PLUS
8515 && REG_P (XEXP (op, 0))
8516 && CONST_INT_P (XEXP (op, 1)))
8517 regnum = REGNO (XEXP (op, 0));
8519 else
8520 return false;
8522 return (regnum >= FIRST_VIRTUAL_REGISTER
8523 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8526 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8527 is known to not straddle a 32k boundary. This function is used
8528 to determine whether -mcmodel=medium code can use TOC pointer
8529 relative addressing for OP. This means the alignment of the TOC
8530 pointer must also be taken into account, and unfortunately that is
8531 only 8 bytes. */
8533 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8534 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8535 #endif
8537 static bool
8538 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8539 machine_mode mode)
8541 tree decl;
8542 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8544 if (!SYMBOL_REF_P (op))
8545 return false;
8547 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8548 SYMBOL_REF. */
8549 if (mode_supports_dq_form (mode))
8550 return false;
8552 dsize = GET_MODE_SIZE (mode);
8553 decl = SYMBOL_REF_DECL (op);
8554 if (!decl)
8556 if (dsize == 0)
8557 return false;
8559 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8560 replacing memory addresses with an anchor plus offset. We
8561 could find the decl by rummaging around in the block->objects
8562 VEC for the given offset but that seems like too much work. */
8563 dalign = BITS_PER_UNIT;
8564 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8565 && SYMBOL_REF_ANCHOR_P (op)
8566 && SYMBOL_REF_BLOCK (op) != NULL)
8568 struct object_block *block = SYMBOL_REF_BLOCK (op);
8570 dalign = block->alignment;
8571 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8573 else if (CONSTANT_POOL_ADDRESS_P (op))
8575 /* It would be nice to have get_pool_align().. */
8576 machine_mode cmode = get_pool_mode (op);
8578 dalign = GET_MODE_ALIGNMENT (cmode);
8581 else if (DECL_P (decl))
8583 dalign = DECL_ALIGN (decl);
8585 if (dsize == 0)
8587 /* Allow BLKmode when the entire object is known to not
8588 cross a 32k boundary. */
8589 if (!DECL_SIZE_UNIT (decl))
8590 return false;
8592 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8593 return false;
8595 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8596 if (dsize > 32768)
8597 return false;
8599 dalign /= BITS_PER_UNIT;
8600 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8601 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8602 return dalign >= dsize;
8605 else
8606 gcc_unreachable ();
8608 /* Find how many bits of the alignment we know for this access. */
8609 dalign /= BITS_PER_UNIT;
8610 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8611 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8612 mask = dalign - 1;
8613 lsb = offset & -offset;
8614 mask &= lsb - 1;
8615 dalign = mask + 1;
8617 return dalign >= dsize;
8620 static bool
8621 constant_pool_expr_p (rtx op)
8623 rtx base, offset;
8625 split_const (op, &base, &offset);
8626 return (SYMBOL_REF_P (base)
8627 && CONSTANT_POOL_ADDRESS_P (base)
8628 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8631 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8632 use that as the register to put the HIGH value into if register allocation
8633 is already done. */
8636 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8638 rtx tocrel, tocreg, hi;
8640 gcc_assert (TARGET_TOC);
8642 if (TARGET_DEBUG_ADDR)
8644 if (SYMBOL_REF_P (symbol))
8645 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8646 XSTR (symbol, 0));
8647 else
8649 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8650 GET_RTX_NAME (GET_CODE (symbol)));
8651 debug_rtx (symbol);
8655 if (!can_create_pseudo_p ())
8656 df_set_regs_ever_live (TOC_REGISTER, true);
8658 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8659 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8660 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8661 return tocrel;
8663 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8664 if (largetoc_reg != NULL)
8666 emit_move_insn (largetoc_reg, hi);
8667 hi = largetoc_reg;
8669 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8672 /* These are only used to pass through from print_operand/print_operand_address
8673 to rs6000_output_addr_const_extra over the intervening function
8674 output_addr_const which is not target code. */
8675 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8677 /* Return true if OP is a toc pointer relative address (the output
8678 of create_TOC_reference). If STRICT, do not match non-split
8679 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8680 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8681 TOCREL_OFFSET_RET respectively. */
8683 bool
8684 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8685 const_rtx *tocrel_offset_ret)
8687 if (!TARGET_TOC)
8688 return false;
8690 if (TARGET_CMODEL != CMODEL_SMALL)
8692 /* When strict ensure we have everything tidy. */
8693 if (strict
8694 && !(GET_CODE (op) == LO_SUM
8695 && REG_P (XEXP (op, 0))
8696 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8697 return false;
8699 /* When not strict, allow non-split TOC addresses and also allow
8700 (lo_sum (high ..)) TOC addresses created during reload. */
8701 if (GET_CODE (op) == LO_SUM)
8702 op = XEXP (op, 1);
8705 const_rtx tocrel_base = op;
8706 const_rtx tocrel_offset = const0_rtx;
8708 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8710 tocrel_base = XEXP (op, 0);
8711 tocrel_offset = XEXP (op, 1);
8714 if (tocrel_base_ret)
8715 *tocrel_base_ret = tocrel_base;
8716 if (tocrel_offset_ret)
8717 *tocrel_offset_ret = tocrel_offset;
8719 return (GET_CODE (tocrel_base) == UNSPEC
8720 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8721 && REG_P (XVECEXP (tocrel_base, 0, 1))
8722 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8725 /* Return true if X is a constant pool address, and also for cmodel=medium
8726 if X is a toc-relative address known to be offsettable within MODE. */
8728 bool
8729 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8730 bool strict)
8732 const_rtx tocrel_base, tocrel_offset;
8733 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8734 && (TARGET_CMODEL != CMODEL_MEDIUM
8735 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8736 || mode == QImode
8737 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8738 INTVAL (tocrel_offset), mode)));
8741 static bool
8742 legitimate_small_data_p (machine_mode mode, rtx x)
8744 return (DEFAULT_ABI == ABI_V4
8745 && !flag_pic && !TARGET_TOC
8746 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8747 && small_data_operand (x, mode));
8750 bool
8751 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8752 bool strict, bool worst_case)
8754 unsigned HOST_WIDE_INT offset;
8755 unsigned int extra;
8757 if (GET_CODE (x) != PLUS)
8758 return false;
8759 if (!REG_P (XEXP (x, 0)))
8760 return false;
8761 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8762 return false;
8763 if (mode_supports_dq_form (mode))
8764 return quad_address_p (x, mode, strict);
8765 if (!reg_offset_addressing_ok_p (mode))
8766 return virtual_stack_registers_memory_p (x);
8767 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8768 return true;
8769 if (!CONST_INT_P (XEXP (x, 1)))
8770 return false;
8772 offset = INTVAL (XEXP (x, 1));
8773 extra = 0;
8774 switch (mode)
8776 case E_DFmode:
8777 case E_DDmode:
8778 case E_DImode:
8779 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8780 addressing. */
8781 if (VECTOR_MEM_VSX_P (mode))
8782 return false;
8784 if (!worst_case)
8785 break;
8786 if (!TARGET_POWERPC64)
8787 extra = 4;
8788 else if (offset & 3)
8789 return false;
8790 break;
8792 case E_TFmode:
8793 case E_IFmode:
8794 case E_KFmode:
8795 case E_TDmode:
8796 case E_TImode:
8797 case E_PTImode:
8798 extra = 8;
8799 if (!worst_case)
8800 break;
8801 if (!TARGET_POWERPC64)
8802 extra = 12;
8803 else if (offset & 3)
8804 return false;
8805 break;
8807 default:
8808 break;
8811 if (TARGET_PREFIXED)
8812 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8813 else
8814 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8817 bool
8818 legitimate_indexed_address_p (rtx x, int strict)
8820 rtx op0, op1;
8822 if (GET_CODE (x) != PLUS)
8823 return false;
8825 op0 = XEXP (x, 0);
8826 op1 = XEXP (x, 1);
8828 return (REG_P (op0) && REG_P (op1)
8829 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8830 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8831 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8832 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8835 bool
8836 avoiding_indexed_address_p (machine_mode mode)
8838 unsigned int msize = GET_MODE_SIZE (mode);
8840 /* Avoid indexed addressing for modes that have non-indexed load/store
8841 instruction forms. On power10, vector pairs have an indexed
8842 form, but vector quads don't. */
8843 if (msize > 16)
8844 return msize != 32;
8846 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8849 bool
8850 legitimate_indirect_address_p (rtx x, int strict)
8852 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8855 bool
8856 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8858 if (!TARGET_MACHO || !flag_pic
8859 || mode != SImode || !MEM_P (x))
8860 return false;
8861 x = XEXP (x, 0);
8863 if (GET_CODE (x) != LO_SUM)
8864 return false;
8865 if (!REG_P (XEXP (x, 0)))
8866 return false;
8867 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8868 return false;
8869 x = XEXP (x, 1);
8871 return CONSTANT_P (x);
8874 static bool
8875 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8877 if (GET_CODE (x) != LO_SUM)
8878 return false;
8879 if (!REG_P (XEXP (x, 0)))
8880 return false;
8881 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8882 return false;
8883 /* quad word addresses are restricted, and we can't use LO_SUM. */
8884 if (mode_supports_dq_form (mode))
8885 return false;
8886 x = XEXP (x, 1);
8888 if (TARGET_ELF || TARGET_MACHO)
8890 bool large_toc_ok;
8892 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8893 return false;
8894 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8895 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8896 recognizes some LO_SUM addresses as valid although this
8897 function says opposite. In most cases, LRA through different
8898 transformations can generate correct code for address reloads.
8899 It cannot manage only some LO_SUM cases. So we need to add
8900 code here saying that some addresses are still valid. */
8901 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8902 && small_toc_ref (x, VOIDmode));
8903 if (TARGET_TOC && ! large_toc_ok)
8904 return false;
8905 if (GET_MODE_NUNITS (mode) != 1)
8906 return false;
8907 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8908 && !(/* ??? Assume floating point reg based on mode? */
8909 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8910 return false;
8912 return CONSTANT_P (x) || large_toc_ok;
8915 return false;
8919 /* Try machine-dependent ways of modifying an illegitimate address
8920 to be legitimate. If we find one, return the new, valid address.
8921 This is used from only one place: `memory_address' in explow.c.
8923 OLDX is the address as it was before break_out_memory_refs was
8924 called. In some cases it is useful to look at this to decide what
8925 needs to be done.
8927 It is always safe for this function to do nothing. It exists to
8928 recognize opportunities to optimize the output.
8930 On RS/6000, first check for the sum of a register with a constant
8931 integer that is out of range. If so, generate code to add the
8932 constant with the low-order 16 bits masked to the register and force
8933 this result into another register (this can be done with `cau').
8934 Then generate an address of REG+(CONST&0xffff), allowing for the
8935 possibility of bit 16 being a one.
8937 Then check for the sum of a register and something not constant, try to
8938 load the other things into a register and return the sum. */
8940 static rtx
8941 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8942 machine_mode mode)
8944 unsigned int extra;
8946 if (!reg_offset_addressing_ok_p (mode)
8947 || mode_supports_dq_form (mode))
8949 if (virtual_stack_registers_memory_p (x))
8950 return x;
8952 /* In theory we should not be seeing addresses of the form reg+0,
8953 but just in case it is generated, optimize it away. */
8954 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8955 return force_reg (Pmode, XEXP (x, 0));
8957 /* For TImode with load/store quad, restrict addresses to just a single
8958 pointer, so it works with both GPRs and VSX registers. */
8959 /* Make sure both operands are registers. */
8960 else if (GET_CODE (x) == PLUS
8961 && (mode != TImode || !TARGET_VSX))
8962 return gen_rtx_PLUS (Pmode,
8963 force_reg (Pmode, XEXP (x, 0)),
8964 force_reg (Pmode, XEXP (x, 1)));
8965 else
8966 return force_reg (Pmode, x);
8968 if (SYMBOL_REF_P (x))
8970 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8971 if (model != 0)
8972 return rs6000_legitimize_tls_address (x, model);
8975 extra = 0;
8976 switch (mode)
8978 case E_TFmode:
8979 case E_TDmode:
8980 case E_TImode:
8981 case E_PTImode:
8982 case E_IFmode:
8983 case E_KFmode:
8984 /* As in legitimate_offset_address_p we do not assume
8985 worst-case. The mode here is just a hint as to the registers
8986 used. A TImode is usually in gprs, but may actually be in
8987 fprs. Leave worst-case scenario for reload to handle via
8988 insn constraints. PTImode is only GPRs. */
8989 extra = 8;
8990 break;
8991 default:
8992 break;
8995 if (GET_CODE (x) == PLUS
8996 && REG_P (XEXP (x, 0))
8997 && CONST_INT_P (XEXP (x, 1))
8998 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8999 >= 0x10000 - extra))
9001 HOST_WIDE_INT high_int, low_int;
9002 rtx sum;
9003 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
9004 if (low_int >= 0x8000 - extra)
9005 low_int = 0;
9006 high_int = INTVAL (XEXP (x, 1)) - low_int;
9007 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9008 gen_int_mode (high_int, Pmode)), 0);
9009 return plus_constant (Pmode, sum, low_int);
9011 else if (GET_CODE (x) == PLUS
9012 && REG_P (XEXP (x, 0))
9013 && !CONST_INT_P (XEXP (x, 1))
9014 && GET_MODE_NUNITS (mode) == 1
9015 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9016 || (/* ??? Assume floating point reg based on mode? */
9017 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9018 && !avoiding_indexed_address_p (mode))
9020 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9021 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9023 else if ((TARGET_ELF
9024 #if TARGET_MACHO
9025 || !MACHO_DYNAMIC_NO_PIC_P
9026 #endif
9028 && TARGET_32BIT
9029 && TARGET_NO_TOC_OR_PCREL
9030 && !flag_pic
9031 && !CONST_INT_P (x)
9032 && !CONST_WIDE_INT_P (x)
9033 && !CONST_DOUBLE_P (x)
9034 && CONSTANT_P (x)
9035 && GET_MODE_NUNITS (mode) == 1
9036 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9037 || (/* ??? Assume floating point reg based on mode? */
9038 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9040 rtx reg = gen_reg_rtx (Pmode);
9041 if (TARGET_ELF)
9042 emit_insn (gen_elf_high (reg, x));
9043 else
9044 emit_insn (gen_macho_high (Pmode, reg, x));
9045 return gen_rtx_LO_SUM (Pmode, reg, x);
9047 else if (TARGET_TOC
9048 && SYMBOL_REF_P (x)
9049 && constant_pool_expr_p (x)
9050 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9051 return create_TOC_reference (x, NULL_RTX);
9052 else
9053 return x;
9056 /* Debug version of rs6000_legitimize_address. */
9057 static rtx
9058 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9060 rtx ret;
9061 rtx_insn *insns;
9063 start_sequence ();
9064 ret = rs6000_legitimize_address (x, oldx, mode);
9065 insns = get_insns ();
9066 end_sequence ();
9068 if (ret != x)
9070 fprintf (stderr,
9071 "\nrs6000_legitimize_address: mode %s, old code %s, "
9072 "new code %s, modified\n",
9073 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9074 GET_RTX_NAME (GET_CODE (ret)));
9076 fprintf (stderr, "Original address:\n");
9077 debug_rtx (x);
9079 fprintf (stderr, "oldx:\n");
9080 debug_rtx (oldx);
9082 fprintf (stderr, "New address:\n");
9083 debug_rtx (ret);
9085 if (insns)
9087 fprintf (stderr, "Insns added:\n");
9088 debug_rtx_list (insns, 20);
9091 else
9093 fprintf (stderr,
9094 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9095 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9097 debug_rtx (x);
9100 if (insns)
9101 emit_insn (insns);
9103 return ret;
9106 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9107 We need to emit DTP-relative relocations. */
9109 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9110 static void
9111 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9113 switch (size)
9115 case 4:
9116 fputs ("\t.long\t", file);
9117 break;
9118 case 8:
9119 fputs (DOUBLE_INT_ASM_OP, file);
9120 break;
9121 default:
9122 gcc_unreachable ();
9124 output_addr_const (file, x);
9125 if (TARGET_ELF)
9126 fputs ("@dtprel+0x8000", file);
9129 /* Return true if X is a symbol that refers to real (rather than emulated)
9130 TLS. */
9132 static bool
9133 rs6000_real_tls_symbol_ref_p (rtx x)
9135 return (SYMBOL_REF_P (x)
9136 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9139 /* In the name of slightly smaller debug output, and to cater to
9140 general assembler lossage, recognize various UNSPEC sequences
9141 and turn them back into a direct symbol reference. */
9143 static rtx
9144 rs6000_delegitimize_address (rtx orig_x)
9146 rtx x, y, offset;
9148 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9149 encodes loading up the high part of the address of a TOC reference along
9150 with a load of a GPR using the same base register used for the load. We
9151 return the original SYMBOL_REF.
9153 (set (reg:INT1 <reg>
9154 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9156 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9157 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9158 We return the original SYMBOL_REF.
9160 (parallel [(set (reg:DI <base-reg>)
9161 (unspec:DI [(symbol_ref <symbol>)
9162 (const_int <marker>)]
9163 UNSPEC_PCREL_OPT_LD_ADDR))
9164 (set (reg:DI <load-reg>)
9165 (unspec:DI [(const_int 0)]
9166 UNSPEC_PCREL_OPT_LD_DATA))])
9168 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9169 GPR being loaded is the same as the GPR used to hold the external address.
9171 (set (reg:DI <base-reg>)
9172 (unspec:DI [(symbol_ref <symbol>)
9173 (const_int <marker>)]
9174 UNSPEC_PCREL_OPT_LD_SAME_REG))
9176 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9177 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9178 We return the original SYMBOL_REF.
9180 (parallel [(set (reg:DI <base-reg>)
9181 (unspec:DI [(symbol_ref <symbol>)
9182 (const_int <marker>)]
9183 UNSPEC_PCREL_OPT_ST_ADDR))
9184 (use (reg <store-reg>))]) */
9186 if (GET_CODE (orig_x) == UNSPEC)
9187 switch (XINT (orig_x, 1))
9189 case UNSPEC_FUSION_GPR:
9190 case UNSPEC_PCREL_OPT_LD_ADDR:
9191 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9192 case UNSPEC_PCREL_OPT_ST_ADDR:
9193 orig_x = XVECEXP (orig_x, 0, 0);
9194 break;
9196 default:
9197 break;
9200 orig_x = delegitimize_mem_from_attrs (orig_x);
9202 x = orig_x;
9203 if (MEM_P (x))
9204 x = XEXP (x, 0);
9206 y = x;
9207 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9208 y = XEXP (y, 1);
9210 offset = NULL_RTX;
9211 if (GET_CODE (y) == PLUS
9212 && GET_MODE (y) == Pmode
9213 && CONST_INT_P (XEXP (y, 1)))
9215 offset = XEXP (y, 1);
9216 y = XEXP (y, 0);
9219 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9221 y = XVECEXP (y, 0, 0);
9223 #ifdef HAVE_AS_TLS
9224 /* Do not associate thread-local symbols with the original
9225 constant pool symbol. */
9226 if (TARGET_XCOFF
9227 && SYMBOL_REF_P (y)
9228 && CONSTANT_POOL_ADDRESS_P (y)
9229 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9230 return orig_x;
9231 #endif
9233 if (offset != NULL_RTX)
9234 y = gen_rtx_PLUS (Pmode, y, offset);
9235 if (!MEM_P (orig_x))
9236 return y;
9237 else
9238 return replace_equiv_address_nv (orig_x, y);
9241 if (TARGET_MACHO
9242 && GET_CODE (orig_x) == LO_SUM
9243 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9245 y = XEXP (XEXP (orig_x, 1), 0);
9246 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9247 return XVECEXP (y, 0, 0);
9250 return orig_x;
9253 /* Return true if X shouldn't be emitted into the debug info.
9254 The linker doesn't like .toc section references from
9255 .debug_* sections, so reject .toc section symbols. */
9257 static bool
9258 rs6000_const_not_ok_for_debug_p (rtx x)
9260 if (GET_CODE (x) == UNSPEC)
9261 return true;
9262 if (SYMBOL_REF_P (x)
9263 && CONSTANT_POOL_ADDRESS_P (x))
9265 rtx c = get_pool_constant (x);
9266 machine_mode cmode = get_pool_mode (x);
9267 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9268 return true;
9271 return false;
9274 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9276 static bool
9277 rs6000_legitimate_combined_insn (rtx_insn *insn)
9279 int icode = INSN_CODE (insn);
9281 /* Reject creating doloop insns. Combine should not be allowed
9282 to create these for a number of reasons:
9283 1) In a nested loop, if combine creates one of these in an
9284 outer loop and the register allocator happens to allocate ctr
9285 to the outer loop insn, then the inner loop can't use ctr.
9286 Inner loops ought to be more highly optimized.
9287 2) Combine often wants to create one of these from what was
9288 originally a three insn sequence, first combining the three
9289 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9290 allocated ctr, the splitter takes use back to the three insn
9291 sequence. It's better to stop combine at the two insn
9292 sequence.
9293 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9294 insns, the register allocator sometimes uses floating point
9295 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9296 jump insn and output reloads are not implemented for jumps,
9297 the ctrsi/ctrdi splitters need to handle all possible cases.
9298 That's a pain, and it gets to be seriously difficult when a
9299 splitter that runs after reload needs memory to transfer from
9300 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9301 for the difficult case. It's better to not create problems
9302 in the first place. */
9303 if (icode != CODE_FOR_nothing
9304 && (icode == CODE_FOR_bdz_si
9305 || icode == CODE_FOR_bdz_di
9306 || icode == CODE_FOR_bdnz_si
9307 || icode == CODE_FOR_bdnz_di
9308 || icode == CODE_FOR_bdztf_si
9309 || icode == CODE_FOR_bdztf_di
9310 || icode == CODE_FOR_bdnztf_si
9311 || icode == CODE_FOR_bdnztf_di))
9312 return false;
9314 return true;
9317 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9319 static GTY(()) rtx rs6000_tls_symbol;
9320 static rtx
9321 rs6000_tls_get_addr (void)
9323 if (!rs6000_tls_symbol)
9324 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9326 return rs6000_tls_symbol;
9329 /* Construct the SYMBOL_REF for TLS GOT references. */
9331 static GTY(()) rtx rs6000_got_symbol;
9333 rs6000_got_sym (void)
9335 if (!rs6000_got_symbol)
9337 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9338 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9339 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9342 return rs6000_got_symbol;
9345 /* AIX Thread-Local Address support. */
9347 static rtx
9348 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9350 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9351 const char *name;
9352 char *tlsname;
9354 /* Place addr into TOC constant pool. */
9355 sym = force_const_mem (GET_MODE (addr), addr);
9357 /* Output the TOC entry and create the MEM referencing the value. */
9358 if (constant_pool_expr_p (XEXP (sym, 0))
9359 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9361 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9362 mem = gen_const_mem (Pmode, tocref);
9363 set_mem_alias_set (mem, get_TOC_alias_set ());
9365 else
9366 return sym;
9368 /* Use global-dynamic for local-dynamic. */
9369 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9370 || model == TLS_MODEL_LOCAL_DYNAMIC)
9372 /* Create new TOC reference for @m symbol. */
9373 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9374 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9375 strcpy (tlsname, "*LCM");
9376 strcat (tlsname, name + 3);
9377 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9378 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9379 tocref = create_TOC_reference (modaddr, NULL_RTX);
9380 rtx modmem = gen_const_mem (Pmode, tocref);
9381 set_mem_alias_set (modmem, get_TOC_alias_set ());
9383 rtx modreg = gen_reg_rtx (Pmode);
9384 emit_insn (gen_rtx_SET (modreg, modmem));
9386 tmpreg = gen_reg_rtx (Pmode);
9387 emit_insn (gen_rtx_SET (tmpreg, mem));
9389 dest = gen_reg_rtx (Pmode);
9390 if (TARGET_32BIT)
9391 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9392 else
9393 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9394 return dest;
9396 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9397 else if (TARGET_32BIT)
9399 tlsreg = gen_reg_rtx (SImode);
9400 emit_insn (gen_tls_get_tpointer (tlsreg));
9402 else
9404 tlsreg = gen_rtx_REG (DImode, 13);
9405 xcoff_tls_exec_model_detected = true;
9408 /* Load the TOC value into temporary register. */
9409 tmpreg = gen_reg_rtx (Pmode);
9410 emit_insn (gen_rtx_SET (tmpreg, mem));
9411 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9412 gen_rtx_MINUS (Pmode, addr, tlsreg));
9414 /* Add TOC symbol value to TLS pointer. */
9415 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9417 return dest;
9420 /* Passes the tls arg value for global dynamic and local dynamic
9421 emit_library_call_value in rs6000_legitimize_tls_address to
9422 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9423 marker relocs put on __tls_get_addr calls. */
9424 static rtx global_tlsarg;
9426 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9427 this (thread-local) address. */
9429 static rtx
9430 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9432 rtx dest, insn;
9434 if (TARGET_XCOFF)
9435 return rs6000_legitimize_tls_address_aix (addr, model);
9437 dest = gen_reg_rtx (Pmode);
9438 if (model == TLS_MODEL_LOCAL_EXEC
9439 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9441 rtx tlsreg;
9443 if (TARGET_64BIT)
9445 tlsreg = gen_rtx_REG (Pmode, 13);
9446 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9448 else
9450 tlsreg = gen_rtx_REG (Pmode, 2);
9451 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9453 emit_insn (insn);
9455 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9457 rtx tlsreg, tmp;
9459 tmp = gen_reg_rtx (Pmode);
9460 if (TARGET_64BIT)
9462 tlsreg = gen_rtx_REG (Pmode, 13);
9463 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9465 else
9467 tlsreg = gen_rtx_REG (Pmode, 2);
9468 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9470 emit_insn (insn);
9471 if (TARGET_64BIT)
9472 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9473 else
9474 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9475 emit_insn (insn);
9477 else
9479 rtx got, tga, tmp1, tmp2;
9481 /* We currently use relocations like @got@tlsgd for tls, which
9482 means the linker will handle allocation of tls entries, placing
9483 them in the .got section. So use a pointer to the .got section,
9484 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9485 or to secondary GOT sections used by 32-bit -fPIC. */
9486 if (rs6000_pcrel_p ())
9487 got = const0_rtx;
9488 else if (TARGET_64BIT)
9489 got = gen_rtx_REG (Pmode, 2);
9490 else
9492 if (flag_pic == 1)
9493 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9494 else
9496 rtx gsym = rs6000_got_sym ();
9497 got = gen_reg_rtx (Pmode);
9498 if (flag_pic == 0)
9499 rs6000_emit_move (got, gsym, Pmode);
9500 else
9502 rtx mem, lab;
9504 tmp1 = gen_reg_rtx (Pmode);
9505 tmp2 = gen_reg_rtx (Pmode);
9506 mem = gen_const_mem (Pmode, tmp1);
9507 lab = gen_label_rtx ();
9508 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9509 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9510 if (TARGET_LINK_STACK)
9511 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9512 emit_move_insn (tmp2, mem);
9513 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9514 set_unique_reg_note (last, REG_EQUAL, gsym);
9519 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9521 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9522 UNSPEC_TLSGD);
9523 tga = rs6000_tls_get_addr ();
9524 rtx argreg = gen_rtx_REG (Pmode, 3);
9525 emit_insn (gen_rtx_SET (argreg, arg));
9526 global_tlsarg = arg;
9527 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9528 global_tlsarg = NULL_RTX;
9530 /* Make a note so that the result of this call can be CSEd. */
9531 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9532 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9533 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9535 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9537 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9538 tga = rs6000_tls_get_addr ();
9539 tmp1 = gen_reg_rtx (Pmode);
9540 rtx argreg = gen_rtx_REG (Pmode, 3);
9541 emit_insn (gen_rtx_SET (argreg, arg));
9542 global_tlsarg = arg;
9543 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9544 global_tlsarg = NULL_RTX;
9546 /* Make a note so that the result of this call can be CSEd. */
9547 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9548 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9549 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9551 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9553 if (TARGET_64BIT)
9554 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9555 else
9556 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9558 else if (rs6000_tls_size == 32)
9560 tmp2 = gen_reg_rtx (Pmode);
9561 if (TARGET_64BIT)
9562 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9563 else
9564 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9565 emit_insn (insn);
9566 if (TARGET_64BIT)
9567 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9568 else
9569 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9571 else
9573 tmp2 = gen_reg_rtx (Pmode);
9574 if (TARGET_64BIT)
9575 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9576 else
9577 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9578 emit_insn (insn);
9579 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9581 emit_insn (insn);
9583 else
9585 /* IE, or 64-bit offset LE. */
9586 tmp2 = gen_reg_rtx (Pmode);
9587 if (TARGET_64BIT)
9588 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9589 else
9590 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9591 emit_insn (insn);
9592 if (rs6000_pcrel_p ())
9594 if (TARGET_64BIT)
9595 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9596 else
9597 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9599 else if (TARGET_64BIT)
9600 insn = gen_tls_tls_64 (dest, tmp2, addr);
9601 else
9602 insn = gen_tls_tls_32 (dest, tmp2, addr);
9603 emit_insn (insn);
9607 return dest;
9610 /* Only create the global variable for the stack protect guard if we are using
9611 the global flavor of that guard. */
9612 static tree
9613 rs6000_init_stack_protect_guard (void)
9615 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9616 return default_stack_protect_guard ();
9618 return NULL_TREE;
9621 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9623 static bool
9624 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9626 if (GET_CODE (x) == HIGH
9627 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9628 return true;
9630 /* A TLS symbol in the TOC cannot contain a sum. */
9631 if (GET_CODE (x) == CONST
9632 && GET_CODE (XEXP (x, 0)) == PLUS
9633 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9634 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9635 return true;
9637 /* Allow AIX TOC TLS symbols in the constant pool,
9638 but not ELF TLS symbols. */
9639 return TARGET_ELF && tls_referenced_p (x);
9642 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9643 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9644 can be addressed relative to the toc pointer. */
9646 static bool
9647 use_toc_relative_ref (rtx sym, machine_mode mode)
9649 return ((constant_pool_expr_p (sym)
9650 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9651 get_pool_mode (sym)))
9652 || (TARGET_CMODEL == CMODEL_MEDIUM
9653 && SYMBOL_REF_LOCAL_P (sym)
9654 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9657 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9658 that is a valid memory address for an instruction.
9659 The MODE argument is the machine mode for the MEM expression
9660 that wants to use this address.
9662 On the RS/6000, there are four valid address: a SYMBOL_REF that
9663 refers to a constant pool entry of an address (or the sum of it
9664 plus a constant), a short (16-bit signed) constant plus a register,
9665 the sum of two registers, or a register indirect, possibly with an
9666 auto-increment. For DFmode, DDmode and DImode with a constant plus
9667 register, we must ensure that both words are addressable or PowerPC64
9668 with offset word aligned.
9670 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9671 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9672 because adjacent memory cells are accessed by adding word-sized offsets
9673 during assembly output. */
9674 static bool
9675 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9677 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9678 bool quad_offset_p = mode_supports_dq_form (mode);
9680 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9681 return 0;
9683 /* Handle unaligned altivec lvx/stvx type addresses. */
9684 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9685 && GET_CODE (x) == AND
9686 && CONST_INT_P (XEXP (x, 1))
9687 && INTVAL (XEXP (x, 1)) == -16)
9689 x = XEXP (x, 0);
9690 return (legitimate_indirect_address_p (x, reg_ok_strict)
9691 || legitimate_indexed_address_p (x, reg_ok_strict)
9692 || virtual_stack_registers_memory_p (x));
9695 if (legitimate_indirect_address_p (x, reg_ok_strict))
9696 return 1;
9697 if (TARGET_UPDATE
9698 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9699 && mode_supports_pre_incdec_p (mode)
9700 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9701 return 1;
9703 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9704 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9705 return 1;
9707 /* Handle restricted vector d-form offsets in ISA 3.0. */
9708 if (quad_offset_p)
9710 if (quad_address_p (x, mode, reg_ok_strict))
9711 return 1;
9713 else if (virtual_stack_registers_memory_p (x))
9714 return 1;
9716 else if (reg_offset_p)
9718 if (legitimate_small_data_p (mode, x))
9719 return 1;
9720 if (legitimate_constant_pool_address_p (x, mode,
9721 reg_ok_strict || lra_in_progress))
9722 return 1;
9725 /* For TImode, if we have TImode in VSX registers, only allow register
9726 indirect addresses. This will allow the values to go in either GPRs
9727 or VSX registers without reloading. The vector types would tend to
9728 go into VSX registers, so we allow REG+REG, while TImode seems
9729 somewhat split, in that some uses are GPR based, and some VSX based. */
9730 /* FIXME: We could loosen this by changing the following to
9731 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9732 but currently we cannot allow REG+REG addressing for TImode. See
9733 PR72827 for complete details on how this ends up hoodwinking DSE. */
9734 if (mode == TImode && TARGET_VSX)
9735 return 0;
9736 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9737 if (! reg_ok_strict
9738 && reg_offset_p
9739 && GET_CODE (x) == PLUS
9740 && REG_P (XEXP (x, 0))
9741 && (XEXP (x, 0) == virtual_stack_vars_rtx
9742 || XEXP (x, 0) == arg_pointer_rtx)
9743 && CONST_INT_P (XEXP (x, 1)))
9744 return 1;
9745 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9746 return 1;
9747 if (!FLOAT128_2REG_P (mode)
9748 && (TARGET_HARD_FLOAT
9749 || TARGET_POWERPC64
9750 || (mode != DFmode && mode != DDmode))
9751 && (TARGET_POWERPC64 || mode != DImode)
9752 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9753 && mode != PTImode
9754 && !avoiding_indexed_address_p (mode)
9755 && legitimate_indexed_address_p (x, reg_ok_strict))
9756 return 1;
9757 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9758 && mode_supports_pre_modify_p (mode)
9759 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9760 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9761 reg_ok_strict, false)
9762 || (!avoiding_indexed_address_p (mode)
9763 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9764 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9766 /* There is no prefixed version of the load/store with update. */
9767 rtx addr = XEXP (x, 1);
9768 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9770 if (reg_offset_p && !quad_offset_p
9771 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9772 return 1;
9773 return 0;
9776 /* Debug version of rs6000_legitimate_address_p. */
9777 static bool
9778 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9779 bool reg_ok_strict)
9781 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9782 fprintf (stderr,
9783 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9784 "strict = %d, reload = %s, code = %s\n",
9785 ret ? "true" : "false",
9786 GET_MODE_NAME (mode),
9787 reg_ok_strict,
9788 (reload_completed ? "after" : "before"),
9789 GET_RTX_NAME (GET_CODE (x)));
9790 debug_rtx (x);
9792 return ret;
9795 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9797 static bool
9798 rs6000_mode_dependent_address_p (const_rtx addr,
9799 addr_space_t as ATTRIBUTE_UNUSED)
9801 return rs6000_mode_dependent_address_ptr (addr);
9804 /* Go to LABEL if ADDR (a legitimate address expression)
9805 has an effect that depends on the machine mode it is used for.
9807 On the RS/6000 this is true of all integral offsets (since AltiVec
9808 and VSX modes don't allow them) or is a pre-increment or decrement.
9810 ??? Except that due to conceptual problems in offsettable_address_p
9811 we can't really report the problems of integral offsets. So leave
9812 this assuming that the adjustable offset must be valid for the
9813 sub-words of a TFmode operand, which is what we had before. */
9815 static bool
9816 rs6000_mode_dependent_address (const_rtx addr)
9818 switch (GET_CODE (addr))
9820 case PLUS:
9821 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9822 is considered a legitimate address before reload, so there
9823 are no offset restrictions in that case. Note that this
9824 condition is safe in strict mode because any address involving
9825 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9826 been rejected as illegitimate. */
9827 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9828 && XEXP (addr, 0) != arg_pointer_rtx
9829 && CONST_INT_P (XEXP (addr, 1)))
9831 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9832 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9833 if (TARGET_PREFIXED)
9834 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9835 else
9836 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9838 break;
9840 case LO_SUM:
9841 /* Anything in the constant pool is sufficiently aligned that
9842 all bytes have the same high part address. */
9843 return !legitimate_constant_pool_address_p (addr, QImode, false);
9845 /* Auto-increment cases are now treated generically in recog.c. */
9846 case PRE_MODIFY:
9847 return TARGET_UPDATE;
9849 /* AND is only allowed in Altivec loads. */
9850 case AND:
9851 return true;
9853 default:
9854 break;
9857 return false;
9860 /* Debug version of rs6000_mode_dependent_address. */
9861 static bool
9862 rs6000_debug_mode_dependent_address (const_rtx addr)
9864 bool ret = rs6000_mode_dependent_address (addr);
9866 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9867 ret ? "true" : "false");
9868 debug_rtx (addr);
9870 return ret;
9873 /* Implement FIND_BASE_TERM. */
9876 rs6000_find_base_term (rtx op)
9878 rtx base;
9880 base = op;
9881 if (GET_CODE (base) == CONST)
9882 base = XEXP (base, 0);
9883 if (GET_CODE (base) == PLUS)
9884 base = XEXP (base, 0);
9885 if (GET_CODE (base) == UNSPEC)
9886 switch (XINT (base, 1))
9888 case UNSPEC_TOCREL:
9889 case UNSPEC_MACHOPIC_OFFSET:
9890 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9891 for aliasing purposes. */
9892 return XVECEXP (base, 0, 0);
9895 return op;
9898 /* More elaborate version of recog's offsettable_memref_p predicate
9899 that works around the ??? note of rs6000_mode_dependent_address.
9900 In particular it accepts
9902 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9904 in 32-bit mode, that the recog predicate rejects. */
9906 static bool
9907 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9909 bool worst_case;
9911 if (!MEM_P (op))
9912 return false;
9914 /* First mimic offsettable_memref_p. */
9915 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9916 return true;
9918 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9919 the latter predicate knows nothing about the mode of the memory
9920 reference and, therefore, assumes that it is the largest supported
9921 mode (TFmode). As a consequence, legitimate offsettable memory
9922 references are rejected. rs6000_legitimate_offset_address_p contains
9923 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9924 at least with a little bit of help here given that we know the
9925 actual registers used. */
9926 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9927 || GET_MODE_SIZE (reg_mode) == 4);
9928 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9929 strict, worst_case);
9932 /* Determine the reassociation width to be used in reassociate_bb.
9933 This takes into account how many parallel operations we
9934 can actually do of a given type, and also the latency.
9936 int add/sub 6/cycle
9937 mul 2/cycle
9938 vect add/sub/mul 2/cycle
9939 fp add/sub/mul 2/cycle
9940 dfp 1/cycle
9943 static int
9944 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9945 machine_mode mode)
9947 switch (rs6000_tune)
9949 case PROCESSOR_POWER8:
9950 case PROCESSOR_POWER9:
9951 case PROCESSOR_POWER10:
9952 if (DECIMAL_FLOAT_MODE_P (mode))
9953 return 1;
9954 if (VECTOR_MODE_P (mode))
9955 return 4;
9956 if (INTEGRAL_MODE_P (mode))
9957 return 1;
9958 if (FLOAT_MODE_P (mode))
9959 return 4;
9960 break;
9961 default:
9962 break;
9964 return 1;
9967 /* Change register usage conditional on target flags. */
9968 static void
9969 rs6000_conditional_register_usage (void)
9971 int i;
9973 if (TARGET_DEBUG_TARGET)
9974 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9976 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9977 if (TARGET_64BIT)
9978 fixed_regs[13] = call_used_regs[13] = 1;
9980 /* Conditionally disable FPRs. */
9981 if (TARGET_SOFT_FLOAT)
9982 for (i = 32; i < 64; i++)
9983 fixed_regs[i] = call_used_regs[i] = 1;
9985 /* The TOC register is not killed across calls in a way that is
9986 visible to the compiler. */
9987 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9988 call_used_regs[2] = 0;
9990 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9991 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9993 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9994 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9995 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9997 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9998 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9999 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10001 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10002 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10004 if (!TARGET_ALTIVEC && !TARGET_VSX)
10006 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10007 fixed_regs[i] = call_used_regs[i] = 1;
10008 call_used_regs[VRSAVE_REGNO] = 1;
10011 if (TARGET_ALTIVEC || TARGET_VSX)
10012 global_regs[VSCR_REGNO] = 1;
10014 if (TARGET_ALTIVEC_ABI)
10016 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10017 call_used_regs[i] = 1;
10019 /* AIX reserves VR20:31 in non-extended ABI mode. */
10020 if (TARGET_XCOFF && !rs6000_aix_extabi)
10021 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10022 fixed_regs[i] = call_used_regs[i] = 1;
10027 /* Output insns to set DEST equal to the constant SOURCE as a series of
10028 lis, ori and shl instructions and return TRUE. */
10030 bool
10031 rs6000_emit_set_const (rtx dest, rtx source)
10033 machine_mode mode = GET_MODE (dest);
10034 rtx temp, set;
10035 rtx_insn *insn;
10036 HOST_WIDE_INT c;
10038 gcc_checking_assert (CONST_INT_P (source));
10039 c = INTVAL (source);
10040 switch (mode)
10042 case E_QImode:
10043 case E_HImode:
10044 emit_insn (gen_rtx_SET (dest, source));
10045 return true;
10047 case E_SImode:
10048 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10050 emit_insn (gen_rtx_SET (copy_rtx (temp),
10051 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10052 emit_insn (gen_rtx_SET (dest,
10053 gen_rtx_IOR (SImode, copy_rtx (temp),
10054 GEN_INT (c & 0xffff))));
10055 break;
10057 case E_DImode:
10058 if (!TARGET_POWERPC64)
10060 rtx hi, lo;
10062 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10063 DImode);
10064 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10065 DImode);
10066 emit_move_insn (hi, GEN_INT (c >> 32));
10067 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10068 emit_move_insn (lo, GEN_INT (c));
10070 else
10071 rs6000_emit_set_long_const (dest, c);
10072 break;
10074 default:
10075 gcc_unreachable ();
10078 insn = get_last_insn ();
10079 set = single_set (insn);
10080 if (! CONSTANT_P (SET_SRC (set)))
10081 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10083 return true;
10086 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10087 Output insns to set DEST equal to the constant C as a series of
10088 lis, ori and shl instructions. */
10090 static void
10091 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10093 rtx temp;
10094 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10096 ud1 = c & 0xffff;
10097 c = c >> 16;
10098 ud2 = c & 0xffff;
10099 c = c >> 16;
10100 ud3 = c & 0xffff;
10101 c = c >> 16;
10102 ud4 = c & 0xffff;
10104 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10105 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10106 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10108 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10109 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10111 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10113 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10114 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10115 if (ud1 != 0)
10116 emit_move_insn (dest,
10117 gen_rtx_IOR (DImode, copy_rtx (temp),
10118 GEN_INT (ud1)));
10120 else if (ud3 == 0 && ud4 == 0)
10122 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10124 gcc_assert (ud2 & 0x8000);
10125 emit_move_insn (copy_rtx (temp),
10126 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10127 if (ud1 != 0)
10128 emit_move_insn (copy_rtx (temp),
10129 gen_rtx_IOR (DImode, copy_rtx (temp),
10130 GEN_INT (ud1)));
10131 emit_move_insn (dest,
10132 gen_rtx_ZERO_EXTEND (DImode,
10133 gen_lowpart (SImode,
10134 copy_rtx (temp))));
10136 else if (ud1 == ud3 && ud2 == ud4)
10138 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10139 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10140 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
10141 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10142 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10143 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10145 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10146 || (ud4 == 0 && ! (ud3 & 0x8000)))
10148 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10150 emit_move_insn (copy_rtx (temp),
10151 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10152 if (ud2 != 0)
10153 emit_move_insn (copy_rtx (temp),
10154 gen_rtx_IOR (DImode, copy_rtx (temp),
10155 GEN_INT (ud2)));
10156 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10157 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10158 GEN_INT (16)));
10159 if (ud1 != 0)
10160 emit_move_insn (dest,
10161 gen_rtx_IOR (DImode, copy_rtx (temp),
10162 GEN_INT (ud1)));
10164 else
10166 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10168 emit_move_insn (copy_rtx (temp),
10169 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10170 if (ud3 != 0)
10171 emit_move_insn (copy_rtx (temp),
10172 gen_rtx_IOR (DImode, copy_rtx (temp),
10173 GEN_INT (ud3)));
10175 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10176 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10177 GEN_INT (32)));
10178 if (ud2 != 0)
10179 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10180 gen_rtx_IOR (DImode, copy_rtx (temp),
10181 GEN_INT (ud2 << 16)));
10182 if (ud1 != 0)
10183 emit_move_insn (dest,
10184 gen_rtx_IOR (DImode, copy_rtx (temp),
10185 GEN_INT (ud1)));
10189 /* Helper for the following. Get rid of [r+r] memory refs
10190 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10192 static void
10193 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10195 if (MEM_P (operands[0])
10196 && !REG_P (XEXP (operands[0], 0))
10197 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10198 GET_MODE (operands[0]), false))
10199 operands[0]
10200 = replace_equiv_address (operands[0],
10201 copy_addr_to_reg (XEXP (operands[0], 0)));
10203 if (MEM_P (operands[1])
10204 && !REG_P (XEXP (operands[1], 0))
10205 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10206 GET_MODE (operands[1]), false))
10207 operands[1]
10208 = replace_equiv_address (operands[1],
10209 copy_addr_to_reg (XEXP (operands[1], 0)));
10212 /* Generate a vector of constants to permute MODE for a little-endian
10213 storage operation by swapping the two halves of a vector. */
10214 static rtvec
10215 rs6000_const_vec (machine_mode mode)
10217 int i, subparts;
10218 rtvec v;
10220 switch (mode)
10222 case E_V1TImode:
10223 subparts = 1;
10224 break;
10225 case E_V2DFmode:
10226 case E_V2DImode:
10227 subparts = 2;
10228 break;
10229 case E_V4SFmode:
10230 case E_V4SImode:
10231 subparts = 4;
10232 break;
10233 case E_V8HImode:
10234 subparts = 8;
10235 break;
10236 case E_V16QImode:
10237 subparts = 16;
10238 break;
10239 default:
10240 gcc_unreachable();
10243 v = rtvec_alloc (subparts);
10245 for (i = 0; i < subparts / 2; ++i)
10246 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10247 for (i = subparts / 2; i < subparts; ++i)
10248 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10250 return v;
10253 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10254 store operation. */
10255 void
10256 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10258 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10259 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10261 /* Scalar permutations are easier to express in integer modes rather than
10262 floating-point modes, so cast them here. We use V1TImode instead
10263 of TImode to ensure that the values don't go through GPRs. */
10264 if (FLOAT128_VECTOR_P (mode))
10266 dest = gen_lowpart (V1TImode, dest);
10267 source = gen_lowpart (V1TImode, source);
10268 mode = V1TImode;
10271 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10272 scalar. */
10273 if (mode == TImode || mode == V1TImode)
10274 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10275 GEN_INT (64))));
10276 else
10278 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10279 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10283 /* Emit a little-endian load from vector memory location SOURCE to VSX
10284 register DEST in mode MODE. The load is done with two permuting
10285 insn's that represent an lxvd2x and xxpermdi. */
10286 void
10287 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10289 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10290 V1TImode). */
10291 if (mode == TImode || mode == V1TImode)
10293 mode = V2DImode;
10294 dest = gen_lowpart (V2DImode, dest);
10295 source = adjust_address (source, V2DImode, 0);
10298 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10299 rs6000_emit_le_vsx_permute (tmp, source, mode);
10300 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10303 /* Emit a little-endian store to vector memory location DEST from VSX
10304 register SOURCE in mode MODE. The store is done with two permuting
10305 insn's that represent an xxpermdi and an stxvd2x. */
10306 void
10307 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10309 /* This should never be called after LRA. */
10310 gcc_assert (can_create_pseudo_p ());
10312 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10313 V1TImode). */
10314 if (mode == TImode || mode == V1TImode)
10316 mode = V2DImode;
10317 dest = adjust_address (dest, V2DImode, 0);
10318 source = gen_lowpart (V2DImode, source);
10321 rtx tmp = gen_reg_rtx_and_attrs (source);
10322 rs6000_emit_le_vsx_permute (tmp, source, mode);
10323 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10326 /* Emit a sequence representing a little-endian VSX load or store,
10327 moving data from SOURCE to DEST in mode MODE. This is done
10328 separately from rs6000_emit_move to ensure it is called only
10329 during expand. LE VSX loads and stores introduced later are
10330 handled with a split. The expand-time RTL generation allows
10331 us to optimize away redundant pairs of register-permutes. */
10332 void
10333 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10335 gcc_assert (!BYTES_BIG_ENDIAN
10336 && VECTOR_MEM_VSX_P (mode)
10337 && !TARGET_P9_VECTOR
10338 && !gpr_or_gpr_p (dest, source)
10339 && (MEM_P (source) ^ MEM_P (dest)));
10341 if (MEM_P (source))
10343 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10344 rs6000_emit_le_vsx_load (dest, source, mode);
10346 else
10348 if (!REG_P (source))
10349 source = force_reg (mode, source);
10350 rs6000_emit_le_vsx_store (dest, source, mode);
10354 /* Return whether a SFmode or SImode move can be done without converting one
10355 mode to another. This arrises when we have:
10357 (SUBREG:SF (REG:SI ...))
10358 (SUBREG:SI (REG:SF ...))
10360 and one of the values is in a floating point/vector register, where SFmode
10361 scalars are stored in DFmode format. */
10363 bool
10364 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10366 if (TARGET_ALLOW_SF_SUBREG)
10367 return true;
10369 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10370 return true;
10372 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10373 return true;
10375 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10376 if (SUBREG_P (dest))
10378 rtx dest_subreg = SUBREG_REG (dest);
10379 rtx src_subreg = SUBREG_REG (src);
10380 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10383 return false;
10387 /* Helper function to change moves with:
10389 (SUBREG:SF (REG:SI)) and
10390 (SUBREG:SI (REG:SF))
10392 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10393 values are stored as DFmode values in the VSX registers. We need to convert
10394 the bits before we can use a direct move or operate on the bits in the
10395 vector register as an integer type.
10397 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10399 static bool
10400 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10402 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10403 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10404 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10406 rtx inner_source = SUBREG_REG (source);
10407 machine_mode inner_mode = GET_MODE (inner_source);
10409 if (mode == SImode && inner_mode == SFmode)
10411 emit_insn (gen_movsi_from_sf (dest, inner_source));
10412 return true;
10415 if (mode == SFmode && inner_mode == SImode)
10417 emit_insn (gen_movsf_from_si (dest, inner_source));
10418 return true;
10422 return false;
10425 /* Emit a move from SOURCE to DEST in mode MODE. */
10426 void
10427 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10429 rtx operands[2];
10430 operands[0] = dest;
10431 operands[1] = source;
10433 if (TARGET_DEBUG_ADDR)
10435 fprintf (stderr,
10436 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10437 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10438 GET_MODE_NAME (mode),
10439 lra_in_progress,
10440 reload_completed,
10441 can_create_pseudo_p ());
10442 debug_rtx (dest);
10443 fprintf (stderr, "source:\n");
10444 debug_rtx (source);
10447 /* Check that we get CONST_WIDE_INT only when we should. */
10448 if (CONST_WIDE_INT_P (operands[1])
10449 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10450 gcc_unreachable ();
10452 #ifdef HAVE_AS_GNU_ATTRIBUTE
10453 /* If we use a long double type, set the flags in .gnu_attribute that say
10454 what the long double type is. This is to allow the linker's warning
10455 message for the wrong long double to be useful, even if the function does
10456 not do a call (for example, doing a 128-bit add on power9 if the long
10457 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10458 used if they aren't the default long dobule type. */
10459 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10461 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10462 rs6000_passes_float = rs6000_passes_long_double = true;
10464 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10465 rs6000_passes_float = rs6000_passes_long_double = true;
10467 #endif
10469 /* See if we need to special case SImode/SFmode SUBREG moves. */
10470 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10471 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10472 return;
10474 /* Check if GCC is setting up a block move that will end up using FP
10475 registers as temporaries. We must make sure this is acceptable. */
10476 if (MEM_P (operands[0])
10477 && MEM_P (operands[1])
10478 && mode == DImode
10479 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10480 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10481 && ! (rs6000_slow_unaligned_access (SImode,
10482 (MEM_ALIGN (operands[0]) > 32
10483 ? 32 : MEM_ALIGN (operands[0])))
10484 || rs6000_slow_unaligned_access (SImode,
10485 (MEM_ALIGN (operands[1]) > 32
10486 ? 32 : MEM_ALIGN (operands[1]))))
10487 && ! MEM_VOLATILE_P (operands [0])
10488 && ! MEM_VOLATILE_P (operands [1]))
10490 emit_move_insn (adjust_address (operands[0], SImode, 0),
10491 adjust_address (operands[1], SImode, 0));
10492 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10493 adjust_address (copy_rtx (operands[1]), SImode, 4));
10494 return;
10497 if (can_create_pseudo_p () && MEM_P (operands[0])
10498 && !gpc_reg_operand (operands[1], mode))
10499 operands[1] = force_reg (mode, operands[1]);
10501 /* Recognize the case where operand[1] is a reference to thread-local
10502 data and load its address to a register. */
10503 if (tls_referenced_p (operands[1]))
10505 enum tls_model model;
10506 rtx tmp = operands[1];
10507 rtx addend = NULL;
10509 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10511 addend = XEXP (XEXP (tmp, 0), 1);
10512 tmp = XEXP (XEXP (tmp, 0), 0);
10515 gcc_assert (SYMBOL_REF_P (tmp));
10516 model = SYMBOL_REF_TLS_MODEL (tmp);
10517 gcc_assert (model != 0);
10519 tmp = rs6000_legitimize_tls_address (tmp, model);
10520 if (addend)
10522 tmp = gen_rtx_PLUS (mode, tmp, addend);
10523 tmp = force_operand (tmp, operands[0]);
10525 operands[1] = tmp;
10528 /* 128-bit constant floating-point values on Darwin should really be loaded
10529 as two parts. However, this premature splitting is a problem when DFmode
10530 values can go into Altivec registers. */
10531 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10532 && !reg_addr[DFmode].scalar_in_vmx_p)
10534 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10535 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10536 DFmode);
10537 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10538 GET_MODE_SIZE (DFmode)),
10539 simplify_gen_subreg (DFmode, operands[1], mode,
10540 GET_MODE_SIZE (DFmode)),
10541 DFmode);
10542 return;
10545 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10546 p1:SD) if p1 is not of floating point class and p0 is spilled as
10547 we can have no analogous movsd_store for this. */
10548 if (lra_in_progress && mode == DDmode
10549 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10550 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10551 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10552 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10554 enum reg_class cl;
10555 int regno = REGNO (SUBREG_REG (operands[1]));
10557 if (!HARD_REGISTER_NUM_P (regno))
10559 cl = reg_preferred_class (regno);
10560 regno = reg_renumber[regno];
10561 if (regno < 0)
10562 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10564 if (regno >= 0 && ! FP_REGNO_P (regno))
10566 mode = SDmode;
10567 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10568 operands[1] = SUBREG_REG (operands[1]);
10571 if (lra_in_progress
10572 && mode == SDmode
10573 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10574 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10575 && (REG_P (operands[1])
10576 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10578 int regno = reg_or_subregno (operands[1]);
10579 enum reg_class cl;
10581 if (!HARD_REGISTER_NUM_P (regno))
10583 cl = reg_preferred_class (regno);
10584 gcc_assert (cl != NO_REGS);
10585 regno = reg_renumber[regno];
10586 if (regno < 0)
10587 regno = ira_class_hard_regs[cl][0];
10589 if (FP_REGNO_P (regno))
10591 if (GET_MODE (operands[0]) != DDmode)
10592 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10593 emit_insn (gen_movsd_store (operands[0], operands[1]));
10595 else if (INT_REGNO_P (regno))
10596 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10597 else
10598 gcc_unreachable();
10599 return;
10601 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10602 p:DD)) if p0 is not of floating point class and p1 is spilled as
10603 we can have no analogous movsd_load for this. */
10604 if (lra_in_progress && mode == DDmode
10605 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10606 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10607 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10608 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10610 enum reg_class cl;
10611 int regno = REGNO (SUBREG_REG (operands[0]));
10613 if (!HARD_REGISTER_NUM_P (regno))
10615 cl = reg_preferred_class (regno);
10616 regno = reg_renumber[regno];
10617 if (regno < 0)
10618 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10620 if (regno >= 0 && ! FP_REGNO_P (regno))
10622 mode = SDmode;
10623 operands[0] = SUBREG_REG (operands[0]);
10624 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10627 if (lra_in_progress
10628 && mode == SDmode
10629 && (REG_P (operands[0])
10630 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10631 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10632 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10634 int regno = reg_or_subregno (operands[0]);
10635 enum reg_class cl;
10637 if (!HARD_REGISTER_NUM_P (regno))
10639 cl = reg_preferred_class (regno);
10640 gcc_assert (cl != NO_REGS);
10641 regno = reg_renumber[regno];
10642 if (regno < 0)
10643 regno = ira_class_hard_regs[cl][0];
10645 if (FP_REGNO_P (regno))
10647 if (GET_MODE (operands[1]) != DDmode)
10648 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10649 emit_insn (gen_movsd_load (operands[0], operands[1]));
10651 else if (INT_REGNO_P (regno))
10652 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10653 else
10654 gcc_unreachable();
10655 return;
10658 /* FIXME: In the long term, this switch statement should go away
10659 and be replaced by a sequence of tests based on things like
10660 mode == Pmode. */
10661 switch (mode)
10663 case E_HImode:
10664 case E_QImode:
10665 if (CONSTANT_P (operands[1])
10666 && !CONST_INT_P (operands[1]))
10667 operands[1] = force_const_mem (mode, operands[1]);
10668 break;
10670 case E_TFmode:
10671 case E_TDmode:
10672 case E_IFmode:
10673 case E_KFmode:
10674 if (FLOAT128_2REG_P (mode))
10675 rs6000_eliminate_indexed_memrefs (operands);
10676 /* fall through */
10678 case E_DFmode:
10679 case E_DDmode:
10680 case E_SFmode:
10681 case E_SDmode:
10682 if (CONSTANT_P (operands[1])
10683 && ! easy_fp_constant (operands[1], mode))
10684 operands[1] = force_const_mem (mode, operands[1]);
10685 break;
10687 case E_V16QImode:
10688 case E_V8HImode:
10689 case E_V4SFmode:
10690 case E_V4SImode:
10691 case E_V2DFmode:
10692 case E_V2DImode:
10693 case E_V1TImode:
10694 if (CONSTANT_P (operands[1])
10695 && !easy_vector_constant (operands[1], mode))
10696 operands[1] = force_const_mem (mode, operands[1]);
10697 break;
10699 case E_OOmode:
10700 case E_XOmode:
10701 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10702 error ("%qs is an opaque type, and you cannot set it to other values",
10703 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
10704 break;
10706 case E_SImode:
10707 case E_DImode:
10708 /* Use default pattern for address of ELF small data */
10709 if (TARGET_ELF
10710 && mode == Pmode
10711 && DEFAULT_ABI == ABI_V4
10712 && (SYMBOL_REF_P (operands[1])
10713 || GET_CODE (operands[1]) == CONST)
10714 && small_data_operand (operands[1], mode))
10716 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10717 return;
10720 /* Use the default pattern for loading up PC-relative addresses. */
10721 if (TARGET_PCREL && mode == Pmode
10722 && pcrel_local_or_external_address (operands[1], Pmode))
10724 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10725 return;
10728 if (DEFAULT_ABI == ABI_V4
10729 && mode == Pmode && mode == SImode
10730 && flag_pic == 1 && got_operand (operands[1], mode))
10732 emit_insn (gen_movsi_got (operands[0], operands[1]));
10733 return;
10736 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10737 && TARGET_NO_TOC_OR_PCREL
10738 && ! flag_pic
10739 && mode == Pmode
10740 && CONSTANT_P (operands[1])
10741 && GET_CODE (operands[1]) != HIGH
10742 && !CONST_INT_P (operands[1]))
10744 rtx target = (!can_create_pseudo_p ()
10745 ? operands[0]
10746 : gen_reg_rtx (mode));
10748 /* If this is a function address on -mcall-aixdesc,
10749 convert it to the address of the descriptor. */
10750 if (DEFAULT_ABI == ABI_AIX
10751 && SYMBOL_REF_P (operands[1])
10752 && XSTR (operands[1], 0)[0] == '.')
10754 const char *name = XSTR (operands[1], 0);
10755 rtx new_ref;
10756 while (*name == '.')
10757 name++;
10758 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10759 CONSTANT_POOL_ADDRESS_P (new_ref)
10760 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10761 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10762 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10763 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10764 operands[1] = new_ref;
10767 if (DEFAULT_ABI == ABI_DARWIN)
10769 #if TARGET_MACHO
10770 /* This is not PIC code, but could require the subset of
10771 indirections used by mdynamic-no-pic. */
10772 if (MACHO_DYNAMIC_NO_PIC_P)
10774 /* Take care of any required data indirection. */
10775 operands[1] = rs6000_machopic_legitimize_pic_address (
10776 operands[1], mode, operands[0]);
10777 if (operands[0] != operands[1])
10778 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10779 return;
10781 #endif
10782 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10783 emit_insn (gen_macho_low (Pmode, operands[0],
10784 target, operands[1]));
10785 return;
10788 emit_insn (gen_elf_high (target, operands[1]));
10789 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10790 return;
10793 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10794 and we have put it in the TOC, we just need to make a TOC-relative
10795 reference to it. */
10796 if (TARGET_TOC
10797 && SYMBOL_REF_P (operands[1])
10798 && use_toc_relative_ref (operands[1], mode))
10799 operands[1] = create_TOC_reference (operands[1], operands[0]);
10800 else if (mode == Pmode
10801 && CONSTANT_P (operands[1])
10802 && GET_CODE (operands[1]) != HIGH
10803 && ((REG_P (operands[0])
10804 && FP_REGNO_P (REGNO (operands[0])))
10805 || !CONST_INT_P (operands[1])
10806 || (num_insns_constant (operands[1], mode)
10807 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10808 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10809 && (TARGET_CMODEL == CMODEL_SMALL
10810 || can_create_pseudo_p ()
10811 || (REG_P (operands[0])
10812 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10815 #if TARGET_MACHO
10816 /* Darwin uses a special PIC legitimizer. */
10817 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10819 operands[1] =
10820 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10821 operands[0]);
10822 if (operands[0] != operands[1])
10823 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10824 return;
10826 #endif
10828 /* If we are to limit the number of things we put in the TOC and
10829 this is a symbol plus a constant we can add in one insn,
10830 just put the symbol in the TOC and add the constant. */
10831 if (GET_CODE (operands[1]) == CONST
10832 && TARGET_NO_SUM_IN_TOC
10833 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10834 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10835 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10836 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10837 && ! side_effects_p (operands[0]))
10839 rtx sym =
10840 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10841 rtx other = XEXP (XEXP (operands[1], 0), 1);
10843 sym = force_reg (mode, sym);
10844 emit_insn (gen_add3_insn (operands[0], sym, other));
10845 return;
10848 operands[1] = force_const_mem (mode, operands[1]);
10850 if (TARGET_TOC
10851 && SYMBOL_REF_P (XEXP (operands[1], 0))
10852 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10854 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10855 operands[0]);
10856 operands[1] = gen_const_mem (mode, tocref);
10857 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10860 break;
10862 case E_TImode:
10863 if (!VECTOR_MEM_VSX_P (TImode))
10864 rs6000_eliminate_indexed_memrefs (operands);
10865 break;
10867 case E_PTImode:
10868 rs6000_eliminate_indexed_memrefs (operands);
10869 break;
10871 default:
10872 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10875 /* Above, we may have called force_const_mem which may have returned
10876 an invalid address. If we can, fix this up; otherwise, reload will
10877 have to deal with it. */
10878 if (MEM_P (operands[1]))
10879 operands[1] = validize_mem (operands[1]);
10881 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10885 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10886 static void
10887 init_float128_ibm (machine_mode mode)
10889 if (!TARGET_XL_COMPAT)
10891 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10892 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10893 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10894 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10896 if (!TARGET_HARD_FLOAT)
10898 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10899 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10900 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10901 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10902 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10903 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10904 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10905 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10907 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10908 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10909 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10910 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10911 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10912 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10913 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10914 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10917 else
10919 set_optab_libfunc (add_optab, mode, "_xlqadd");
10920 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10921 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10922 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10925 /* Add various conversions for IFmode to use the traditional TFmode
10926 names. */
10927 if (mode == IFmode)
10929 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10930 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10931 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10932 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10933 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10934 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10936 if (TARGET_POWERPC64)
10938 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10939 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10940 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10941 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10946 /* Create a decl for either complex long double multiply or complex long double
10947 divide when long double is IEEE 128-bit floating point. We can't use
10948 __multc3 and __divtc3 because the original long double using IBM extended
10949 double used those names. The complex multiply/divide functions are encoded
10950 as builtin functions with a complex result and 4 scalar inputs. */
10952 static void
10953 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10955 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10956 name, NULL_TREE);
10958 set_builtin_decl (fncode, fndecl, true);
10960 if (TARGET_DEBUG_BUILTIN)
10961 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10963 return;
10966 /* Set up IEEE 128-bit floating point routines. Use different names if the
10967 arguments can be passed in a vector register. The historical PowerPC
10968 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10969 continue to use that if we aren't using vector registers to pass IEEE
10970 128-bit floating point. */
10972 static void
10973 init_float128_ieee (machine_mode mode)
10975 if (FLOAT128_VECTOR_P (mode))
10977 static bool complex_muldiv_init_p = false;
10979 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10980 we have clone or target attributes, this will be called a second
10981 time. We want to create the built-in function only once. */
10982 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10984 complex_muldiv_init_p = true;
10985 built_in_function fncode_mul =
10986 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10987 - MIN_MODE_COMPLEX_FLOAT);
10988 built_in_function fncode_div =
10989 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10990 - MIN_MODE_COMPLEX_FLOAT);
10992 tree fntype = build_function_type_list (complex_long_double_type_node,
10993 long_double_type_node,
10994 long_double_type_node,
10995 long_double_type_node,
10996 long_double_type_node,
10997 NULL_TREE);
10999 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
11000 create_complex_muldiv ("__divkc3", fncode_div, fntype);
11003 set_optab_libfunc (add_optab, mode, "__addkf3");
11004 set_optab_libfunc (sub_optab, mode, "__subkf3");
11005 set_optab_libfunc (neg_optab, mode, "__negkf2");
11006 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11007 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11008 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11009 set_optab_libfunc (abs_optab, mode, "__abskf2");
11010 set_optab_libfunc (powi_optab, mode, "__powikf2");
11012 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11013 set_optab_libfunc (ne_optab, mode, "__nekf2");
11014 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11015 set_optab_libfunc (ge_optab, mode, "__gekf2");
11016 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11017 set_optab_libfunc (le_optab, mode, "__lekf2");
11018 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11020 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11021 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11022 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11023 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11025 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11026 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11027 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11029 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11030 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11031 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11033 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11034 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11035 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11036 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11037 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11038 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11040 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11041 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11042 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11043 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11045 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11046 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11047 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11048 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11050 if (TARGET_POWERPC64)
11052 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11053 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11054 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11055 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11059 else
11061 set_optab_libfunc (add_optab, mode, "_q_add");
11062 set_optab_libfunc (sub_optab, mode, "_q_sub");
11063 set_optab_libfunc (neg_optab, mode, "_q_neg");
11064 set_optab_libfunc (smul_optab, mode, "_q_mul");
11065 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11066 if (TARGET_PPC_GPOPT)
11067 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11069 set_optab_libfunc (eq_optab, mode, "_q_feq");
11070 set_optab_libfunc (ne_optab, mode, "_q_fne");
11071 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11072 set_optab_libfunc (ge_optab, mode, "_q_fge");
11073 set_optab_libfunc (lt_optab, mode, "_q_flt");
11074 set_optab_libfunc (le_optab, mode, "_q_fle");
11076 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11077 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11078 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11079 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11080 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11081 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11082 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11083 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11087 static void
11088 rs6000_init_libfuncs (void)
11090 /* __float128 support. */
11091 if (TARGET_FLOAT128_TYPE)
11093 init_float128_ibm (IFmode);
11094 init_float128_ieee (KFmode);
11097 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11098 if (TARGET_LONG_DOUBLE_128)
11100 if (!TARGET_IEEEQUAD)
11101 init_float128_ibm (TFmode);
11103 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11104 else
11105 init_float128_ieee (TFmode);
11109 /* Emit a potentially record-form instruction, setting DST from SRC.
11110 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11111 signed comparison of DST with zero. If DOT is 1, the generated RTL
11112 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11113 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11114 a separate COMPARE. */
11116 void
11117 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11119 if (dot == 0)
11121 emit_move_insn (dst, src);
11122 return;
11125 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11127 emit_move_insn (dst, src);
11128 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11129 return;
11132 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11133 if (dot == 1)
11135 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11136 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11138 else
11140 rtx set = gen_rtx_SET (dst, src);
11141 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11146 /* A validation routine: say whether CODE, a condition code, and MODE
11147 match. The other alternatives either don't make sense or should
11148 never be generated. */
11150 void
11151 validate_condition_mode (enum rtx_code code, machine_mode mode)
11153 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11154 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11155 && GET_MODE_CLASS (mode) == MODE_CC);
11157 /* These don't make sense. */
11158 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11159 || mode != CCUNSmode);
11161 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11162 || mode == CCUNSmode);
11164 gcc_assert (mode == CCFPmode
11165 || (code != ORDERED && code != UNORDERED
11166 && code != UNEQ && code != LTGT
11167 && code != UNGT && code != UNLT
11168 && code != UNGE && code != UNLE));
11170 /* These are invalid; the information is not there. */
11171 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11175 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11176 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11177 not zero, store there the bit offset (counted from the right) where
11178 the single stretch of 1 bits begins; and similarly for B, the bit
11179 offset where it ends. */
11181 bool
11182 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11184 unsigned HOST_WIDE_INT val = INTVAL (mask);
11185 unsigned HOST_WIDE_INT bit;
11186 int nb, ne;
11187 int n = GET_MODE_PRECISION (mode);
11189 if (mode != DImode && mode != SImode)
11190 return false;
11192 if (INTVAL (mask) >= 0)
11194 bit = val & -val;
11195 ne = exact_log2 (bit);
11196 nb = exact_log2 (val + bit);
11198 else if (val + 1 == 0)
11200 nb = n;
11201 ne = 0;
11203 else if (val & 1)
11205 val = ~val;
11206 bit = val & -val;
11207 nb = exact_log2 (bit);
11208 ne = exact_log2 (val + bit);
11210 else
11212 bit = val & -val;
11213 ne = exact_log2 (bit);
11214 if (val + bit == 0)
11215 nb = n;
11216 else
11217 nb = 0;
11220 nb--;
11222 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11223 return false;
11225 if (b)
11226 *b = nb;
11227 if (e)
11228 *e = ne;
11230 return true;
11233 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11234 or rldicr instruction, to implement an AND with it in mode MODE. */
11236 bool
11237 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11239 int nb, ne;
11241 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11242 return false;
11244 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11245 does not wrap. */
11246 if (mode == DImode)
11247 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11249 /* For SImode, rlwinm can do everything. */
11250 if (mode == SImode)
11251 return (nb < 32 && ne < 32);
11253 return false;
11256 /* Return the instruction template for an AND with mask in mode MODE, with
11257 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11259 const char *
11260 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11262 int nb, ne;
11264 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11265 gcc_unreachable ();
11267 if (mode == DImode && ne == 0)
11269 operands[3] = GEN_INT (63 - nb);
11270 if (dot)
11271 return "rldicl. %0,%1,0,%3";
11272 return "rldicl %0,%1,0,%3";
11275 if (mode == DImode && nb == 63)
11277 operands[3] = GEN_INT (63 - ne);
11278 if (dot)
11279 return "rldicr. %0,%1,0,%3";
11280 return "rldicr %0,%1,0,%3";
11283 if (nb < 32 && ne < 32)
11285 operands[3] = GEN_INT (31 - nb);
11286 operands[4] = GEN_INT (31 - ne);
11287 if (dot)
11288 return "rlwinm. %0,%1,0,%3,%4";
11289 return "rlwinm %0,%1,0,%3,%4";
11292 gcc_unreachable ();
11295 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11296 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11297 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11299 bool
11300 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11302 int nb, ne;
11304 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11305 return false;
11307 int n = GET_MODE_PRECISION (mode);
11308 int sh = -1;
11310 if (CONST_INT_P (XEXP (shift, 1)))
11312 sh = INTVAL (XEXP (shift, 1));
11313 if (sh < 0 || sh >= n)
11314 return false;
11317 rtx_code code = GET_CODE (shift);
11319 /* Convert any shift by 0 to a rotate, to simplify below code. */
11320 if (sh == 0)
11321 code = ROTATE;
11323 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11324 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11325 code = ASHIFT;
11326 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11328 code = LSHIFTRT;
11329 sh = n - sh;
11332 /* DImode rotates need rld*. */
11333 if (mode == DImode && code == ROTATE)
11334 return (nb == 63 || ne == 0 || ne == sh);
11336 /* SImode rotates need rlw*. */
11337 if (mode == SImode && code == ROTATE)
11338 return (nb < 32 && ne < 32 && sh < 32);
11340 /* Wrap-around masks are only okay for rotates. */
11341 if (ne > nb)
11342 return false;
11344 /* Variable shifts are only okay for rotates. */
11345 if (sh < 0)
11346 return false;
11348 /* Don't allow ASHIFT if the mask is wrong for that. */
11349 if (code == ASHIFT && ne < sh)
11350 return false;
11352 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11353 if the mask is wrong for that. */
11354 if (nb < 32 && ne < 32 && sh < 32
11355 && !(code == LSHIFTRT && nb >= 32 - sh))
11356 return true;
11358 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11359 if the mask is wrong for that. */
11360 if (code == LSHIFTRT)
11361 sh = 64 - sh;
11362 if (nb == 63 || ne == 0 || ne == sh)
11363 return !(code == LSHIFTRT && nb >= sh);
11365 return false;
11368 /* Return the instruction template for a shift with mask in mode MODE, with
11369 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11371 const char *
11372 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11374 int nb, ne;
11376 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11377 gcc_unreachable ();
11379 if (mode == DImode && ne == 0)
11381 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11382 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11383 operands[3] = GEN_INT (63 - nb);
11384 if (dot)
11385 return "rld%I2cl. %0,%1,%2,%3";
11386 return "rld%I2cl %0,%1,%2,%3";
11389 if (mode == DImode && nb == 63)
11391 operands[3] = GEN_INT (63 - ne);
11392 if (dot)
11393 return "rld%I2cr. %0,%1,%2,%3";
11394 return "rld%I2cr %0,%1,%2,%3";
11397 if (mode == DImode
11398 && GET_CODE (operands[4]) != LSHIFTRT
11399 && CONST_INT_P (operands[2])
11400 && ne == INTVAL (operands[2]))
11402 operands[3] = GEN_INT (63 - nb);
11403 if (dot)
11404 return "rld%I2c. %0,%1,%2,%3";
11405 return "rld%I2c %0,%1,%2,%3";
11408 if (nb < 32 && ne < 32)
11410 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11411 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11412 operands[3] = GEN_INT (31 - nb);
11413 operands[4] = GEN_INT (31 - ne);
11414 /* This insn can also be a 64-bit rotate with mask that really makes
11415 it just a shift right (with mask); the %h below are to adjust for
11416 that situation (shift count is >= 32 in that case). */
11417 if (dot)
11418 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11419 return "rlw%I2nm %0,%1,%h2,%3,%4";
11422 gcc_unreachable ();
11425 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11426 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11427 ASHIFT, or LSHIFTRT) in mode MODE. */
11429 bool
11430 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11432 int nb, ne;
11434 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11435 return false;
11437 int n = GET_MODE_PRECISION (mode);
11439 int sh = INTVAL (XEXP (shift, 1));
11440 if (sh < 0 || sh >= n)
11441 return false;
11443 rtx_code code = GET_CODE (shift);
11445 /* Convert any shift by 0 to a rotate, to simplify below code. */
11446 if (sh == 0)
11447 code = ROTATE;
11449 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11450 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11451 code = ASHIFT;
11452 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11454 code = LSHIFTRT;
11455 sh = n - sh;
11458 /* DImode rotates need rldimi. */
11459 if (mode == DImode && code == ROTATE)
11460 return (ne == sh);
11462 /* SImode rotates need rlwimi. */
11463 if (mode == SImode && code == ROTATE)
11464 return (nb < 32 && ne < 32 && sh < 32);
11466 /* Wrap-around masks are only okay for rotates. */
11467 if (ne > nb)
11468 return false;
11470 /* Don't allow ASHIFT if the mask is wrong for that. */
11471 if (code == ASHIFT && ne < sh)
11472 return false;
11474 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11475 if the mask is wrong for that. */
11476 if (nb < 32 && ne < 32 && sh < 32
11477 && !(code == LSHIFTRT && nb >= 32 - sh))
11478 return true;
11480 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11481 if the mask is wrong for that. */
11482 if (code == LSHIFTRT)
11483 sh = 64 - sh;
11484 if (ne == sh)
11485 return !(code == LSHIFTRT && nb >= sh);
11487 return false;
11490 /* Return the instruction template for an insert with mask in mode MODE, with
11491 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11493 const char *
11494 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11496 int nb, ne;
11498 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11499 gcc_unreachable ();
11501 /* Prefer rldimi because rlwimi is cracked. */
11502 if (TARGET_POWERPC64
11503 && (!dot || mode == DImode)
11504 && GET_CODE (operands[4]) != LSHIFTRT
11505 && ne == INTVAL (operands[2]))
11507 operands[3] = GEN_INT (63 - nb);
11508 if (dot)
11509 return "rldimi. %0,%1,%2,%3";
11510 return "rldimi %0,%1,%2,%3";
11513 if (nb < 32 && ne < 32)
11515 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11516 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11517 operands[3] = GEN_INT (31 - nb);
11518 operands[4] = GEN_INT (31 - ne);
11519 if (dot)
11520 return "rlwimi. %0,%1,%2,%3,%4";
11521 return "rlwimi %0,%1,%2,%3,%4";
11524 gcc_unreachable ();
11527 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11528 using two machine instructions. */
11530 bool
11531 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11533 /* There are two kinds of AND we can handle with two insns:
11534 1) those we can do with two rl* insn;
11535 2) ori[s];xori[s].
11537 We do not handle that last case yet. */
11539 /* If there is just one stretch of ones, we can do it. */
11540 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11541 return true;
11543 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11544 one insn, we can do the whole thing with two. */
11545 unsigned HOST_WIDE_INT val = INTVAL (c);
11546 unsigned HOST_WIDE_INT bit1 = val & -val;
11547 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11548 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11549 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11550 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11553 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11554 If EXPAND is true, split rotate-and-mask instructions we generate to
11555 their constituent parts as well (this is used during expand); if DOT
11556 is 1, make the last insn a record-form instruction clobbering the
11557 destination GPR and setting the CC reg (from operands[3]); if 2, set
11558 that GPR as well as the CC reg. */
11560 void
11561 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11563 gcc_assert (!(expand && dot));
11565 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11567 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11568 shift right. This generates better code than doing the masks without
11569 shifts, or shifting first right and then left. */
11570 int nb, ne;
11571 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11573 gcc_assert (mode == DImode);
11575 int shift = 63 - nb;
11576 if (expand)
11578 rtx tmp1 = gen_reg_rtx (DImode);
11579 rtx tmp2 = gen_reg_rtx (DImode);
11580 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11581 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11582 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11584 else
11586 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11587 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11588 emit_move_insn (operands[0], tmp);
11589 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11590 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11592 return;
11595 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11596 that does the rest. */
11597 unsigned HOST_WIDE_INT bit1 = val & -val;
11598 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11599 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11600 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11602 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11603 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11605 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11607 /* Two "no-rotate"-and-mask instructions, for SImode. */
11608 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11610 gcc_assert (mode == SImode);
11612 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11613 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11614 emit_move_insn (reg, tmp);
11615 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11616 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11617 return;
11620 gcc_assert (mode == DImode);
11622 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11623 insns; we have to do the first in SImode, because it wraps. */
11624 if (mask2 <= 0xffffffff
11625 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11627 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11628 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11629 GEN_INT (mask1));
11630 rtx reg_low = gen_lowpart (SImode, reg);
11631 emit_move_insn (reg_low, tmp);
11632 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11633 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11634 return;
11637 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11638 at the top end), rotate back and clear the other hole. */
11639 int right = exact_log2 (bit3);
11640 int left = 64 - right;
11642 /* Rotate the mask too. */
11643 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11645 if (expand)
11647 rtx tmp1 = gen_reg_rtx (DImode);
11648 rtx tmp2 = gen_reg_rtx (DImode);
11649 rtx tmp3 = gen_reg_rtx (DImode);
11650 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11651 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11652 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11653 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11655 else
11657 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11658 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11659 emit_move_insn (operands[0], tmp);
11660 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11661 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11662 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11666 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11667 for lfq and stfq insns iff the registers are hard registers. */
11670 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11672 /* We might have been passed a SUBREG. */
11673 if (!REG_P (reg1) || !REG_P (reg2))
11674 return 0;
11676 /* We might have been passed non floating point registers. */
11677 if (!FP_REGNO_P (REGNO (reg1))
11678 || !FP_REGNO_P (REGNO (reg2)))
11679 return 0;
11681 return (REGNO (reg1) == REGNO (reg2) - 1);
11684 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11685 addr1 and addr2 must be in consecutive memory locations
11686 (addr2 == addr1 + 8). */
11689 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11691 rtx addr1, addr2;
11692 unsigned int reg1, reg2;
11693 int offset1, offset2;
11695 /* The mems cannot be volatile. */
11696 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11697 return 0;
11699 addr1 = XEXP (mem1, 0);
11700 addr2 = XEXP (mem2, 0);
11702 /* Extract an offset (if used) from the first addr. */
11703 if (GET_CODE (addr1) == PLUS)
11705 /* If not a REG, return zero. */
11706 if (!REG_P (XEXP (addr1, 0)))
11707 return 0;
11708 else
11710 reg1 = REGNO (XEXP (addr1, 0));
11711 /* The offset must be constant! */
11712 if (!CONST_INT_P (XEXP (addr1, 1)))
11713 return 0;
11714 offset1 = INTVAL (XEXP (addr1, 1));
11717 else if (!REG_P (addr1))
11718 return 0;
11719 else
11721 reg1 = REGNO (addr1);
11722 /* This was a simple (mem (reg)) expression. Offset is 0. */
11723 offset1 = 0;
11726 /* And now for the second addr. */
11727 if (GET_CODE (addr2) == PLUS)
11729 /* If not a REG, return zero. */
11730 if (!REG_P (XEXP (addr2, 0)))
11731 return 0;
11732 else
11734 reg2 = REGNO (XEXP (addr2, 0));
11735 /* The offset must be constant. */
11736 if (!CONST_INT_P (XEXP (addr2, 1)))
11737 return 0;
11738 offset2 = INTVAL (XEXP (addr2, 1));
11741 else if (!REG_P (addr2))
11742 return 0;
11743 else
11745 reg2 = REGNO (addr2);
11746 /* This was a simple (mem (reg)) expression. Offset is 0. */
11747 offset2 = 0;
11750 /* Both of these must have the same base register. */
11751 if (reg1 != reg2)
11752 return 0;
11754 /* The offset for the second addr must be 8 more than the first addr. */
11755 if (offset2 != offset1 + 8)
11756 return 0;
11758 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11759 instructions. */
11760 return 1;
11763 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11764 need to use DDmode, in all other cases we can use the same mode. */
11765 static machine_mode
11766 rs6000_secondary_memory_needed_mode (machine_mode mode)
11768 if (lra_in_progress && mode == SDmode)
11769 return DDmode;
11770 return mode;
11773 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11774 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11775 only work on the traditional altivec registers, note if an altivec register
11776 was chosen. */
11778 static enum rs6000_reg_type
11779 register_to_reg_type (rtx reg, bool *is_altivec)
11781 HOST_WIDE_INT regno;
11782 enum reg_class rclass;
11784 if (SUBREG_P (reg))
11785 reg = SUBREG_REG (reg);
11787 if (!REG_P (reg))
11788 return NO_REG_TYPE;
11790 regno = REGNO (reg);
11791 if (!HARD_REGISTER_NUM_P (regno))
11793 if (!lra_in_progress && !reload_completed)
11794 return PSEUDO_REG_TYPE;
11796 regno = true_regnum (reg);
11797 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11798 return PSEUDO_REG_TYPE;
11801 gcc_assert (regno >= 0);
11803 if (is_altivec && ALTIVEC_REGNO_P (regno))
11804 *is_altivec = true;
11806 rclass = rs6000_regno_regclass[regno];
11807 return reg_class_to_reg_type[(int)rclass];
11810 /* Helper function to return the cost of adding a TOC entry address. */
11812 static inline int
11813 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11815 int ret;
11817 if (TARGET_CMODEL != CMODEL_SMALL)
11818 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11820 else
11821 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11823 return ret;
11826 /* Helper function for rs6000_secondary_reload to determine whether the memory
11827 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11828 needs reloading. Return negative if the memory is not handled by the memory
11829 helper functions and to try a different reload method, 0 if no additional
11830 instructions are need, and positive to give the extra cost for the
11831 memory. */
11833 static int
11834 rs6000_secondary_reload_memory (rtx addr,
11835 enum reg_class rclass,
11836 machine_mode mode)
11838 int extra_cost = 0;
11839 rtx reg, and_arg, plus_arg0, plus_arg1;
11840 addr_mask_type addr_mask;
11841 const char *type = NULL;
11842 const char *fail_msg = NULL;
11844 if (GPR_REG_CLASS_P (rclass))
11845 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11847 else if (rclass == FLOAT_REGS)
11848 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11850 else if (rclass == ALTIVEC_REGS)
11851 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11853 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11854 else if (rclass == VSX_REGS)
11855 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11856 & ~RELOAD_REG_AND_M16);
11858 /* If the register allocator hasn't made up its mind yet on the register
11859 class to use, settle on defaults to use. */
11860 else if (rclass == NO_REGS)
11862 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11863 & ~RELOAD_REG_AND_M16);
11865 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11866 addr_mask &= ~(RELOAD_REG_INDEXED
11867 | RELOAD_REG_PRE_INCDEC
11868 | RELOAD_REG_PRE_MODIFY);
11871 else
11872 addr_mask = 0;
11874 /* If the register isn't valid in this register class, just return now. */
11875 if ((addr_mask & RELOAD_REG_VALID) == 0)
11877 if (TARGET_DEBUG_ADDR)
11879 fprintf (stderr,
11880 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11881 "not valid in class\n",
11882 GET_MODE_NAME (mode), reg_class_names[rclass]);
11883 debug_rtx (addr);
11886 return -1;
11889 switch (GET_CODE (addr))
11891 /* Does the register class supports auto update forms for this mode? We
11892 don't need a scratch register, since the powerpc only supports
11893 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11894 case PRE_INC:
11895 case PRE_DEC:
11896 reg = XEXP (addr, 0);
11897 if (!base_reg_operand (addr, GET_MODE (reg)))
11899 fail_msg = "no base register #1";
11900 extra_cost = -1;
11903 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11905 extra_cost = 1;
11906 type = "update";
11908 break;
11910 case PRE_MODIFY:
11911 reg = XEXP (addr, 0);
11912 plus_arg1 = XEXP (addr, 1);
11913 if (!base_reg_operand (reg, GET_MODE (reg))
11914 || GET_CODE (plus_arg1) != PLUS
11915 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11917 fail_msg = "bad PRE_MODIFY";
11918 extra_cost = -1;
11921 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11923 extra_cost = 1;
11924 type = "update";
11926 break;
11928 /* Do we need to simulate AND -16 to clear the bottom address bits used
11929 in VMX load/stores? Only allow the AND for vector sizes. */
11930 case AND:
11931 and_arg = XEXP (addr, 0);
11932 if (GET_MODE_SIZE (mode) != 16
11933 || !CONST_INT_P (XEXP (addr, 1))
11934 || INTVAL (XEXP (addr, 1)) != -16)
11936 fail_msg = "bad Altivec AND #1";
11937 extra_cost = -1;
11940 if (rclass != ALTIVEC_REGS)
11942 if (legitimate_indirect_address_p (and_arg, false))
11943 extra_cost = 1;
11945 else if (legitimate_indexed_address_p (and_arg, false))
11946 extra_cost = 2;
11948 else
11950 fail_msg = "bad Altivec AND #2";
11951 extra_cost = -1;
11954 type = "and";
11956 break;
11958 /* If this is an indirect address, make sure it is a base register. */
11959 case REG:
11960 case SUBREG:
11961 if (!legitimate_indirect_address_p (addr, false))
11963 extra_cost = 1;
11964 type = "move";
11966 break;
11968 /* If this is an indexed address, make sure the register class can handle
11969 indexed addresses for this mode. */
11970 case PLUS:
11971 plus_arg0 = XEXP (addr, 0);
11972 plus_arg1 = XEXP (addr, 1);
11974 /* (plus (plus (reg) (constant)) (constant)) is generated during
11975 push_reload processing, so handle it now. */
11976 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11978 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11980 extra_cost = 1;
11981 type = "offset";
11985 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11986 push_reload processing, so handle it now. */
11987 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11989 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11991 extra_cost = 1;
11992 type = "indexed #2";
11996 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11998 fail_msg = "no base register #2";
11999 extra_cost = -1;
12002 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12004 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12005 || !legitimate_indexed_address_p (addr, false))
12007 extra_cost = 1;
12008 type = "indexed";
12012 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12013 && CONST_INT_P (plus_arg1))
12015 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12017 extra_cost = 1;
12018 type = "vector d-form offset";
12022 /* Make sure the register class can handle offset addresses. */
12023 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12025 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12027 extra_cost = 1;
12028 type = "offset #2";
12032 else
12034 fail_msg = "bad PLUS";
12035 extra_cost = -1;
12038 break;
12040 case LO_SUM:
12041 /* Quad offsets are restricted and can't handle normal addresses. */
12042 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12044 extra_cost = -1;
12045 type = "vector d-form lo_sum";
12048 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12050 fail_msg = "bad LO_SUM";
12051 extra_cost = -1;
12054 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12056 extra_cost = 1;
12057 type = "lo_sum";
12059 break;
12061 /* Static addresses need to create a TOC entry. */
12062 case CONST:
12063 case SYMBOL_REF:
12064 case LABEL_REF:
12065 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12067 extra_cost = -1;
12068 type = "vector d-form lo_sum #2";
12071 else
12073 type = "address";
12074 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12076 break;
12078 /* TOC references look like offsetable memory. */
12079 case UNSPEC:
12080 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12082 fail_msg = "bad UNSPEC";
12083 extra_cost = -1;
12086 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12088 extra_cost = -1;
12089 type = "vector d-form lo_sum #3";
12092 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12094 extra_cost = 1;
12095 type = "toc reference";
12097 break;
12099 default:
12101 fail_msg = "bad address";
12102 extra_cost = -1;
12106 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12108 if (extra_cost < 0)
12109 fprintf (stderr,
12110 "rs6000_secondary_reload_memory error: mode = %s, "
12111 "class = %s, addr_mask = '%s', %s\n",
12112 GET_MODE_NAME (mode),
12113 reg_class_names[rclass],
12114 rs6000_debug_addr_mask (addr_mask, false),
12115 (fail_msg != NULL) ? fail_msg : "<bad address>");
12117 else
12118 fprintf (stderr,
12119 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12120 "addr_mask = '%s', extra cost = %d, %s\n",
12121 GET_MODE_NAME (mode),
12122 reg_class_names[rclass],
12123 rs6000_debug_addr_mask (addr_mask, false),
12124 extra_cost,
12125 (type) ? type : "<none>");
12127 debug_rtx (addr);
12130 return extra_cost;
12133 /* Helper function for rs6000_secondary_reload to return true if a move to a
12134 different register classe is really a simple move. */
12136 static bool
12137 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12138 enum rs6000_reg_type from_type,
12139 machine_mode mode)
12141 int size = GET_MODE_SIZE (mode);
12143 /* Add support for various direct moves available. In this function, we only
12144 look at cases where we don't need any extra registers, and one or more
12145 simple move insns are issued. Originally small integers are not allowed
12146 in FPR/VSX registers. Single precision binary floating is not a simple
12147 move because we need to convert to the single precision memory layout.
12148 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12149 need special direct move handling, which we do not support yet. */
12150 if (TARGET_DIRECT_MOVE
12151 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12152 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12154 if (TARGET_POWERPC64)
12156 /* ISA 2.07: MTVSRD or MVFVSRD. */
12157 if (size == 8)
12158 return true;
12160 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12161 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12162 return true;
12165 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12166 if (TARGET_P8_VECTOR)
12168 if (mode == SImode)
12169 return true;
12171 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12172 return true;
12175 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12176 if (mode == SDmode)
12177 return true;
12180 /* Move to/from SPR. */
12181 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12182 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12183 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12184 return true;
12186 return false;
12189 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12190 special direct moves that involve allocating an extra register, return the
12191 insn code of the helper function if there is such a function or
12192 CODE_FOR_nothing if not. */
12194 static bool
12195 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12196 enum rs6000_reg_type from_type,
12197 machine_mode mode,
12198 secondary_reload_info *sri,
12199 bool altivec_p)
12201 bool ret = false;
12202 enum insn_code icode = CODE_FOR_nothing;
12203 int cost = 0;
12204 int size = GET_MODE_SIZE (mode);
12206 if (TARGET_POWERPC64 && size == 16)
12208 /* Handle moving 128-bit values from GPRs to VSX point registers on
12209 ISA 2.07 (power8, power9) when running in 64-bit mode using
12210 XXPERMDI to glue the two 64-bit values back together. */
12211 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12213 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12214 icode = reg_addr[mode].reload_vsx_gpr;
12217 /* Handle moving 128-bit values from VSX point registers to GPRs on
12218 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12219 bottom 64-bit value. */
12220 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12222 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12223 icode = reg_addr[mode].reload_gpr_vsx;
12227 else if (TARGET_POWERPC64 && mode == SFmode)
12229 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12231 cost = 3; /* xscvdpspn, mfvsrd, and. */
12232 icode = reg_addr[mode].reload_gpr_vsx;
12235 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12237 cost = 2; /* mtvsrz, xscvspdpn. */
12238 icode = reg_addr[mode].reload_vsx_gpr;
12242 else if (!TARGET_POWERPC64 && size == 8)
12244 /* Handle moving 64-bit values from GPRs to floating point registers on
12245 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12246 32-bit values back together. Altivec register classes must be handled
12247 specially since a different instruction is used, and the secondary
12248 reload support requires a single instruction class in the scratch
12249 register constraint. However, right now TFmode is not allowed in
12250 Altivec registers, so the pattern will never match. */
12251 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12253 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12254 icode = reg_addr[mode].reload_fpr_gpr;
12258 if (icode != CODE_FOR_nothing)
12260 ret = true;
12261 if (sri)
12263 sri->icode = icode;
12264 sri->extra_cost = cost;
12268 return ret;
12271 /* Return whether a move between two register classes can be done either
12272 directly (simple move) or via a pattern that uses a single extra temporary
12273 (using ISA 2.07's direct move in this case. */
12275 static bool
12276 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12277 enum rs6000_reg_type from_type,
12278 machine_mode mode,
12279 secondary_reload_info *sri,
12280 bool altivec_p)
12282 /* Fall back to load/store reloads if either type is not a register. */
12283 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12284 return false;
12286 /* If we haven't allocated registers yet, assume the move can be done for the
12287 standard register types. */
12288 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12289 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12290 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12291 return true;
12293 /* Moves to the same set of registers is a simple move for non-specialized
12294 registers. */
12295 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12296 return true;
12298 /* Check whether a simple move can be done directly. */
12299 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12301 if (sri)
12303 sri->icode = CODE_FOR_nothing;
12304 sri->extra_cost = 0;
12306 return true;
12309 /* Now check if we can do it in a few steps. */
12310 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12311 altivec_p);
12314 /* Inform reload about cases where moving X with a mode MODE to a register in
12315 RCLASS requires an extra scratch or immediate register. Return the class
12316 needed for the immediate register.
12318 For VSX and Altivec, we may need a register to convert sp+offset into
12319 reg+sp.
12321 For misaligned 64-bit gpr loads and stores we need a register to
12322 convert an offset address to indirect. */
12324 static reg_class_t
12325 rs6000_secondary_reload (bool in_p,
12326 rtx x,
12327 reg_class_t rclass_i,
12328 machine_mode mode,
12329 secondary_reload_info *sri)
12331 enum reg_class rclass = (enum reg_class) rclass_i;
12332 reg_class_t ret = ALL_REGS;
12333 enum insn_code icode;
12334 bool default_p = false;
12335 bool done_p = false;
12337 /* Allow subreg of memory before/during reload. */
12338 bool memory_p = (MEM_P (x)
12339 || (!reload_completed && SUBREG_P (x)
12340 && MEM_P (SUBREG_REG (x))));
12342 sri->icode = CODE_FOR_nothing;
12343 sri->t_icode = CODE_FOR_nothing;
12344 sri->extra_cost = 0;
12345 icode = ((in_p)
12346 ? reg_addr[mode].reload_load
12347 : reg_addr[mode].reload_store);
12349 if (REG_P (x) || register_operand (x, mode))
12351 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12352 bool altivec_p = (rclass == ALTIVEC_REGS);
12353 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12355 if (!in_p)
12356 std::swap (to_type, from_type);
12358 /* Can we do a direct move of some sort? */
12359 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12360 altivec_p))
12362 icode = (enum insn_code)sri->icode;
12363 default_p = false;
12364 done_p = true;
12365 ret = NO_REGS;
12369 /* Make sure 0.0 is not reloaded or forced into memory. */
12370 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12372 ret = NO_REGS;
12373 default_p = false;
12374 done_p = true;
12377 /* If this is a scalar floating point value and we want to load it into the
12378 traditional Altivec registers, do it via a move via a traditional floating
12379 point register, unless we have D-form addressing. Also make sure that
12380 non-zero constants use a FPR. */
12381 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12382 && !mode_supports_vmx_dform (mode)
12383 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12384 && (memory_p || CONST_DOUBLE_P (x)))
12386 ret = FLOAT_REGS;
12387 default_p = false;
12388 done_p = true;
12391 /* Handle reload of load/stores if we have reload helper functions. */
12392 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12394 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12395 mode);
12397 if (extra_cost >= 0)
12399 done_p = true;
12400 ret = NO_REGS;
12401 if (extra_cost > 0)
12403 sri->extra_cost = extra_cost;
12404 sri->icode = icode;
12409 /* Handle unaligned loads and stores of integer registers. */
12410 if (!done_p && TARGET_POWERPC64
12411 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12412 && memory_p
12413 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12415 rtx addr = XEXP (x, 0);
12416 rtx off = address_offset (addr);
12418 if (off != NULL_RTX)
12420 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12421 unsigned HOST_WIDE_INT offset = INTVAL (off);
12423 /* We need a secondary reload when our legitimate_address_p
12424 says the address is good (as otherwise the entire address
12425 will be reloaded), and the offset is not a multiple of
12426 four or we have an address wrap. Address wrap will only
12427 occur for LO_SUMs since legitimate_offset_address_p
12428 rejects addresses for 16-byte mems that will wrap. */
12429 if (GET_CODE (addr) == LO_SUM
12430 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12431 && ((offset & 3) != 0
12432 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12433 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12434 && (offset & 3) != 0))
12436 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12437 if (in_p)
12438 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12439 : CODE_FOR_reload_di_load);
12440 else
12441 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12442 : CODE_FOR_reload_di_store);
12443 sri->extra_cost = 2;
12444 ret = NO_REGS;
12445 done_p = true;
12447 else
12448 default_p = true;
12450 else
12451 default_p = true;
12454 if (!done_p && !TARGET_POWERPC64
12455 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12456 && memory_p
12457 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12459 rtx addr = XEXP (x, 0);
12460 rtx off = address_offset (addr);
12462 if (off != NULL_RTX)
12464 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12465 unsigned HOST_WIDE_INT offset = INTVAL (off);
12467 /* We need a secondary reload when our legitimate_address_p
12468 says the address is good (as otherwise the entire address
12469 will be reloaded), and we have a wrap.
12471 legitimate_lo_sum_address_p allows LO_SUM addresses to
12472 have any offset so test for wrap in the low 16 bits.
12474 legitimate_offset_address_p checks for the range
12475 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12476 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12477 [0x7ff4,0x7fff] respectively, so test for the
12478 intersection of these ranges, [0x7ffc,0x7fff] and
12479 [0x7ff4,0x7ff7] respectively.
12481 Note that the address we see here may have been
12482 manipulated by legitimize_reload_address. */
12483 if (GET_CODE (addr) == LO_SUM
12484 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12485 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12487 if (in_p)
12488 sri->icode = CODE_FOR_reload_si_load;
12489 else
12490 sri->icode = CODE_FOR_reload_si_store;
12491 sri->extra_cost = 2;
12492 ret = NO_REGS;
12493 done_p = true;
12495 else
12496 default_p = true;
12498 else
12499 default_p = true;
12502 if (!done_p)
12503 default_p = true;
12505 if (default_p)
12506 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12508 gcc_assert (ret != ALL_REGS);
12510 if (TARGET_DEBUG_ADDR)
12512 fprintf (stderr,
12513 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12514 "mode = %s",
12515 reg_class_names[ret],
12516 in_p ? "true" : "false",
12517 reg_class_names[rclass],
12518 GET_MODE_NAME (mode));
12520 if (reload_completed)
12521 fputs (", after reload", stderr);
12523 if (!done_p)
12524 fputs (", done_p not set", stderr);
12526 if (default_p)
12527 fputs (", default secondary reload", stderr);
12529 if (sri->icode != CODE_FOR_nothing)
12530 fprintf (stderr, ", reload func = %s, extra cost = %d",
12531 insn_data[sri->icode].name, sri->extra_cost);
12533 else if (sri->extra_cost > 0)
12534 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12536 fputs ("\n", stderr);
12537 debug_rtx (x);
12540 return ret;
12543 /* Better tracing for rs6000_secondary_reload_inner. */
12545 static void
12546 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12547 bool store_p)
12549 rtx set, clobber;
12551 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12553 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12554 store_p ? "store" : "load");
12556 if (store_p)
12557 set = gen_rtx_SET (mem, reg);
12558 else
12559 set = gen_rtx_SET (reg, mem);
12561 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12562 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12565 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12566 ATTRIBUTE_NORETURN;
12568 static void
12569 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12570 bool store_p)
12572 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12573 gcc_unreachable ();
12576 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12577 reload helper functions. These were identified in
12578 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12579 reload, it calls the insns:
12580 reload_<RELOAD:mode>_<P:mptrsize>_store
12581 reload_<RELOAD:mode>_<P:mptrsize>_load
12583 which in turn calls this function, to do whatever is necessary to create
12584 valid addresses. */
12586 void
12587 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12589 int regno = true_regnum (reg);
12590 machine_mode mode = GET_MODE (reg);
12591 addr_mask_type addr_mask;
12592 rtx addr;
12593 rtx new_addr;
12594 rtx op_reg, op0, op1;
12595 rtx and_op;
12596 rtx cc_clobber;
12597 rtvec rv;
12599 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12600 || !base_reg_operand (scratch, GET_MODE (scratch)))
12601 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12603 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12604 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12606 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12607 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12609 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12610 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12612 else
12613 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12615 /* Make sure the mode is valid in this register class. */
12616 if ((addr_mask & RELOAD_REG_VALID) == 0)
12617 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12619 if (TARGET_DEBUG_ADDR)
12620 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12622 new_addr = addr = XEXP (mem, 0);
12623 switch (GET_CODE (addr))
12625 /* Does the register class support auto update forms for this mode? If
12626 not, do the update now. We don't need a scratch register, since the
12627 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12628 case PRE_INC:
12629 case PRE_DEC:
12630 op_reg = XEXP (addr, 0);
12631 if (!base_reg_operand (op_reg, Pmode))
12632 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12634 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12636 int delta = GET_MODE_SIZE (mode);
12637 if (GET_CODE (addr) == PRE_DEC)
12638 delta = -delta;
12639 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12640 new_addr = op_reg;
12642 break;
12644 case PRE_MODIFY:
12645 op0 = XEXP (addr, 0);
12646 op1 = XEXP (addr, 1);
12647 if (!base_reg_operand (op0, Pmode)
12648 || GET_CODE (op1) != PLUS
12649 || !rtx_equal_p (op0, XEXP (op1, 0)))
12650 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12652 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12654 emit_insn (gen_rtx_SET (op0, op1));
12655 new_addr = reg;
12657 break;
12659 /* Do we need to simulate AND -16 to clear the bottom address bits used
12660 in VMX load/stores? */
12661 case AND:
12662 op0 = XEXP (addr, 0);
12663 op1 = XEXP (addr, 1);
12664 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12666 if (REG_P (op0) || SUBREG_P (op0))
12667 op_reg = op0;
12669 else if (GET_CODE (op1) == PLUS)
12671 emit_insn (gen_rtx_SET (scratch, op1));
12672 op_reg = scratch;
12675 else
12676 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12678 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12679 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12680 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12681 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12682 new_addr = scratch;
12684 break;
12686 /* If this is an indirect address, make sure it is a base register. */
12687 case REG:
12688 case SUBREG:
12689 if (!base_reg_operand (addr, GET_MODE (addr)))
12691 emit_insn (gen_rtx_SET (scratch, addr));
12692 new_addr = scratch;
12694 break;
12696 /* If this is an indexed address, make sure the register class can handle
12697 indexed addresses for this mode. */
12698 case PLUS:
12699 op0 = XEXP (addr, 0);
12700 op1 = XEXP (addr, 1);
12701 if (!base_reg_operand (op0, Pmode))
12702 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12704 else if (int_reg_operand (op1, Pmode))
12706 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12708 emit_insn (gen_rtx_SET (scratch, addr));
12709 new_addr = scratch;
12713 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12715 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12716 || !quad_address_p (addr, mode, false))
12718 emit_insn (gen_rtx_SET (scratch, addr));
12719 new_addr = scratch;
12723 /* Make sure the register class can handle offset addresses. */
12724 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12726 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12728 emit_insn (gen_rtx_SET (scratch, addr));
12729 new_addr = scratch;
12733 else
12734 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12736 break;
12738 case LO_SUM:
12739 op0 = XEXP (addr, 0);
12740 op1 = XEXP (addr, 1);
12741 if (!base_reg_operand (op0, Pmode))
12742 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12744 else if (int_reg_operand (op1, Pmode))
12746 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12748 emit_insn (gen_rtx_SET (scratch, addr));
12749 new_addr = scratch;
12753 /* Quad offsets are restricted and can't handle normal addresses. */
12754 else if (mode_supports_dq_form (mode))
12756 emit_insn (gen_rtx_SET (scratch, addr));
12757 new_addr = scratch;
12760 /* Make sure the register class can handle offset addresses. */
12761 else if (legitimate_lo_sum_address_p (mode, addr, false))
12763 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12765 emit_insn (gen_rtx_SET (scratch, addr));
12766 new_addr = scratch;
12770 else
12771 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12773 break;
12775 case SYMBOL_REF:
12776 case CONST:
12777 case LABEL_REF:
12778 rs6000_emit_move (scratch, addr, Pmode);
12779 new_addr = scratch;
12780 break;
12782 default:
12783 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12786 /* Adjust the address if it changed. */
12787 if (addr != new_addr)
12789 mem = replace_equiv_address_nv (mem, new_addr);
12790 if (TARGET_DEBUG_ADDR)
12791 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12794 /* Now create the move. */
12795 if (store_p)
12796 emit_insn (gen_rtx_SET (mem, reg));
12797 else
12798 emit_insn (gen_rtx_SET (reg, mem));
12800 return;
12803 /* Convert reloads involving 64-bit gprs and misaligned offset
12804 addressing, or multiple 32-bit gprs and offsets that are too large,
12805 to use indirect addressing. */
12807 void
12808 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12810 int regno = true_regnum (reg);
12811 enum reg_class rclass;
12812 rtx addr;
12813 rtx scratch_or_premodify = scratch;
12815 if (TARGET_DEBUG_ADDR)
12817 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12818 store_p ? "store" : "load");
12819 fprintf (stderr, "reg:\n");
12820 debug_rtx (reg);
12821 fprintf (stderr, "mem:\n");
12822 debug_rtx (mem);
12823 fprintf (stderr, "scratch:\n");
12824 debug_rtx (scratch);
12827 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12828 gcc_assert (MEM_P (mem));
12829 rclass = REGNO_REG_CLASS (regno);
12830 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12831 addr = XEXP (mem, 0);
12833 if (GET_CODE (addr) == PRE_MODIFY)
12835 gcc_assert (REG_P (XEXP (addr, 0))
12836 && GET_CODE (XEXP (addr, 1)) == PLUS
12837 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12838 scratch_or_premodify = XEXP (addr, 0);
12839 addr = XEXP (addr, 1);
12841 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12843 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12845 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12847 /* Now create the move. */
12848 if (store_p)
12849 emit_insn (gen_rtx_SET (mem, reg));
12850 else
12851 emit_insn (gen_rtx_SET (reg, mem));
12853 return;
12856 /* Given an rtx X being reloaded into a reg required to be
12857 in class CLASS, return the class of reg to actually use.
12858 In general this is just CLASS; but on some machines
12859 in some cases it is preferable to use a more restrictive class.
12861 On the RS/6000, we have to return NO_REGS when we want to reload a
12862 floating-point CONST_DOUBLE to force it to be copied to memory.
12864 We also don't want to reload integer values into floating-point
12865 registers if we can at all help it. In fact, this can
12866 cause reload to die, if it tries to generate a reload of CTR
12867 into a FP register and discovers it doesn't have the memory location
12868 required.
12870 ??? Would it be a good idea to have reload do the converse, that is
12871 try to reload floating modes into FP registers if possible?
12874 static enum reg_class
12875 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12877 machine_mode mode = GET_MODE (x);
12878 bool is_constant = CONSTANT_P (x);
12880 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12881 reload class for it. */
12882 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12883 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12884 return NO_REGS;
12886 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12887 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12888 return NO_REGS;
12890 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12891 the reloading of address expressions using PLUS into floating point
12892 registers. */
12893 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12895 if (is_constant)
12897 /* Zero is always allowed in all VSX registers. */
12898 if (x == CONST0_RTX (mode))
12899 return rclass;
12901 /* If this is a vector constant that can be formed with a few Altivec
12902 instructions, we want altivec registers. */
12903 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12904 return ALTIVEC_REGS;
12906 /* If this is an integer constant that can easily be loaded into
12907 vector registers, allow it. */
12908 if (CONST_INT_P (x))
12910 HOST_WIDE_INT value = INTVAL (x);
12912 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12913 2.06 can generate it in the Altivec registers with
12914 VSPLTI<x>. */
12915 if (value == -1)
12917 if (TARGET_P8_VECTOR)
12918 return rclass;
12919 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12920 return ALTIVEC_REGS;
12921 else
12922 return NO_REGS;
12925 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12926 a sign extend in the Altivec registers. */
12927 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12928 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12929 return ALTIVEC_REGS;
12932 /* Force constant to memory. */
12933 return NO_REGS;
12936 /* D-form addressing can easily reload the value. */
12937 if (mode_supports_vmx_dform (mode)
12938 || mode_supports_dq_form (mode))
12939 return rclass;
12941 /* If this is a scalar floating point value and we don't have D-form
12942 addressing, prefer the traditional floating point registers so that we
12943 can use D-form (register+offset) addressing. */
12944 if (rclass == VSX_REGS
12945 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12946 return FLOAT_REGS;
12948 /* Prefer the Altivec registers if Altivec is handling the vector
12949 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12950 loads. */
12951 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12952 || mode == V1TImode)
12953 return ALTIVEC_REGS;
12955 return rclass;
12958 if (is_constant || GET_CODE (x) == PLUS)
12960 if (reg_class_subset_p (GENERAL_REGS, rclass))
12961 return GENERAL_REGS;
12962 if (reg_class_subset_p (BASE_REGS, rclass))
12963 return BASE_REGS;
12964 return NO_REGS;
12967 /* For the vector pair and vector quad modes, prefer their natural register
12968 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12969 the GPR registers. */
12970 if (rclass == GEN_OR_FLOAT_REGS)
12972 if (mode == OOmode)
12973 return VSX_REGS;
12975 if (mode == XOmode)
12976 return FLOAT_REGS;
12978 if (GET_MODE_CLASS (mode) == MODE_INT)
12979 return GENERAL_REGS;
12982 return rclass;
12985 /* Debug version of rs6000_preferred_reload_class. */
12986 static enum reg_class
12987 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12989 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12991 fprintf (stderr,
12992 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12993 "mode = %s, x:\n",
12994 reg_class_names[ret], reg_class_names[rclass],
12995 GET_MODE_NAME (GET_MODE (x)));
12996 debug_rtx (x);
12998 return ret;
13001 /* If we are copying between FP or AltiVec registers and anything else, we need
13002 a memory location. The exception is when we are targeting ppc64 and the
13003 move to/from fpr to gpr instructions are available. Also, under VSX, you
13004 can copy vector registers from the FP register set to the Altivec register
13005 set and vice versa. */
13007 static bool
13008 rs6000_secondary_memory_needed (machine_mode mode,
13009 reg_class_t from_class,
13010 reg_class_t to_class)
13012 enum rs6000_reg_type from_type, to_type;
13013 bool altivec_p = ((from_class == ALTIVEC_REGS)
13014 || (to_class == ALTIVEC_REGS));
13016 /* If a simple/direct move is available, we don't need secondary memory */
13017 from_type = reg_class_to_reg_type[(int)from_class];
13018 to_type = reg_class_to_reg_type[(int)to_class];
13020 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13021 (secondary_reload_info *)0, altivec_p))
13022 return false;
13024 /* If we have a floating point or vector register class, we need to use
13025 memory to transfer the data. */
13026 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13027 return true;
13029 return false;
13032 /* Debug version of rs6000_secondary_memory_needed. */
13033 static bool
13034 rs6000_debug_secondary_memory_needed (machine_mode mode,
13035 reg_class_t from_class,
13036 reg_class_t to_class)
13038 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13040 fprintf (stderr,
13041 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13042 "to_class = %s, mode = %s\n",
13043 ret ? "true" : "false",
13044 reg_class_names[from_class],
13045 reg_class_names[to_class],
13046 GET_MODE_NAME (mode));
13048 return ret;
13051 /* Return the register class of a scratch register needed to copy IN into
13052 or out of a register in RCLASS in MODE. If it can be done directly,
13053 NO_REGS is returned. */
13055 static enum reg_class
13056 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13057 rtx in)
13059 int regno;
13061 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13062 #if TARGET_MACHO
13063 && MACHOPIC_INDIRECT
13064 #endif
13067 /* We cannot copy a symbolic operand directly into anything
13068 other than BASE_REGS for TARGET_ELF. So indicate that a
13069 register from BASE_REGS is needed as an intermediate
13070 register.
13072 On Darwin, pic addresses require a load from memory, which
13073 needs a base register. */
13074 if (rclass != BASE_REGS
13075 && (SYMBOL_REF_P (in)
13076 || GET_CODE (in) == HIGH
13077 || GET_CODE (in) == LABEL_REF
13078 || GET_CODE (in) == CONST))
13079 return BASE_REGS;
13082 if (REG_P (in))
13084 regno = REGNO (in);
13085 if (!HARD_REGISTER_NUM_P (regno))
13087 regno = true_regnum (in);
13088 if (!HARD_REGISTER_NUM_P (regno))
13089 regno = -1;
13092 else if (SUBREG_P (in))
13094 regno = true_regnum (in);
13095 if (!HARD_REGISTER_NUM_P (regno))
13096 regno = -1;
13098 else
13099 regno = -1;
13101 /* If we have VSX register moves, prefer moving scalar values between
13102 Altivec registers and GPR by going via an FPR (and then via memory)
13103 instead of reloading the secondary memory address for Altivec moves. */
13104 if (TARGET_VSX
13105 && GET_MODE_SIZE (mode) < 16
13106 && !mode_supports_vmx_dform (mode)
13107 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13108 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13109 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13110 && (regno >= 0 && INT_REGNO_P (regno)))))
13111 return FLOAT_REGS;
13113 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13114 into anything. */
13115 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13116 || (regno >= 0 && INT_REGNO_P (regno)))
13117 return NO_REGS;
13119 /* Constants, memory, and VSX registers can go into VSX registers (both the
13120 traditional floating point and the altivec registers). */
13121 if (rclass == VSX_REGS
13122 && (regno == -1 || VSX_REGNO_P (regno)))
13123 return NO_REGS;
13125 /* Constants, memory, and FP registers can go into FP registers. */
13126 if ((regno == -1 || FP_REGNO_P (regno))
13127 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13128 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13130 /* Memory, and AltiVec registers can go into AltiVec registers. */
13131 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13132 && rclass == ALTIVEC_REGS)
13133 return NO_REGS;
13135 /* We can copy among the CR registers. */
13136 if ((rclass == CR_REGS || rclass == CR0_REGS)
13137 && regno >= 0 && CR_REGNO_P (regno))
13138 return NO_REGS;
13140 /* Otherwise, we need GENERAL_REGS. */
13141 return GENERAL_REGS;
13144 /* Debug version of rs6000_secondary_reload_class. */
13145 static enum reg_class
13146 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13147 machine_mode mode, rtx in)
13149 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13150 fprintf (stderr,
13151 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13152 "mode = %s, input rtx:\n",
13153 reg_class_names[ret], reg_class_names[rclass],
13154 GET_MODE_NAME (mode));
13155 debug_rtx (in);
13157 return ret;
13160 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13162 static bool
13163 rs6000_can_change_mode_class (machine_mode from,
13164 machine_mode to,
13165 reg_class_t rclass)
13167 unsigned from_size = GET_MODE_SIZE (from);
13168 unsigned to_size = GET_MODE_SIZE (to);
13170 if (from_size != to_size)
13172 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13174 if (reg_classes_intersect_p (xclass, rclass))
13176 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13177 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13178 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13179 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13181 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13182 single register under VSX because the scalar part of the register
13183 is in the upper 64-bits, and not the lower 64-bits. Types like
13184 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13185 IEEE floating point can't overlap, and neither can small
13186 values. */
13188 if (to_float128_vector_p && from_float128_vector_p)
13189 return true;
13191 else if (to_float128_vector_p || from_float128_vector_p)
13192 return false;
13194 /* TDmode in floating-mode registers must always go into a register
13195 pair with the most significant word in the even-numbered register
13196 to match ISA requirements. In little-endian mode, this does not
13197 match subreg numbering, so we cannot allow subregs. */
13198 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13199 return false;
13201 /* Allow SD<->DD changes, since SDmode values are stored in
13202 the low half of the DDmode, just like target-independent
13203 code expects. We need to allow at least SD->DD since
13204 rs6000_secondary_memory_needed_mode asks for that change
13205 to be made for SD reloads. */
13206 if ((to == DDmode && from == SDmode)
13207 || (to == SDmode && from == DDmode))
13208 return true;
13210 if (from_size < 8 || to_size < 8)
13211 return false;
13213 if (from_size == 8 && (8 * to_nregs) != to_size)
13214 return false;
13216 if (to_size == 8 && (8 * from_nregs) != from_size)
13217 return false;
13219 return true;
13221 else
13222 return true;
13225 /* Since the VSX register set includes traditional floating point registers
13226 and altivec registers, just check for the size being different instead of
13227 trying to check whether the modes are vector modes. Otherwise it won't
13228 allow say DF and DI to change classes. For types like TFmode and TDmode
13229 that take 2 64-bit registers, rather than a single 128-bit register, don't
13230 allow subregs of those types to other 128 bit types. */
13231 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13233 unsigned num_regs = (from_size + 15) / 16;
13234 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13235 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13236 return false;
13238 return (from_size == 8 || from_size == 16);
13241 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13242 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13243 return false;
13245 return true;
13248 /* Debug version of rs6000_can_change_mode_class. */
13249 static bool
13250 rs6000_debug_can_change_mode_class (machine_mode from,
13251 machine_mode to,
13252 reg_class_t rclass)
13254 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13256 fprintf (stderr,
13257 "rs6000_can_change_mode_class, return %s, from = %s, "
13258 "to = %s, rclass = %s\n",
13259 ret ? "true" : "false",
13260 GET_MODE_NAME (from), GET_MODE_NAME (to),
13261 reg_class_names[rclass]);
13263 return ret;
13266 /* Return a string to do a move operation of 128 bits of data. */
13268 const char *
13269 rs6000_output_move_128bit (rtx operands[])
13271 rtx dest = operands[0];
13272 rtx src = operands[1];
13273 machine_mode mode = GET_MODE (dest);
13274 int dest_regno;
13275 int src_regno;
13276 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13277 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13279 if (REG_P (dest))
13281 dest_regno = REGNO (dest);
13282 dest_gpr_p = INT_REGNO_P (dest_regno);
13283 dest_fp_p = FP_REGNO_P (dest_regno);
13284 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13285 dest_vsx_p = dest_fp_p | dest_vmx_p;
13287 else
13289 dest_regno = -1;
13290 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13293 if (REG_P (src))
13295 src_regno = REGNO (src);
13296 src_gpr_p = INT_REGNO_P (src_regno);
13297 src_fp_p = FP_REGNO_P (src_regno);
13298 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13299 src_vsx_p = src_fp_p | src_vmx_p;
13301 else
13303 src_regno = -1;
13304 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13307 /* Register moves. */
13308 if (dest_regno >= 0 && src_regno >= 0)
13310 if (dest_gpr_p)
13312 if (src_gpr_p)
13313 return "#";
13315 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13316 return (WORDS_BIG_ENDIAN
13317 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13318 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13320 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13321 return "#";
13324 else if (TARGET_VSX && dest_vsx_p)
13326 if (src_vsx_p)
13327 return "xxlor %x0,%x1,%x1";
13329 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13330 return (WORDS_BIG_ENDIAN
13331 ? "mtvsrdd %x0,%1,%L1"
13332 : "mtvsrdd %x0,%L1,%1");
13334 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13335 return "#";
13338 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13339 return "vor %0,%1,%1";
13341 else if (dest_fp_p && src_fp_p)
13342 return "#";
13345 /* Loads. */
13346 else if (dest_regno >= 0 && MEM_P (src))
13348 if (dest_gpr_p)
13350 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13351 return "lq %0,%1";
13352 else
13353 return "#";
13356 else if (TARGET_ALTIVEC && dest_vmx_p
13357 && altivec_indexed_or_indirect_operand (src, mode))
13358 return "lvx %0,%y1";
13360 else if (TARGET_VSX && dest_vsx_p)
13362 if (mode_supports_dq_form (mode)
13363 && quad_address_p (XEXP (src, 0), mode, true))
13364 return "lxv %x0,%1";
13366 else if (TARGET_P9_VECTOR)
13367 return "lxvx %x0,%y1";
13369 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13370 return "lxvw4x %x0,%y1";
13372 else
13373 return "lxvd2x %x0,%y1";
13376 else if (TARGET_ALTIVEC && dest_vmx_p)
13377 return "lvx %0,%y1";
13379 else if (dest_fp_p)
13380 return "#";
13383 /* Stores. */
13384 else if (src_regno >= 0 && MEM_P (dest))
13386 if (src_gpr_p)
13388 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13389 return "stq %1,%0";
13390 else
13391 return "#";
13394 else if (TARGET_ALTIVEC && src_vmx_p
13395 && altivec_indexed_or_indirect_operand (dest, mode))
13396 return "stvx %1,%y0";
13398 else if (TARGET_VSX && src_vsx_p)
13400 if (mode_supports_dq_form (mode)
13401 && quad_address_p (XEXP (dest, 0), mode, true))
13402 return "stxv %x1,%0";
13404 else if (TARGET_P9_VECTOR)
13405 return "stxvx %x1,%y0";
13407 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13408 return "stxvw4x %x1,%y0";
13410 else
13411 return "stxvd2x %x1,%y0";
13414 else if (TARGET_ALTIVEC && src_vmx_p)
13415 return "stvx %1,%y0";
13417 else if (src_fp_p)
13418 return "#";
13421 /* Constants. */
13422 else if (dest_regno >= 0
13423 && (CONST_INT_P (src)
13424 || CONST_WIDE_INT_P (src)
13425 || CONST_DOUBLE_P (src)
13426 || GET_CODE (src) == CONST_VECTOR))
13428 if (dest_gpr_p)
13429 return "#";
13431 else if ((dest_vmx_p && TARGET_ALTIVEC)
13432 || (dest_vsx_p && TARGET_VSX))
13433 return output_vec_const_move (operands);
13436 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13439 /* Validate a 128-bit move. */
13440 bool
13441 rs6000_move_128bit_ok_p (rtx operands[])
13443 machine_mode mode = GET_MODE (operands[0]);
13444 return (gpc_reg_operand (operands[0], mode)
13445 || gpc_reg_operand (operands[1], mode));
13448 /* Return true if a 128-bit move needs to be split. */
13449 bool
13450 rs6000_split_128bit_ok_p (rtx operands[])
13452 if (!reload_completed)
13453 return false;
13455 if (!gpr_or_gpr_p (operands[0], operands[1]))
13456 return false;
13458 if (quad_load_store_p (operands[0], operands[1]))
13459 return false;
13461 return true;
13465 /* Given a comparison operation, return the bit number in CCR to test. We
13466 know this is a valid comparison.
13468 SCC_P is 1 if this is for an scc. That means that %D will have been
13469 used instead of %C, so the bits will be in different places.
13471 Return -1 if OP isn't a valid comparison for some reason. */
13474 ccr_bit (rtx op, int scc_p)
13476 enum rtx_code code = GET_CODE (op);
13477 machine_mode cc_mode;
13478 int cc_regnum;
13479 int base_bit;
13480 rtx reg;
13482 if (!COMPARISON_P (op))
13483 return -1;
13485 reg = XEXP (op, 0);
13487 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13488 return -1;
13490 cc_mode = GET_MODE (reg);
13491 cc_regnum = REGNO (reg);
13492 base_bit = 4 * (cc_regnum - CR0_REGNO);
13494 validate_condition_mode (code, cc_mode);
13496 /* When generating a sCOND operation, only positive conditions are
13497 allowed. */
13498 if (scc_p)
13499 switch (code)
13501 case EQ:
13502 case GT:
13503 case LT:
13504 case UNORDERED:
13505 case GTU:
13506 case LTU:
13507 break;
13508 default:
13509 return -1;
13512 switch (code)
13514 case NE:
13515 return scc_p ? base_bit + 3 : base_bit + 2;
13516 case EQ:
13517 return base_bit + 2;
13518 case GT: case GTU: case UNLE:
13519 return base_bit + 1;
13520 case LT: case LTU: case UNGE:
13521 return base_bit;
13522 case ORDERED: case UNORDERED:
13523 return base_bit + 3;
13525 case GE: case GEU:
13526 /* If scc, we will have done a cror to put the bit in the
13527 unordered position. So test that bit. For integer, this is ! LT
13528 unless this is an scc insn. */
13529 return scc_p ? base_bit + 3 : base_bit;
13531 case LE: case LEU:
13532 return scc_p ? base_bit + 3 : base_bit + 1;
13534 default:
13535 return -1;
13539 /* Return the GOT register. */
13542 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13544 /* The second flow pass currently (June 1999) can't update
13545 regs_ever_live without disturbing other parts of the compiler, so
13546 update it here to make the prolog/epilogue code happy. */
13547 if (!can_create_pseudo_p ()
13548 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13549 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13551 crtl->uses_pic_offset_table = 1;
13553 return pic_offset_table_rtx;
13556 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13558 /* Write out a function code label. */
13560 void
13561 rs6000_output_function_entry (FILE *file, const char *fname)
13563 if (fname[0] != '.')
13565 switch (DEFAULT_ABI)
13567 default:
13568 gcc_unreachable ();
13570 case ABI_AIX:
13571 if (DOT_SYMBOLS)
13572 putc ('.', file);
13573 else
13574 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13575 break;
13577 case ABI_ELFv2:
13578 case ABI_V4:
13579 case ABI_DARWIN:
13580 break;
13584 RS6000_OUTPUT_BASENAME (file, fname);
13587 /* Print an operand. Recognize special options, documented below. */
13589 #if TARGET_ELF
13590 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13591 only introduced by the linker, when applying the sda21
13592 relocation. */
13593 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13594 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13595 #else
13596 #define SMALL_DATA_RELOC "sda21"
13597 #define SMALL_DATA_REG 0
13598 #endif
13600 void
13601 print_operand (FILE *file, rtx x, int code)
13603 int i;
13604 unsigned HOST_WIDE_INT uval;
13606 switch (code)
13608 /* %a is output_address. */
13610 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13611 output_operand. */
13613 case 'A':
13614 /* Write the MMA accumulator number associated with VSX register X. */
13615 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13616 output_operand_lossage ("invalid %%A value");
13617 else
13618 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13619 return;
13621 case 'D':
13622 /* Like 'J' but get to the GT bit only. */
13623 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13625 output_operand_lossage ("invalid %%D value");
13626 return;
13629 /* Bit 1 is GT bit. */
13630 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13632 /* Add one for shift count in rlinm for scc. */
13633 fprintf (file, "%d", i + 1);
13634 return;
13636 case 'e':
13637 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13638 if (! INT_P (x))
13640 output_operand_lossage ("invalid %%e value");
13641 return;
13644 uval = INTVAL (x);
13645 if ((uval & 0xffff) == 0 && uval != 0)
13646 putc ('s', file);
13647 return;
13649 case 'E':
13650 /* X is a CR register. Print the number of the EQ bit of the CR */
13651 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13652 output_operand_lossage ("invalid %%E value");
13653 else
13654 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13655 return;
13657 case 'f':
13658 /* X is a CR register. Print the shift count needed to move it
13659 to the high-order four bits. */
13660 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13661 output_operand_lossage ("invalid %%f value");
13662 else
13663 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13664 return;
13666 case 'F':
13667 /* Similar, but print the count for the rotate in the opposite
13668 direction. */
13669 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13670 output_operand_lossage ("invalid %%F value");
13671 else
13672 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13673 return;
13675 case 'G':
13676 /* X is a constant integer. If it is negative, print "m",
13677 otherwise print "z". This is to make an aze or ame insn. */
13678 if (!CONST_INT_P (x))
13679 output_operand_lossage ("invalid %%G value");
13680 else if (INTVAL (x) >= 0)
13681 putc ('z', file);
13682 else
13683 putc ('m', file);
13684 return;
13686 case 'h':
13687 /* If constant, output low-order five bits. Otherwise, write
13688 normally. */
13689 if (INT_P (x))
13690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13691 else
13692 print_operand (file, x, 0);
13693 return;
13695 case 'H':
13696 /* If constant, output low-order six bits. Otherwise, write
13697 normally. */
13698 if (INT_P (x))
13699 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13700 else
13701 print_operand (file, x, 0);
13702 return;
13704 case 'I':
13705 /* Print `i' if this is a constant, else nothing. */
13706 if (INT_P (x))
13707 putc ('i', file);
13708 return;
13710 case 'j':
13711 /* Write the bit number in CCR for jump. */
13712 i = ccr_bit (x, 0);
13713 if (i == -1)
13714 output_operand_lossage ("invalid %%j code");
13715 else
13716 fprintf (file, "%d", i);
13717 return;
13719 case 'J':
13720 /* Similar, but add one for shift count in rlinm for scc and pass
13721 scc flag to `ccr_bit'. */
13722 i = ccr_bit (x, 1);
13723 if (i == -1)
13724 output_operand_lossage ("invalid %%J code");
13725 else
13726 /* If we want bit 31, write a shift count of zero, not 32. */
13727 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13728 return;
13730 case 'k':
13731 /* X must be a constant. Write the 1's complement of the
13732 constant. */
13733 if (! INT_P (x))
13734 output_operand_lossage ("invalid %%k value");
13735 else
13736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13737 return;
13739 case 'K':
13740 /* X must be a symbolic constant on ELF. Write an
13741 expression suitable for an 'addi' that adds in the low 16
13742 bits of the MEM. */
13743 if (GET_CODE (x) == CONST)
13745 if (GET_CODE (XEXP (x, 0)) != PLUS
13746 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13747 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13748 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13749 output_operand_lossage ("invalid %%K value");
13751 print_operand_address (file, x);
13752 fputs ("@l", file);
13753 return;
13755 /* %l is output_asm_label. */
13757 case 'L':
13758 /* Write second word of DImode or DFmode reference. Works on register
13759 or non-indexed memory only. */
13760 if (REG_P (x))
13761 fputs (reg_names[REGNO (x) + 1], file);
13762 else if (MEM_P (x))
13764 machine_mode mode = GET_MODE (x);
13765 /* Handle possible auto-increment. Since it is pre-increment and
13766 we have already done it, we can just use an offset of word. */
13767 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13768 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13769 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13770 UNITS_PER_WORD));
13771 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13772 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13773 UNITS_PER_WORD));
13774 else
13775 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13776 UNITS_PER_WORD),
13777 0));
13779 if (small_data_operand (x, GET_MODE (x)))
13780 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13781 reg_names[SMALL_DATA_REG]);
13783 return;
13785 case 'N': /* Unused */
13786 /* Write the number of elements in the vector times 4. */
13787 if (GET_CODE (x) != PARALLEL)
13788 output_operand_lossage ("invalid %%N value");
13789 else
13790 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13791 return;
13793 case 'O': /* Unused */
13794 /* Similar, but subtract 1 first. */
13795 if (GET_CODE (x) != PARALLEL)
13796 output_operand_lossage ("invalid %%O value");
13797 else
13798 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13799 return;
13801 case 'p':
13802 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13803 if (! INT_P (x)
13804 || INTVAL (x) < 0
13805 || (i = exact_log2 (INTVAL (x))) < 0)
13806 output_operand_lossage ("invalid %%p value");
13807 else
13808 fprintf (file, "%d", i);
13809 return;
13811 case 'P':
13812 /* The operand must be an indirect memory reference. The result
13813 is the register name. */
13814 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13815 || REGNO (XEXP (x, 0)) >= 32)
13816 output_operand_lossage ("invalid %%P value");
13817 else
13818 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13819 return;
13821 case 'q':
13822 /* This outputs the logical code corresponding to a boolean
13823 expression. The expression may have one or both operands
13824 negated (if one, only the first one). For condition register
13825 logical operations, it will also treat the negated
13826 CR codes as NOTs, but not handle NOTs of them. */
13828 const char *const *t = 0;
13829 const char *s;
13830 enum rtx_code code = GET_CODE (x);
13831 static const char * const tbl[3][3] = {
13832 { "and", "andc", "nor" },
13833 { "or", "orc", "nand" },
13834 { "xor", "eqv", "xor" } };
13836 if (code == AND)
13837 t = tbl[0];
13838 else if (code == IOR)
13839 t = tbl[1];
13840 else if (code == XOR)
13841 t = tbl[2];
13842 else
13843 output_operand_lossage ("invalid %%q value");
13845 if (GET_CODE (XEXP (x, 0)) != NOT)
13846 s = t[0];
13847 else
13849 if (GET_CODE (XEXP (x, 1)) == NOT)
13850 s = t[2];
13851 else
13852 s = t[1];
13855 fputs (s, file);
13857 return;
13859 case 'Q':
13860 if (! TARGET_MFCRF)
13861 return;
13862 fputc (',', file);
13863 /* FALLTHRU */
13865 case 'R':
13866 /* X is a CR register. Print the mask for `mtcrf'. */
13867 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13868 output_operand_lossage ("invalid %%R value");
13869 else
13870 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13871 return;
13873 case 's':
13874 /* Low 5 bits of 32 - value */
13875 if (! INT_P (x))
13876 output_operand_lossage ("invalid %%s value");
13877 else
13878 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13879 return;
13881 case 't':
13882 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13883 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13885 output_operand_lossage ("invalid %%t value");
13886 return;
13889 /* Bit 3 is OV bit. */
13890 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13892 /* If we want bit 31, write a shift count of zero, not 32. */
13893 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13894 return;
13896 case 'T':
13897 /* Print the symbolic name of a branch target register. */
13898 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13899 x = XVECEXP (x, 0, 0);
13900 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13901 && REGNO (x) != CTR_REGNO))
13902 output_operand_lossage ("invalid %%T value");
13903 else if (REGNO (x) == LR_REGNO)
13904 fputs ("lr", file);
13905 else
13906 fputs ("ctr", file);
13907 return;
13909 case 'u':
13910 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13911 for use in unsigned operand. */
13912 if (! INT_P (x))
13914 output_operand_lossage ("invalid %%u value");
13915 return;
13918 uval = INTVAL (x);
13919 if ((uval & 0xffff) == 0)
13920 uval >>= 16;
13922 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13923 return;
13925 case 'v':
13926 /* High-order 16 bits of constant for use in signed operand. */
13927 if (! INT_P (x))
13928 output_operand_lossage ("invalid %%v value");
13929 else
13930 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13931 (INTVAL (x) >> 16) & 0xffff);
13932 return;
13934 case 'U':
13935 /* Print `u' if this has an auto-increment or auto-decrement. */
13936 if (MEM_P (x)
13937 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13938 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13939 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13940 putc ('u', file);
13941 return;
13943 case 'V':
13944 /* Print the trap code for this operand. */
13945 switch (GET_CODE (x))
13947 case EQ:
13948 fputs ("eq", file); /* 4 */
13949 break;
13950 case NE:
13951 fputs ("ne", file); /* 24 */
13952 break;
13953 case LT:
13954 fputs ("lt", file); /* 16 */
13955 break;
13956 case LE:
13957 fputs ("le", file); /* 20 */
13958 break;
13959 case GT:
13960 fputs ("gt", file); /* 8 */
13961 break;
13962 case GE:
13963 fputs ("ge", file); /* 12 */
13964 break;
13965 case LTU:
13966 fputs ("llt", file); /* 2 */
13967 break;
13968 case LEU:
13969 fputs ("lle", file); /* 6 */
13970 break;
13971 case GTU:
13972 fputs ("lgt", file); /* 1 */
13973 break;
13974 case GEU:
13975 fputs ("lge", file); /* 5 */
13976 break;
13977 default:
13978 output_operand_lossage ("invalid %%V value");
13980 break;
13982 case 'w':
13983 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13984 normally. */
13985 if (INT_P (x))
13986 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13987 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13988 else
13989 print_operand (file, x, 0);
13990 return;
13992 case 'x':
13993 /* X is a FPR or Altivec register used in a VSX context. */
13994 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13995 output_operand_lossage ("invalid %%x value");
13996 else
13998 int reg = REGNO (x);
13999 int vsx_reg = (FP_REGNO_P (reg)
14000 ? reg - 32
14001 : reg - FIRST_ALTIVEC_REGNO + 32);
14003 #ifdef TARGET_REGNAMES
14004 if (TARGET_REGNAMES)
14005 fprintf (file, "%%vs%d", vsx_reg);
14006 else
14007 #endif
14008 fprintf (file, "%d", vsx_reg);
14010 return;
14012 case 'X':
14013 if (MEM_P (x)
14014 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14015 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14016 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14017 putc ('x', file);
14018 return;
14020 case 'Y':
14021 /* Like 'L', for third word of TImode/PTImode */
14022 if (REG_P (x))
14023 fputs (reg_names[REGNO (x) + 2], file);
14024 else if (MEM_P (x))
14026 machine_mode mode = GET_MODE (x);
14027 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14028 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14029 output_address (mode, plus_constant (Pmode,
14030 XEXP (XEXP (x, 0), 0), 8));
14031 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14032 output_address (mode, plus_constant (Pmode,
14033 XEXP (XEXP (x, 0), 0), 8));
14034 else
14035 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14036 if (small_data_operand (x, GET_MODE (x)))
14037 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14038 reg_names[SMALL_DATA_REG]);
14040 return;
14042 case 'z':
14043 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14044 x = XVECEXP (x, 0, 1);
14045 /* X is a SYMBOL_REF. Write out the name preceded by a
14046 period and without any trailing data in brackets. Used for function
14047 names. If we are configured for System V (or the embedded ABI) on
14048 the PowerPC, do not emit the period, since those systems do not use
14049 TOCs and the like. */
14050 if (!SYMBOL_REF_P (x))
14052 output_operand_lossage ("invalid %%z value");
14053 return;
14056 /* For macho, check to see if we need a stub. */
14057 if (TARGET_MACHO)
14059 const char *name = XSTR (x, 0);
14060 #if TARGET_MACHO
14061 if (darwin_symbol_stubs
14062 && MACHOPIC_INDIRECT
14063 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14064 name = machopic_indirection_name (x, /*stub_p=*/true);
14065 #endif
14066 assemble_name (file, name);
14068 else if (!DOT_SYMBOLS)
14069 assemble_name (file, XSTR (x, 0));
14070 else
14071 rs6000_output_function_entry (file, XSTR (x, 0));
14072 return;
14074 case 'Z':
14075 /* Like 'L', for last word of TImode/PTImode. */
14076 if (REG_P (x))
14077 fputs (reg_names[REGNO (x) + 3], file);
14078 else if (MEM_P (x))
14080 machine_mode mode = GET_MODE (x);
14081 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14082 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14083 output_address (mode, plus_constant (Pmode,
14084 XEXP (XEXP (x, 0), 0), 12));
14085 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14086 output_address (mode, plus_constant (Pmode,
14087 XEXP (XEXP (x, 0), 0), 12));
14088 else
14089 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14090 if (small_data_operand (x, GET_MODE (x)))
14091 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14092 reg_names[SMALL_DATA_REG]);
14094 return;
14096 /* Print AltiVec memory operand. */
14097 case 'y':
14099 rtx tmp;
14101 gcc_assert (MEM_P (x));
14103 tmp = XEXP (x, 0);
14105 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14106 && GET_CODE (tmp) == AND
14107 && CONST_INT_P (XEXP (tmp, 1))
14108 && INTVAL (XEXP (tmp, 1)) == -16)
14109 tmp = XEXP (tmp, 0);
14110 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14111 && GET_CODE (tmp) == PRE_MODIFY)
14112 tmp = XEXP (tmp, 1);
14113 if (REG_P (tmp))
14114 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14115 else
14117 if (GET_CODE (tmp) != PLUS
14118 || !REG_P (XEXP (tmp, 0))
14119 || !REG_P (XEXP (tmp, 1)))
14121 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14122 break;
14125 if (REGNO (XEXP (tmp, 0)) == 0)
14126 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14127 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14128 else
14129 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14130 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14132 break;
14135 case 0:
14136 if (REG_P (x))
14137 fprintf (file, "%s", reg_names[REGNO (x)]);
14138 else if (MEM_P (x))
14140 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14141 know the width from the mode. */
14142 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14143 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14144 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14145 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14146 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14147 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14148 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14149 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14150 else
14151 output_address (GET_MODE (x), XEXP (x, 0));
14153 else if (toc_relative_expr_p (x, false,
14154 &tocrel_base_oac, &tocrel_offset_oac))
14155 /* This hack along with a corresponding hack in
14156 rs6000_output_addr_const_extra arranges to output addends
14157 where the assembler expects to find them. eg.
14158 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14159 without this hack would be output as "x@toc+4". We
14160 want "x+4@toc". */
14161 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14162 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14163 output_addr_const (file, XVECEXP (x, 0, 0));
14164 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14165 output_addr_const (file, XVECEXP (x, 0, 1));
14166 else
14167 output_addr_const (file, x);
14168 return;
14170 case '&':
14171 if (const char *name = get_some_local_dynamic_name ())
14172 assemble_name (file, name);
14173 else
14174 output_operand_lossage ("'%%&' used without any "
14175 "local dynamic TLS references");
14176 return;
14178 default:
14179 output_operand_lossage ("invalid %%xn code");
14183 /* Print the address of an operand. */
14185 void
14186 print_operand_address (FILE *file, rtx x)
14188 if (REG_P (x))
14189 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14191 /* Is it a PC-relative address? */
14192 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14194 HOST_WIDE_INT offset;
14196 if (GET_CODE (x) == CONST)
14197 x = XEXP (x, 0);
14199 if (GET_CODE (x) == PLUS)
14201 offset = INTVAL (XEXP (x, 1));
14202 x = XEXP (x, 0);
14204 else
14205 offset = 0;
14207 output_addr_const (file, x);
14209 if (offset)
14210 fprintf (file, "%+" PRId64, offset);
14212 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14213 fprintf (file, "@got");
14215 fprintf (file, "@pcrel");
14217 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14218 || GET_CODE (x) == LABEL_REF)
14220 output_addr_const (file, x);
14221 if (small_data_operand (x, GET_MODE (x)))
14222 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14223 reg_names[SMALL_DATA_REG]);
14224 else
14225 gcc_assert (!TARGET_TOC);
14227 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14228 && REG_P (XEXP (x, 1)))
14230 if (REGNO (XEXP (x, 0)) == 0)
14231 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14232 reg_names[ REGNO (XEXP (x, 0)) ]);
14233 else
14234 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14235 reg_names[ REGNO (XEXP (x, 1)) ]);
14237 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14238 && CONST_INT_P (XEXP (x, 1)))
14239 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14240 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14241 #if TARGET_MACHO
14242 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14243 && CONSTANT_P (XEXP (x, 1)))
14245 fprintf (file, "lo16(");
14246 output_addr_const (file, XEXP (x, 1));
14247 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14249 #endif
14250 #if TARGET_ELF
14251 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14252 && CONSTANT_P (XEXP (x, 1)))
14254 output_addr_const (file, XEXP (x, 1));
14255 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14257 #endif
14258 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14260 /* This hack along with a corresponding hack in
14261 rs6000_output_addr_const_extra arranges to output addends
14262 where the assembler expects to find them. eg.
14263 (lo_sum (reg 9)
14264 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14265 without this hack would be output as "x@toc+8@l(9)". We
14266 want "x+8@toc@l(9)". */
14267 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14268 if (GET_CODE (x) == LO_SUM)
14269 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14270 else
14271 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14273 else
14274 output_addr_const (file, x);
14277 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14279 bool
14280 rs6000_output_addr_const_extra (FILE *file, rtx x)
14282 if (GET_CODE (x) == UNSPEC)
14283 switch (XINT (x, 1))
14285 case UNSPEC_TOCREL:
14286 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14287 && REG_P (XVECEXP (x, 0, 1))
14288 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14289 output_addr_const (file, XVECEXP (x, 0, 0));
14290 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14292 if (INTVAL (tocrel_offset_oac) >= 0)
14293 fprintf (file, "+");
14294 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14296 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14298 putc ('-', file);
14299 assemble_name (file, toc_label_name);
14300 need_toc_init = 1;
14302 else if (TARGET_ELF)
14303 fputs ("@toc", file);
14304 return true;
14306 #if TARGET_MACHO
14307 case UNSPEC_MACHOPIC_OFFSET:
14308 output_addr_const (file, XVECEXP (x, 0, 0));
14309 putc ('-', file);
14310 machopic_output_function_base_name (file);
14311 return true;
14312 #endif
14314 return false;
14317 /* Target hook for assembling integer objects. The PowerPC version has
14318 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14319 is defined. It also needs to handle DI-mode objects on 64-bit
14320 targets. */
14322 static bool
14323 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14325 #ifdef RELOCATABLE_NEEDS_FIXUP
14326 /* Special handling for SI values. */
14327 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14329 static int recurse = 0;
14331 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14332 the .fixup section. Since the TOC section is already relocated, we
14333 don't need to mark it here. We used to skip the text section, but it
14334 should never be valid for relocated addresses to be placed in the text
14335 section. */
14336 if (DEFAULT_ABI == ABI_V4
14337 && (TARGET_RELOCATABLE || flag_pic > 1)
14338 && in_section != toc_section
14339 && !recurse
14340 && !CONST_SCALAR_INT_P (x)
14341 && CONSTANT_P (x))
14343 char buf[256];
14345 recurse = 1;
14346 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14347 fixuplabelno++;
14348 ASM_OUTPUT_LABEL (asm_out_file, buf);
14349 fprintf (asm_out_file, "\t.long\t(");
14350 output_addr_const (asm_out_file, x);
14351 fprintf (asm_out_file, ")@fixup\n");
14352 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14353 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14354 fprintf (asm_out_file, "\t.long\t");
14355 assemble_name (asm_out_file, buf);
14356 fprintf (asm_out_file, "\n\t.previous\n");
14357 recurse = 0;
14358 return true;
14360 /* Remove initial .'s to turn a -mcall-aixdesc function
14361 address into the address of the descriptor, not the function
14362 itself. */
14363 else if (SYMBOL_REF_P (x)
14364 && XSTR (x, 0)[0] == '.'
14365 && DEFAULT_ABI == ABI_AIX)
14367 const char *name = XSTR (x, 0);
14368 while (*name == '.')
14369 name++;
14371 fprintf (asm_out_file, "\t.long\t%s\n", name);
14372 return true;
14375 #endif /* RELOCATABLE_NEEDS_FIXUP */
14376 return default_assemble_integer (x, size, aligned_p);
14379 /* Return a template string for assembly to emit when making an
14380 external call. FUNOP is the call mem argument operand number. */
14382 static const char *
14383 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14385 /* -Wformat-overflow workaround, without which gcc thinks that %u
14386 might produce 10 digits. */
14387 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14389 char arg[12];
14390 arg[0] = 0;
14391 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14393 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14394 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14395 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14396 sprintf (arg, "(%%&@tlsld)");
14399 /* The magic 32768 offset here corresponds to the offset of
14400 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14401 char z[11];
14402 sprintf (z, "%%z%u%s", funop,
14403 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14404 ? "+32768" : ""));
14406 static char str[32]; /* 1 spare */
14407 if (rs6000_pcrel_p ())
14408 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14409 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14410 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14411 sibcall ? "" : "\n\tnop");
14412 else if (DEFAULT_ABI == ABI_V4)
14413 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14414 flag_pic ? "@plt" : "");
14415 #if TARGET_MACHO
14416 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14417 else if (DEFAULT_ABI == ABI_DARWIN)
14419 /* The cookie is in operand func+2. */
14420 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14421 int cookie = INTVAL (operands[funop + 2]);
14422 if (cookie & CALL_LONG)
14424 tree funname = get_identifier (XSTR (operands[funop], 0));
14425 tree labelname = get_prev_label (funname);
14426 gcc_checking_assert (labelname && !sibcall);
14428 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14429 instruction will reach 'foo', otherwise link as 'bl L42'".
14430 "L42" should be a 'branch island', that will do a far jump to
14431 'foo'. Branch islands are generated in
14432 macho_branch_islands(). */
14433 sprintf (str, "jbsr %%z%u,%.10s", funop,
14434 IDENTIFIER_POINTER (labelname));
14436 else
14437 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14438 after the call. */
14439 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14441 #endif
14442 else
14443 gcc_unreachable ();
14444 return str;
14447 const char *
14448 rs6000_call_template (rtx *operands, unsigned int funop)
14450 return rs6000_call_template_1 (operands, funop, false);
14453 const char *
14454 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14456 return rs6000_call_template_1 (operands, funop, true);
14459 /* As above, for indirect calls. */
14461 static const char *
14462 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14463 bool sibcall)
14465 /* -Wformat-overflow workaround, without which gcc thinks that %u
14466 might produce 10 digits. Note that -Wformat-overflow will not
14467 currently warn here for str[], so do not rely on a warning to
14468 ensure str[] is correctly sized. */
14469 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14471 /* Currently, funop is either 0 or 1. The maximum string is always
14472 a !speculate 64-bit __tls_get_addr call.
14474 ABI_ELFv2, pcrel:
14475 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14476 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14477 . 9 crset 2\n\t
14478 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14479 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14480 . 8 beq%T1l-
14481 .---
14482 .142
14484 ABI_AIX:
14485 . 9 ld 2,%3\n\t
14486 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14487 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14488 . 9 crset 2\n\t
14489 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14490 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14491 . 10 beq%T1l-\n\t
14492 . 10 ld 2,%4(1)
14493 .---
14494 .151
14496 ABI_ELFv2:
14497 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14498 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14499 . 9 crset 2\n\t
14500 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14501 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14502 . 10 beq%T1l-\n\t
14503 . 10 ld 2,%3(1)
14504 .---
14505 .142
14507 ABI_V4:
14508 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14509 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14510 . 9 crset 2\n\t
14511 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14512 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14513 . 8 beq%T1l-
14514 .---
14515 .141 */
14516 static char str[160]; /* 8 spare */
14517 char *s = str;
14518 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14520 if (DEFAULT_ABI == ABI_AIX)
14521 s += sprintf (s,
14522 "l%s 2,%%%u\n\t",
14523 ptrload, funop + 3);
14525 /* We don't need the extra code to stop indirect call speculation if
14526 calling via LR. */
14527 bool speculate = (TARGET_MACHO
14528 || rs6000_speculate_indirect_jumps
14529 || (REG_P (operands[funop])
14530 && REGNO (operands[funop]) == LR_REGNO));
14532 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14534 const char *rel64 = TARGET_64BIT ? "64" : "";
14535 char tls[29];
14536 tls[0] = 0;
14537 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14539 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14540 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14541 rel64, funop + 1);
14542 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14543 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14544 rel64);
14547 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14548 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14549 && flag_pic == 2 ? "+32768" : "");
14550 if (!speculate)
14552 s += sprintf (s,
14553 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14554 tls, rel64, notoc, funop, addend);
14555 s += sprintf (s, "crset 2\n\t");
14557 s += sprintf (s,
14558 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14559 tls, rel64, notoc, funop, addend);
14561 else if (!speculate)
14562 s += sprintf (s, "crset 2\n\t");
14564 if (rs6000_pcrel_p ())
14566 if (speculate)
14567 sprintf (s, "b%%T%ul", funop);
14568 else
14569 sprintf (s, "beq%%T%ul-", funop);
14571 else if (DEFAULT_ABI == ABI_AIX)
14573 if (speculate)
14574 sprintf (s,
14575 "b%%T%ul\n\t"
14576 "l%s 2,%%%u(1)",
14577 funop, ptrload, funop + 4);
14578 else
14579 sprintf (s,
14580 "beq%%T%ul-\n\t"
14581 "l%s 2,%%%u(1)",
14582 funop, ptrload, funop + 4);
14584 else if (DEFAULT_ABI == ABI_ELFv2)
14586 if (speculate)
14587 sprintf (s,
14588 "b%%T%ul\n\t"
14589 "l%s 2,%%%u(1)",
14590 funop, ptrload, funop + 3);
14591 else
14592 sprintf (s,
14593 "beq%%T%ul-\n\t"
14594 "l%s 2,%%%u(1)",
14595 funop, ptrload, funop + 3);
14597 else
14599 if (speculate)
14600 sprintf (s,
14601 "b%%T%u%s",
14602 funop, sibcall ? "" : "l");
14603 else
14604 sprintf (s,
14605 "beq%%T%u%s-%s",
14606 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14608 return str;
14611 const char *
14612 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14614 return rs6000_indirect_call_template_1 (operands, funop, false);
14617 const char *
14618 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14620 return rs6000_indirect_call_template_1 (operands, funop, true);
14623 #if HAVE_AS_PLTSEQ
14624 /* Output indirect call insns. WHICH identifies the type of sequence. */
14625 const char *
14626 rs6000_pltseq_template (rtx *operands, int which)
14628 const char *rel64 = TARGET_64BIT ? "64" : "";
14629 char tls[30];
14630 tls[0] = 0;
14631 if (GET_CODE (operands[3]) == UNSPEC)
14633 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14634 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14635 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14636 off, rel64);
14637 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14638 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14639 off, rel64);
14642 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14643 static char str[96]; /* 10 spare */
14644 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14645 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14646 && flag_pic == 2 ? "+32768" : "");
14647 switch (which)
14649 case RS6000_PLTSEQ_TOCSAVE:
14650 sprintf (str,
14651 "st%s\n\t"
14652 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14653 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14654 tls, rel64);
14655 break;
14656 case RS6000_PLTSEQ_PLT16_HA:
14657 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14658 sprintf (str,
14659 "lis %%0,0\n\t"
14660 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14661 tls, off, rel64);
14662 else
14663 sprintf (str,
14664 "addis %%0,%%1,0\n\t"
14665 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14666 tls, off, rel64, addend);
14667 break;
14668 case RS6000_PLTSEQ_PLT16_LO:
14669 sprintf (str,
14670 "l%s %%0,0(%%1)\n\t"
14671 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14672 TARGET_64BIT ? "d" : "wz",
14673 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14674 break;
14675 case RS6000_PLTSEQ_MTCTR:
14676 sprintf (str,
14677 "mtctr %%1\n\t"
14678 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14679 tls, rel64, addend);
14680 break;
14681 case RS6000_PLTSEQ_PLT_PCREL34:
14682 sprintf (str,
14683 "pl%s %%0,0(0),1\n\t"
14684 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14685 TARGET_64BIT ? "d" : "wz",
14686 tls, rel64);
14687 break;
14688 default:
14689 gcc_unreachable ();
14691 return str;
14693 #endif
14695 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14696 /* Emit an assembler directive to set symbol visibility for DECL to
14697 VISIBILITY_TYPE. */
14699 static void
14700 rs6000_assemble_visibility (tree decl, int vis)
14702 if (TARGET_XCOFF)
14703 return;
14705 /* Functions need to have their entry point symbol visibility set as
14706 well as their descriptor symbol visibility. */
14707 if (DEFAULT_ABI == ABI_AIX
14708 && DOT_SYMBOLS
14709 && TREE_CODE (decl) == FUNCTION_DECL)
14711 static const char * const visibility_types[] = {
14712 NULL, "protected", "hidden", "internal"
14715 const char *name, *type;
14717 name = ((* targetm.strip_name_encoding)
14718 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14719 type = visibility_types[vis];
14721 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14722 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14724 else
14725 default_assemble_visibility (decl, vis);
14727 #endif
14729 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14730 entry. If RECORD_P is true and the target supports named sections,
14731 the location of the NOPs will be recorded in a special object section
14732 called "__patchable_function_entries". This routine may be called
14733 twice per function to put NOPs before and after the function
14734 entry. */
14736 void
14737 rs6000_print_patchable_function_entry (FILE *file,
14738 unsigned HOST_WIDE_INT patch_area_size,
14739 bool record_p)
14741 unsigned int flags = SECTION_WRITE | SECTION_RELRO;
14742 /* When .opd section is emitted, the function symbol
14743 default_print_patchable_function_entry_1 is emitted into the .opd section
14744 while the patchable area is emitted into the function section.
14745 Don't use SECTION_LINK_ORDER in that case. */
14746 if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
14747 && HAVE_GAS_SECTION_LINK_ORDER)
14748 flags |= SECTION_LINK_ORDER;
14749 default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
14750 flags);
14753 enum rtx_code
14754 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14756 /* Reversal of FP compares takes care -- an ordered compare
14757 becomes an unordered compare and vice versa. */
14758 if (mode == CCFPmode
14759 && (!flag_finite_math_only
14760 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14761 || code == UNEQ || code == LTGT))
14762 return reverse_condition_maybe_unordered (code);
14763 else
14764 return reverse_condition (code);
14767 /* Generate a compare for CODE. Return a brand-new rtx that
14768 represents the result of the compare. */
14770 static rtx
14771 rs6000_generate_compare (rtx cmp, machine_mode mode)
14773 machine_mode comp_mode;
14774 rtx compare_result;
14775 enum rtx_code code = GET_CODE (cmp);
14776 rtx op0 = XEXP (cmp, 0);
14777 rtx op1 = XEXP (cmp, 1);
14779 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14780 comp_mode = CCmode;
14781 else if (FLOAT_MODE_P (mode))
14782 comp_mode = CCFPmode;
14783 else if (code == GTU || code == LTU
14784 || code == GEU || code == LEU)
14785 comp_mode = CCUNSmode;
14786 else if ((code == EQ || code == NE)
14787 && unsigned_reg_p (op0)
14788 && (unsigned_reg_p (op1)
14789 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14790 /* These are unsigned values, perhaps there will be a later
14791 ordering compare that can be shared with this one. */
14792 comp_mode = CCUNSmode;
14793 else
14794 comp_mode = CCmode;
14796 /* If we have an unsigned compare, make sure we don't have a signed value as
14797 an immediate. */
14798 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14799 && INTVAL (op1) < 0)
14801 op0 = copy_rtx_if_shared (op0);
14802 op1 = force_reg (GET_MODE (op0), op1);
14803 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14806 /* First, the compare. */
14807 compare_result = gen_reg_rtx (comp_mode);
14809 /* IEEE 128-bit support in VSX registers when we do not have hardware
14810 support. */
14811 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14813 rtx libfunc = NULL_RTX;
14814 bool check_nan = false;
14815 rtx dest;
14817 switch (code)
14819 case EQ:
14820 case NE:
14821 libfunc = optab_libfunc (eq_optab, mode);
14822 break;
14824 case GT:
14825 case GE:
14826 libfunc = optab_libfunc (ge_optab, mode);
14827 break;
14829 case LT:
14830 case LE:
14831 libfunc = optab_libfunc (le_optab, mode);
14832 break;
14834 case UNORDERED:
14835 case ORDERED:
14836 libfunc = optab_libfunc (unord_optab, mode);
14837 code = (code == UNORDERED) ? NE : EQ;
14838 break;
14840 case UNGE:
14841 case UNGT:
14842 check_nan = true;
14843 libfunc = optab_libfunc (ge_optab, mode);
14844 code = (code == UNGE) ? GE : GT;
14845 break;
14847 case UNLE:
14848 case UNLT:
14849 check_nan = true;
14850 libfunc = optab_libfunc (le_optab, mode);
14851 code = (code == UNLE) ? LE : LT;
14852 break;
14854 case UNEQ:
14855 case LTGT:
14856 check_nan = true;
14857 libfunc = optab_libfunc (eq_optab, mode);
14858 code = (code = UNEQ) ? EQ : NE;
14859 break;
14861 default:
14862 gcc_unreachable ();
14865 gcc_assert (libfunc);
14867 if (!check_nan)
14868 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14869 SImode, op0, mode, op1, mode);
14871 /* The library signals an exception for signalling NaNs, so we need to
14872 handle isgreater, etc. by first checking isordered. */
14873 else
14875 rtx ne_rtx, normal_dest, unord_dest;
14876 rtx unord_func = optab_libfunc (unord_optab, mode);
14877 rtx join_label = gen_label_rtx ();
14878 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14879 rtx unord_cmp = gen_reg_rtx (comp_mode);
14882 /* Test for either value being a NaN. */
14883 gcc_assert (unord_func);
14884 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14885 SImode, op0, mode, op1, mode);
14887 /* Set value (0) if either value is a NaN, and jump to the join
14888 label. */
14889 dest = gen_reg_rtx (SImode);
14890 emit_move_insn (dest, const1_rtx);
14891 emit_insn (gen_rtx_SET (unord_cmp,
14892 gen_rtx_COMPARE (comp_mode, unord_dest,
14893 const0_rtx)));
14895 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14896 emit_jump_insn (gen_rtx_SET (pc_rtx,
14897 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14898 join_ref,
14899 pc_rtx)));
14901 /* Do the normal comparison, knowing that the values are not
14902 NaNs. */
14903 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14904 SImode, op0, mode, op1, mode);
14906 emit_insn (gen_cstoresi4 (dest,
14907 gen_rtx_fmt_ee (code, SImode, normal_dest,
14908 const0_rtx),
14909 normal_dest, const0_rtx));
14911 /* Join NaN and non-Nan paths. Compare dest against 0. */
14912 emit_label (join_label);
14913 code = NE;
14916 emit_insn (gen_rtx_SET (compare_result,
14917 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14920 else
14922 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14923 CLOBBERs to match cmptf_internal2 pattern. */
14924 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14925 && FLOAT128_IBM_P (GET_MODE (op0))
14926 && TARGET_HARD_FLOAT)
14927 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14928 gen_rtvec (10,
14929 gen_rtx_SET (compare_result,
14930 gen_rtx_COMPARE (comp_mode, op0, op1)),
14931 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14932 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14933 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14934 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14935 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14936 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14937 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14938 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14939 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14940 else if (GET_CODE (op1) == UNSPEC
14941 && XINT (op1, 1) == UNSPEC_SP_TEST)
14943 rtx op1b = XVECEXP (op1, 0, 0);
14944 comp_mode = CCEQmode;
14945 compare_result = gen_reg_rtx (CCEQmode);
14946 if (TARGET_64BIT)
14947 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14948 else
14949 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14951 else
14952 emit_insn (gen_rtx_SET (compare_result,
14953 gen_rtx_COMPARE (comp_mode, op0, op1)));
14956 validate_condition_mode (code, GET_MODE (compare_result));
14958 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14962 /* Return the diagnostic message string if the binary operation OP is
14963 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14965 static const char*
14966 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14967 const_tree type1,
14968 const_tree type2)
14970 machine_mode mode1 = TYPE_MODE (type1);
14971 machine_mode mode2 = TYPE_MODE (type2);
14973 /* For complex modes, use the inner type. */
14974 if (COMPLEX_MODE_P (mode1))
14975 mode1 = GET_MODE_INNER (mode1);
14977 if (COMPLEX_MODE_P (mode2))
14978 mode2 = GET_MODE_INNER (mode2);
14980 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14981 double to intermix unless -mfloat128-convert. */
14982 if (mode1 == mode2)
14983 return NULL;
14985 if (!TARGET_FLOAT128_CVT)
14987 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
14988 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
14989 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
14990 "point types");
14993 return NULL;
14997 /* Expand floating point conversion to/from __float128 and __ibm128. */
14999 void
15000 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15002 machine_mode dest_mode = GET_MODE (dest);
15003 machine_mode src_mode = GET_MODE (src);
15004 convert_optab cvt = unknown_optab;
15005 bool do_move = false;
15006 rtx libfunc = NULL_RTX;
15007 rtx dest2;
15008 typedef rtx (*rtx_2func_t) (rtx, rtx);
15009 rtx_2func_t hw_convert = (rtx_2func_t)0;
15010 size_t kf_or_tf;
15012 struct hw_conv_t {
15013 rtx_2func_t from_df;
15014 rtx_2func_t from_sf;
15015 rtx_2func_t from_si_sign;
15016 rtx_2func_t from_si_uns;
15017 rtx_2func_t from_di_sign;
15018 rtx_2func_t from_di_uns;
15019 rtx_2func_t to_df;
15020 rtx_2func_t to_sf;
15021 rtx_2func_t to_si_sign;
15022 rtx_2func_t to_si_uns;
15023 rtx_2func_t to_di_sign;
15024 rtx_2func_t to_di_uns;
15025 } hw_conversions[2] = {
15026 /* convertions to/from KFmode */
15028 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15029 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15030 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15031 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15032 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15033 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15034 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15035 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15036 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15037 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15038 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15039 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15042 /* convertions to/from TFmode */
15044 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15045 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15046 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15047 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15048 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15049 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15050 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15051 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15052 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15053 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15054 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15055 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15059 if (dest_mode == src_mode)
15060 gcc_unreachable ();
15062 /* Eliminate memory operations. */
15063 if (MEM_P (src))
15064 src = force_reg (src_mode, src);
15066 if (MEM_P (dest))
15068 rtx tmp = gen_reg_rtx (dest_mode);
15069 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15070 rs6000_emit_move (dest, tmp, dest_mode);
15071 return;
15074 /* Convert to IEEE 128-bit floating point. */
15075 if (FLOAT128_IEEE_P (dest_mode))
15077 if (dest_mode == KFmode)
15078 kf_or_tf = 0;
15079 else if (dest_mode == TFmode)
15080 kf_or_tf = 1;
15081 else
15082 gcc_unreachable ();
15084 switch (src_mode)
15086 case E_DFmode:
15087 cvt = sext_optab;
15088 hw_convert = hw_conversions[kf_or_tf].from_df;
15089 break;
15091 case E_SFmode:
15092 cvt = sext_optab;
15093 hw_convert = hw_conversions[kf_or_tf].from_sf;
15094 break;
15096 case E_KFmode:
15097 case E_IFmode:
15098 case E_TFmode:
15099 if (FLOAT128_IBM_P (src_mode))
15100 cvt = sext_optab;
15101 else
15102 do_move = true;
15103 break;
15105 case E_SImode:
15106 if (unsigned_p)
15108 cvt = ufloat_optab;
15109 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15111 else
15113 cvt = sfloat_optab;
15114 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15116 break;
15118 case E_DImode:
15119 if (unsigned_p)
15121 cvt = ufloat_optab;
15122 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15124 else
15126 cvt = sfloat_optab;
15127 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15129 break;
15131 default:
15132 gcc_unreachable ();
15136 /* Convert from IEEE 128-bit floating point. */
15137 else if (FLOAT128_IEEE_P (src_mode))
15139 if (src_mode == KFmode)
15140 kf_or_tf = 0;
15141 else if (src_mode == TFmode)
15142 kf_or_tf = 1;
15143 else
15144 gcc_unreachable ();
15146 switch (dest_mode)
15148 case E_DFmode:
15149 cvt = trunc_optab;
15150 hw_convert = hw_conversions[kf_or_tf].to_df;
15151 break;
15153 case E_SFmode:
15154 cvt = trunc_optab;
15155 hw_convert = hw_conversions[kf_or_tf].to_sf;
15156 break;
15158 case E_KFmode:
15159 case E_IFmode:
15160 case E_TFmode:
15161 if (FLOAT128_IBM_P (dest_mode))
15162 cvt = trunc_optab;
15163 else
15164 do_move = true;
15165 break;
15167 case E_SImode:
15168 if (unsigned_p)
15170 cvt = ufix_optab;
15171 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15173 else
15175 cvt = sfix_optab;
15176 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15178 break;
15180 case E_DImode:
15181 if (unsigned_p)
15183 cvt = ufix_optab;
15184 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15186 else
15188 cvt = sfix_optab;
15189 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15191 break;
15193 default:
15194 gcc_unreachable ();
15198 /* Both IBM format. */
15199 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15200 do_move = true;
15202 else
15203 gcc_unreachable ();
15205 /* Handle conversion between TFmode/KFmode/IFmode. */
15206 if (do_move)
15207 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15209 /* Handle conversion if we have hardware support. */
15210 else if (TARGET_FLOAT128_HW && hw_convert)
15211 emit_insn ((hw_convert) (dest, src));
15213 /* Call an external function to do the conversion. */
15214 else if (cvt != unknown_optab)
15216 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15217 gcc_assert (libfunc != NULL_RTX);
15219 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15220 src, src_mode);
15222 gcc_assert (dest2 != NULL_RTX);
15223 if (!rtx_equal_p (dest, dest2))
15224 emit_move_insn (dest, dest2);
15227 else
15228 gcc_unreachable ();
15230 return;
15234 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15235 can be used as that dest register. Return the dest register. */
15238 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15240 if (op2 == const0_rtx)
15241 return op1;
15243 if (GET_CODE (scratch) == SCRATCH)
15244 scratch = gen_reg_rtx (mode);
15246 if (logical_operand (op2, mode))
15247 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15248 else
15249 emit_insn (gen_rtx_SET (scratch,
15250 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15252 return scratch;
15255 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15256 requires this. The result is mode MODE. */
15258 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15260 rtx cond[2];
15261 int n = 0;
15262 if (code == LTGT || code == LE || code == UNLT)
15263 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15264 if (code == LTGT || code == GE || code == UNGT)
15265 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15266 if (code == LE || code == GE || code == UNEQ)
15267 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15268 if (code == UNLT || code == UNGT || code == UNEQ)
15269 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15271 gcc_assert (n == 2);
15273 rtx cc = gen_reg_rtx (CCEQmode);
15274 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15275 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15277 return cc;
15280 void
15281 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15283 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15284 rtx_code cond_code = GET_CODE (condition_rtx);
15286 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15287 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15289 else if (cond_code == NE
15290 || cond_code == GE || cond_code == LE
15291 || cond_code == GEU || cond_code == LEU
15292 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15294 rtx not_result = gen_reg_rtx (CCEQmode);
15295 rtx not_op, rev_cond_rtx;
15296 machine_mode cc_mode;
15298 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15300 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15301 SImode, XEXP (condition_rtx, 0), const0_rtx);
15302 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15303 emit_insn (gen_rtx_SET (not_result, not_op));
15304 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15307 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15308 if (op_mode == VOIDmode)
15309 op_mode = GET_MODE (XEXP (operands[1], 1));
15311 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15313 PUT_MODE (condition_rtx, DImode);
15314 convert_move (operands[0], condition_rtx, 0);
15316 else
15318 PUT_MODE (condition_rtx, SImode);
15319 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15323 /* Emit a branch of kind CODE to location LOC. */
15325 void
15326 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15328 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15329 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15330 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15331 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15334 /* Return the string to output a conditional branch to LABEL, which is
15335 the operand template of the label, or NULL if the branch is really a
15336 conditional return.
15338 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15339 condition code register and its mode specifies what kind of
15340 comparison we made.
15342 REVERSED is nonzero if we should reverse the sense of the comparison.
15344 INSN is the insn. */
15346 char *
15347 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15349 static char string[64];
15350 enum rtx_code code = GET_CODE (op);
15351 rtx cc_reg = XEXP (op, 0);
15352 machine_mode mode = GET_MODE (cc_reg);
15353 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15354 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15355 int really_reversed = reversed ^ need_longbranch;
15356 char *s = string;
15357 const char *ccode;
15358 const char *pred;
15359 rtx note;
15361 validate_condition_mode (code, mode);
15363 /* Work out which way this really branches. We could use
15364 reverse_condition_maybe_unordered here always but this
15365 makes the resulting assembler clearer. */
15366 if (really_reversed)
15368 /* Reversal of FP compares takes care -- an ordered compare
15369 becomes an unordered compare and vice versa. */
15370 if (mode == CCFPmode)
15371 code = reverse_condition_maybe_unordered (code);
15372 else
15373 code = reverse_condition (code);
15376 switch (code)
15378 /* Not all of these are actually distinct opcodes, but
15379 we distinguish them for clarity of the resulting assembler. */
15380 case NE: case LTGT:
15381 ccode = "ne"; break;
15382 case EQ: case UNEQ:
15383 ccode = "eq"; break;
15384 case GE: case GEU:
15385 ccode = "ge"; break;
15386 case GT: case GTU: case UNGT:
15387 ccode = "gt"; break;
15388 case LE: case LEU:
15389 ccode = "le"; break;
15390 case LT: case LTU: case UNLT:
15391 ccode = "lt"; break;
15392 case UNORDERED: ccode = "un"; break;
15393 case ORDERED: ccode = "nu"; break;
15394 case UNGE: ccode = "nl"; break;
15395 case UNLE: ccode = "ng"; break;
15396 default:
15397 gcc_unreachable ();
15400 /* Maybe we have a guess as to how likely the branch is. */
15401 pred = "";
15402 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15403 if (note != NULL_RTX)
15405 /* PROB is the difference from 50%. */
15406 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15407 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15409 /* Only hint for highly probable/improbable branches on newer cpus when
15410 we have real profile data, as static prediction overrides processor
15411 dynamic prediction. For older cpus we may as well always hint, but
15412 assume not taken for branches that are very close to 50% as a
15413 mispredicted taken branch is more expensive than a
15414 mispredicted not-taken branch. */
15415 if (rs6000_always_hint
15416 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15417 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15418 && br_prob_note_reliable_p (note)))
15420 if (abs (prob) > REG_BR_PROB_BASE / 20
15421 && ((prob > 0) ^ need_longbranch))
15422 pred = "+";
15423 else
15424 pred = "-";
15428 if (label == NULL)
15429 s += sprintf (s, "b%slr%s ", ccode, pred);
15430 else
15431 s += sprintf (s, "b%s%s ", ccode, pred);
15433 /* We need to escape any '%' characters in the reg_names string.
15434 Assume they'd only be the first character.... */
15435 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15436 *s++ = '%';
15437 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15439 if (label != NULL)
15441 /* If the branch distance was too far, we may have to use an
15442 unconditional branch to go the distance. */
15443 if (need_longbranch)
15444 s += sprintf (s, ",$+8\n\tb %s", label);
15445 else
15446 s += sprintf (s, ",%s", label);
15449 return string;
15452 /* Return insn for VSX or Altivec comparisons. */
15454 static rtx
15455 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15457 rtx mask;
15458 machine_mode mode = GET_MODE (op0);
15460 switch (code)
15462 default:
15463 break;
15465 case GE:
15466 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15467 return NULL_RTX;
15468 /* FALLTHRU */
15470 case EQ:
15471 case GT:
15472 case GTU:
15473 case ORDERED:
15474 case UNORDERED:
15475 case UNEQ:
15476 case LTGT:
15477 mask = gen_reg_rtx (mode);
15478 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15479 return mask;
15482 return NULL_RTX;
15485 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15486 DMODE is expected destination mode. This is a recursive function. */
15488 static rtx
15489 rs6000_emit_vector_compare (enum rtx_code rcode,
15490 rtx op0, rtx op1,
15491 machine_mode dmode)
15493 rtx mask;
15494 bool swap_operands = false;
15495 bool try_again = false;
15497 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15498 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15500 /* See if the comparison works as is. */
15501 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15502 if (mask)
15503 return mask;
15505 switch (rcode)
15507 case LT:
15508 rcode = GT;
15509 swap_operands = true;
15510 try_again = true;
15511 break;
15512 case LTU:
15513 rcode = GTU;
15514 swap_operands = true;
15515 try_again = true;
15516 break;
15517 case NE:
15518 case UNLE:
15519 case UNLT:
15520 case UNGE:
15521 case UNGT:
15522 /* Invert condition and try again.
15523 e.g., A != B becomes ~(A==B). */
15525 enum rtx_code rev_code;
15526 enum insn_code nor_code;
15527 rtx mask2;
15529 rev_code = reverse_condition_maybe_unordered (rcode);
15530 if (rev_code == UNKNOWN)
15531 return NULL_RTX;
15533 nor_code = optab_handler (one_cmpl_optab, dmode);
15534 if (nor_code == CODE_FOR_nothing)
15535 return NULL_RTX;
15537 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15538 if (!mask2)
15539 return NULL_RTX;
15541 mask = gen_reg_rtx (dmode);
15542 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15543 return mask;
15545 break;
15546 case GE:
15547 case GEU:
15548 case LE:
15549 case LEU:
15550 /* Try GT/GTU/LT/LTU OR EQ */
15552 rtx c_rtx, eq_rtx;
15553 enum insn_code ior_code;
15554 enum rtx_code new_code;
15556 switch (rcode)
15558 case GE:
15559 new_code = GT;
15560 break;
15562 case GEU:
15563 new_code = GTU;
15564 break;
15566 case LE:
15567 new_code = LT;
15568 break;
15570 case LEU:
15571 new_code = LTU;
15572 break;
15574 default:
15575 gcc_unreachable ();
15578 ior_code = optab_handler (ior_optab, dmode);
15579 if (ior_code == CODE_FOR_nothing)
15580 return NULL_RTX;
15582 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15583 if (!c_rtx)
15584 return NULL_RTX;
15586 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15587 if (!eq_rtx)
15588 return NULL_RTX;
15590 mask = gen_reg_rtx (dmode);
15591 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15592 return mask;
15594 break;
15595 default:
15596 return NULL_RTX;
15599 if (try_again)
15601 if (swap_operands)
15602 std::swap (op0, op1);
15604 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15605 if (mask)
15606 return mask;
15609 /* You only get two chances. */
15610 return NULL_RTX;
15613 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15614 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15615 operands for the relation operation COND. */
15618 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15619 rtx cond, rtx cc_op0, rtx cc_op1)
15621 machine_mode dest_mode = GET_MODE (dest);
15622 machine_mode mask_mode = GET_MODE (cc_op0);
15623 enum rtx_code rcode = GET_CODE (cond);
15624 machine_mode cc_mode = CCmode;
15625 rtx mask;
15626 rtx cond2;
15627 bool invert_move = false;
15629 if (VECTOR_UNIT_NONE_P (dest_mode))
15630 return 0;
15632 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15633 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15635 switch (rcode)
15637 /* Swap operands if we can, and fall back to doing the operation as
15638 specified, and doing a NOR to invert the test. */
15639 case NE:
15640 case UNLE:
15641 case UNLT:
15642 case UNGE:
15643 case UNGT:
15644 /* Invert condition and try again.
15645 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15646 invert_move = true;
15647 rcode = reverse_condition_maybe_unordered (rcode);
15648 if (rcode == UNKNOWN)
15649 return 0;
15650 break;
15652 case GE:
15653 case LE:
15654 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
15656 /* Invert condition to avoid compound test. */
15657 invert_move = true;
15658 rcode = reverse_condition (rcode);
15660 break;
15662 case GTU:
15663 case GEU:
15664 case LTU:
15665 case LEU:
15666 /* Mark unsigned tests with CCUNSmode. */
15667 cc_mode = CCUNSmode;
15669 /* Invert condition to avoid compound test if necessary. */
15670 if (rcode == GEU || rcode == LEU)
15672 invert_move = true;
15673 rcode = reverse_condition (rcode);
15675 break;
15677 default:
15678 break;
15681 /* Get the vector mask for the given relational operations. */
15682 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15684 if (!mask)
15685 return 0;
15687 if (invert_move)
15688 std::swap (op_true, op_false);
15690 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15691 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15692 && (GET_CODE (op_true) == CONST_VECTOR
15693 || GET_CODE (op_false) == CONST_VECTOR))
15695 rtx constant_0 = CONST0_RTX (dest_mode);
15696 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15698 if (op_true == constant_m1 && op_false == constant_0)
15700 emit_move_insn (dest, mask);
15701 return 1;
15704 else if (op_true == constant_0 && op_false == constant_m1)
15706 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15707 return 1;
15710 /* If we can't use the vector comparison directly, perhaps we can use
15711 the mask for the true or false fields, instead of loading up a
15712 constant. */
15713 if (op_true == constant_m1)
15714 op_true = mask;
15716 if (op_false == constant_0)
15717 op_false = mask;
15720 if (!REG_P (op_true) && !SUBREG_P (op_true))
15721 op_true = force_reg (dest_mode, op_true);
15723 if (!REG_P (op_false) && !SUBREG_P (op_false))
15724 op_false = force_reg (dest_mode, op_false);
15726 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
15727 CONST0_RTX (dest_mode));
15728 emit_insn (gen_rtx_SET (dest,
15729 gen_rtx_IF_THEN_ELSE (dest_mode,
15730 cond2,
15731 op_true,
15732 op_false)));
15733 return 1;
15736 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
15737 maximum or minimum with "C" semantics.
15739 Unless you use -ffast-math, you can't use these instructions to replace
15740 conditions that implicitly reverse the condition because the comparison
15741 might generate a NaN or signed zer0.
15743 I.e. the following can be replaced all of the time
15744 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15745 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15746 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15747 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15749 The following can be replaced only if -ffast-math is used:
15750 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15751 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15752 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15753 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15755 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15756 nonzero/true, FALSE_COND if it is zero/false.
15758 Return false if we can't generate the appropriate minimum or maximum, and
15759 true if we can did the minimum or maximum. */
15761 static bool
15762 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15764 enum rtx_code code = GET_CODE (op);
15765 rtx op0 = XEXP (op, 0);
15766 rtx op1 = XEXP (op, 1);
15767 machine_mode compare_mode = GET_MODE (op0);
15768 machine_mode result_mode = GET_MODE (dest);
15769 bool max_p = false;
15771 if (result_mode != compare_mode)
15772 return false;
15774 if (code == GE || code == GT)
15775 max_p = true;
15776 else if (code == LE || code == LT)
15777 max_p = false;
15778 else
15779 return false;
15781 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15784 /* Only when NaNs and signed-zeros are not in effect, smax could be
15785 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15786 `op0 > op1 ? op1 : op0`. */
15787 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15788 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15789 max_p = !max_p;
15791 else
15792 return false;
15794 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15795 return true;
15798 /* Possibly emit a floating point conditional move by generating a compare that
15799 sets a mask instruction and a XXSEL select instruction.
15801 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15802 nonzero/true, FALSE_COND if it is zero/false.
15804 Return false if the operation cannot be generated, and true if we could
15805 generate the instruction. */
15807 static bool
15808 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15810 enum rtx_code code = GET_CODE (op);
15811 rtx op0 = XEXP (op, 0);
15812 rtx op1 = XEXP (op, 1);
15813 machine_mode compare_mode = GET_MODE (op0);
15814 machine_mode result_mode = GET_MODE (dest);
15815 rtx compare_rtx;
15816 rtx cmove_rtx;
15817 rtx clobber_rtx;
15819 if (!can_create_pseudo_p ())
15820 return 0;
15822 /* We allow the comparison to be either SFmode/DFmode and the true/false
15823 condition to be either SFmode/DFmode. I.e. we allow:
15825 float a, b;
15826 double c, d, r;
15828 r = (a == b) ? c : d;
15830 and:
15832 double a, b;
15833 float c, d, r;
15835 r = (a == b) ? c : d;
15837 but we don't allow intermixing the IEEE 128-bit floating point types with
15838 the 32/64-bit scalar types. */
15840 if (!(compare_mode == result_mode
15841 || (compare_mode == SFmode && result_mode == DFmode)
15842 || (compare_mode == DFmode && result_mode == SFmode)))
15843 return false;
15845 switch (code)
15847 case EQ:
15848 case GE:
15849 case GT:
15850 break;
15852 case NE:
15853 case LT:
15854 case LE:
15855 code = swap_condition (code);
15856 std::swap (op0, op1);
15857 break;
15859 default:
15860 return false;
15863 /* Generate: [(parallel [(set (dest)
15864 (if_then_else (op (cmp1) (cmp2))
15865 (true)
15866 (false)))
15867 (clobber (scratch))])]. */
15869 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15870 cmove_rtx = gen_rtx_SET (dest,
15871 gen_rtx_IF_THEN_ELSE (result_mode,
15872 compare_rtx,
15873 true_cond,
15874 false_cond));
15876 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15877 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15878 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15880 return true;
15883 /* Helper function to return true if the target has instructions to do a
15884 compare and set mask instruction that can be used with XXSEL to implement a
15885 conditional move. It is also assumed that such a target also supports the
15886 "C" minimum and maximum instructions. */
15888 static bool
15889 have_compare_and_set_mask (machine_mode mode)
15891 switch (mode)
15893 case E_SFmode:
15894 case E_DFmode:
15895 return TARGET_P9_MINMAX;
15897 case E_KFmode:
15898 case E_TFmode:
15899 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
15901 default:
15902 break;
15905 return false;
15908 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15909 operands of the last comparison is nonzero/true, FALSE_COND if it
15910 is zero/false. Return 0 if the hardware has no such operation. */
15912 bool
15913 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15915 enum rtx_code code = GET_CODE (op);
15916 rtx op0 = XEXP (op, 0);
15917 rtx op1 = XEXP (op, 1);
15918 machine_mode compare_mode = GET_MODE (op0);
15919 machine_mode result_mode = GET_MODE (dest);
15920 rtx temp;
15921 bool is_against_zero;
15923 /* These modes should always match. */
15924 if (GET_MODE (op1) != compare_mode
15925 /* In the isel case however, we can use a compare immediate, so
15926 op1 may be a small constant. */
15927 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15928 return false;
15929 if (GET_MODE (true_cond) != result_mode)
15930 return false;
15931 if (GET_MODE (false_cond) != result_mode)
15932 return false;
15934 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15935 instructions. */
15936 if (have_compare_and_set_mask (compare_mode)
15937 && have_compare_and_set_mask (result_mode))
15939 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15940 return true;
15942 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15943 return true;
15946 /* Don't allow using floating point comparisons for integer results for
15947 now. */
15948 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15949 return false;
15951 /* First, work out if the hardware can do this at all, or
15952 if it's too slow.... */
15953 if (!FLOAT_MODE_P (compare_mode))
15955 if (TARGET_ISEL)
15956 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15957 return false;
15960 is_against_zero = op1 == CONST0_RTX (compare_mode);
15962 /* A floating-point subtract might overflow, underflow, or produce
15963 an inexact result, thus changing the floating-point flags, so it
15964 can't be generated if we care about that. It's safe if one side
15965 of the construct is zero, since then no subtract will be
15966 generated. */
15967 if (SCALAR_FLOAT_MODE_P (compare_mode)
15968 && flag_trapping_math && ! is_against_zero)
15969 return false;
15971 /* Eliminate half of the comparisons by switching operands, this
15972 makes the remaining code simpler. */
15973 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15974 || code == LTGT || code == LT || code == UNLE)
15976 code = reverse_condition_maybe_unordered (code);
15977 temp = true_cond;
15978 true_cond = false_cond;
15979 false_cond = temp;
15982 /* UNEQ and LTGT take four instructions for a comparison with zero,
15983 it'll probably be faster to use a branch here too. */
15984 if (code == UNEQ && HONOR_NANS (compare_mode))
15985 return false;
15987 /* We're going to try to implement comparisons by performing
15988 a subtract, then comparing against zero. Unfortunately,
15989 Inf - Inf is NaN which is not zero, and so if we don't
15990 know that the operand is finite and the comparison
15991 would treat EQ different to UNORDERED, we can't do it. */
15992 if (HONOR_INFINITIES (compare_mode)
15993 && code != GT && code != UNGE
15994 && (!CONST_DOUBLE_P (op1)
15995 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15996 /* Constructs of the form (a OP b ? a : b) are safe. */
15997 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15998 || (! rtx_equal_p (op0, true_cond)
15999 && ! rtx_equal_p (op1, true_cond))))
16000 return false;
16002 /* At this point we know we can use fsel. */
16004 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16005 is no fsel instruction. */
16006 if (compare_mode != SFmode && compare_mode != DFmode)
16007 return false;
16009 /* Reduce the comparison to a comparison against zero. */
16010 if (! is_against_zero)
16012 temp = gen_reg_rtx (compare_mode);
16013 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16014 op0 = temp;
16015 op1 = CONST0_RTX (compare_mode);
16018 /* If we don't care about NaNs we can reduce some of the comparisons
16019 down to faster ones. */
16020 if (! HONOR_NANS (compare_mode))
16021 switch (code)
16023 case GT:
16024 code = LE;
16025 temp = true_cond;
16026 true_cond = false_cond;
16027 false_cond = temp;
16028 break;
16029 case UNGE:
16030 code = GE;
16031 break;
16032 case UNEQ:
16033 code = EQ;
16034 break;
16035 default:
16036 break;
16039 /* Now, reduce everything down to a GE. */
16040 switch (code)
16042 case GE:
16043 break;
16045 case LE:
16046 temp = gen_reg_rtx (compare_mode);
16047 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16048 op0 = temp;
16049 break;
16051 case ORDERED:
16052 temp = gen_reg_rtx (compare_mode);
16053 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16054 op0 = temp;
16055 break;
16057 case EQ:
16058 temp = gen_reg_rtx (compare_mode);
16059 emit_insn (gen_rtx_SET (temp,
16060 gen_rtx_NEG (compare_mode,
16061 gen_rtx_ABS (compare_mode, op0))));
16062 op0 = temp;
16063 break;
16065 case UNGE:
16066 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16067 temp = gen_reg_rtx (result_mode);
16068 emit_insn (gen_rtx_SET (temp,
16069 gen_rtx_IF_THEN_ELSE (result_mode,
16070 gen_rtx_GE (VOIDmode,
16071 op0, op1),
16072 true_cond, false_cond)));
16073 false_cond = true_cond;
16074 true_cond = temp;
16076 temp = gen_reg_rtx (compare_mode);
16077 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16078 op0 = temp;
16079 break;
16081 case GT:
16082 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16083 temp = gen_reg_rtx (result_mode);
16084 emit_insn (gen_rtx_SET (temp,
16085 gen_rtx_IF_THEN_ELSE (result_mode,
16086 gen_rtx_GE (VOIDmode,
16087 op0, op1),
16088 true_cond, false_cond)));
16089 true_cond = false_cond;
16090 false_cond = temp;
16092 temp = gen_reg_rtx (compare_mode);
16093 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16094 op0 = temp;
16095 break;
16097 default:
16098 gcc_unreachable ();
16101 emit_insn (gen_rtx_SET (dest,
16102 gen_rtx_IF_THEN_ELSE (result_mode,
16103 gen_rtx_GE (VOIDmode,
16104 op0, op1),
16105 true_cond, false_cond)));
16106 return true;
16109 /* Same as above, but for ints (isel). */
16111 bool
16112 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16114 rtx condition_rtx, cr;
16115 machine_mode mode = GET_MODE (dest);
16116 enum rtx_code cond_code;
16117 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16118 bool signedp;
16120 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16121 return false;
16123 /* We still have to do the compare, because isel doesn't do a
16124 compare, it just looks at the CRx bits set by a previous compare
16125 instruction. */
16126 condition_rtx = rs6000_generate_compare (op, mode);
16127 cond_code = GET_CODE (condition_rtx);
16128 cr = XEXP (condition_rtx, 0);
16129 signedp = GET_MODE (cr) == CCmode;
16131 isel_func = (mode == SImode
16132 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
16133 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
16135 switch (cond_code)
16137 case LT: case GT: case LTU: case GTU: case EQ:
16138 /* isel handles these directly. */
16139 break;
16141 default:
16142 /* We need to swap the sense of the comparison. */
16144 std::swap (false_cond, true_cond);
16145 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16147 break;
16150 false_cond = force_reg (mode, false_cond);
16151 if (true_cond != const0_rtx)
16152 true_cond = force_reg (mode, true_cond);
16154 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16156 return true;
16159 void
16160 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16162 machine_mode mode = GET_MODE (op0);
16163 enum rtx_code c;
16164 rtx target;
16166 /* VSX/altivec have direct min/max insns. */
16167 if ((code == SMAX || code == SMIN)
16168 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16169 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16170 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16172 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16173 return;
16176 if (code == SMAX || code == SMIN)
16177 c = GE;
16178 else
16179 c = GEU;
16181 if (code == SMAX || code == UMAX)
16182 target = emit_conditional_move (dest, c, op0, op1, mode,
16183 op0, op1, mode, 0);
16184 else
16185 target = emit_conditional_move (dest, c, op0, op1, mode,
16186 op1, op0, mode, 0);
16187 gcc_assert (target);
16188 if (target != dest)
16189 emit_move_insn (dest, target);
16192 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16193 COND is true. Mark the jump as unlikely to be taken. */
16195 static void
16196 emit_unlikely_jump (rtx cond, rtx label)
16198 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16199 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16200 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16203 /* A subroutine of the atomic operation splitters. Emit a load-locked
16204 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16205 the zero_extend operation. */
16207 static void
16208 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16210 rtx (*fn) (rtx, rtx) = NULL;
16212 switch (mode)
16214 case E_QImode:
16215 fn = gen_load_lockedqi;
16216 break;
16217 case E_HImode:
16218 fn = gen_load_lockedhi;
16219 break;
16220 case E_SImode:
16221 if (GET_MODE (mem) == QImode)
16222 fn = gen_load_lockedqi_si;
16223 else if (GET_MODE (mem) == HImode)
16224 fn = gen_load_lockedhi_si;
16225 else
16226 fn = gen_load_lockedsi;
16227 break;
16228 case E_DImode:
16229 fn = gen_load_lockeddi;
16230 break;
16231 case E_TImode:
16232 fn = gen_load_lockedti;
16233 break;
16234 default:
16235 gcc_unreachable ();
16237 emit_insn (fn (reg, mem));
16240 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16241 instruction in MODE. */
16243 static void
16244 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16246 rtx (*fn) (rtx, rtx, rtx) = NULL;
16248 switch (mode)
16250 case E_QImode:
16251 fn = gen_store_conditionalqi;
16252 break;
16253 case E_HImode:
16254 fn = gen_store_conditionalhi;
16255 break;
16256 case E_SImode:
16257 fn = gen_store_conditionalsi;
16258 break;
16259 case E_DImode:
16260 fn = gen_store_conditionaldi;
16261 break;
16262 case E_TImode:
16263 fn = gen_store_conditionalti;
16264 break;
16265 default:
16266 gcc_unreachable ();
16269 /* Emit sync before stwcx. to address PPC405 Erratum. */
16270 if (PPC405_ERRATUM77)
16271 emit_insn (gen_hwsync ());
16273 emit_insn (fn (res, mem, val));
16276 /* Expand barriers before and after a load_locked/store_cond sequence. */
16278 static rtx
16279 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16281 rtx addr = XEXP (mem, 0);
16283 if (!legitimate_indirect_address_p (addr, reload_completed)
16284 && !legitimate_indexed_address_p (addr, reload_completed))
16286 addr = force_reg (Pmode, addr);
16287 mem = replace_equiv_address_nv (mem, addr);
16290 switch (model)
16292 case MEMMODEL_RELAXED:
16293 case MEMMODEL_CONSUME:
16294 case MEMMODEL_ACQUIRE:
16295 break;
16296 case MEMMODEL_RELEASE:
16297 case MEMMODEL_ACQ_REL:
16298 emit_insn (gen_lwsync ());
16299 break;
16300 case MEMMODEL_SEQ_CST:
16301 emit_insn (gen_hwsync ());
16302 break;
16303 default:
16304 gcc_unreachable ();
16306 return mem;
16309 static void
16310 rs6000_post_atomic_barrier (enum memmodel model)
16312 switch (model)
16314 case MEMMODEL_RELAXED:
16315 case MEMMODEL_CONSUME:
16316 case MEMMODEL_RELEASE:
16317 break;
16318 case MEMMODEL_ACQUIRE:
16319 case MEMMODEL_ACQ_REL:
16320 case MEMMODEL_SEQ_CST:
16321 emit_insn (gen_isync ());
16322 break;
16323 default:
16324 gcc_unreachable ();
16328 /* A subroutine of the various atomic expanders. For sub-word operations,
16329 we must adjust things to operate on SImode. Given the original MEM,
16330 return a new aligned memory. Also build and return the quantities by
16331 which to shift and mask. */
16333 static rtx
16334 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16336 rtx addr, align, shift, mask, mem;
16337 HOST_WIDE_INT shift_mask;
16338 machine_mode mode = GET_MODE (orig_mem);
16340 /* For smaller modes, we have to implement this via SImode. */
16341 shift_mask = (mode == QImode ? 0x18 : 0x10);
16343 addr = XEXP (orig_mem, 0);
16344 addr = force_reg (GET_MODE (addr), addr);
16346 /* Aligned memory containing subword. Generate a new memory. We
16347 do not want any of the existing MEM_ATTR data, as we're now
16348 accessing memory outside the original object. */
16349 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16350 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16351 mem = gen_rtx_MEM (SImode, align);
16352 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16353 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16354 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16356 /* Shift amount for subword relative to aligned word. */
16357 shift = gen_reg_rtx (SImode);
16358 addr = gen_lowpart (SImode, addr);
16359 rtx tmp = gen_reg_rtx (SImode);
16360 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16361 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16362 if (BYTES_BIG_ENDIAN)
16363 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16364 shift, 1, OPTAB_LIB_WIDEN);
16365 *pshift = shift;
16367 /* Mask for insertion. */
16368 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16369 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16370 *pmask = mask;
16372 return mem;
16375 /* A subroutine of the various atomic expanders. For sub-word operands,
16376 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16378 static rtx
16379 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16381 rtx x;
16383 x = gen_reg_rtx (SImode);
16384 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16385 gen_rtx_NOT (SImode, mask),
16386 oldval)));
16388 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16390 return x;
16393 /* A subroutine of the various atomic expanders. For sub-word operands,
16394 extract WIDE to NARROW via SHIFT. */
16396 static void
16397 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16399 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16400 wide, 1, OPTAB_LIB_WIDEN);
16401 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16404 /* Expand an atomic compare and swap operation. */
16406 void
16407 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16409 rtx boolval, retval, mem, oldval, newval, cond;
16410 rtx label1, label2, x, mask, shift;
16411 machine_mode mode, orig_mode;
16412 enum memmodel mod_s, mod_f;
16413 bool is_weak;
16415 boolval = operands[0];
16416 retval = operands[1];
16417 mem = operands[2];
16418 oldval = operands[3];
16419 newval = operands[4];
16420 is_weak = (INTVAL (operands[5]) != 0);
16421 mod_s = memmodel_base (INTVAL (operands[6]));
16422 mod_f = memmodel_base (INTVAL (operands[7]));
16423 orig_mode = mode = GET_MODE (mem);
16425 mask = shift = NULL_RTX;
16426 if (mode == QImode || mode == HImode)
16428 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16429 lwarx and shift/mask operations. With power8, we need to do the
16430 comparison in SImode, but the store is still done in QI/HImode. */
16431 oldval = convert_modes (SImode, mode, oldval, 1);
16433 if (!TARGET_SYNC_HI_QI)
16435 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16437 /* Shift and mask OLDVAL into position with the word. */
16438 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16439 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16441 /* Shift and mask NEWVAL into position within the word. */
16442 newval = convert_modes (SImode, mode, newval, 1);
16443 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16444 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16447 /* Prepare to adjust the return value. */
16448 retval = gen_reg_rtx (SImode);
16449 mode = SImode;
16451 else if (reg_overlap_mentioned_p (retval, oldval))
16452 oldval = copy_to_reg (oldval);
16454 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16455 oldval = copy_to_mode_reg (mode, oldval);
16457 if (reg_overlap_mentioned_p (retval, newval))
16458 newval = copy_to_reg (newval);
16460 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16462 label1 = NULL_RTX;
16463 if (!is_weak)
16465 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16466 emit_label (XEXP (label1, 0));
16468 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16470 emit_load_locked (mode, retval, mem);
16472 x = retval;
16473 if (mask)
16474 x = expand_simple_binop (SImode, AND, retval, mask,
16475 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16477 cond = gen_reg_rtx (CCmode);
16478 /* If we have TImode, synthesize a comparison. */
16479 if (mode != TImode)
16480 x = gen_rtx_COMPARE (CCmode, x, oldval);
16481 else
16483 rtx xor1_result = gen_reg_rtx (DImode);
16484 rtx xor2_result = gen_reg_rtx (DImode);
16485 rtx or_result = gen_reg_rtx (DImode);
16486 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16487 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16488 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16489 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16491 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16492 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16493 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16494 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16497 emit_insn (gen_rtx_SET (cond, x));
16499 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16500 emit_unlikely_jump (x, label2);
16502 x = newval;
16503 if (mask)
16504 x = rs6000_mask_atomic_subword (retval, newval, mask);
16506 emit_store_conditional (orig_mode, cond, mem, x);
16508 if (!is_weak)
16510 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16511 emit_unlikely_jump (x, label1);
16514 if (!is_mm_relaxed (mod_f))
16515 emit_label (XEXP (label2, 0));
16517 rs6000_post_atomic_barrier (mod_s);
16519 if (is_mm_relaxed (mod_f))
16520 emit_label (XEXP (label2, 0));
16522 if (shift)
16523 rs6000_finish_atomic_subword (operands[1], retval, shift);
16524 else if (mode != GET_MODE (operands[1]))
16525 convert_move (operands[1], retval, 1);
16527 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16528 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16529 emit_insn (gen_rtx_SET (boolval, x));
16532 /* Expand an atomic exchange operation. */
16534 void
16535 rs6000_expand_atomic_exchange (rtx operands[])
16537 rtx retval, mem, val, cond;
16538 machine_mode mode;
16539 enum memmodel model;
16540 rtx label, x, mask, shift;
16542 retval = operands[0];
16543 mem = operands[1];
16544 val = operands[2];
16545 model = memmodel_base (INTVAL (operands[3]));
16546 mode = GET_MODE (mem);
16548 mask = shift = NULL_RTX;
16549 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16551 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16553 /* Shift and mask VAL into position with the word. */
16554 val = convert_modes (SImode, mode, val, 1);
16555 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16556 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16558 /* Prepare to adjust the return value. */
16559 retval = gen_reg_rtx (SImode);
16560 mode = SImode;
16563 mem = rs6000_pre_atomic_barrier (mem, model);
16565 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16566 emit_label (XEXP (label, 0));
16568 emit_load_locked (mode, retval, mem);
16570 x = val;
16571 if (mask)
16572 x = rs6000_mask_atomic_subword (retval, val, mask);
16574 cond = gen_reg_rtx (CCmode);
16575 emit_store_conditional (mode, cond, mem, x);
16577 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16578 emit_unlikely_jump (x, label);
16580 rs6000_post_atomic_barrier (model);
16582 if (shift)
16583 rs6000_finish_atomic_subword (operands[0], retval, shift);
16586 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16587 to perform. MEM is the memory on which to operate. VAL is the second
16588 operand of the binary operator. BEFORE and AFTER are optional locations to
16589 return the value of MEM either before of after the operation. MODEL_RTX
16590 is a CONST_INT containing the memory model to use. */
16592 void
16593 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16594 rtx orig_before, rtx orig_after, rtx model_rtx)
16596 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16597 machine_mode mode = GET_MODE (mem);
16598 machine_mode store_mode = mode;
16599 rtx label, x, cond, mask, shift;
16600 rtx before = orig_before, after = orig_after;
16602 mask = shift = NULL_RTX;
16603 /* On power8, we want to use SImode for the operation. On previous systems,
16604 use the operation in a subword and shift/mask to get the proper byte or
16605 halfword. */
16606 if (mode == QImode || mode == HImode)
16608 if (TARGET_SYNC_HI_QI)
16610 val = convert_modes (SImode, mode, val, 1);
16612 /* Prepare to adjust the return value. */
16613 before = gen_reg_rtx (SImode);
16614 if (after)
16615 after = gen_reg_rtx (SImode);
16616 mode = SImode;
16618 else
16620 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16622 /* Shift and mask VAL into position with the word. */
16623 val = convert_modes (SImode, mode, val, 1);
16624 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16625 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16627 switch (code)
16629 case IOR:
16630 case XOR:
16631 /* We've already zero-extended VAL. That is sufficient to
16632 make certain that it does not affect other bits. */
16633 mask = NULL;
16634 break;
16636 case AND:
16637 /* If we make certain that all of the other bits in VAL are
16638 set, that will be sufficient to not affect other bits. */
16639 x = gen_rtx_NOT (SImode, mask);
16640 x = gen_rtx_IOR (SImode, x, val);
16641 emit_insn (gen_rtx_SET (val, x));
16642 mask = NULL;
16643 break;
16645 case NOT:
16646 case PLUS:
16647 case MINUS:
16648 /* These will all affect bits outside the field and need
16649 adjustment via MASK within the loop. */
16650 break;
16652 default:
16653 gcc_unreachable ();
16656 /* Prepare to adjust the return value. */
16657 before = gen_reg_rtx (SImode);
16658 if (after)
16659 after = gen_reg_rtx (SImode);
16660 store_mode = mode = SImode;
16664 mem = rs6000_pre_atomic_barrier (mem, model);
16666 label = gen_label_rtx ();
16667 emit_label (label);
16668 label = gen_rtx_LABEL_REF (VOIDmode, label);
16670 if (before == NULL_RTX)
16671 before = gen_reg_rtx (mode);
16673 emit_load_locked (mode, before, mem);
16675 if (code == NOT)
16677 x = expand_simple_binop (mode, AND, before, val,
16678 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16679 after = expand_simple_unop (mode, NOT, x, after, 1);
16681 else
16683 after = expand_simple_binop (mode, code, before, val,
16684 after, 1, OPTAB_LIB_WIDEN);
16687 x = after;
16688 if (mask)
16690 x = expand_simple_binop (SImode, AND, after, mask,
16691 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16692 x = rs6000_mask_atomic_subword (before, x, mask);
16694 else if (store_mode != mode)
16695 x = convert_modes (store_mode, mode, x, 1);
16697 cond = gen_reg_rtx (CCmode);
16698 emit_store_conditional (store_mode, cond, mem, x);
16700 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16701 emit_unlikely_jump (x, label);
16703 rs6000_post_atomic_barrier (model);
16705 if (shift)
16707 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16708 then do the calcuations in a SImode register. */
16709 if (orig_before)
16710 rs6000_finish_atomic_subword (orig_before, before, shift);
16711 if (orig_after)
16712 rs6000_finish_atomic_subword (orig_after, after, shift);
16714 else if (store_mode != mode)
16716 /* QImode/HImode on machines with lbarx/lharx where we do the native
16717 operation and then do the calcuations in a SImode register. */
16718 if (orig_before)
16719 convert_move (orig_before, before, 1);
16720 if (orig_after)
16721 convert_move (orig_after, after, 1);
16723 else if (orig_after && after != orig_after)
16724 emit_move_insn (orig_after, after);
16727 static GTY(()) alias_set_type TOC_alias_set = -1;
16729 alias_set_type
16730 get_TOC_alias_set (void)
16732 if (TOC_alias_set == -1)
16733 TOC_alias_set = new_alias_set ();
16734 return TOC_alias_set;
16737 /* The mode the ABI uses for a word. This is not the same as word_mode
16738 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16740 static scalar_int_mode
16741 rs6000_abi_word_mode (void)
16743 return TARGET_32BIT ? SImode : DImode;
16746 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16747 static char *
16748 rs6000_offload_options (void)
16750 if (TARGET_64BIT)
16751 return xstrdup ("-foffload-abi=lp64");
16752 else
16753 return xstrdup ("-foffload-abi=ilp32");
16757 /* A quick summary of the various types of 'constant-pool tables'
16758 under PowerPC:
16760 Target Flags Name One table per
16761 AIX (none) AIX TOC object file
16762 AIX -mfull-toc AIX TOC object file
16763 AIX -mminimal-toc AIX minimal TOC translation unit
16764 SVR4/EABI (none) SVR4 SDATA object file
16765 SVR4/EABI -fpic SVR4 pic object file
16766 SVR4/EABI -fPIC SVR4 PIC translation unit
16767 SVR4/EABI -mrelocatable EABI TOC function
16768 SVR4/EABI -maix AIX TOC object file
16769 SVR4/EABI -maix -mminimal-toc
16770 AIX minimal TOC translation unit
16772 Name Reg. Set by entries contains:
16773 made by addrs? fp? sum?
16775 AIX TOC 2 crt0 as Y option option
16776 AIX minimal TOC 30 prolog gcc Y Y option
16777 SVR4 SDATA 13 crt0 gcc N Y N
16778 SVR4 pic 30 prolog ld Y not yet N
16779 SVR4 PIC 30 prolog gcc Y option option
16780 EABI TOC 30 prolog gcc Y option option
16784 /* Hash functions for the hash table. */
16786 static unsigned
16787 rs6000_hash_constant (rtx k)
16789 enum rtx_code code = GET_CODE (k);
16790 machine_mode mode = GET_MODE (k);
16791 unsigned result = (code << 3) ^ mode;
16792 const char *format;
16793 int flen, fidx;
16795 format = GET_RTX_FORMAT (code);
16796 flen = strlen (format);
16797 fidx = 0;
16799 switch (code)
16801 case LABEL_REF:
16802 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16804 case CONST_WIDE_INT:
16806 int i;
16807 flen = CONST_WIDE_INT_NUNITS (k);
16808 for (i = 0; i < flen; i++)
16809 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16810 return result;
16813 case CONST_DOUBLE:
16814 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16816 case CODE_LABEL:
16817 fidx = 3;
16818 break;
16820 default:
16821 break;
16824 for (; fidx < flen; fidx++)
16825 switch (format[fidx])
16827 case 's':
16829 unsigned i, len;
16830 const char *str = XSTR (k, fidx);
16831 len = strlen (str);
16832 result = result * 613 + len;
16833 for (i = 0; i < len; i++)
16834 result = result * 613 + (unsigned) str[i];
16835 break;
16837 case 'u':
16838 case 'e':
16839 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16840 break;
16841 case 'i':
16842 case 'n':
16843 result = result * 613 + (unsigned) XINT (k, fidx);
16844 break;
16845 case 'w':
16846 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16847 result = result * 613 + (unsigned) XWINT (k, fidx);
16848 else
16850 size_t i;
16851 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16852 result = result * 613 + (unsigned) (XWINT (k, fidx)
16853 >> CHAR_BIT * i);
16855 break;
16856 case '0':
16857 break;
16858 default:
16859 gcc_unreachable ();
16862 return result;
16865 hashval_t
16866 toc_hasher::hash (toc_hash_struct *thc)
16868 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16871 /* Compare H1 and H2 for equivalence. */
16873 bool
16874 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16876 rtx r1 = h1->key;
16877 rtx r2 = h2->key;
16879 if (h1->key_mode != h2->key_mode)
16880 return 0;
16882 return rtx_equal_p (r1, r2);
16885 /* These are the names given by the C++ front-end to vtables, and
16886 vtable-like objects. Ideally, this logic should not be here;
16887 instead, there should be some programmatic way of inquiring as
16888 to whether or not an object is a vtable. */
16890 #define VTABLE_NAME_P(NAME) \
16891 (startswith (name, "_vt.") \
16892 || startswith (name, "_ZTV") \
16893 || startswith (name, "_ZTT") \
16894 || startswith (name, "_ZTI") \
16895 || startswith (name, "_ZTC"))
16897 #ifdef NO_DOLLAR_IN_LABEL
16898 /* Return a GGC-allocated character string translating dollar signs in
16899 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16901 const char *
16902 rs6000_xcoff_strip_dollar (const char *name)
16904 char *strip, *p;
16905 const char *q;
16906 size_t len;
16908 q = (const char *) strchr (name, '$');
16910 if (q == 0 || q == name)
16911 return name;
16913 len = strlen (name);
16914 strip = XALLOCAVEC (char, len + 1);
16915 strcpy (strip, name);
16916 p = strip + (q - name);
16917 while (p)
16919 *p = '_';
16920 p = strchr (p + 1, '$');
16923 return ggc_alloc_string (strip, len);
16925 #endif
16927 void
16928 rs6000_output_symbol_ref (FILE *file, rtx x)
16930 const char *name = XSTR (x, 0);
16932 /* Currently C++ toc references to vtables can be emitted before it
16933 is decided whether the vtable is public or private. If this is
16934 the case, then the linker will eventually complain that there is
16935 a reference to an unknown section. Thus, for vtables only,
16936 we emit the TOC reference to reference the identifier and not the
16937 symbol. */
16938 if (VTABLE_NAME_P (name))
16940 RS6000_OUTPUT_BASENAME (file, name);
16942 else
16943 assemble_name (file, name);
16946 /* Output a TOC entry. We derive the entry name from what is being
16947 written. */
16949 void
16950 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16952 char buf[256];
16953 const char *name = buf;
16954 rtx base = x;
16955 HOST_WIDE_INT offset = 0;
16957 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16959 /* When the linker won't eliminate them, don't output duplicate
16960 TOC entries (this happens on AIX if there is any kind of TOC,
16961 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16962 CODE_LABELs. */
16963 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16965 struct toc_hash_struct *h;
16967 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16968 time because GGC is not initialized at that point. */
16969 if (toc_hash_table == NULL)
16970 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16972 h = ggc_alloc<toc_hash_struct> ();
16973 h->key = x;
16974 h->key_mode = mode;
16975 h->labelno = labelno;
16977 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16978 if (*found == NULL)
16979 *found = h;
16980 else /* This is indeed a duplicate.
16981 Set this label equal to that label. */
16983 fputs ("\t.set ", file);
16984 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16985 fprintf (file, "%d,", labelno);
16986 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16987 fprintf (file, "%d\n", ((*found)->labelno));
16989 #ifdef HAVE_AS_TLS
16990 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16991 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16992 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16994 fputs ("\t.set ", file);
16995 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16996 fprintf (file, "%d,", labelno);
16997 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16998 fprintf (file, "%d\n", ((*found)->labelno));
17000 #endif
17001 return;
17005 /* If we're going to put a double constant in the TOC, make sure it's
17006 aligned properly when strict alignment is on. */
17007 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17008 && STRICT_ALIGNMENT
17009 && GET_MODE_BITSIZE (mode) >= 64
17010 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17011 ASM_OUTPUT_ALIGN (file, 3);
17014 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17016 /* Handle FP constants specially. Note that if we have a minimal
17017 TOC, things we put here aren't actually in the TOC, so we can allow
17018 FP constants. */
17019 if (CONST_DOUBLE_P (x)
17020 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17021 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17023 long k[4];
17025 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17026 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17027 else
17028 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17030 if (TARGET_64BIT)
17032 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17033 fputs (DOUBLE_INT_ASM_OP, file);
17034 else
17035 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17036 k[0] & 0xffffffff, k[1] & 0xffffffff,
17037 k[2] & 0xffffffff, k[3] & 0xffffffff);
17038 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17039 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17040 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17041 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17042 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17043 return;
17045 else
17047 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17048 fputs ("\t.long ", file);
17049 else
17050 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17051 k[0] & 0xffffffff, k[1] & 0xffffffff,
17052 k[2] & 0xffffffff, k[3] & 0xffffffff);
17053 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17054 k[0] & 0xffffffff, k[1] & 0xffffffff,
17055 k[2] & 0xffffffff, k[3] & 0xffffffff);
17056 return;
17059 else if (CONST_DOUBLE_P (x)
17060 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17062 long k[2];
17064 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17065 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17066 else
17067 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17069 if (TARGET_64BIT)
17071 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17072 fputs (DOUBLE_INT_ASM_OP, file);
17073 else
17074 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17075 k[0] & 0xffffffff, k[1] & 0xffffffff);
17076 fprintf (file, "0x%lx%08lx\n",
17077 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17078 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17079 return;
17081 else
17083 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17084 fputs ("\t.long ", file);
17085 else
17086 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17087 k[0] & 0xffffffff, k[1] & 0xffffffff);
17088 fprintf (file, "0x%lx,0x%lx\n",
17089 k[0] & 0xffffffff, k[1] & 0xffffffff);
17090 return;
17093 else if (CONST_DOUBLE_P (x)
17094 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17096 long l;
17098 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17099 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17100 else
17101 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17103 if (TARGET_64BIT)
17105 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17106 fputs (DOUBLE_INT_ASM_OP, file);
17107 else
17108 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17109 if (WORDS_BIG_ENDIAN)
17110 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17111 else
17112 fprintf (file, "0x%lx\n", l & 0xffffffff);
17113 return;
17115 else
17117 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17118 fputs ("\t.long ", file);
17119 else
17120 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17121 fprintf (file, "0x%lx\n", l & 0xffffffff);
17122 return;
17125 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17127 unsigned HOST_WIDE_INT low;
17128 HOST_WIDE_INT high;
17130 low = INTVAL (x) & 0xffffffff;
17131 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17133 /* TOC entries are always Pmode-sized, so when big-endian
17134 smaller integer constants in the TOC need to be padded.
17135 (This is still a win over putting the constants in
17136 a separate constant pool, because then we'd have
17137 to have both a TOC entry _and_ the actual constant.)
17139 For a 32-bit target, CONST_INT values are loaded and shifted
17140 entirely within `low' and can be stored in one TOC entry. */
17142 /* It would be easy to make this work, but it doesn't now. */
17143 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17145 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17147 low |= high << 32;
17148 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17149 high = (HOST_WIDE_INT) low >> 32;
17150 low &= 0xffffffff;
17153 if (TARGET_64BIT)
17155 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17156 fputs (DOUBLE_INT_ASM_OP, file);
17157 else
17158 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17159 (long) high & 0xffffffff, (long) low & 0xffffffff);
17160 fprintf (file, "0x%lx%08lx\n",
17161 (long) high & 0xffffffff, (long) low & 0xffffffff);
17162 return;
17164 else
17166 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17168 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17169 fputs ("\t.long ", file);
17170 else
17171 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17172 (long) high & 0xffffffff, (long) low & 0xffffffff);
17173 fprintf (file, "0x%lx,0x%lx\n",
17174 (long) high & 0xffffffff, (long) low & 0xffffffff);
17176 else
17178 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17179 fputs ("\t.long ", file);
17180 else
17181 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17182 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17184 return;
17188 if (GET_CODE (x) == CONST)
17190 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17191 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17193 base = XEXP (XEXP (x, 0), 0);
17194 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17197 switch (GET_CODE (base))
17199 case SYMBOL_REF:
17200 name = XSTR (base, 0);
17201 break;
17203 case LABEL_REF:
17204 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17205 CODE_LABEL_NUMBER (XEXP (base, 0)));
17206 break;
17208 case CODE_LABEL:
17209 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17210 break;
17212 default:
17213 gcc_unreachable ();
17216 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17217 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17218 else
17220 fputs ("\t.tc ", file);
17221 RS6000_OUTPUT_BASENAME (file, name);
17223 if (offset < 0)
17224 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17225 else if (offset)
17226 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17228 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17229 after other TOC symbols, reducing overflow of small TOC access
17230 to [TC] symbols. */
17231 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17232 ? "[TE]," : "[TC],", file);
17235 /* Currently C++ toc references to vtables can be emitted before it
17236 is decided whether the vtable is public or private. If this is
17237 the case, then the linker will eventually complain that there is
17238 a TOC reference to an unknown section. Thus, for vtables only,
17239 we emit the TOC reference to reference the symbol and not the
17240 section. */
17241 if (VTABLE_NAME_P (name))
17243 RS6000_OUTPUT_BASENAME (file, name);
17244 if (offset < 0)
17245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17246 else if (offset > 0)
17247 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17249 else
17250 output_addr_const (file, x);
17252 #if HAVE_AS_TLS
17253 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17255 switch (SYMBOL_REF_TLS_MODEL (base))
17257 case 0:
17258 break;
17259 case TLS_MODEL_LOCAL_EXEC:
17260 fputs ("@le", file);
17261 break;
17262 case TLS_MODEL_INITIAL_EXEC:
17263 fputs ("@ie", file);
17264 break;
17265 /* Use global-dynamic for local-dynamic. */
17266 case TLS_MODEL_GLOBAL_DYNAMIC:
17267 case TLS_MODEL_LOCAL_DYNAMIC:
17268 putc ('\n', file);
17269 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17270 fputs ("\t.tc .", file);
17271 RS6000_OUTPUT_BASENAME (file, name);
17272 fputs ("[TC],", file);
17273 output_addr_const (file, x);
17274 fputs ("@m", file);
17275 break;
17276 default:
17277 gcc_unreachable ();
17280 #endif
17282 putc ('\n', file);
17285 /* Output an assembler pseudo-op to write an ASCII string of N characters
17286 starting at P to FILE.
17288 On the RS/6000, we have to do this using the .byte operation and
17289 write out special characters outside the quoted string.
17290 Also, the assembler is broken; very long strings are truncated,
17291 so we must artificially break them up early. */
17293 void
17294 output_ascii (FILE *file, const char *p, int n)
17296 char c;
17297 int i, count_string;
17298 const char *for_string = "\t.byte \"";
17299 const char *for_decimal = "\t.byte ";
17300 const char *to_close = NULL;
17302 count_string = 0;
17303 for (i = 0; i < n; i++)
17305 c = *p++;
17306 if (c >= ' ' && c < 0177)
17308 if (for_string)
17309 fputs (for_string, file);
17310 putc (c, file);
17312 /* Write two quotes to get one. */
17313 if (c == '"')
17315 putc (c, file);
17316 ++count_string;
17319 for_string = NULL;
17320 for_decimal = "\"\n\t.byte ";
17321 to_close = "\"\n";
17322 ++count_string;
17324 if (count_string >= 512)
17326 fputs (to_close, file);
17328 for_string = "\t.byte \"";
17329 for_decimal = "\t.byte ";
17330 to_close = NULL;
17331 count_string = 0;
17334 else
17336 if (for_decimal)
17337 fputs (for_decimal, file);
17338 fprintf (file, "%d", c);
17340 for_string = "\n\t.byte \"";
17341 for_decimal = ", ";
17342 to_close = "\n";
17343 count_string = 0;
17347 /* Now close the string if we have written one. Then end the line. */
17348 if (to_close)
17349 fputs (to_close, file);
17352 /* Generate a unique section name for FILENAME for a section type
17353 represented by SECTION_DESC. Output goes into BUF.
17355 SECTION_DESC can be any string, as long as it is different for each
17356 possible section type.
17358 We name the section in the same manner as xlc. The name begins with an
17359 underscore followed by the filename (after stripping any leading directory
17360 names) with the last period replaced by the string SECTION_DESC. If
17361 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17362 the name. */
17364 void
17365 rs6000_gen_section_name (char **buf, const char *filename,
17366 const char *section_desc)
17368 const char *q, *after_last_slash, *last_period = 0;
17369 char *p;
17370 int len;
17372 after_last_slash = filename;
17373 for (q = filename; *q; q++)
17375 if (*q == '/')
17376 after_last_slash = q + 1;
17377 else if (*q == '.')
17378 last_period = q;
17381 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17382 *buf = (char *) xmalloc (len);
17384 p = *buf;
17385 *p++ = '_';
17387 for (q = after_last_slash; *q; q++)
17389 if (q == last_period)
17391 strcpy (p, section_desc);
17392 p += strlen (section_desc);
17393 break;
17396 else if (ISALNUM (*q))
17397 *p++ = *q;
17400 if (last_period == 0)
17401 strcpy (p, section_desc);
17402 else
17403 *p = '\0';
17406 /* Emit profile function. */
17408 void
17409 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17411 /* Non-standard profiling for kernels, which just saves LR then calls
17412 _mcount without worrying about arg saves. The idea is to change
17413 the function prologue as little as possible as it isn't easy to
17414 account for arg save/restore code added just for _mcount. */
17415 if (TARGET_PROFILE_KERNEL)
17416 return;
17418 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17420 #ifndef NO_PROFILE_COUNTERS
17421 # define NO_PROFILE_COUNTERS 0
17422 #endif
17423 if (NO_PROFILE_COUNTERS)
17424 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17425 LCT_NORMAL, VOIDmode);
17426 else
17428 char buf[30];
17429 const char *label_name;
17430 rtx fun;
17432 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17433 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17434 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17436 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17437 LCT_NORMAL, VOIDmode, fun, Pmode);
17440 else if (DEFAULT_ABI == ABI_DARWIN)
17442 const char *mcount_name = RS6000_MCOUNT;
17443 int caller_addr_regno = LR_REGNO;
17445 /* Be conservative and always set this, at least for now. */
17446 crtl->uses_pic_offset_table = 1;
17448 #if TARGET_MACHO
17449 /* For PIC code, set up a stub and collect the caller's address
17450 from r0, which is where the prologue puts it. */
17451 if (MACHOPIC_INDIRECT
17452 && crtl->uses_pic_offset_table)
17453 caller_addr_regno = 0;
17454 #endif
17455 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17456 LCT_NORMAL, VOIDmode,
17457 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17461 /* Write function profiler code. */
17463 void
17464 output_function_profiler (FILE *file, int labelno)
17466 char buf[100];
17468 switch (DEFAULT_ABI)
17470 default:
17471 gcc_unreachable ();
17473 case ABI_V4:
17474 if (!TARGET_32BIT)
17476 warning (0, "no profiling of 64-bit code for this ABI");
17477 return;
17479 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17480 fprintf (file, "\tmflr %s\n", reg_names[0]);
17481 if (NO_PROFILE_COUNTERS)
17483 asm_fprintf (file, "\tstw %s,4(%s)\n",
17484 reg_names[0], reg_names[1]);
17486 else if (TARGET_SECURE_PLT && flag_pic)
17488 if (TARGET_LINK_STACK)
17490 char name[32];
17491 get_ppc476_thunk_name (name);
17492 asm_fprintf (file, "\tbl %s\n", name);
17494 else
17495 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17496 asm_fprintf (file, "\tstw %s,4(%s)\n",
17497 reg_names[0], reg_names[1]);
17498 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17499 asm_fprintf (file, "\taddis %s,%s,",
17500 reg_names[12], reg_names[12]);
17501 assemble_name (file, buf);
17502 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17503 assemble_name (file, buf);
17504 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17506 else if (flag_pic == 1)
17508 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17509 asm_fprintf (file, "\tstw %s,4(%s)\n",
17510 reg_names[0], reg_names[1]);
17511 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17512 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17513 assemble_name (file, buf);
17514 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17516 else if (flag_pic > 1)
17518 asm_fprintf (file, "\tstw %s,4(%s)\n",
17519 reg_names[0], reg_names[1]);
17520 /* Now, we need to get the address of the label. */
17521 if (TARGET_LINK_STACK)
17523 char name[32];
17524 get_ppc476_thunk_name (name);
17525 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17526 assemble_name (file, buf);
17527 fputs ("-.\n1:", file);
17528 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17529 asm_fprintf (file, "\taddi %s,%s,4\n",
17530 reg_names[11], reg_names[11]);
17532 else
17534 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17535 assemble_name (file, buf);
17536 fputs ("-.\n1:", file);
17537 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17539 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17540 reg_names[0], reg_names[11]);
17541 asm_fprintf (file, "\tadd %s,%s,%s\n",
17542 reg_names[0], reg_names[0], reg_names[11]);
17544 else
17546 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17547 assemble_name (file, buf);
17548 fputs ("@ha\n", file);
17549 asm_fprintf (file, "\tstw %s,4(%s)\n",
17550 reg_names[0], reg_names[1]);
17551 asm_fprintf (file, "\tla %s,", reg_names[0]);
17552 assemble_name (file, buf);
17553 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17556 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17557 fprintf (file, "\tbl %s%s\n",
17558 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17559 break;
17561 case ABI_AIX:
17562 case ABI_ELFv2:
17563 case ABI_DARWIN:
17564 /* Don't do anything, done in output_profile_hook (). */
17565 break;
17571 /* The following variable value is the last issued insn. */
17573 static rtx_insn *last_scheduled_insn;
17575 /* The following variable helps to balance issuing of load and
17576 store instructions */
17578 static int load_store_pendulum;
17580 /* The following variable helps pair divide insns during scheduling. */
17581 static int divide_cnt;
17582 /* The following variable helps pair and alternate vector and vector load
17583 insns during scheduling. */
17584 static int vec_pairing;
17587 /* Power4 load update and store update instructions are cracked into a
17588 load or store and an integer insn which are executed in the same cycle.
17589 Branches have their own dispatch slot which does not count against the
17590 GCC issue rate, but it changes the program flow so there are no other
17591 instructions to issue in this cycle. */
17593 static int
17594 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17596 last_scheduled_insn = insn;
17597 if (GET_CODE (PATTERN (insn)) == USE
17598 || GET_CODE (PATTERN (insn)) == CLOBBER)
17600 cached_can_issue_more = more;
17601 return cached_can_issue_more;
17604 if (insn_terminates_group_p (insn, current_group))
17606 cached_can_issue_more = 0;
17607 return cached_can_issue_more;
17610 /* If no reservation, but reach here */
17611 if (recog_memoized (insn) < 0)
17612 return more;
17614 if (rs6000_sched_groups)
17616 if (is_microcoded_insn (insn))
17617 cached_can_issue_more = 0;
17618 else if (is_cracked_insn (insn))
17619 cached_can_issue_more = more > 2 ? more - 2 : 0;
17620 else
17621 cached_can_issue_more = more - 1;
17623 return cached_can_issue_more;
17626 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17627 return 0;
17629 cached_can_issue_more = more - 1;
17630 return cached_can_issue_more;
17633 static int
17634 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17636 int r = rs6000_variable_issue_1 (insn, more);
17637 if (verbose)
17638 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17639 return r;
17642 /* Adjust the cost of a scheduling dependency. Return the new cost of
17643 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17645 static int
17646 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17647 unsigned int)
17649 enum attr_type attr_type;
17651 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17652 return cost;
17654 switch (dep_type)
17656 case REG_DEP_TRUE:
17658 /* Data dependency; DEP_INSN writes a register that INSN reads
17659 some cycles later. */
17661 /* Separate a load from a narrower, dependent store. */
17662 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17663 || rs6000_tune == PROCESSOR_POWER10)
17664 && GET_CODE (PATTERN (insn)) == SET
17665 && GET_CODE (PATTERN (dep_insn)) == SET
17666 && MEM_P (XEXP (PATTERN (insn), 1))
17667 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17668 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17669 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17670 return cost + 14;
17672 attr_type = get_attr_type (insn);
17674 switch (attr_type)
17676 case TYPE_JMPREG:
17677 /* Tell the first scheduling pass about the latency between
17678 a mtctr and bctr (and mtlr and br/blr). The first
17679 scheduling pass will not know about this latency since
17680 the mtctr instruction, which has the latency associated
17681 to it, will be generated by reload. */
17682 return 4;
17683 case TYPE_BRANCH:
17684 /* Leave some extra cycles between a compare and its
17685 dependent branch, to inhibit expensive mispredicts. */
17686 if ((rs6000_tune == PROCESSOR_PPC603
17687 || rs6000_tune == PROCESSOR_PPC604
17688 || rs6000_tune == PROCESSOR_PPC604e
17689 || rs6000_tune == PROCESSOR_PPC620
17690 || rs6000_tune == PROCESSOR_PPC630
17691 || rs6000_tune == PROCESSOR_PPC750
17692 || rs6000_tune == PROCESSOR_PPC7400
17693 || rs6000_tune == PROCESSOR_PPC7450
17694 || rs6000_tune == PROCESSOR_PPCE5500
17695 || rs6000_tune == PROCESSOR_PPCE6500
17696 || rs6000_tune == PROCESSOR_POWER4
17697 || rs6000_tune == PROCESSOR_POWER5
17698 || rs6000_tune == PROCESSOR_POWER7
17699 || rs6000_tune == PROCESSOR_POWER8
17700 || rs6000_tune == PROCESSOR_POWER9
17701 || rs6000_tune == PROCESSOR_POWER10
17702 || rs6000_tune == PROCESSOR_CELL)
17703 && recog_memoized (dep_insn)
17704 && (INSN_CODE (dep_insn) >= 0))
17706 switch (get_attr_type (dep_insn))
17708 case TYPE_CMP:
17709 case TYPE_FPCOMPARE:
17710 case TYPE_CR_LOGICAL:
17711 return cost + 2;
17712 case TYPE_EXTS:
17713 case TYPE_MUL:
17714 if (get_attr_dot (dep_insn) == DOT_YES)
17715 return cost + 2;
17716 else
17717 break;
17718 case TYPE_SHIFT:
17719 if (get_attr_dot (dep_insn) == DOT_YES
17720 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17721 return cost + 2;
17722 else
17723 break;
17724 default:
17725 break;
17727 break;
17729 case TYPE_STORE:
17730 case TYPE_FPSTORE:
17731 if ((rs6000_tune == PROCESSOR_POWER6)
17732 && recog_memoized (dep_insn)
17733 && (INSN_CODE (dep_insn) >= 0))
17736 if (GET_CODE (PATTERN (insn)) != SET)
17737 /* If this happens, we have to extend this to schedule
17738 optimally. Return default for now. */
17739 return cost;
17741 /* Adjust the cost for the case where the value written
17742 by a fixed point operation is used as the address
17743 gen value on a store. */
17744 switch (get_attr_type (dep_insn))
17746 case TYPE_LOAD:
17747 case TYPE_CNTLZ:
17749 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17750 return get_attr_sign_extend (dep_insn)
17751 == SIGN_EXTEND_YES ? 6 : 4;
17752 break;
17754 case TYPE_SHIFT:
17756 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17757 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17758 6 : 3;
17759 break;
17761 case TYPE_INTEGER:
17762 case TYPE_ADD:
17763 case TYPE_LOGICAL:
17764 case TYPE_EXTS:
17765 case TYPE_INSERT:
17767 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17768 return 3;
17769 break;
17771 case TYPE_STORE:
17772 case TYPE_FPLOAD:
17773 case TYPE_FPSTORE:
17775 if (get_attr_update (dep_insn) == UPDATE_YES
17776 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17777 return 3;
17778 break;
17780 case TYPE_MUL:
17782 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17783 return 17;
17784 break;
17786 case TYPE_DIV:
17788 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17789 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17790 break;
17792 default:
17793 break;
17796 break;
17798 case TYPE_LOAD:
17799 if ((rs6000_tune == PROCESSOR_POWER6)
17800 && recog_memoized (dep_insn)
17801 && (INSN_CODE (dep_insn) >= 0))
17804 /* Adjust the cost for the case where the value written
17805 by a fixed point instruction is used within the address
17806 gen portion of a subsequent load(u)(x) */
17807 switch (get_attr_type (dep_insn))
17809 case TYPE_LOAD:
17810 case TYPE_CNTLZ:
17812 if (set_to_load_agen (dep_insn, insn))
17813 return get_attr_sign_extend (dep_insn)
17814 == SIGN_EXTEND_YES ? 6 : 4;
17815 break;
17817 case TYPE_SHIFT:
17819 if (set_to_load_agen (dep_insn, insn))
17820 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17821 6 : 3;
17822 break;
17824 case TYPE_INTEGER:
17825 case TYPE_ADD:
17826 case TYPE_LOGICAL:
17827 case TYPE_EXTS:
17828 case TYPE_INSERT:
17830 if (set_to_load_agen (dep_insn, insn))
17831 return 3;
17832 break;
17834 case TYPE_STORE:
17835 case TYPE_FPLOAD:
17836 case TYPE_FPSTORE:
17838 if (get_attr_update (dep_insn) == UPDATE_YES
17839 && set_to_load_agen (dep_insn, insn))
17840 return 3;
17841 break;
17843 case TYPE_MUL:
17845 if (set_to_load_agen (dep_insn, insn))
17846 return 17;
17847 break;
17849 case TYPE_DIV:
17851 if (set_to_load_agen (dep_insn, insn))
17852 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17853 break;
17855 default:
17856 break;
17859 break;
17861 default:
17862 break;
17865 /* Fall out to return default cost. */
17867 break;
17869 case REG_DEP_OUTPUT:
17870 /* Output dependency; DEP_INSN writes a register that INSN writes some
17871 cycles later. */
17872 if ((rs6000_tune == PROCESSOR_POWER6)
17873 && recog_memoized (dep_insn)
17874 && (INSN_CODE (dep_insn) >= 0))
17876 attr_type = get_attr_type (insn);
17878 switch (attr_type)
17880 case TYPE_FP:
17881 case TYPE_FPSIMPLE:
17882 if (get_attr_type (dep_insn) == TYPE_FP
17883 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17884 return 1;
17885 break;
17886 default:
17887 break;
17890 /* Fall through, no cost for output dependency. */
17891 /* FALLTHRU */
17893 case REG_DEP_ANTI:
17894 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17895 cycles later. */
17896 return 0;
17898 default:
17899 gcc_unreachable ();
17902 return cost;
17905 /* Debug version of rs6000_adjust_cost. */
17907 static int
17908 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17909 int cost, unsigned int dw)
17911 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17913 if (ret != cost)
17915 const char *dep;
17917 switch (dep_type)
17919 default: dep = "unknown depencency"; break;
17920 case REG_DEP_TRUE: dep = "data dependency"; break;
17921 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17922 case REG_DEP_ANTI: dep = "anti depencency"; break;
17925 fprintf (stderr,
17926 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17927 "%s, insn:\n", ret, cost, dep);
17929 debug_rtx (insn);
17932 return ret;
17935 /* The function returns a true if INSN is microcoded.
17936 Return false otherwise. */
17938 static bool
17939 is_microcoded_insn (rtx_insn *insn)
17941 if (!insn || !NONDEBUG_INSN_P (insn)
17942 || GET_CODE (PATTERN (insn)) == USE
17943 || GET_CODE (PATTERN (insn)) == CLOBBER)
17944 return false;
17946 if (rs6000_tune == PROCESSOR_CELL)
17947 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17949 if (rs6000_sched_groups
17950 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17952 enum attr_type type = get_attr_type (insn);
17953 if ((type == TYPE_LOAD
17954 && get_attr_update (insn) == UPDATE_YES
17955 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17956 || ((type == TYPE_LOAD || type == TYPE_STORE)
17957 && get_attr_update (insn) == UPDATE_YES
17958 && get_attr_indexed (insn) == INDEXED_YES)
17959 || type == TYPE_MFCR)
17960 return true;
17963 return false;
17966 /* The function returns true if INSN is cracked into 2 instructions
17967 by the processor (and therefore occupies 2 issue slots). */
17969 static bool
17970 is_cracked_insn (rtx_insn *insn)
17972 if (!insn || !NONDEBUG_INSN_P (insn)
17973 || GET_CODE (PATTERN (insn)) == USE
17974 || GET_CODE (PATTERN (insn)) == CLOBBER)
17975 return false;
17977 if (rs6000_sched_groups
17978 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17980 enum attr_type type = get_attr_type (insn);
17981 if ((type == TYPE_LOAD
17982 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17983 && get_attr_update (insn) == UPDATE_NO)
17984 || (type == TYPE_LOAD
17985 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17986 && get_attr_update (insn) == UPDATE_YES
17987 && get_attr_indexed (insn) == INDEXED_NO)
17988 || (type == TYPE_STORE
17989 && get_attr_update (insn) == UPDATE_YES
17990 && get_attr_indexed (insn) == INDEXED_NO)
17991 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17992 && get_attr_update (insn) == UPDATE_YES)
17993 || (type == TYPE_CR_LOGICAL
17994 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17995 || (type == TYPE_EXTS
17996 && get_attr_dot (insn) == DOT_YES)
17997 || (type == TYPE_SHIFT
17998 && get_attr_dot (insn) == DOT_YES
17999 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18000 || (type == TYPE_MUL
18001 && get_attr_dot (insn) == DOT_YES)
18002 || type == TYPE_DIV
18003 || (type == TYPE_INSERT
18004 && get_attr_size (insn) == SIZE_32))
18005 return true;
18008 return false;
18011 /* The function returns true if INSN can be issued only from
18012 the branch slot. */
18014 static bool
18015 is_branch_slot_insn (rtx_insn *insn)
18017 if (!insn || !NONDEBUG_INSN_P (insn)
18018 || GET_CODE (PATTERN (insn)) == USE
18019 || GET_CODE (PATTERN (insn)) == CLOBBER)
18020 return false;
18022 if (rs6000_sched_groups)
18024 enum attr_type type = get_attr_type (insn);
18025 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18026 return true;
18027 return false;
18030 return false;
18033 /* The function returns true if out_inst sets a value that is
18034 used in the address generation computation of in_insn */
18035 static bool
18036 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18038 rtx out_set, in_set;
18040 /* For performance reasons, only handle the simple case where
18041 both loads are a single_set. */
18042 out_set = single_set (out_insn);
18043 if (out_set)
18045 in_set = single_set (in_insn);
18046 if (in_set)
18047 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18050 return false;
18053 /* Try to determine base/offset/size parts of the given MEM.
18054 Return true if successful, false if all the values couldn't
18055 be determined.
18057 This function only looks for REG or REG+CONST address forms.
18058 REG+REG address form will return false. */
18060 static bool
18061 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18062 HOST_WIDE_INT *size)
18064 rtx addr_rtx;
18065 if MEM_SIZE_KNOWN_P (mem)
18066 *size = MEM_SIZE (mem);
18067 else
18068 return false;
18070 addr_rtx = (XEXP (mem, 0));
18071 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18072 addr_rtx = XEXP (addr_rtx, 1);
18074 *offset = 0;
18075 while (GET_CODE (addr_rtx) == PLUS
18076 && CONST_INT_P (XEXP (addr_rtx, 1)))
18078 *offset += INTVAL (XEXP (addr_rtx, 1));
18079 addr_rtx = XEXP (addr_rtx, 0);
18081 if (!REG_P (addr_rtx))
18082 return false;
18084 *base = addr_rtx;
18085 return true;
18088 /* If the target storage locations of arguments MEM1 and MEM2 are
18089 adjacent, then return the argument that has the lower address.
18090 Otherwise, return NULL_RTX. */
18092 static rtx
18093 adjacent_mem_locations (rtx mem1, rtx mem2)
18095 rtx reg1, reg2;
18096 HOST_WIDE_INT off1, size1, off2, size2;
18098 if (MEM_P (mem1)
18099 && MEM_P (mem2)
18100 && get_memref_parts (mem1, &reg1, &off1, &size1)
18101 && get_memref_parts (mem2, &reg2, &off2, &size2)
18102 && REGNO (reg1) == REGNO (reg2))
18104 if (off1 + size1 == off2)
18105 return mem1;
18106 else if (off2 + size2 == off1)
18107 return mem2;
18110 return NULL_RTX;
18113 /* This function returns true if it can be determined that the two MEM
18114 locations overlap by at least 1 byte based on base reg/offset/size. */
18116 static bool
18117 mem_locations_overlap (rtx mem1, rtx mem2)
18119 rtx reg1, reg2;
18120 HOST_WIDE_INT off1, size1, off2, size2;
18122 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18123 && get_memref_parts (mem2, &reg2, &off2, &size2))
18124 return ((REGNO (reg1) == REGNO (reg2))
18125 && (((off1 <= off2) && (off1 + size1 > off2))
18126 || ((off2 <= off1) && (off2 + size2 > off1))));
18128 return false;
18131 /* A C statement (sans semicolon) to update the integer scheduling
18132 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18133 INSN earlier, reduce the priority to execute INSN later. Do not
18134 define this macro if you do not need to adjust the scheduling
18135 priorities of insns. */
18137 static int
18138 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18140 rtx load_mem, str_mem;
18141 /* On machines (like the 750) which have asymmetric integer units,
18142 where one integer unit can do multiply and divides and the other
18143 can't, reduce the priority of multiply/divide so it is scheduled
18144 before other integer operations. */
18146 #if 0
18147 if (! INSN_P (insn))
18148 return priority;
18150 if (GET_CODE (PATTERN (insn)) == USE)
18151 return priority;
18153 switch (rs6000_tune) {
18154 case PROCESSOR_PPC750:
18155 switch (get_attr_type (insn))
18157 default:
18158 break;
18160 case TYPE_MUL:
18161 case TYPE_DIV:
18162 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18163 priority, priority);
18164 if (priority >= 0 && priority < 0x01000000)
18165 priority >>= 3;
18166 break;
18169 #endif
18171 if (insn_must_be_first_in_group (insn)
18172 && reload_completed
18173 && current_sched_info->sched_max_insns_priority
18174 && rs6000_sched_restricted_insns_priority)
18177 /* Prioritize insns that can be dispatched only in the first
18178 dispatch slot. */
18179 if (rs6000_sched_restricted_insns_priority == 1)
18180 /* Attach highest priority to insn. This means that in
18181 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
18182 precede 'priority' (critical path) considerations. */
18183 return current_sched_info->sched_max_insns_priority;
18184 else if (rs6000_sched_restricted_insns_priority == 2)
18185 /* Increase priority of insn by a minimal amount. This means that in
18186 haifa-sched.c:ready_sort(), only 'priority' (critical path)
18187 considerations precede dispatch-slot restriction considerations. */
18188 return (priority + 1);
18191 if (rs6000_tune == PROCESSOR_POWER6
18192 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18193 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18194 /* Attach highest priority to insn if the scheduler has just issued two
18195 stores and this instruction is a load, or two loads and this instruction
18196 is a store. Power6 wants loads and stores scheduled alternately
18197 when possible */
18198 return current_sched_info->sched_max_insns_priority;
18200 return priority;
18203 /* Return true if the instruction is nonpipelined on the Cell. */
18204 static bool
18205 is_nonpipeline_insn (rtx_insn *insn)
18207 enum attr_type type;
18208 if (!insn || !NONDEBUG_INSN_P (insn)
18209 || GET_CODE (PATTERN (insn)) == USE
18210 || GET_CODE (PATTERN (insn)) == CLOBBER)
18211 return false;
18213 type = get_attr_type (insn);
18214 if (type == TYPE_MUL
18215 || type == TYPE_DIV
18216 || type == TYPE_SDIV
18217 || type == TYPE_DDIV
18218 || type == TYPE_SSQRT
18219 || type == TYPE_DSQRT
18220 || type == TYPE_MFCR
18221 || type == TYPE_MFCRF
18222 || type == TYPE_MFJMPR)
18224 return true;
18226 return false;
18230 /* Return how many instructions the machine can issue per cycle. */
18232 static int
18233 rs6000_issue_rate (void)
18235 /* Unless scheduling for register pressure, use issue rate of 1 for
18236 first scheduling pass to decrease degradation. */
18237 if (!reload_completed && !flag_sched_pressure)
18238 return 1;
18240 switch (rs6000_tune) {
18241 case PROCESSOR_RS64A:
18242 case PROCESSOR_PPC601: /* ? */
18243 case PROCESSOR_PPC7450:
18244 return 3;
18245 case PROCESSOR_PPC440:
18246 case PROCESSOR_PPC603:
18247 case PROCESSOR_PPC750:
18248 case PROCESSOR_PPC7400:
18249 case PROCESSOR_PPC8540:
18250 case PROCESSOR_PPC8548:
18251 case PROCESSOR_CELL:
18252 case PROCESSOR_PPCE300C2:
18253 case PROCESSOR_PPCE300C3:
18254 case PROCESSOR_PPCE500MC:
18255 case PROCESSOR_PPCE500MC64:
18256 case PROCESSOR_PPCE5500:
18257 case PROCESSOR_PPCE6500:
18258 case PROCESSOR_TITAN:
18259 return 2;
18260 case PROCESSOR_PPC476:
18261 case PROCESSOR_PPC604:
18262 case PROCESSOR_PPC604e:
18263 case PROCESSOR_PPC620:
18264 case PROCESSOR_PPC630:
18265 return 4;
18266 case PROCESSOR_POWER4:
18267 case PROCESSOR_POWER5:
18268 case PROCESSOR_POWER6:
18269 case PROCESSOR_POWER7:
18270 return 5;
18271 case PROCESSOR_POWER8:
18272 return 7;
18273 case PROCESSOR_POWER9:
18274 return 6;
18275 case PROCESSOR_POWER10:
18276 return 8;
18277 default:
18278 return 1;
18282 /* Return how many instructions to look ahead for better insn
18283 scheduling. */
18285 static int
18286 rs6000_use_sched_lookahead (void)
18288 switch (rs6000_tune)
18290 case PROCESSOR_PPC8540:
18291 case PROCESSOR_PPC8548:
18292 return 4;
18294 case PROCESSOR_CELL:
18295 return (reload_completed ? 8 : 0);
18297 default:
18298 return 0;
18302 /* We are choosing insn from the ready queue. Return zero if INSN can be
18303 chosen. */
18304 static int
18305 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18307 if (ready_index == 0)
18308 return 0;
18310 if (rs6000_tune != PROCESSOR_CELL)
18311 return 0;
18313 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18315 if (!reload_completed
18316 || is_nonpipeline_insn (insn)
18317 || is_microcoded_insn (insn))
18318 return 1;
18320 return 0;
18323 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18324 and return true. */
18326 static bool
18327 find_mem_ref (rtx pat, rtx *mem_ref)
18329 const char * fmt;
18330 int i, j;
18332 /* stack_tie does not produce any real memory traffic. */
18333 if (tie_operand (pat, VOIDmode))
18334 return false;
18336 if (MEM_P (pat))
18338 *mem_ref = pat;
18339 return true;
18342 /* Recursively process the pattern. */
18343 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18345 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18347 if (fmt[i] == 'e')
18349 if (find_mem_ref (XEXP (pat, i), mem_ref))
18350 return true;
18352 else if (fmt[i] == 'E')
18353 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18355 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18356 return true;
18360 return false;
18363 /* Determine if PAT is a PATTERN of a load insn. */
18365 static bool
18366 is_load_insn1 (rtx pat, rtx *load_mem)
18368 if (!pat || pat == NULL_RTX)
18369 return false;
18371 if (GET_CODE (pat) == SET)
18373 if (REG_P (SET_DEST (pat)))
18374 return find_mem_ref (SET_SRC (pat), load_mem);
18375 else
18376 return false;
18379 if (GET_CODE (pat) == PARALLEL)
18381 int i;
18383 for (i = 0; i < XVECLEN (pat, 0); i++)
18384 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18385 return true;
18388 return false;
18391 /* Determine if INSN loads from memory. */
18393 static bool
18394 is_load_insn (rtx insn, rtx *load_mem)
18396 if (!insn || !INSN_P (insn))
18397 return false;
18399 if (CALL_P (insn))
18400 return false;
18402 return is_load_insn1 (PATTERN (insn), load_mem);
18405 /* Determine if PAT is a PATTERN of a store insn. */
18407 static bool
18408 is_store_insn1 (rtx pat, rtx *str_mem)
18410 if (!pat || pat == NULL_RTX)
18411 return false;
18413 if (GET_CODE (pat) == SET)
18415 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18416 return find_mem_ref (SET_DEST (pat), str_mem);
18417 else
18418 return false;
18421 if (GET_CODE (pat) == PARALLEL)
18423 int i;
18425 for (i = 0; i < XVECLEN (pat, 0); i++)
18426 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18427 return true;
18430 return false;
18433 /* Determine if INSN stores to memory. */
18435 static bool
18436 is_store_insn (rtx insn, rtx *str_mem)
18438 if (!insn || !INSN_P (insn))
18439 return false;
18441 return is_store_insn1 (PATTERN (insn), str_mem);
18444 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18446 static bool
18447 is_power9_pairable_vec_type (enum attr_type type)
18449 switch (type)
18451 case TYPE_VECSIMPLE:
18452 case TYPE_VECCOMPLEX:
18453 case TYPE_VECDIV:
18454 case TYPE_VECCMP:
18455 case TYPE_VECPERM:
18456 case TYPE_VECFLOAT:
18457 case TYPE_VECFDIV:
18458 case TYPE_VECDOUBLE:
18459 return true;
18460 default:
18461 break;
18463 return false;
18466 /* Returns whether the dependence between INSN and NEXT is considered
18467 costly by the given target. */
18469 static bool
18470 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18472 rtx insn;
18473 rtx next;
18474 rtx load_mem, str_mem;
18476 /* If the flag is not enabled - no dependence is considered costly;
18477 allow all dependent insns in the same group.
18478 This is the most aggressive option. */
18479 if (rs6000_sched_costly_dep == no_dep_costly)
18480 return false;
18482 /* If the flag is set to 1 - a dependence is always considered costly;
18483 do not allow dependent instructions in the same group.
18484 This is the most conservative option. */
18485 if (rs6000_sched_costly_dep == all_deps_costly)
18486 return true;
18488 insn = DEP_PRO (dep);
18489 next = DEP_CON (dep);
18491 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18492 && is_load_insn (next, &load_mem)
18493 && is_store_insn (insn, &str_mem))
18494 /* Prevent load after store in the same group. */
18495 return true;
18497 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18498 && is_load_insn (next, &load_mem)
18499 && is_store_insn (insn, &str_mem)
18500 && DEP_TYPE (dep) == REG_DEP_TRUE
18501 && mem_locations_overlap(str_mem, load_mem))
18502 /* Prevent load after store in the same group if it is a true
18503 dependence. */
18504 return true;
18506 /* The flag is set to X; dependences with latency >= X are considered costly,
18507 and will not be scheduled in the same group. */
18508 if (rs6000_sched_costly_dep <= max_dep_latency
18509 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18510 return true;
18512 return false;
18515 /* Return the next insn after INSN that is found before TAIL is reached,
18516 skipping any "non-active" insns - insns that will not actually occupy
18517 an issue slot. Return NULL_RTX if such an insn is not found. */
18519 static rtx_insn *
18520 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18522 if (insn == NULL_RTX || insn == tail)
18523 return NULL;
18525 while (1)
18527 insn = NEXT_INSN (insn);
18528 if (insn == NULL_RTX || insn == tail)
18529 return NULL;
18531 if (CALL_P (insn)
18532 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18533 || (NONJUMP_INSN_P (insn)
18534 && GET_CODE (PATTERN (insn)) != USE
18535 && GET_CODE (PATTERN (insn)) != CLOBBER
18536 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18537 break;
18539 return insn;
18542 /* Move instruction at POS to the end of the READY list. */
18544 static void
18545 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18547 rtx_insn *tmp;
18548 int i;
18550 tmp = ready[pos];
18551 for (i = pos; i < lastpos; i++)
18552 ready[i] = ready[i + 1];
18553 ready[lastpos] = tmp;
18556 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18558 static int
18559 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18561 /* For Power6, we need to handle some special cases to try and keep the
18562 store queue from overflowing and triggering expensive flushes.
18564 This code monitors how load and store instructions are being issued
18565 and skews the ready list one way or the other to increase the likelihood
18566 that a desired instruction is issued at the proper time.
18568 A couple of things are done. First, we maintain a "load_store_pendulum"
18569 to track the current state of load/store issue.
18571 - If the pendulum is at zero, then no loads or stores have been
18572 issued in the current cycle so we do nothing.
18574 - If the pendulum is 1, then a single load has been issued in this
18575 cycle and we attempt to locate another load in the ready list to
18576 issue with it.
18578 - If the pendulum is -2, then two stores have already been
18579 issued in this cycle, so we increase the priority of the first load
18580 in the ready list to increase it's likelihood of being chosen first
18581 in the next cycle.
18583 - If the pendulum is -1, then a single store has been issued in this
18584 cycle and we attempt to locate another store in the ready list to
18585 issue with it, preferring a store to an adjacent memory location to
18586 facilitate store pairing in the store queue.
18588 - If the pendulum is 2, then two loads have already been
18589 issued in this cycle, so we increase the priority of the first store
18590 in the ready list to increase it's likelihood of being chosen first
18591 in the next cycle.
18593 - If the pendulum < -2 or > 2, then do nothing.
18595 Note: This code covers the most common scenarios. There exist non
18596 load/store instructions which make use of the LSU and which
18597 would need to be accounted for to strictly model the behavior
18598 of the machine. Those instructions are currently unaccounted
18599 for to help minimize compile time overhead of this code.
18601 int pos;
18602 rtx load_mem, str_mem;
18604 if (is_store_insn (last_scheduled_insn, &str_mem))
18605 /* Issuing a store, swing the load_store_pendulum to the left */
18606 load_store_pendulum--;
18607 else if (is_load_insn (last_scheduled_insn, &load_mem))
18608 /* Issuing a load, swing the load_store_pendulum to the right */
18609 load_store_pendulum++;
18610 else
18611 return cached_can_issue_more;
18613 /* If the pendulum is balanced, or there is only one instruction on
18614 the ready list, then all is well, so return. */
18615 if ((load_store_pendulum == 0) || (lastpos <= 0))
18616 return cached_can_issue_more;
18618 if (load_store_pendulum == 1)
18620 /* A load has been issued in this cycle. Scan the ready list
18621 for another load to issue with it */
18622 pos = lastpos;
18624 while (pos >= 0)
18626 if (is_load_insn (ready[pos], &load_mem))
18628 /* Found a load. Move it to the head of the ready list,
18629 and adjust it's priority so that it is more likely to
18630 stay there */
18631 move_to_end_of_ready (ready, pos, lastpos);
18633 if (!sel_sched_p ()
18634 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18635 INSN_PRIORITY (ready[lastpos])++;
18636 break;
18638 pos--;
18641 else if (load_store_pendulum == -2)
18643 /* Two stores have been issued in this cycle. Increase the
18644 priority of the first load in the ready list to favor it for
18645 issuing in the next cycle. */
18646 pos = lastpos;
18648 while (pos >= 0)
18650 if (is_load_insn (ready[pos], &load_mem)
18651 && !sel_sched_p ()
18652 && INSN_PRIORITY_KNOWN (ready[pos]))
18654 INSN_PRIORITY (ready[pos])++;
18656 /* Adjust the pendulum to account for the fact that a load
18657 was found and increased in priority. This is to prevent
18658 increasing the priority of multiple loads */
18659 load_store_pendulum--;
18661 break;
18663 pos--;
18666 else if (load_store_pendulum == -1)
18668 /* A store has been issued in this cycle. Scan the ready list for
18669 another store to issue with it, preferring a store to an adjacent
18670 memory location */
18671 int first_store_pos = -1;
18673 pos = lastpos;
18675 while (pos >= 0)
18677 if (is_store_insn (ready[pos], &str_mem))
18679 rtx str_mem2;
18680 /* Maintain the index of the first store found on the
18681 list */
18682 if (first_store_pos == -1)
18683 first_store_pos = pos;
18685 if (is_store_insn (last_scheduled_insn, &str_mem2)
18686 && adjacent_mem_locations (str_mem, str_mem2))
18688 /* Found an adjacent store. Move it to the head of the
18689 ready list, and adjust it's priority so that it is
18690 more likely to stay there */
18691 move_to_end_of_ready (ready, pos, lastpos);
18693 if (!sel_sched_p ()
18694 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18695 INSN_PRIORITY (ready[lastpos])++;
18697 first_store_pos = -1;
18699 break;
18702 pos--;
18705 if (first_store_pos >= 0)
18707 /* An adjacent store wasn't found, but a non-adjacent store was,
18708 so move the non-adjacent store to the front of the ready
18709 list, and adjust its priority so that it is more likely to
18710 stay there. */
18711 move_to_end_of_ready (ready, first_store_pos, lastpos);
18712 if (!sel_sched_p ()
18713 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18714 INSN_PRIORITY (ready[lastpos])++;
18717 else if (load_store_pendulum == 2)
18719 /* Two loads have been issued in this cycle. Increase the priority
18720 of the first store in the ready list to favor it for issuing in
18721 the next cycle. */
18722 pos = lastpos;
18724 while (pos >= 0)
18726 if (is_store_insn (ready[pos], &str_mem)
18727 && !sel_sched_p ()
18728 && INSN_PRIORITY_KNOWN (ready[pos]))
18730 INSN_PRIORITY (ready[pos])++;
18732 /* Adjust the pendulum to account for the fact that a store
18733 was found and increased in priority. This is to prevent
18734 increasing the priority of multiple stores */
18735 load_store_pendulum++;
18737 break;
18739 pos--;
18743 return cached_can_issue_more;
18746 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18748 static int
18749 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18751 int pos;
18752 enum attr_type type, type2;
18754 type = get_attr_type (last_scheduled_insn);
18756 /* Try to issue fixed point divides back-to-back in pairs so they will be
18757 routed to separate execution units and execute in parallel. */
18758 if (type == TYPE_DIV && divide_cnt == 0)
18760 /* First divide has been scheduled. */
18761 divide_cnt = 1;
18763 /* Scan the ready list looking for another divide, if found move it
18764 to the end of the list so it is chosen next. */
18765 pos = lastpos;
18766 while (pos >= 0)
18768 if (recog_memoized (ready[pos]) >= 0
18769 && get_attr_type (ready[pos]) == TYPE_DIV)
18771 move_to_end_of_ready (ready, pos, lastpos);
18772 break;
18774 pos--;
18777 else
18779 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18780 divide_cnt = 0;
18782 /* The best dispatch throughput for vector and vector load insns can be
18783 achieved by interleaving a vector and vector load such that they'll
18784 dispatch to the same superslice. If this pairing cannot be achieved
18785 then it is best to pair vector insns together and vector load insns
18786 together.
18788 To aid in this pairing, vec_pairing maintains the current state with
18789 the following values:
18791 0 : Initial state, no vecload/vector pairing has been started.
18793 1 : A vecload or vector insn has been issued and a candidate for
18794 pairing has been found and moved to the end of the ready
18795 list. */
18796 if (type == TYPE_VECLOAD)
18798 /* Issued a vecload. */
18799 if (vec_pairing == 0)
18801 int vecload_pos = -1;
18802 /* We issued a single vecload, look for a vector insn to pair it
18803 with. If one isn't found, try to pair another vecload. */
18804 pos = lastpos;
18805 while (pos >= 0)
18807 if (recog_memoized (ready[pos]) >= 0)
18809 type2 = get_attr_type (ready[pos]);
18810 if (is_power9_pairable_vec_type (type2))
18812 /* Found a vector insn to pair with, move it to the
18813 end of the ready list so it is scheduled next. */
18814 move_to_end_of_ready (ready, pos, lastpos);
18815 vec_pairing = 1;
18816 return cached_can_issue_more;
18818 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18819 /* Remember position of first vecload seen. */
18820 vecload_pos = pos;
18822 pos--;
18824 if (vecload_pos >= 0)
18826 /* Didn't find a vector to pair with but did find a vecload,
18827 move it to the end of the ready list. */
18828 move_to_end_of_ready (ready, vecload_pos, lastpos);
18829 vec_pairing = 1;
18830 return cached_can_issue_more;
18834 else if (is_power9_pairable_vec_type (type))
18836 /* Issued a vector operation. */
18837 if (vec_pairing == 0)
18839 int vec_pos = -1;
18840 /* We issued a single vector insn, look for a vecload to pair it
18841 with. If one isn't found, try to pair another vector. */
18842 pos = lastpos;
18843 while (pos >= 0)
18845 if (recog_memoized (ready[pos]) >= 0)
18847 type2 = get_attr_type (ready[pos]);
18848 if (type2 == TYPE_VECLOAD)
18850 /* Found a vecload insn to pair with, move it to the
18851 end of the ready list so it is scheduled next. */
18852 move_to_end_of_ready (ready, pos, lastpos);
18853 vec_pairing = 1;
18854 return cached_can_issue_more;
18856 else if (is_power9_pairable_vec_type (type2)
18857 && vec_pos == -1)
18858 /* Remember position of first vector insn seen. */
18859 vec_pos = pos;
18861 pos--;
18863 if (vec_pos >= 0)
18865 /* Didn't find a vecload to pair with but did find a vector
18866 insn, move it to the end of the ready list. */
18867 move_to_end_of_ready (ready, vec_pos, lastpos);
18868 vec_pairing = 1;
18869 return cached_can_issue_more;
18874 /* We've either finished a vec/vecload pair, couldn't find an insn to
18875 continue the current pair, or the last insn had nothing to do with
18876 with pairing. In any case, reset the state. */
18877 vec_pairing = 0;
18880 return cached_can_issue_more;
18883 /* We are about to begin issuing insns for this clock cycle. */
18885 static int
18886 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18887 rtx_insn **ready ATTRIBUTE_UNUSED,
18888 int *pn_ready ATTRIBUTE_UNUSED,
18889 int clock_var ATTRIBUTE_UNUSED)
18891 int n_ready = *pn_ready;
18893 if (sched_verbose)
18894 fprintf (dump, "// rs6000_sched_reorder :\n");
18896 /* Reorder the ready list, if the second to last ready insn
18897 is a nonepipeline insn. */
18898 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18900 if (is_nonpipeline_insn (ready[n_ready - 1])
18901 && (recog_memoized (ready[n_ready - 2]) > 0))
18902 /* Simply swap first two insns. */
18903 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18906 if (rs6000_tune == PROCESSOR_POWER6)
18907 load_store_pendulum = 0;
18909 return rs6000_issue_rate ();
18912 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18914 static int
18915 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18916 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18918 if (sched_verbose)
18919 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18921 /* Do Power6 dependent reordering if necessary. */
18922 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18923 return power6_sched_reorder2 (ready, *pn_ready - 1);
18925 /* Do Power9 dependent reordering if necessary. */
18926 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18927 && recog_memoized (last_scheduled_insn) >= 0)
18928 return power9_sched_reorder2 (ready, *pn_ready - 1);
18930 return cached_can_issue_more;
18933 /* Return whether the presence of INSN causes a dispatch group termination
18934 of group WHICH_GROUP.
18936 If WHICH_GROUP == current_group, this function will return true if INSN
18937 causes the termination of the current group (i.e, the dispatch group to
18938 which INSN belongs). This means that INSN will be the last insn in the
18939 group it belongs to.
18941 If WHICH_GROUP == previous_group, this function will return true if INSN
18942 causes the termination of the previous group (i.e, the dispatch group that
18943 precedes the group to which INSN belongs). This means that INSN will be
18944 the first insn in the group it belongs to). */
18946 static bool
18947 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18949 bool first, last;
18951 if (! insn)
18952 return false;
18954 first = insn_must_be_first_in_group (insn);
18955 last = insn_must_be_last_in_group (insn);
18957 if (first && last)
18958 return true;
18960 if (which_group == current_group)
18961 return last;
18962 else if (which_group == previous_group)
18963 return first;
18965 return false;
18969 static bool
18970 insn_must_be_first_in_group (rtx_insn *insn)
18972 enum attr_type type;
18974 if (!insn
18975 || NOTE_P (insn)
18976 || DEBUG_INSN_P (insn)
18977 || GET_CODE (PATTERN (insn)) == USE
18978 || GET_CODE (PATTERN (insn)) == CLOBBER)
18979 return false;
18981 switch (rs6000_tune)
18983 case PROCESSOR_POWER5:
18984 if (is_cracked_insn (insn))
18985 return true;
18986 /* FALLTHRU */
18987 case PROCESSOR_POWER4:
18988 if (is_microcoded_insn (insn))
18989 return true;
18991 if (!rs6000_sched_groups)
18992 return false;
18994 type = get_attr_type (insn);
18996 switch (type)
18998 case TYPE_MFCR:
18999 case TYPE_MFCRF:
19000 case TYPE_MTCR:
19001 case TYPE_CR_LOGICAL:
19002 case TYPE_MTJMPR:
19003 case TYPE_MFJMPR:
19004 case TYPE_DIV:
19005 case TYPE_LOAD_L:
19006 case TYPE_STORE_C:
19007 case TYPE_ISYNC:
19008 case TYPE_SYNC:
19009 return true;
19010 default:
19011 break;
19013 break;
19014 case PROCESSOR_POWER6:
19015 type = get_attr_type (insn);
19017 switch (type)
19019 case TYPE_EXTS:
19020 case TYPE_CNTLZ:
19021 case TYPE_TRAP:
19022 case TYPE_MUL:
19023 case TYPE_INSERT:
19024 case TYPE_FPCOMPARE:
19025 case TYPE_MFCR:
19026 case TYPE_MTCR:
19027 case TYPE_MFJMPR:
19028 case TYPE_MTJMPR:
19029 case TYPE_ISYNC:
19030 case TYPE_SYNC:
19031 case TYPE_LOAD_L:
19032 case TYPE_STORE_C:
19033 return true;
19034 case TYPE_SHIFT:
19035 if (get_attr_dot (insn) == DOT_NO
19036 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19037 return true;
19038 else
19039 break;
19040 case TYPE_DIV:
19041 if (get_attr_size (insn) == SIZE_32)
19042 return true;
19043 else
19044 break;
19045 case TYPE_LOAD:
19046 case TYPE_STORE:
19047 case TYPE_FPLOAD:
19048 case TYPE_FPSTORE:
19049 if (get_attr_update (insn) == UPDATE_YES)
19050 return true;
19051 else
19052 break;
19053 default:
19054 break;
19056 break;
19057 case PROCESSOR_POWER7:
19058 type = get_attr_type (insn);
19060 switch (type)
19062 case TYPE_CR_LOGICAL:
19063 case TYPE_MFCR:
19064 case TYPE_MFCRF:
19065 case TYPE_MTCR:
19066 case TYPE_DIV:
19067 case TYPE_ISYNC:
19068 case TYPE_LOAD_L:
19069 case TYPE_STORE_C:
19070 case TYPE_MFJMPR:
19071 case TYPE_MTJMPR:
19072 return true;
19073 case TYPE_MUL:
19074 case TYPE_SHIFT:
19075 case TYPE_EXTS:
19076 if (get_attr_dot (insn) == DOT_YES)
19077 return true;
19078 else
19079 break;
19080 case TYPE_LOAD:
19081 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19082 || get_attr_update (insn) == UPDATE_YES)
19083 return true;
19084 else
19085 break;
19086 case TYPE_STORE:
19087 case TYPE_FPLOAD:
19088 case TYPE_FPSTORE:
19089 if (get_attr_update (insn) == UPDATE_YES)
19090 return true;
19091 else
19092 break;
19093 default:
19094 break;
19096 break;
19097 case PROCESSOR_POWER8:
19098 type = get_attr_type (insn);
19100 switch (type)
19102 case TYPE_CR_LOGICAL:
19103 case TYPE_MFCR:
19104 case TYPE_MFCRF:
19105 case TYPE_MTCR:
19106 case TYPE_SYNC:
19107 case TYPE_ISYNC:
19108 case TYPE_LOAD_L:
19109 case TYPE_STORE_C:
19110 case TYPE_VECSTORE:
19111 case TYPE_MFJMPR:
19112 case TYPE_MTJMPR:
19113 return true;
19114 case TYPE_SHIFT:
19115 case TYPE_EXTS:
19116 case TYPE_MUL:
19117 if (get_attr_dot (insn) == DOT_YES)
19118 return true;
19119 else
19120 break;
19121 case TYPE_LOAD:
19122 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19123 || get_attr_update (insn) == UPDATE_YES)
19124 return true;
19125 else
19126 break;
19127 case TYPE_STORE:
19128 if (get_attr_update (insn) == UPDATE_YES
19129 && get_attr_indexed (insn) == INDEXED_YES)
19130 return true;
19131 else
19132 break;
19133 default:
19134 break;
19136 break;
19137 default:
19138 break;
19141 return false;
19144 static bool
19145 insn_must_be_last_in_group (rtx_insn *insn)
19147 enum attr_type type;
19149 if (!insn
19150 || NOTE_P (insn)
19151 || DEBUG_INSN_P (insn)
19152 || GET_CODE (PATTERN (insn)) == USE
19153 || GET_CODE (PATTERN (insn)) == CLOBBER)
19154 return false;
19156 switch (rs6000_tune) {
19157 case PROCESSOR_POWER4:
19158 case PROCESSOR_POWER5:
19159 if (is_microcoded_insn (insn))
19160 return true;
19162 if (is_branch_slot_insn (insn))
19163 return true;
19165 break;
19166 case PROCESSOR_POWER6:
19167 type = get_attr_type (insn);
19169 switch (type)
19171 case TYPE_EXTS:
19172 case TYPE_CNTLZ:
19173 case TYPE_TRAP:
19174 case TYPE_MUL:
19175 case TYPE_FPCOMPARE:
19176 case TYPE_MFCR:
19177 case TYPE_MTCR:
19178 case TYPE_MFJMPR:
19179 case TYPE_MTJMPR:
19180 case TYPE_ISYNC:
19181 case TYPE_SYNC:
19182 case TYPE_LOAD_L:
19183 case TYPE_STORE_C:
19184 return true;
19185 case TYPE_SHIFT:
19186 if (get_attr_dot (insn) == DOT_NO
19187 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19188 return true;
19189 else
19190 break;
19191 case TYPE_DIV:
19192 if (get_attr_size (insn) == SIZE_32)
19193 return true;
19194 else
19195 break;
19196 default:
19197 break;
19199 break;
19200 case PROCESSOR_POWER7:
19201 type = get_attr_type (insn);
19203 switch (type)
19205 case TYPE_ISYNC:
19206 case TYPE_SYNC:
19207 case TYPE_LOAD_L:
19208 case TYPE_STORE_C:
19209 return true;
19210 case TYPE_LOAD:
19211 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19212 && get_attr_update (insn) == UPDATE_YES)
19213 return true;
19214 else
19215 break;
19216 case TYPE_STORE:
19217 if (get_attr_update (insn) == UPDATE_YES
19218 && get_attr_indexed (insn) == INDEXED_YES)
19219 return true;
19220 else
19221 break;
19222 default:
19223 break;
19225 break;
19226 case PROCESSOR_POWER8:
19227 type = get_attr_type (insn);
19229 switch (type)
19231 case TYPE_MFCR:
19232 case TYPE_MTCR:
19233 case TYPE_ISYNC:
19234 case TYPE_SYNC:
19235 case TYPE_LOAD_L:
19236 case TYPE_STORE_C:
19237 return true;
19238 case TYPE_LOAD:
19239 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19240 && get_attr_update (insn) == UPDATE_YES)
19241 return true;
19242 else
19243 break;
19244 case TYPE_STORE:
19245 if (get_attr_update (insn) == UPDATE_YES
19246 && get_attr_indexed (insn) == INDEXED_YES)
19247 return true;
19248 else
19249 break;
19250 default:
19251 break;
19253 break;
19254 default:
19255 break;
19258 return false;
19261 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19262 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19264 static bool
19265 is_costly_group (rtx *group_insns, rtx next_insn)
19267 int i;
19268 int issue_rate = rs6000_issue_rate ();
19270 for (i = 0; i < issue_rate; i++)
19272 sd_iterator_def sd_it;
19273 dep_t dep;
19274 rtx insn = group_insns[i];
19276 if (!insn)
19277 continue;
19279 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19281 rtx next = DEP_CON (dep);
19283 if (next == next_insn
19284 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19285 return true;
19289 return false;
19292 /* Utility of the function redefine_groups.
19293 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19294 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19295 to keep it "far" (in a separate group) from GROUP_INSNS, following
19296 one of the following schemes, depending on the value of the flag
19297 -minsert_sched_nops = X:
19298 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19299 in order to force NEXT_INSN into a separate group.
19300 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19301 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19302 insertion (has a group just ended, how many vacant issue slots remain in the
19303 last group, and how many dispatch groups were encountered so far). */
19305 static int
19306 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19307 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19308 int *group_count)
19310 rtx nop;
19311 bool force;
19312 int issue_rate = rs6000_issue_rate ();
19313 bool end = *group_end;
19314 int i;
19316 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19317 return can_issue_more;
19319 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19320 return can_issue_more;
19322 force = is_costly_group (group_insns, next_insn);
19323 if (!force)
19324 return can_issue_more;
19326 if (sched_verbose > 6)
19327 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19328 *group_count ,can_issue_more);
19330 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19332 if (*group_end)
19333 can_issue_more = 0;
19335 /* Since only a branch can be issued in the last issue_slot, it is
19336 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19337 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19338 in this case the last nop will start a new group and the branch
19339 will be forced to the new group. */
19340 if (can_issue_more && !is_branch_slot_insn (next_insn))
19341 can_issue_more--;
19343 /* Do we have a special group ending nop? */
19344 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19345 || rs6000_tune == PROCESSOR_POWER8)
19347 nop = gen_group_ending_nop ();
19348 emit_insn_before (nop, next_insn);
19349 can_issue_more = 0;
19351 else
19352 while (can_issue_more > 0)
19354 nop = gen_nop ();
19355 emit_insn_before (nop, next_insn);
19356 can_issue_more--;
19359 *group_end = true;
19360 return 0;
19363 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19365 int n_nops = rs6000_sched_insert_nops;
19367 /* Nops can't be issued from the branch slot, so the effective
19368 issue_rate for nops is 'issue_rate - 1'. */
19369 if (can_issue_more == 0)
19370 can_issue_more = issue_rate;
19371 can_issue_more--;
19372 if (can_issue_more == 0)
19374 can_issue_more = issue_rate - 1;
19375 (*group_count)++;
19376 end = true;
19377 for (i = 0; i < issue_rate; i++)
19379 group_insns[i] = 0;
19383 while (n_nops > 0)
19385 nop = gen_nop ();
19386 emit_insn_before (nop, next_insn);
19387 if (can_issue_more == issue_rate - 1) /* new group begins */
19388 end = false;
19389 can_issue_more--;
19390 if (can_issue_more == 0)
19392 can_issue_more = issue_rate - 1;
19393 (*group_count)++;
19394 end = true;
19395 for (i = 0; i < issue_rate; i++)
19397 group_insns[i] = 0;
19400 n_nops--;
19403 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19404 can_issue_more++;
19406 /* Is next_insn going to start a new group? */
19407 *group_end
19408 = (end
19409 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19410 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19411 || (can_issue_more < issue_rate &&
19412 insn_terminates_group_p (next_insn, previous_group)));
19413 if (*group_end && end)
19414 (*group_count)--;
19416 if (sched_verbose > 6)
19417 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19418 *group_count, can_issue_more);
19419 return can_issue_more;
19422 return can_issue_more;
19425 /* This function tries to synch the dispatch groups that the compiler "sees"
19426 with the dispatch groups that the processor dispatcher is expected to
19427 form in practice. It tries to achieve this synchronization by forcing the
19428 estimated processor grouping on the compiler (as opposed to the function
19429 'pad_goups' which tries to force the scheduler's grouping on the processor).
19431 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19432 examines the (estimated) dispatch groups that will be formed by the processor
19433 dispatcher. It marks these group boundaries to reflect the estimated
19434 processor grouping, overriding the grouping that the scheduler had marked.
19435 Depending on the value of the flag '-minsert-sched-nops' this function can
19436 force certain insns into separate groups or force a certain distance between
19437 them by inserting nops, for example, if there exists a "costly dependence"
19438 between the insns.
19440 The function estimates the group boundaries that the processor will form as
19441 follows: It keeps track of how many vacant issue slots are available after
19442 each insn. A subsequent insn will start a new group if one of the following
19443 4 cases applies:
19444 - no more vacant issue slots remain in the current dispatch group.
19445 - only the last issue slot, which is the branch slot, is vacant, but the next
19446 insn is not a branch.
19447 - only the last 2 or less issue slots, including the branch slot, are vacant,
19448 which means that a cracked insn (which occupies two issue slots) can't be
19449 issued in this group.
19450 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19451 start a new group. */
19453 static int
19454 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19455 rtx_insn *tail)
19457 rtx_insn *insn, *next_insn;
19458 int issue_rate;
19459 int can_issue_more;
19460 int slot, i;
19461 bool group_end;
19462 int group_count = 0;
19463 rtx *group_insns;
19465 /* Initialize. */
19466 issue_rate = rs6000_issue_rate ();
19467 group_insns = XALLOCAVEC (rtx, issue_rate);
19468 for (i = 0; i < issue_rate; i++)
19470 group_insns[i] = 0;
19472 can_issue_more = issue_rate;
19473 slot = 0;
19474 insn = get_next_active_insn (prev_head_insn, tail);
19475 group_end = false;
19477 while (insn != NULL_RTX)
19479 slot = (issue_rate - can_issue_more);
19480 group_insns[slot] = insn;
19481 can_issue_more =
19482 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19483 if (insn_terminates_group_p (insn, current_group))
19484 can_issue_more = 0;
19486 next_insn = get_next_active_insn (insn, tail);
19487 if (next_insn == NULL_RTX)
19488 return group_count + 1;
19490 /* Is next_insn going to start a new group? */
19491 group_end
19492 = (can_issue_more == 0
19493 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19494 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19495 || (can_issue_more < issue_rate &&
19496 insn_terminates_group_p (next_insn, previous_group)));
19498 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19499 next_insn, &group_end, can_issue_more,
19500 &group_count);
19502 if (group_end)
19504 group_count++;
19505 can_issue_more = 0;
19506 for (i = 0; i < issue_rate; i++)
19508 group_insns[i] = 0;
19512 if (GET_MODE (next_insn) == TImode && can_issue_more)
19513 PUT_MODE (next_insn, VOIDmode);
19514 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19515 PUT_MODE (next_insn, TImode);
19517 insn = next_insn;
19518 if (can_issue_more == 0)
19519 can_issue_more = issue_rate;
19520 } /* while */
19522 return group_count;
19525 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19526 dispatch group boundaries that the scheduler had marked. Pad with nops
19527 any dispatch groups which have vacant issue slots, in order to force the
19528 scheduler's grouping on the processor dispatcher. The function
19529 returns the number of dispatch groups found. */
19531 static int
19532 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19533 rtx_insn *tail)
19535 rtx_insn *insn, *next_insn;
19536 rtx nop;
19537 int issue_rate;
19538 int can_issue_more;
19539 int group_end;
19540 int group_count = 0;
19542 /* Initialize issue_rate. */
19543 issue_rate = rs6000_issue_rate ();
19544 can_issue_more = issue_rate;
19546 insn = get_next_active_insn (prev_head_insn, tail);
19547 next_insn = get_next_active_insn (insn, tail);
19549 while (insn != NULL_RTX)
19551 can_issue_more =
19552 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19554 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19556 if (next_insn == NULL_RTX)
19557 break;
19559 if (group_end)
19561 /* If the scheduler had marked group termination at this location
19562 (between insn and next_insn), and neither insn nor next_insn will
19563 force group termination, pad the group with nops to force group
19564 termination. */
19565 if (can_issue_more
19566 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19567 && !insn_terminates_group_p (insn, current_group)
19568 && !insn_terminates_group_p (next_insn, previous_group))
19570 if (!is_branch_slot_insn (next_insn))
19571 can_issue_more--;
19573 while (can_issue_more)
19575 nop = gen_nop ();
19576 emit_insn_before (nop, next_insn);
19577 can_issue_more--;
19581 can_issue_more = issue_rate;
19582 group_count++;
19585 insn = next_insn;
19586 next_insn = get_next_active_insn (insn, tail);
19589 return group_count;
19592 /* We're beginning a new block. Initialize data structures as necessary. */
19594 static void
19595 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19596 int sched_verbose ATTRIBUTE_UNUSED,
19597 int max_ready ATTRIBUTE_UNUSED)
19599 last_scheduled_insn = NULL;
19600 load_store_pendulum = 0;
19601 divide_cnt = 0;
19602 vec_pairing = 0;
19605 /* The following function is called at the end of scheduling BB.
19606 After reload, it inserts nops at insn group bundling. */
19608 static void
19609 rs6000_sched_finish (FILE *dump, int sched_verbose)
19611 int n_groups;
19613 if (sched_verbose)
19614 fprintf (dump, "=== Finishing schedule.\n");
19616 if (reload_completed && rs6000_sched_groups)
19618 /* Do not run sched_finish hook when selective scheduling enabled. */
19619 if (sel_sched_p ())
19620 return;
19622 if (rs6000_sched_insert_nops == sched_finish_none)
19623 return;
19625 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19626 n_groups = pad_groups (dump, sched_verbose,
19627 current_sched_info->prev_head,
19628 current_sched_info->next_tail);
19629 else
19630 n_groups = redefine_groups (dump, sched_verbose,
19631 current_sched_info->prev_head,
19632 current_sched_info->next_tail);
19634 if (sched_verbose >= 6)
19636 fprintf (dump, "ngroups = %d\n", n_groups);
19637 print_rtl (dump, current_sched_info->prev_head);
19638 fprintf (dump, "Done finish_sched\n");
19643 struct rs6000_sched_context
19645 short cached_can_issue_more;
19646 rtx_insn *last_scheduled_insn;
19647 int load_store_pendulum;
19648 int divide_cnt;
19649 int vec_pairing;
19652 typedef struct rs6000_sched_context rs6000_sched_context_def;
19653 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19655 /* Allocate store for new scheduling context. */
19656 static void *
19657 rs6000_alloc_sched_context (void)
19659 return xmalloc (sizeof (rs6000_sched_context_def));
19662 /* If CLEAN_P is true then initializes _SC with clean data,
19663 and from the global context otherwise. */
19664 static void
19665 rs6000_init_sched_context (void *_sc, bool clean_p)
19667 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19669 if (clean_p)
19671 sc->cached_can_issue_more = 0;
19672 sc->last_scheduled_insn = NULL;
19673 sc->load_store_pendulum = 0;
19674 sc->divide_cnt = 0;
19675 sc->vec_pairing = 0;
19677 else
19679 sc->cached_can_issue_more = cached_can_issue_more;
19680 sc->last_scheduled_insn = last_scheduled_insn;
19681 sc->load_store_pendulum = load_store_pendulum;
19682 sc->divide_cnt = divide_cnt;
19683 sc->vec_pairing = vec_pairing;
19687 /* Sets the global scheduling context to the one pointed to by _SC. */
19688 static void
19689 rs6000_set_sched_context (void *_sc)
19691 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19693 gcc_assert (sc != NULL);
19695 cached_can_issue_more = sc->cached_can_issue_more;
19696 last_scheduled_insn = sc->last_scheduled_insn;
19697 load_store_pendulum = sc->load_store_pendulum;
19698 divide_cnt = sc->divide_cnt;
19699 vec_pairing = sc->vec_pairing;
19702 /* Free _SC. */
19703 static void
19704 rs6000_free_sched_context (void *_sc)
19706 gcc_assert (_sc != NULL);
19708 free (_sc);
19711 static bool
19712 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19714 switch (get_attr_type (insn))
19716 case TYPE_DIV:
19717 case TYPE_SDIV:
19718 case TYPE_DDIV:
19719 case TYPE_VECDIV:
19720 case TYPE_SSQRT:
19721 case TYPE_DSQRT:
19722 return false;
19724 default:
19725 return true;
19729 /* Length in units of the trampoline for entering a nested function. */
19732 rs6000_trampoline_size (void)
19734 int ret = 0;
19736 switch (DEFAULT_ABI)
19738 default:
19739 gcc_unreachable ();
19741 case ABI_AIX:
19742 ret = (TARGET_32BIT) ? 12 : 24;
19743 break;
19745 case ABI_ELFv2:
19746 gcc_assert (!TARGET_32BIT);
19747 ret = 32;
19748 break;
19750 case ABI_DARWIN:
19751 case ABI_V4:
19752 ret = (TARGET_32BIT) ? 40 : 48;
19753 break;
19756 return ret;
19759 /* Emit RTL insns to initialize the variable parts of a trampoline.
19760 FNADDR is an RTX for the address of the function's pure code.
19761 CXT is an RTX for the static chain value for the function. */
19763 static void
19764 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19766 int regsize = (TARGET_32BIT) ? 4 : 8;
19767 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19768 rtx ctx_reg = force_reg (Pmode, cxt);
19769 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19771 switch (DEFAULT_ABI)
19773 default:
19774 gcc_unreachable ();
19776 /* Under AIX, just build the 3 word function descriptor */
19777 case ABI_AIX:
19779 rtx fnmem, fn_reg, toc_reg;
19781 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19782 error ("you cannot take the address of a nested function if you use "
19783 "the %qs option", "-mno-pointers-to-nested-functions");
19785 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19786 fn_reg = gen_reg_rtx (Pmode);
19787 toc_reg = gen_reg_rtx (Pmode);
19789 /* Macro to shorten the code expansions below. */
19790 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19792 m_tramp = replace_equiv_address (m_tramp, addr);
19794 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19795 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19796 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19797 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19798 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19800 # undef MEM_PLUS
19802 break;
19804 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19805 case ABI_ELFv2:
19806 case ABI_DARWIN:
19807 case ABI_V4:
19808 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19809 LCT_NORMAL, VOIDmode,
19810 addr, Pmode,
19811 GEN_INT (rs6000_trampoline_size ()), SImode,
19812 fnaddr, Pmode,
19813 ctx_reg, Pmode);
19814 break;
19819 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19820 identifier as an argument, so the front end shouldn't look it up. */
19822 static bool
19823 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19825 return is_attribute_p ("altivec", attr_id);
19828 /* Handle the "altivec" attribute. The attribute may have
19829 arguments as follows:
19831 __attribute__((altivec(vector__)))
19832 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19833 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19835 and may appear more than once (e.g., 'vector bool char') in a
19836 given declaration. */
19838 static tree
19839 rs6000_handle_altivec_attribute (tree *node,
19840 tree name ATTRIBUTE_UNUSED,
19841 tree args,
19842 int flags ATTRIBUTE_UNUSED,
19843 bool *no_add_attrs)
19845 tree type = *node, result = NULL_TREE;
19846 machine_mode mode;
19847 int unsigned_p;
19848 char altivec_type
19849 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19850 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19851 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19852 : '?');
19854 while (POINTER_TYPE_P (type)
19855 || TREE_CODE (type) == FUNCTION_TYPE
19856 || TREE_CODE (type) == METHOD_TYPE
19857 || TREE_CODE (type) == ARRAY_TYPE)
19858 type = TREE_TYPE (type);
19860 mode = TYPE_MODE (type);
19862 /* Check for invalid AltiVec type qualifiers. */
19863 if (type == long_double_type_node)
19864 error ("use of %<long double%> in AltiVec types is invalid");
19865 else if (type == boolean_type_node)
19866 error ("use of boolean types in AltiVec types is invalid");
19867 else if (TREE_CODE (type) == COMPLEX_TYPE)
19868 error ("use of %<complex%> in AltiVec types is invalid");
19869 else if (DECIMAL_FLOAT_MODE_P (mode))
19870 error ("use of decimal floating-point types in AltiVec types is invalid");
19871 else if (!TARGET_VSX)
19873 if (type == long_unsigned_type_node || type == long_integer_type_node)
19875 if (TARGET_64BIT)
19876 error ("use of %<long%> in AltiVec types is invalid for "
19877 "64-bit code without %qs", "-mvsx");
19878 else if (rs6000_warn_altivec_long)
19879 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19880 "use %<int%>");
19882 else if (type == long_long_unsigned_type_node
19883 || type == long_long_integer_type_node)
19884 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19885 "-mvsx");
19886 else if (type == double_type_node)
19887 error ("use of %<double%> in AltiVec types is invalid without %qs",
19888 "-mvsx");
19891 switch (altivec_type)
19893 case 'v':
19894 unsigned_p = TYPE_UNSIGNED (type);
19895 switch (mode)
19897 case E_TImode:
19898 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19899 break;
19900 case E_DImode:
19901 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19902 break;
19903 case E_SImode:
19904 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19905 break;
19906 case E_HImode:
19907 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19908 break;
19909 case E_QImode:
19910 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19911 break;
19912 case E_SFmode: result = V4SF_type_node; break;
19913 case E_DFmode: result = V2DF_type_node; break;
19914 /* If the user says 'vector int bool', we may be handed the 'bool'
19915 attribute _before_ the 'vector' attribute, and so select the
19916 proper type in the 'b' case below. */
19917 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19918 case E_V2DImode: case E_V2DFmode:
19919 result = type;
19920 default: break;
19922 break;
19923 case 'b':
19924 switch (mode)
19926 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
19927 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19928 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19929 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19930 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19931 default: break;
19933 break;
19934 case 'p':
19935 switch (mode)
19937 case E_V8HImode: result = pixel_V8HI_type_node;
19938 default: break;
19940 default: break;
19943 /* Propagate qualifiers attached to the element type
19944 onto the vector type. */
19945 if (result && result != type && TYPE_QUALS (type))
19946 result = build_qualified_type (result, TYPE_QUALS (type));
19948 *no_add_attrs = true; /* No need to hang on to the attribute. */
19950 if (result)
19951 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19953 return NULL_TREE;
19956 /* AltiVec defines five built-in scalar types that serve as vector
19957 elements; we must teach the compiler how to mangle them. The 128-bit
19958 floating point mangling is target-specific as well. MMA defines
19959 two built-in types to be used as opaque vector types. */
19961 static const char *
19962 rs6000_mangle_type (const_tree type)
19964 type = TYPE_MAIN_VARIANT (type);
19966 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19967 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
19968 && TREE_CODE (type) != OPAQUE_TYPE)
19969 return NULL;
19971 if (type == bool_char_type_node) return "U6__boolc";
19972 if (type == bool_short_type_node) return "U6__bools";
19973 if (type == pixel_type_node) return "u7__pixel";
19974 if (type == bool_int_type_node) return "U6__booli";
19975 if (type == bool_long_long_type_node) return "U6__boolx";
19977 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19978 return "g";
19979 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19980 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19982 if (type == vector_pair_type_node)
19983 return "u13__vector_pair";
19984 if (type == vector_quad_type_node)
19985 return "u13__vector_quad";
19987 /* For all other types, use the default mangling. */
19988 return NULL;
19991 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19992 struct attribute_spec.handler. */
19994 static tree
19995 rs6000_handle_longcall_attribute (tree *node, tree name,
19996 tree args ATTRIBUTE_UNUSED,
19997 int flags ATTRIBUTE_UNUSED,
19998 bool *no_add_attrs)
20000 if (TREE_CODE (*node) != FUNCTION_TYPE
20001 && TREE_CODE (*node) != FIELD_DECL
20002 && TREE_CODE (*node) != TYPE_DECL)
20004 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20005 name);
20006 *no_add_attrs = true;
20009 return NULL_TREE;
20012 /* Set longcall attributes on all functions declared when
20013 rs6000_default_long_calls is true. */
20014 static void
20015 rs6000_set_default_type_attributes (tree type)
20017 if (rs6000_default_long_calls
20018 && (TREE_CODE (type) == FUNCTION_TYPE
20019 || TREE_CODE (type) == METHOD_TYPE))
20020 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20021 NULL_TREE,
20022 TYPE_ATTRIBUTES (type));
20024 #if TARGET_MACHO
20025 darwin_set_default_type_attributes (type);
20026 #endif
20029 /* Return a reference suitable for calling a function with the
20030 longcall attribute. */
20032 static rtx
20033 rs6000_longcall_ref (rtx call_ref, rtx arg)
20035 /* System V adds '.' to the internal name, so skip them. */
20036 const char *call_name = XSTR (call_ref, 0);
20037 if (*call_name == '.')
20039 while (*call_name == '.')
20040 call_name++;
20042 tree node = get_identifier (call_name);
20043 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20046 if (TARGET_PLTSEQ)
20048 rtx base = const0_rtx;
20049 int regno = 12;
20050 if (rs6000_pcrel_p ())
20052 rtx reg = gen_rtx_REG (Pmode, regno);
20053 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20054 gen_rtvec (3, base, call_ref, arg),
20055 UNSPECV_PLT_PCREL);
20056 emit_insn (gen_rtx_SET (reg, u));
20057 return reg;
20060 if (DEFAULT_ABI == ABI_ELFv2)
20061 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20062 else
20064 if (flag_pic)
20065 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20066 regno = 11;
20068 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20069 may be used by a function global entry point. For SysV4, r11
20070 is used by __glink_PLTresolve lazy resolver entry. */
20071 rtx reg = gen_rtx_REG (Pmode, regno);
20072 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20073 UNSPEC_PLT16_HA);
20074 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20075 gen_rtvec (3, reg, call_ref, arg),
20076 UNSPECV_PLT16_LO);
20077 emit_insn (gen_rtx_SET (reg, hi));
20078 emit_insn (gen_rtx_SET (reg, lo));
20079 return reg;
20082 return force_reg (Pmode, call_ref);
20085 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20086 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20087 #endif
20089 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20090 struct attribute_spec.handler. */
20091 static tree
20092 rs6000_handle_struct_attribute (tree *node, tree name,
20093 tree args ATTRIBUTE_UNUSED,
20094 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20096 tree *type = NULL;
20097 if (DECL_P (*node))
20099 if (TREE_CODE (*node) == TYPE_DECL)
20100 type = &TREE_TYPE (*node);
20102 else
20103 type = node;
20105 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20106 || TREE_CODE (*type) == UNION_TYPE)))
20108 warning (OPT_Wattributes, "%qE attribute ignored", name);
20109 *no_add_attrs = true;
20112 else if ((is_attribute_p ("ms_struct", name)
20113 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20114 || ((is_attribute_p ("gcc_struct", name)
20115 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20117 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20118 name);
20119 *no_add_attrs = true;
20122 return NULL_TREE;
20125 static bool
20126 rs6000_ms_bitfield_layout_p (const_tree record_type)
20128 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20129 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20130 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20133 #ifdef USING_ELFOS_H
20135 /* A get_unnamed_section callback, used for switching to toc_section. */
20137 static void
20138 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20140 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20141 && TARGET_MINIMAL_TOC)
20143 if (!toc_initialized)
20145 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20146 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20147 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20148 fprintf (asm_out_file, "\t.tc ");
20149 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20150 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20151 fprintf (asm_out_file, "\n");
20153 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20154 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20155 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20156 fprintf (asm_out_file, " = .+32768\n");
20157 toc_initialized = 1;
20159 else
20160 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20162 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20164 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20165 if (!toc_initialized)
20167 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20168 toc_initialized = 1;
20171 else
20173 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20174 if (!toc_initialized)
20176 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20177 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20178 fprintf (asm_out_file, " = .+32768\n");
20179 toc_initialized = 1;
20184 /* Implement TARGET_ASM_INIT_SECTIONS. */
20186 static void
20187 rs6000_elf_asm_init_sections (void)
20189 toc_section
20190 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20192 sdata2_section
20193 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20194 SDATA2_SECTION_ASM_OP);
20197 /* Implement TARGET_SELECT_RTX_SECTION. */
20199 static section *
20200 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20201 unsigned HOST_WIDE_INT align)
20203 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20204 return toc_section;
20205 else
20206 return default_elf_select_rtx_section (mode, x, align);
20209 /* For a SYMBOL_REF, set generic flags and then perform some
20210 target-specific processing.
20212 When the AIX ABI is requested on a non-AIX system, replace the
20213 function name with the real name (with a leading .) rather than the
20214 function descriptor name. This saves a lot of overriding code to
20215 read the prefixes. */
20217 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20218 static void
20219 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20221 default_encode_section_info (decl, rtl, first);
20223 if (first
20224 && TREE_CODE (decl) == FUNCTION_DECL
20225 && !TARGET_AIX
20226 && DEFAULT_ABI == ABI_AIX)
20228 rtx sym_ref = XEXP (rtl, 0);
20229 size_t len = strlen (XSTR (sym_ref, 0));
20230 char *str = XALLOCAVEC (char, len + 2);
20231 str[0] = '.';
20232 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20233 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20237 static inline bool
20238 compare_section_name (const char *section, const char *templ)
20240 int len;
20242 len = strlen (templ);
20243 return (strncmp (section, templ, len) == 0
20244 && (section[len] == 0 || section[len] == '.'));
20247 bool
20248 rs6000_elf_in_small_data_p (const_tree decl)
20250 if (rs6000_sdata == SDATA_NONE)
20251 return false;
20253 /* We want to merge strings, so we never consider them small data. */
20254 if (TREE_CODE (decl) == STRING_CST)
20255 return false;
20257 /* Functions are never in the small data area. */
20258 if (TREE_CODE (decl) == FUNCTION_DECL)
20259 return false;
20261 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
20263 const char *section = DECL_SECTION_NAME (decl);
20264 if (compare_section_name (section, ".sdata")
20265 || compare_section_name (section, ".sdata2")
20266 || compare_section_name (section, ".gnu.linkonce.s")
20267 || compare_section_name (section, ".sbss")
20268 || compare_section_name (section, ".sbss2")
20269 || compare_section_name (section, ".gnu.linkonce.sb")
20270 || strcmp (section, ".PPC.EMB.sdata0") == 0
20271 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20272 return true;
20274 else
20276 /* If we are told not to put readonly data in sdata, then don't. */
20277 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20278 && !rs6000_readonly_in_sdata)
20279 return false;
20281 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20283 if (size > 0
20284 && size <= g_switch_value
20285 /* If it's not public, and we're not going to reference it there,
20286 there's no need to put it in the small data section. */
20287 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20288 return true;
20291 return false;
20294 #endif /* USING_ELFOS_H */
20296 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20298 static bool
20299 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20301 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20304 /* Do not place thread-local symbols refs in the object blocks. */
20306 static bool
20307 rs6000_use_blocks_for_decl_p (const_tree decl)
20309 return !DECL_THREAD_LOCAL_P (decl);
20312 /* Return a REG that occurs in ADDR with coefficient 1.
20313 ADDR can be effectively incremented by incrementing REG.
20315 r0 is special and we must not select it as an address
20316 register by this routine since our caller will try to
20317 increment the returned register via an "la" instruction. */
20320 find_addr_reg (rtx addr)
20322 while (GET_CODE (addr) == PLUS)
20324 if (REG_P (XEXP (addr, 0))
20325 && REGNO (XEXP (addr, 0)) != 0)
20326 addr = XEXP (addr, 0);
20327 else if (REG_P (XEXP (addr, 1))
20328 && REGNO (XEXP (addr, 1)) != 0)
20329 addr = XEXP (addr, 1);
20330 else if (CONSTANT_P (XEXP (addr, 0)))
20331 addr = XEXP (addr, 1);
20332 else if (CONSTANT_P (XEXP (addr, 1)))
20333 addr = XEXP (addr, 0);
20334 else
20335 gcc_unreachable ();
20337 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20338 return addr;
20341 void
20342 rs6000_fatal_bad_address (rtx op)
20344 fatal_insn ("bad address", op);
20347 #if TARGET_MACHO
20349 vec<branch_island, va_gc> *branch_islands;
20351 /* Remember to generate a branch island for far calls to the given
20352 function. */
20354 static void
20355 add_compiler_branch_island (tree label_name, tree function_name,
20356 int line_number)
20358 branch_island bi = {function_name, label_name, line_number};
20359 vec_safe_push (branch_islands, bi);
20362 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20363 already there or not. */
20365 static int
20366 no_previous_def (tree function_name)
20368 branch_island *bi;
20369 unsigned ix;
20371 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20372 if (function_name == bi->function_name)
20373 return 0;
20374 return 1;
20377 /* GET_PREV_LABEL gets the label name from the previous definition of
20378 the function. */
20380 static tree
20381 get_prev_label (tree function_name)
20383 branch_island *bi;
20384 unsigned ix;
20386 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20387 if (function_name == bi->function_name)
20388 return bi->label_name;
20389 return NULL_TREE;
20392 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20394 void
20395 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20397 unsigned int length;
20398 char *symbol_name, *lazy_ptr_name;
20399 char *local_label_0;
20400 static unsigned label = 0;
20402 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20403 symb = (*targetm.strip_name_encoding) (symb);
20405 length = strlen (symb);
20406 symbol_name = XALLOCAVEC (char, length + 32);
20407 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20409 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20410 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20412 if (MACHOPIC_PURE)
20414 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20415 fprintf (file, "\t.align 5\n");
20417 fprintf (file, "%s:\n", stub);
20418 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20420 label++;
20421 local_label_0 = XALLOCAVEC (char, 16);
20422 sprintf (local_label_0, "L%u$spb", label);
20424 fprintf (file, "\tmflr r0\n");
20425 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20426 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20427 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20428 lazy_ptr_name, local_label_0);
20429 fprintf (file, "\tmtlr r0\n");
20430 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20431 (TARGET_64BIT ? "ldu" : "lwzu"),
20432 lazy_ptr_name, local_label_0);
20433 fprintf (file, "\tmtctr r12\n");
20434 fprintf (file, "\tbctr\n");
20436 else /* mdynamic-no-pic or mkernel. */
20438 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20439 fprintf (file, "\t.align 4\n");
20441 fprintf (file, "%s:\n", stub);
20442 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20444 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20445 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20446 (TARGET_64BIT ? "ldu" : "lwzu"),
20447 lazy_ptr_name);
20448 fprintf (file, "\tmtctr r12\n");
20449 fprintf (file, "\tbctr\n");
20452 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20453 fprintf (file, "%s:\n", lazy_ptr_name);
20454 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20455 fprintf (file, "%sdyld_stub_binding_helper\n",
20456 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20459 /* Legitimize PIC addresses. If the address is already
20460 position-independent, we return ORIG. Newly generated
20461 position-independent addresses go into a reg. This is REG if non
20462 zero, otherwise we allocate register(s) as necessary. */
20464 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20467 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20468 rtx reg)
20470 rtx base, offset;
20472 if (reg == NULL && !reload_completed)
20473 reg = gen_reg_rtx (Pmode);
20475 if (GET_CODE (orig) == CONST)
20477 rtx reg_temp;
20479 if (GET_CODE (XEXP (orig, 0)) == PLUS
20480 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20481 return orig;
20483 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20485 /* Use a different reg for the intermediate value, as
20486 it will be marked UNCHANGING. */
20487 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20488 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20489 Pmode, reg_temp);
20490 offset =
20491 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20492 Pmode, reg);
20494 if (CONST_INT_P (offset))
20496 if (SMALL_INT (offset))
20497 return plus_constant (Pmode, base, INTVAL (offset));
20498 else if (!reload_completed)
20499 offset = force_reg (Pmode, offset);
20500 else
20502 rtx mem = force_const_mem (Pmode, orig);
20503 return machopic_legitimize_pic_address (mem, Pmode, reg);
20506 return gen_rtx_PLUS (Pmode, base, offset);
20509 /* Fall back on generic machopic code. */
20510 return machopic_legitimize_pic_address (orig, mode, reg);
20513 /* Output a .machine directive for the Darwin assembler, and call
20514 the generic start_file routine. */
20516 static void
20517 rs6000_darwin_file_start (void)
20519 static const struct
20521 const char *arg;
20522 const char *name;
20523 HOST_WIDE_INT if_set;
20524 } mapping[] = {
20525 { "ppc64", "ppc64", MASK_64BIT },
20526 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20527 { "power4", "ppc970", 0 },
20528 { "G5", "ppc970", 0 },
20529 { "7450", "ppc7450", 0 },
20530 { "7400", "ppc7400", MASK_ALTIVEC },
20531 { "G4", "ppc7400", 0 },
20532 { "750", "ppc750", 0 },
20533 { "740", "ppc750", 0 },
20534 { "G3", "ppc750", 0 },
20535 { "604e", "ppc604e", 0 },
20536 { "604", "ppc604", 0 },
20537 { "603e", "ppc603", 0 },
20538 { "603", "ppc603", 0 },
20539 { "601", "ppc601", 0 },
20540 { NULL, "ppc", 0 } };
20541 const char *cpu_id = "";
20542 size_t i;
20544 rs6000_file_start ();
20545 darwin_file_start ();
20547 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20549 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20550 cpu_id = rs6000_default_cpu;
20552 if (global_options_set.x_rs6000_cpu_index)
20553 cpu_id = processor_target_table[rs6000_cpu_index].name;
20555 /* Look through the mapping array. Pick the first name that either
20556 matches the argument, has a bit set in IF_SET that is also set
20557 in the target flags, or has a NULL name. */
20559 i = 0;
20560 while (mapping[i].arg != NULL
20561 && strcmp (mapping[i].arg, cpu_id) != 0
20562 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20563 i++;
20565 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20568 #endif /* TARGET_MACHO */
20570 #if TARGET_ELF
20571 static int
20572 rs6000_elf_reloc_rw_mask (void)
20574 if (flag_pic)
20575 return 3;
20576 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20577 return 2;
20578 else
20579 return 0;
20582 /* Record an element in the table of global constructors. SYMBOL is
20583 a SYMBOL_REF of the function to be called; PRIORITY is a number
20584 between 0 and MAX_INIT_PRIORITY.
20586 This differs from default_named_section_asm_out_constructor in
20587 that we have special handling for -mrelocatable. */
20589 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20590 static void
20591 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20593 const char *section = ".ctors";
20594 char buf[18];
20596 if (priority != DEFAULT_INIT_PRIORITY)
20598 sprintf (buf, ".ctors.%.5u",
20599 /* Invert the numbering so the linker puts us in the proper
20600 order; constructors are run from right to left, and the
20601 linker sorts in increasing order. */
20602 MAX_INIT_PRIORITY - priority);
20603 section = buf;
20606 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20607 assemble_align (POINTER_SIZE);
20609 if (DEFAULT_ABI == ABI_V4
20610 && (TARGET_RELOCATABLE || flag_pic > 1))
20612 fputs ("\t.long (", asm_out_file);
20613 output_addr_const (asm_out_file, symbol);
20614 fputs (")@fixup\n", asm_out_file);
20616 else
20617 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20620 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20621 static void
20622 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20624 const char *section = ".dtors";
20625 char buf[18];
20627 if (priority != DEFAULT_INIT_PRIORITY)
20629 sprintf (buf, ".dtors.%.5u",
20630 /* Invert the numbering so the linker puts us in the proper
20631 order; constructors are run from right to left, and the
20632 linker sorts in increasing order. */
20633 MAX_INIT_PRIORITY - priority);
20634 section = buf;
20637 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20638 assemble_align (POINTER_SIZE);
20640 if (DEFAULT_ABI == ABI_V4
20641 && (TARGET_RELOCATABLE || flag_pic > 1))
20643 fputs ("\t.long (", asm_out_file);
20644 output_addr_const (asm_out_file, symbol);
20645 fputs (")@fixup\n", asm_out_file);
20647 else
20648 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20651 void
20652 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20654 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20656 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20657 ASM_OUTPUT_LABEL (file, name);
20658 fputs (DOUBLE_INT_ASM_OP, file);
20659 rs6000_output_function_entry (file, name);
20660 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20661 if (DOT_SYMBOLS)
20663 fputs ("\t.size\t", file);
20664 assemble_name (file, name);
20665 fputs (",24\n\t.type\t.", file);
20666 assemble_name (file, name);
20667 fputs (",@function\n", file);
20668 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20670 fputs ("\t.globl\t.", file);
20671 assemble_name (file, name);
20672 putc ('\n', file);
20675 else
20676 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20677 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20678 rs6000_output_function_entry (file, name);
20679 fputs (":\n", file);
20680 return;
20683 int uses_toc;
20684 if (DEFAULT_ABI == ABI_V4
20685 && (TARGET_RELOCATABLE || flag_pic > 1)
20686 && !TARGET_SECURE_PLT
20687 && (!constant_pool_empty_p () || crtl->profile)
20688 && (uses_toc = uses_TOC ()))
20690 char buf[256];
20692 if (uses_toc == 2)
20693 switch_to_other_text_partition ();
20694 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20696 fprintf (file, "\t.long ");
20697 assemble_name (file, toc_label_name);
20698 need_toc_init = 1;
20699 putc ('-', file);
20700 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20701 assemble_name (file, buf);
20702 putc ('\n', file);
20703 if (uses_toc == 2)
20704 switch_to_other_text_partition ();
20707 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20708 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20710 if (TARGET_CMODEL == CMODEL_LARGE
20711 && rs6000_global_entry_point_prologue_needed_p ())
20713 char buf[256];
20715 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20717 fprintf (file, "\t.quad .TOC.-");
20718 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20719 assemble_name (file, buf);
20720 putc ('\n', file);
20723 if (DEFAULT_ABI == ABI_AIX)
20725 const char *desc_name, *orig_name;
20727 orig_name = (*targetm.strip_name_encoding) (name);
20728 desc_name = orig_name;
20729 while (*desc_name == '.')
20730 desc_name++;
20732 if (TREE_PUBLIC (decl))
20733 fprintf (file, "\t.globl %s\n", desc_name);
20735 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20736 fprintf (file, "%s:\n", desc_name);
20737 fprintf (file, "\t.long %s\n", orig_name);
20738 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20739 fputs ("\t.long 0\n", file);
20740 fprintf (file, "\t.previous\n");
20742 ASM_OUTPUT_LABEL (file, name);
20745 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20746 static void
20747 rs6000_elf_file_end (void)
20749 #ifdef HAVE_AS_GNU_ATTRIBUTE
20750 /* ??? The value emitted depends on options active at file end.
20751 Assume anyone using #pragma or attributes that might change
20752 options knows what they are doing. */
20753 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20754 && rs6000_passes_float)
20756 int fp;
20758 if (TARGET_HARD_FLOAT)
20759 fp = 1;
20760 else
20761 fp = 2;
20762 if (rs6000_passes_long_double)
20764 if (!TARGET_LONG_DOUBLE_128)
20765 fp |= 2 * 4;
20766 else if (TARGET_IEEEQUAD)
20767 fp |= 3 * 4;
20768 else
20769 fp |= 1 * 4;
20771 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20773 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20775 if (rs6000_passes_vector)
20776 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20777 (TARGET_ALTIVEC_ABI ? 2 : 1));
20778 if (rs6000_returns_struct)
20779 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20780 aix_struct_return ? 2 : 1);
20782 #endif
20783 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20784 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20785 file_end_indicate_exec_stack ();
20786 #endif
20788 if (flag_split_stack)
20789 file_end_indicate_split_stack ();
20791 if (cpu_builtin_p)
20793 /* We have expanded a CPU builtin, so we need to emit a reference to
20794 the special symbol that LIBC uses to declare it supports the
20795 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20796 switch_to_section (data_section);
20797 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20798 fprintf (asm_out_file, "\t%s %s\n",
20799 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20802 #endif
20804 #if TARGET_XCOFF
20806 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20807 #define HAVE_XCOFF_DWARF_EXTRAS 0
20808 #endif
20810 static enum unwind_info_type
20811 rs6000_xcoff_debug_unwind_info (void)
20813 return UI_NONE;
20816 static void
20817 rs6000_xcoff_asm_output_anchor (rtx symbol)
20819 char buffer[100];
20821 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20822 SYMBOL_REF_BLOCK_OFFSET (symbol));
20823 fprintf (asm_out_file, "%s", SET_ASM_OP);
20824 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20825 fprintf (asm_out_file, ",");
20826 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20827 fprintf (asm_out_file, "\n");
20830 static void
20831 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20833 fputs (GLOBAL_ASM_OP, stream);
20834 RS6000_OUTPUT_BASENAME (stream, name);
20835 putc ('\n', stream);
20838 /* A get_unnamed_decl callback, used for read-only sections. PTR
20839 points to the section string variable. */
20841 static void
20842 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20844 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20845 *(const char *const *) directive,
20846 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20849 /* Likewise for read-write sections. */
20851 static void
20852 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20854 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20855 *(const char *const *) directive,
20856 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20859 static void
20860 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20862 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20863 *(const char *const *) directive,
20864 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20867 /* A get_unnamed_section callback, used for switching to toc_section. */
20869 static void
20870 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20872 if (TARGET_MINIMAL_TOC)
20874 /* toc_section is always selected at least once from
20875 rs6000_xcoff_file_start, so this is guaranteed to
20876 always be defined once and only once in each file. */
20877 if (!toc_initialized)
20879 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20880 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20881 toc_initialized = 1;
20883 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20884 (TARGET_32BIT ? "" : ",3"));
20886 else
20887 fputs ("\t.toc\n", asm_out_file);
20890 /* Implement TARGET_ASM_INIT_SECTIONS. */
20892 static void
20893 rs6000_xcoff_asm_init_sections (void)
20895 read_only_data_section
20896 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20897 &xcoff_read_only_section_name);
20899 private_data_section
20900 = get_unnamed_section (SECTION_WRITE,
20901 rs6000_xcoff_output_readwrite_section_asm_op,
20902 &xcoff_private_data_section_name);
20904 read_only_private_data_section
20905 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20906 &xcoff_private_rodata_section_name);
20908 tls_data_section
20909 = get_unnamed_section (SECTION_TLS,
20910 rs6000_xcoff_output_tls_section_asm_op,
20911 &xcoff_tls_data_section_name);
20913 tls_private_data_section
20914 = get_unnamed_section (SECTION_TLS,
20915 rs6000_xcoff_output_tls_section_asm_op,
20916 &xcoff_private_data_section_name);
20918 toc_section
20919 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20921 readonly_data_section = read_only_data_section;
20924 static int
20925 rs6000_xcoff_reloc_rw_mask (void)
20927 return 3;
20930 static void
20931 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20932 tree decl ATTRIBUTE_UNUSED)
20934 int smclass;
20935 static const char * const suffix[7]
20936 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
20938 if (flags & SECTION_EXCLUDE)
20939 smclass = 6;
20940 else if (flags & SECTION_DEBUG)
20942 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20943 return;
20945 else if (flags & SECTION_CODE)
20946 smclass = 0;
20947 else if (flags & SECTION_TLS)
20949 if (flags & SECTION_BSS)
20950 smclass = 5;
20951 else
20952 smclass = 4;
20954 else if (flags & SECTION_WRITE)
20956 if (flags & SECTION_BSS)
20957 smclass = 3;
20958 else
20959 smclass = 2;
20961 else
20962 smclass = 1;
20964 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20965 (flags & SECTION_CODE) ? "." : "",
20966 name, suffix[smclass], flags & SECTION_ENTSIZE);
20969 #define IN_NAMED_SECTION(DECL) \
20970 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20971 && DECL_SECTION_NAME (DECL) != NULL)
20973 static section *
20974 rs6000_xcoff_select_section (tree decl, int reloc,
20975 unsigned HOST_WIDE_INT align)
20977 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20978 named section. */
20979 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
20981 resolve_unique_section (decl, reloc, true);
20982 if (IN_NAMED_SECTION (decl))
20983 return get_named_section (decl, NULL, reloc);
20986 if (decl_readonly_section (decl, reloc))
20988 if (TREE_PUBLIC (decl))
20989 return read_only_data_section;
20990 else
20991 return read_only_private_data_section;
20993 else
20995 #if HAVE_AS_TLS
20996 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20998 if (bss_initializer_p (decl))
20999 return tls_comm_section;
21000 else if (TREE_PUBLIC (decl))
21001 return tls_data_section;
21002 else
21003 return tls_private_data_section;
21005 else
21006 #endif
21007 if (TREE_PUBLIC (decl))
21008 return data_section;
21009 else
21010 return private_data_section;
21014 static void
21015 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21017 const char *name;
21019 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21020 name = (*targetm.strip_name_encoding) (name);
21021 set_decl_section_name (decl, name);
21024 /* Select section for constant in constant pool.
21026 On RS/6000, all constants are in the private read-only data area.
21027 However, if this is being placed in the TOC it must be output as a
21028 toc entry. */
21030 static section *
21031 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21032 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21034 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21035 return toc_section;
21036 else
21037 return read_only_private_data_section;
21040 /* Remove any trailing [DS] or the like from the symbol name. */
21042 static const char *
21043 rs6000_xcoff_strip_name_encoding (const char *name)
21045 size_t len;
21046 if (*name == '*')
21047 name++;
21048 len = strlen (name);
21049 if (name[len - 1] == ']')
21050 return ggc_alloc_string (name, len - 4);
21051 else
21052 return name;
21055 /* Section attributes. AIX is always PIC. */
21057 static unsigned int
21058 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21060 unsigned int align;
21061 unsigned int flags = default_section_type_flags (decl, name, reloc);
21063 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21064 flags |= SECTION_BSS;
21066 /* Align to at least UNIT size. */
21067 if (!decl || !DECL_P (decl))
21068 align = MIN_UNITS_PER_WORD;
21069 /* Align code CSECT to at least 32 bytes. */
21070 else if ((flags & SECTION_CODE) != 0)
21071 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21072 else
21073 /* Increase alignment of large objects if not already stricter. */
21074 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21075 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21076 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21078 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21081 /* Output at beginning of assembler file.
21083 Initialize the section names for the RS/6000 at this point.
21085 Specify filename, including full path, to assembler.
21087 We want to go into the TOC section so at least one .toc will be emitted.
21088 Also, in order to output proper .bs/.es pairs, we need at least one static
21089 [RW] section emitted.
21091 Finally, declare mcount when profiling to make the assembler happy. */
21093 static void
21094 rs6000_xcoff_file_start (void)
21096 rs6000_gen_section_name (&xcoff_bss_section_name,
21097 main_input_filename, ".bss_");
21098 rs6000_gen_section_name (&xcoff_private_data_section_name,
21099 main_input_filename, ".rw_");
21100 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21101 main_input_filename, ".rop_");
21102 rs6000_gen_section_name (&xcoff_read_only_section_name,
21103 main_input_filename, ".ro_");
21104 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21105 main_input_filename, ".tls_");
21107 fputs ("\t.file\t", asm_out_file);
21108 output_quoted_string (asm_out_file, main_input_filename);
21109 fputc ('\n', asm_out_file);
21110 if (write_symbols != NO_DEBUG)
21111 switch_to_section (private_data_section);
21112 switch_to_section (toc_section);
21113 switch_to_section (text_section);
21114 if (profile_flag)
21115 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21116 rs6000_file_start ();
21119 /* Output at end of assembler file.
21120 On the RS/6000, referencing data should automatically pull in text. */
21122 static void
21123 rs6000_xcoff_file_end (void)
21125 switch_to_section (text_section);
21126 fputs ("_section_.text:\n", asm_out_file);
21127 switch_to_section (data_section);
21128 fputs (TARGET_32BIT
21129 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21130 asm_out_file);
21132 if (xcoff_tls_exec_model_detected)
21134 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21135 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21139 struct declare_alias_data
21141 FILE *file;
21142 bool function_descriptor;
21145 /* Declare alias N. A helper function for for_node_and_aliases. */
21147 static bool
21148 rs6000_declare_alias (struct symtab_node *n, void *d)
21150 struct declare_alias_data *data = (struct declare_alias_data *)d;
21151 /* Main symbol is output specially, because varasm machinery does part of
21152 the job for us - we do not need to declare .globl/lglobs and such. */
21153 if (!n->alias || n->weakref)
21154 return false;
21156 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21157 return false;
21159 /* Prevent assemble_alias from trying to use .set pseudo operation
21160 that does not behave as expected by the middle-end. */
21161 TREE_ASM_WRITTEN (n->decl) = true;
21163 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21164 char *buffer = (char *) alloca (strlen (name) + 2);
21165 char *p;
21166 int dollar_inside = 0;
21168 strcpy (buffer, name);
21169 p = strchr (buffer, '$');
21170 while (p) {
21171 *p = '_';
21172 dollar_inside++;
21173 p = strchr (p + 1, '$');
21175 if (TREE_PUBLIC (n->decl))
21177 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21179 if (dollar_inside) {
21180 if (data->function_descriptor)
21181 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21182 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21184 if (data->function_descriptor)
21186 fputs ("\t.globl .", data->file);
21187 RS6000_OUTPUT_BASENAME (data->file, buffer);
21188 putc ('\n', data->file);
21190 fputs ("\t.globl ", data->file);
21191 assemble_name (data->file, buffer);
21192 putc ('\n', data->file);
21194 #ifdef ASM_WEAKEN_DECL
21195 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21196 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21197 #endif
21199 else
21201 if (dollar_inside)
21203 if (data->function_descriptor)
21204 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21205 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21207 if (data->function_descriptor)
21209 fputs ("\t.lglobl .", data->file);
21210 RS6000_OUTPUT_BASENAME (data->file, buffer);
21211 putc ('\n', data->file);
21213 fputs ("\t.lglobl ", data->file);
21214 assemble_name (data->file, buffer);
21215 putc ('\n', data->file);
21217 if (data->function_descriptor)
21218 putc ('.', data->file);
21219 ASM_OUTPUT_LABEL (data->file, buffer);
21220 return false;
21224 #ifdef HAVE_GAS_HIDDEN
21225 /* Helper function to calculate visibility of a DECL
21226 and return the value as a const string. */
21228 static const char *
21229 rs6000_xcoff_visibility (tree decl)
21231 static const char * const visibility_types[] = {
21232 "", ",protected", ",hidden", ",internal"
21235 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21236 return visibility_types[vis];
21238 #endif
21241 /* This macro produces the initial definition of a function name.
21242 On the RS/6000, we need to place an extra '.' in the function name and
21243 output the function descriptor.
21244 Dollar signs are converted to underscores.
21246 The csect for the function will have already been created when
21247 text_section was selected. We do have to go back to that csect, however.
21249 The third and fourth parameters to the .function pseudo-op (16 and 044)
21250 are placeholders which no longer have any use.
21252 Because AIX assembler's .set command has unexpected semantics, we output
21253 all aliases as alternative labels in front of the definition. */
21255 void
21256 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21258 char *buffer = (char *) alloca (strlen (name) + 1);
21259 char *p;
21260 int dollar_inside = 0;
21261 struct declare_alias_data data = {file, false};
21263 strcpy (buffer, name);
21264 p = strchr (buffer, '$');
21265 while (p) {
21266 *p = '_';
21267 dollar_inside++;
21268 p = strchr (p + 1, '$');
21270 if (TREE_PUBLIC (decl))
21272 if (!RS6000_WEAK || !DECL_WEAK (decl))
21274 if (dollar_inside) {
21275 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21276 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21278 fputs ("\t.globl .", file);
21279 RS6000_OUTPUT_BASENAME (file, buffer);
21280 #ifdef HAVE_GAS_HIDDEN
21281 fputs (rs6000_xcoff_visibility (decl), file);
21282 #endif
21283 putc ('\n', file);
21286 else
21288 if (dollar_inside) {
21289 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21290 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21292 fputs ("\t.lglobl .", file);
21293 RS6000_OUTPUT_BASENAME (file, buffer);
21294 putc ('\n', file);
21297 fputs ("\t.csect ", file);
21298 assemble_name (file, buffer);
21299 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21301 ASM_OUTPUT_LABEL (file, buffer);
21303 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21304 &data, true);
21305 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21306 RS6000_OUTPUT_BASENAME (file, buffer);
21307 fputs (", TOC[tc0], 0\n", file);
21309 in_section = NULL;
21310 switch_to_section (function_section (decl));
21311 putc ('.', file);
21312 ASM_OUTPUT_LABEL (file, buffer);
21314 data.function_descriptor = true;
21315 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21316 &data, true);
21317 if (!DECL_IGNORED_P (decl))
21319 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
21320 xcoffout_declare_function (file, decl, buffer);
21321 else if (dwarf_debuginfo_p ())
21323 name = (*targetm.strip_name_encoding) (name);
21324 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21327 return;
21331 /* Output assembly language to globalize a symbol from a DECL,
21332 possibly with visibility. */
21334 void
21335 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21337 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21338 fputs (GLOBAL_ASM_OP, stream);
21339 assemble_name (stream, name);
21340 #ifdef HAVE_GAS_HIDDEN
21341 fputs (rs6000_xcoff_visibility (decl), stream);
21342 #endif
21343 putc ('\n', stream);
21346 /* Output assembly language to define a symbol as COMMON from a DECL,
21347 possibly with visibility. */
21349 void
21350 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21351 tree decl ATTRIBUTE_UNUSED,
21352 const char *name,
21353 unsigned HOST_WIDE_INT size,
21354 unsigned int align)
21356 unsigned int align2 = 2;
21358 if (align == 0)
21359 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21361 if (align > 32)
21362 align2 = floor_log2 (align / BITS_PER_UNIT);
21363 else if (size > 4)
21364 align2 = 3;
21366 if (! DECL_COMMON (decl))
21368 /* Forget section. */
21369 in_section = NULL;
21371 /* Globalize TLS BSS. */
21372 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21374 fputs (GLOBAL_ASM_OP, stream);
21375 assemble_name (stream, name);
21376 fputc ('\n', stream);
21379 /* Switch to section and skip space. */
21380 fputs ("\t.csect ", stream);
21381 assemble_name (stream, name);
21382 fprintf (stream, ",%u\n", align2);
21383 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21384 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21385 return;
21388 if (TREE_PUBLIC (decl))
21390 fprintf (stream,
21391 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21392 name, size, align2);
21394 #ifdef HAVE_GAS_HIDDEN
21395 if (decl != NULL)
21396 fputs (rs6000_xcoff_visibility (decl), stream);
21397 #endif
21398 putc ('\n', stream);
21400 else
21401 fprintf (stream,
21402 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21403 (*targetm.strip_name_encoding) (name), size, name, align2);
21406 /* This macro produces the initial definition of a object (variable) name.
21407 Because AIX assembler's .set command has unexpected semantics, we output
21408 all aliases as alternative labels in front of the definition. */
21410 void
21411 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21413 struct declare_alias_data data = {file, false};
21414 ASM_OUTPUT_LABEL (file, name);
21415 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21416 &data, true);
21419 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21421 void
21422 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21424 fputs (integer_asm_op (size, FALSE), file);
21425 assemble_name (file, label);
21426 fputs ("-$", file);
21429 /* Output a symbol offset relative to the dbase for the current object.
21430 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21431 signed offsets.
21433 __gcc_unwind_dbase is embedded in all executables/libraries through
21434 libgcc/config/rs6000/crtdbase.S. */
21436 void
21437 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21439 fputs (integer_asm_op (size, FALSE), file);
21440 assemble_name (file, label);
21441 fputs("-__gcc_unwind_dbase", file);
21444 #ifdef HAVE_AS_TLS
21445 static void
21446 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21448 rtx symbol;
21449 int flags;
21450 const char *symname;
21452 default_encode_section_info (decl, rtl, first);
21454 /* Careful not to prod global register variables. */
21455 if (!MEM_P (rtl))
21456 return;
21457 symbol = XEXP (rtl, 0);
21458 if (!SYMBOL_REF_P (symbol))
21459 return;
21461 flags = SYMBOL_REF_FLAGS (symbol);
21463 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21464 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21466 SYMBOL_REF_FLAGS (symbol) = flags;
21468 symname = XSTR (symbol, 0);
21470 /* Append CSECT mapping class, unless the symbol already is qualified.
21471 Aliases are implemented as labels, so the symbol name should not add
21472 a mapping class. */
21473 if (decl
21474 && DECL_P (decl)
21475 && VAR_OR_FUNCTION_DECL_P (decl)
21476 && symtab_node::get (decl)->alias == 0
21477 && symname[strlen (symname) - 1] != ']')
21479 const char *smclass = NULL;
21481 if (TREE_CODE (decl) == FUNCTION_DECL)
21482 smclass = "[DS]";
21483 else if (DECL_THREAD_LOCAL_P (decl))
21485 if (bss_initializer_p (decl))
21486 smclass = "[UL]";
21487 else if (flag_data_sections)
21488 smclass = "[TL]";
21490 else if (DECL_EXTERNAL (decl))
21491 smclass = "[UA]";
21492 else if (bss_initializer_p (decl))
21493 smclass = "[BS]";
21494 else if (flag_data_sections)
21496 /* This must exactly match the logic of select section. */
21497 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21498 smclass = "[RO]";
21499 else
21500 smclass = "[RW]";
21503 if (smclass != NULL)
21505 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21507 strcpy (newname, symname);
21508 strcat (newname, smclass);
21509 XSTR (symbol, 0) = ggc_strdup (newname);
21513 #endif /* HAVE_AS_TLS */
21514 #endif /* TARGET_XCOFF */
21516 void
21517 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21518 const char *name, const char *val)
21520 fputs ("\t.weak\t", stream);
21521 assemble_name (stream, name);
21522 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21523 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21525 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21526 if (TARGET_XCOFF)
21527 fputs (rs6000_xcoff_visibility (decl), stream);
21528 #endif
21529 fputs ("\n\t.weak\t.", stream);
21530 RS6000_OUTPUT_BASENAME (stream, name);
21532 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21533 if (TARGET_XCOFF)
21534 fputs (rs6000_xcoff_visibility (decl), stream);
21535 #endif
21536 fputc ('\n', stream);
21538 if (val)
21540 #ifdef ASM_OUTPUT_DEF
21541 ASM_OUTPUT_DEF (stream, name, val);
21542 #endif
21543 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21544 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21546 fputs ("\t.set\t.", stream);
21547 RS6000_OUTPUT_BASENAME (stream, name);
21548 fputs (",.", stream);
21549 RS6000_OUTPUT_BASENAME (stream, val);
21550 fputc ('\n', stream);
21556 /* Return true if INSN should not be copied. */
21558 static bool
21559 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21561 return recog_memoized (insn) >= 0
21562 && get_attr_cannot_copy (insn);
21565 /* Compute a (partial) cost for rtx X. Return true if the complete
21566 cost has been computed, and false if subexpressions should be
21567 scanned. In either case, *TOTAL contains the cost result. */
21569 static bool
21570 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21571 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21573 int code = GET_CODE (x);
21575 switch (code)
21577 /* On the RS/6000, if it is valid in the insn, it is free. */
21578 case CONST_INT:
21579 if (((outer_code == SET
21580 || outer_code == PLUS
21581 || outer_code == MINUS)
21582 && (satisfies_constraint_I (x)
21583 || satisfies_constraint_L (x)))
21584 || (outer_code == AND
21585 && (satisfies_constraint_K (x)
21586 || (mode == SImode
21587 ? satisfies_constraint_L (x)
21588 : satisfies_constraint_J (x))))
21589 || ((outer_code == IOR || outer_code == XOR)
21590 && (satisfies_constraint_K (x)
21591 || (mode == SImode
21592 ? satisfies_constraint_L (x)
21593 : satisfies_constraint_J (x))))
21594 || outer_code == ASHIFT
21595 || outer_code == ASHIFTRT
21596 || outer_code == LSHIFTRT
21597 || outer_code == ROTATE
21598 || outer_code == ROTATERT
21599 || outer_code == ZERO_EXTRACT
21600 || (outer_code == MULT
21601 && satisfies_constraint_I (x))
21602 || ((outer_code == DIV || outer_code == UDIV
21603 || outer_code == MOD || outer_code == UMOD)
21604 && exact_log2 (INTVAL (x)) >= 0)
21605 || (outer_code == COMPARE
21606 && (satisfies_constraint_I (x)
21607 || satisfies_constraint_K (x)))
21608 || ((outer_code == EQ || outer_code == NE)
21609 && (satisfies_constraint_I (x)
21610 || satisfies_constraint_K (x)
21611 || (mode == SImode
21612 ? satisfies_constraint_L (x)
21613 : satisfies_constraint_J (x))))
21614 || (outer_code == GTU
21615 && satisfies_constraint_I (x))
21616 || (outer_code == LTU
21617 && satisfies_constraint_P (x)))
21619 *total = 0;
21620 return true;
21622 else if ((outer_code == PLUS
21623 && reg_or_add_cint_operand (x, mode))
21624 || (outer_code == MINUS
21625 && reg_or_sub_cint_operand (x, mode))
21626 || ((outer_code == SET
21627 || outer_code == IOR
21628 || outer_code == XOR)
21629 && (INTVAL (x)
21630 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21632 *total = COSTS_N_INSNS (1);
21633 return true;
21635 /* FALLTHRU */
21637 case CONST_DOUBLE:
21638 case CONST_WIDE_INT:
21639 case CONST:
21640 case HIGH:
21641 case SYMBOL_REF:
21642 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21643 return true;
21645 case MEM:
21646 /* When optimizing for size, MEM should be slightly more expensive
21647 than generating address, e.g., (plus (reg) (const)).
21648 L1 cache latency is about two instructions. */
21649 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21650 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21651 *total += COSTS_N_INSNS (100);
21652 return true;
21654 case LABEL_REF:
21655 *total = 0;
21656 return true;
21658 case PLUS:
21659 case MINUS:
21660 if (FLOAT_MODE_P (mode))
21661 *total = rs6000_cost->fp;
21662 else
21663 *total = COSTS_N_INSNS (1);
21664 return false;
21666 case MULT:
21667 if (CONST_INT_P (XEXP (x, 1))
21668 && satisfies_constraint_I (XEXP (x, 1)))
21670 if (INTVAL (XEXP (x, 1)) >= -256
21671 && INTVAL (XEXP (x, 1)) <= 255)
21672 *total = rs6000_cost->mulsi_const9;
21673 else
21674 *total = rs6000_cost->mulsi_const;
21676 else if (mode == SFmode)
21677 *total = rs6000_cost->fp;
21678 else if (FLOAT_MODE_P (mode))
21679 *total = rs6000_cost->dmul;
21680 else if (mode == DImode)
21681 *total = rs6000_cost->muldi;
21682 else
21683 *total = rs6000_cost->mulsi;
21684 return false;
21686 case FMA:
21687 if (mode == SFmode)
21688 *total = rs6000_cost->fp;
21689 else
21690 *total = rs6000_cost->dmul;
21691 break;
21693 case DIV:
21694 case MOD:
21695 if (FLOAT_MODE_P (mode))
21697 *total = mode == DFmode ? rs6000_cost->ddiv
21698 : rs6000_cost->sdiv;
21699 return false;
21701 /* FALLTHRU */
21703 case UDIV:
21704 case UMOD:
21705 if (CONST_INT_P (XEXP (x, 1))
21706 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21708 if (code == DIV || code == MOD)
21709 /* Shift, addze */
21710 *total = COSTS_N_INSNS (2);
21711 else
21712 /* Shift */
21713 *total = COSTS_N_INSNS (1);
21715 else
21717 if (GET_MODE (XEXP (x, 1)) == DImode)
21718 *total = rs6000_cost->divdi;
21719 else
21720 *total = rs6000_cost->divsi;
21722 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21723 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21724 *total += COSTS_N_INSNS (2);
21725 return false;
21727 case CTZ:
21728 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21729 return false;
21731 case FFS:
21732 *total = COSTS_N_INSNS (4);
21733 return false;
21735 case POPCOUNT:
21736 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21737 return false;
21739 case PARITY:
21740 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21741 return false;
21743 case NOT:
21744 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21745 *total = 0;
21746 else
21747 *total = COSTS_N_INSNS (1);
21748 return false;
21750 case AND:
21751 if (CONST_INT_P (XEXP (x, 1)))
21753 rtx left = XEXP (x, 0);
21754 rtx_code left_code = GET_CODE (left);
21756 /* rotate-and-mask: 1 insn. */
21757 if ((left_code == ROTATE
21758 || left_code == ASHIFT
21759 || left_code == LSHIFTRT)
21760 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21762 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21763 if (!CONST_INT_P (XEXP (left, 1)))
21764 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21765 *total += COSTS_N_INSNS (1);
21766 return true;
21769 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21770 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21771 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21772 || (val & 0xffff) == val
21773 || (val & 0xffff0000) == val
21774 || ((val & 0xffff) == 0 && mode == SImode))
21776 *total = rtx_cost (left, mode, AND, 0, speed);
21777 *total += COSTS_N_INSNS (1);
21778 return true;
21781 /* 2 insns. */
21782 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21784 *total = rtx_cost (left, mode, AND, 0, speed);
21785 *total += COSTS_N_INSNS (2);
21786 return true;
21790 *total = COSTS_N_INSNS (1);
21791 return false;
21793 case IOR:
21794 /* FIXME */
21795 *total = COSTS_N_INSNS (1);
21796 return true;
21798 case CLZ:
21799 case XOR:
21800 case ZERO_EXTRACT:
21801 *total = COSTS_N_INSNS (1);
21802 return false;
21804 case ASHIFT:
21805 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21806 the sign extend and shift separately within the insn. */
21807 if (TARGET_EXTSWSLI && mode == DImode
21808 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21809 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21811 *total = 0;
21812 return false;
21814 /* fall through */
21816 case ASHIFTRT:
21817 case LSHIFTRT:
21818 case ROTATE:
21819 case ROTATERT:
21820 /* Handle mul_highpart. */
21821 if (outer_code == TRUNCATE
21822 && GET_CODE (XEXP (x, 0)) == MULT)
21824 if (mode == DImode)
21825 *total = rs6000_cost->muldi;
21826 else
21827 *total = rs6000_cost->mulsi;
21828 return true;
21830 else if (outer_code == AND)
21831 *total = 0;
21832 else
21833 *total = COSTS_N_INSNS (1);
21834 return false;
21836 case SIGN_EXTEND:
21837 case ZERO_EXTEND:
21838 if (MEM_P (XEXP (x, 0)))
21839 *total = 0;
21840 else
21841 *total = COSTS_N_INSNS (1);
21842 return false;
21844 case COMPARE:
21845 case NEG:
21846 case ABS:
21847 if (!FLOAT_MODE_P (mode))
21849 *total = COSTS_N_INSNS (1);
21850 return false;
21852 /* FALLTHRU */
21854 case FLOAT:
21855 case UNSIGNED_FLOAT:
21856 case FIX:
21857 case UNSIGNED_FIX:
21858 case FLOAT_TRUNCATE:
21859 *total = rs6000_cost->fp;
21860 return false;
21862 case FLOAT_EXTEND:
21863 if (mode == DFmode)
21864 *total = rs6000_cost->sfdf_convert;
21865 else
21866 *total = rs6000_cost->fp;
21867 return false;
21869 case CALL:
21870 case IF_THEN_ELSE:
21871 if (!speed)
21873 *total = COSTS_N_INSNS (1);
21874 return true;
21876 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21878 *total = rs6000_cost->fp;
21879 return false;
21881 break;
21883 case NE:
21884 case EQ:
21885 case GTU:
21886 case LTU:
21887 /* Carry bit requires mode == Pmode.
21888 NEG or PLUS already counted so only add one. */
21889 if (mode == Pmode
21890 && (outer_code == NEG || outer_code == PLUS))
21892 *total = COSTS_N_INSNS (1);
21893 return true;
21895 /* FALLTHRU */
21897 case GT:
21898 case LT:
21899 case UNORDERED:
21900 if (outer_code == SET)
21902 if (XEXP (x, 1) == const0_rtx)
21904 *total = COSTS_N_INSNS (2);
21905 return true;
21907 else
21909 *total = COSTS_N_INSNS (3);
21910 return false;
21913 /* CC COMPARE. */
21914 if (outer_code == COMPARE)
21916 *total = 0;
21917 return true;
21919 break;
21921 case UNSPEC:
21922 if (XINT (x, 1) == UNSPEC_MMA_XXSETACCZ)
21924 *total = 0;
21925 return true;
21927 break;
21929 default:
21930 break;
21933 return false;
21936 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21938 static bool
21939 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21940 int opno, int *total, bool speed)
21942 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21944 fprintf (stderr,
21945 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21946 "opno = %d, total = %d, speed = %s, x:\n",
21947 ret ? "complete" : "scan inner",
21948 GET_MODE_NAME (mode),
21949 GET_RTX_NAME (outer_code),
21950 opno,
21951 *total,
21952 speed ? "true" : "false");
21954 debug_rtx (x);
21956 return ret;
21959 static int
21960 rs6000_insn_cost (rtx_insn *insn, bool speed)
21962 if (recog_memoized (insn) < 0)
21963 return 0;
21965 /* If we are optimizing for size, just use the length. */
21966 if (!speed)
21967 return get_attr_length (insn);
21969 /* Use the cost if provided. */
21970 int cost = get_attr_cost (insn);
21971 if (cost > 0)
21972 return cost;
21974 /* If the insn tells us how many insns there are, use that. Otherwise use
21975 the length/4. Adjust the insn length to remove the extra size that
21976 prefixed instructions take. */
21977 int n = get_attr_num_insns (insn);
21978 if (n == 0)
21980 int length = get_attr_length (insn);
21981 if (get_attr_prefixed (insn) == PREFIXED_YES)
21983 int adjust = 0;
21984 ADJUST_INSN_LENGTH (insn, adjust);
21985 length -= adjust;
21988 n = length / 4;
21991 enum attr_type type = get_attr_type (insn);
21993 switch (type)
21995 case TYPE_LOAD:
21996 case TYPE_FPLOAD:
21997 case TYPE_VECLOAD:
21998 cost = COSTS_N_INSNS (n + 1);
21999 break;
22001 case TYPE_MUL:
22002 switch (get_attr_size (insn))
22004 case SIZE_8:
22005 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22006 break;
22007 case SIZE_16:
22008 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22009 break;
22010 case SIZE_32:
22011 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22012 break;
22013 case SIZE_64:
22014 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22015 break;
22016 default:
22017 gcc_unreachable ();
22019 break;
22020 case TYPE_DIV:
22021 switch (get_attr_size (insn))
22023 case SIZE_32:
22024 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22025 break;
22026 case SIZE_64:
22027 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22028 break;
22029 default:
22030 gcc_unreachable ();
22032 break;
22034 case TYPE_FP:
22035 cost = n * rs6000_cost->fp;
22036 break;
22037 case TYPE_DMUL:
22038 cost = n * rs6000_cost->dmul;
22039 break;
22040 case TYPE_SDIV:
22041 cost = n * rs6000_cost->sdiv;
22042 break;
22043 case TYPE_DDIV:
22044 cost = n * rs6000_cost->ddiv;
22045 break;
22047 case TYPE_SYNC:
22048 case TYPE_LOAD_L:
22049 case TYPE_MFCR:
22050 case TYPE_MFCRF:
22051 cost = COSTS_N_INSNS (n + 2);
22052 break;
22054 default:
22055 cost = COSTS_N_INSNS (n);
22058 return cost;
22061 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22063 static int
22064 rs6000_debug_address_cost (rtx x, machine_mode mode,
22065 addr_space_t as, bool speed)
22067 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22069 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22070 ret, speed ? "true" : "false");
22071 debug_rtx (x);
22073 return ret;
22077 /* A C expression returning the cost of moving data from a register of class
22078 CLASS1 to one of CLASS2. */
22080 static int
22081 rs6000_register_move_cost (machine_mode mode,
22082 reg_class_t from, reg_class_t to)
22084 int ret;
22085 reg_class_t rclass;
22087 if (TARGET_DEBUG_COST)
22088 dbg_cost_ctrl++;
22090 /* If we have VSX, we can easily move between FPR or Altivec registers,
22091 otherwise we can only easily move within classes.
22092 Do this first so we give best-case answers for union classes
22093 containing both gprs and vsx regs. */
22094 HARD_REG_SET to_vsx, from_vsx;
22095 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22096 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22097 if (!hard_reg_set_empty_p (to_vsx)
22098 && !hard_reg_set_empty_p (from_vsx)
22099 && (TARGET_VSX
22100 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22102 int reg = FIRST_FPR_REGNO;
22103 if (TARGET_VSX
22104 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22105 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22106 reg = FIRST_ALTIVEC_REGNO;
22107 ret = 2 * hard_regno_nregs (reg, mode);
22110 /* Moves from/to GENERAL_REGS. */
22111 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22112 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22114 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22116 if (TARGET_DIRECT_MOVE)
22118 /* Keep the cost for direct moves above that for within
22119 a register class even if the actual processor cost is
22120 comparable. We do this because a direct move insn
22121 can't be a nop, whereas with ideal register
22122 allocation a move within the same class might turn
22123 out to be a nop. */
22124 if (rs6000_tune == PROCESSOR_POWER9
22125 || rs6000_tune == PROCESSOR_POWER10)
22126 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22127 else
22128 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22129 /* SFmode requires a conversion when moving between gprs
22130 and vsx. */
22131 if (mode == SFmode)
22132 ret += 2;
22134 else
22135 ret = (rs6000_memory_move_cost (mode, rclass, false)
22136 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22139 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22140 shift. */
22141 else if (rclass == CR_REGS)
22142 ret = 4;
22144 /* For those processors that have slow LR/CTR moves, make them more
22145 expensive than memory in order to bias spills to memory .*/
22146 else if ((rs6000_tune == PROCESSOR_POWER6
22147 || rs6000_tune == PROCESSOR_POWER7
22148 || rs6000_tune == PROCESSOR_POWER8
22149 || rs6000_tune == PROCESSOR_POWER9)
22150 && reg_class_subset_p (rclass, SPECIAL_REGS))
22151 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22153 else
22154 /* A move will cost one instruction per GPR moved. */
22155 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22158 /* Everything else has to go through GENERAL_REGS. */
22159 else
22160 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22161 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22163 if (TARGET_DEBUG_COST)
22165 if (dbg_cost_ctrl == 1)
22166 fprintf (stderr,
22167 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22168 ret, GET_MODE_NAME (mode), reg_class_names[from],
22169 reg_class_names[to]);
22170 dbg_cost_ctrl--;
22173 return ret;
22176 /* A C expressions returning the cost of moving data of MODE from a register to
22177 or from memory. */
22179 static int
22180 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22181 bool in ATTRIBUTE_UNUSED)
22183 int ret;
22185 if (TARGET_DEBUG_COST)
22186 dbg_cost_ctrl++;
22188 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22189 ret = 4 * hard_regno_nregs (0, mode);
22190 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22191 || reg_classes_intersect_p (rclass, VSX_REGS)))
22192 ret = 4 * hard_regno_nregs (32, mode);
22193 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22194 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22195 else
22196 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22198 if (TARGET_DEBUG_COST)
22200 if (dbg_cost_ctrl == 1)
22201 fprintf (stderr,
22202 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22203 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22204 dbg_cost_ctrl--;
22207 return ret;
22210 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22212 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22213 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22214 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22215 move cost between GENERAL_REGS and VSX_REGS low.
22217 It might seem reasonable to use a union class. After all, if usage
22218 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22219 rather than memory. However, in cases where register pressure of
22220 both is high, like the cactus_adm spec test, allowing
22221 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22222 the first scheduling pass. This is partly due to an allocno of
22223 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22224 class, which gives too high a pressure for GENERAL_REGS and too low
22225 for VSX_REGS. So, force a choice of the subclass here.
22227 The best class is also the union if GENERAL_REGS and VSX_REGS have
22228 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22229 allocno class, since trying to narrow down the class by regno mode
22230 is prone to error. For example, SImode is allowed in VSX regs and
22231 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22232 it would be wrong to choose an allocno of GENERAL_REGS based on
22233 SImode. */
22235 static reg_class_t
22236 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22237 reg_class_t allocno_class,
22238 reg_class_t best_class)
22240 switch (allocno_class)
22242 case GEN_OR_VSX_REGS:
22243 /* best_class must be a subset of allocno_class. */
22244 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22245 || best_class == GEN_OR_FLOAT_REGS
22246 || best_class == VSX_REGS
22247 || best_class == ALTIVEC_REGS
22248 || best_class == FLOAT_REGS
22249 || best_class == GENERAL_REGS
22250 || best_class == BASE_REGS);
22251 /* Use best_class but choose wider classes when copying from the
22252 wider class to best_class is cheap. This mimics IRA choice
22253 of allocno class. */
22254 if (best_class == BASE_REGS)
22255 return GENERAL_REGS;
22256 if (TARGET_VSX && best_class == FLOAT_REGS)
22257 return VSX_REGS;
22258 return best_class;
22260 case VSX_REGS:
22261 if (best_class == ALTIVEC_REGS)
22262 return ALTIVEC_REGS;
22264 default:
22265 break;
22268 return allocno_class;
22271 /* Returns a code for a target-specific builtin that implements
22272 reciprocal of the function, or NULL_TREE if not available. */
22274 static tree
22275 rs6000_builtin_reciprocal (tree fndecl)
22277 switch (DECL_MD_FUNCTION_CODE (fndecl))
22279 case VSX_BUILTIN_XVSQRTDP:
22280 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
22281 return NULL_TREE;
22283 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
22285 case VSX_BUILTIN_XVSQRTSP:
22286 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
22287 return NULL_TREE;
22289 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
22291 default:
22292 return NULL_TREE;
22296 /* Load up a constant. If the mode is a vector mode, splat the value across
22297 all of the vector elements. */
22299 static rtx
22300 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22302 rtx reg;
22304 if (mode == SFmode || mode == DFmode)
22306 rtx d = const_double_from_real_value (dconst, mode);
22307 reg = force_reg (mode, d);
22309 else if (mode == V4SFmode)
22311 rtx d = const_double_from_real_value (dconst, SFmode);
22312 rtvec v = gen_rtvec (4, d, d, d, d);
22313 reg = gen_reg_rtx (mode);
22314 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22316 else if (mode == V2DFmode)
22318 rtx d = const_double_from_real_value (dconst, DFmode);
22319 rtvec v = gen_rtvec (2, d, d);
22320 reg = gen_reg_rtx (mode);
22321 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22323 else
22324 gcc_unreachable ();
22326 return reg;
22329 /* Generate an FMA instruction. */
22331 static void
22332 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22334 machine_mode mode = GET_MODE (target);
22335 rtx dst;
22337 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22338 gcc_assert (dst != NULL);
22340 if (dst != target)
22341 emit_move_insn (target, dst);
22344 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22346 static void
22347 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22349 machine_mode mode = GET_MODE (dst);
22350 rtx r;
22352 /* This is a tad more complicated, since the fnma_optab is for
22353 a different expression: fma(-m1, m2, a), which is the same
22354 thing except in the case of signed zeros.
22356 Fortunately we know that if FMA is supported that FNMSUB is
22357 also supported in the ISA. Just expand it directly. */
22359 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22361 r = gen_rtx_NEG (mode, a);
22362 r = gen_rtx_FMA (mode, m1, m2, r);
22363 r = gen_rtx_NEG (mode, r);
22364 emit_insn (gen_rtx_SET (dst, r));
22367 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22368 add a reg_note saying that this was a division. Support both scalar and
22369 vector divide. Assumes no trapping math and finite arguments. */
22371 void
22372 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22374 machine_mode mode = GET_MODE (dst);
22375 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22376 int i;
22378 /* Low precision estimates guarantee 5 bits of accuracy. High
22379 precision estimates guarantee 14 bits of accuracy. SFmode
22380 requires 23 bits of accuracy. DFmode requires 52 bits of
22381 accuracy. Each pass at least doubles the accuracy, leading
22382 to the following. */
22383 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22384 if (mode == DFmode || mode == V2DFmode)
22385 passes++;
22387 enum insn_code code = optab_handler (smul_optab, mode);
22388 insn_gen_fn gen_mul = GEN_FCN (code);
22390 gcc_assert (code != CODE_FOR_nothing);
22392 one = rs6000_load_constant_and_splat (mode, dconst1);
22394 /* x0 = 1./d estimate */
22395 x0 = gen_reg_rtx (mode);
22396 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22397 UNSPEC_FRES)));
22399 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22400 if (passes > 1) {
22402 /* e0 = 1. - d * x0 */
22403 e0 = gen_reg_rtx (mode);
22404 rs6000_emit_nmsub (e0, d, x0, one);
22406 /* x1 = x0 + e0 * x0 */
22407 x1 = gen_reg_rtx (mode);
22408 rs6000_emit_madd (x1, e0, x0, x0);
22410 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22411 ++i, xprev = xnext, eprev = enext) {
22413 /* enext = eprev * eprev */
22414 enext = gen_reg_rtx (mode);
22415 emit_insn (gen_mul (enext, eprev, eprev));
22417 /* xnext = xprev + enext * xprev */
22418 xnext = gen_reg_rtx (mode);
22419 rs6000_emit_madd (xnext, enext, xprev, xprev);
22422 } else
22423 xprev = x0;
22425 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22427 /* u = n * xprev */
22428 u = gen_reg_rtx (mode);
22429 emit_insn (gen_mul (u, n, xprev));
22431 /* v = n - (d * u) */
22432 v = gen_reg_rtx (mode);
22433 rs6000_emit_nmsub (v, d, u, n);
22435 /* dst = (v * xprev) + u */
22436 rs6000_emit_madd (dst, v, xprev, u);
22438 if (note_p)
22439 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22442 /* Goldschmidt's Algorithm for single/double-precision floating point
22443 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22445 void
22446 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22448 machine_mode mode = GET_MODE (src);
22449 rtx e = gen_reg_rtx (mode);
22450 rtx g = gen_reg_rtx (mode);
22451 rtx h = gen_reg_rtx (mode);
22453 /* Low precision estimates guarantee 5 bits of accuracy. High
22454 precision estimates guarantee 14 bits of accuracy. SFmode
22455 requires 23 bits of accuracy. DFmode requires 52 bits of
22456 accuracy. Each pass at least doubles the accuracy, leading
22457 to the following. */
22458 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22459 if (mode == DFmode || mode == V2DFmode)
22460 passes++;
22462 int i;
22463 rtx mhalf;
22464 enum insn_code code = optab_handler (smul_optab, mode);
22465 insn_gen_fn gen_mul = GEN_FCN (code);
22467 gcc_assert (code != CODE_FOR_nothing);
22469 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22471 /* e = rsqrt estimate */
22472 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22473 UNSPEC_RSQRT)));
22475 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22476 if (!recip)
22478 rtx zero = force_reg (mode, CONST0_RTX (mode));
22480 if (mode == SFmode)
22482 rtx target = emit_conditional_move (e, GT, src, zero, mode,
22483 e, zero, mode, 0);
22484 if (target != e)
22485 emit_move_insn (e, target);
22487 else
22489 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22490 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22494 /* g = sqrt estimate. */
22495 emit_insn (gen_mul (g, e, src));
22496 /* h = 1/(2*sqrt) estimate. */
22497 emit_insn (gen_mul (h, e, mhalf));
22499 if (recip)
22501 if (passes == 1)
22503 rtx t = gen_reg_rtx (mode);
22504 rs6000_emit_nmsub (t, g, h, mhalf);
22505 /* Apply correction directly to 1/rsqrt estimate. */
22506 rs6000_emit_madd (dst, e, t, e);
22508 else
22510 for (i = 0; i < passes; i++)
22512 rtx t1 = gen_reg_rtx (mode);
22513 rtx g1 = gen_reg_rtx (mode);
22514 rtx h1 = gen_reg_rtx (mode);
22516 rs6000_emit_nmsub (t1, g, h, mhalf);
22517 rs6000_emit_madd (g1, g, t1, g);
22518 rs6000_emit_madd (h1, h, t1, h);
22520 g = g1;
22521 h = h1;
22523 /* Multiply by 2 for 1/rsqrt. */
22524 emit_insn (gen_add3_insn (dst, h, h));
22527 else
22529 rtx t = gen_reg_rtx (mode);
22530 rs6000_emit_nmsub (t, g, h, mhalf);
22531 rs6000_emit_madd (dst, g, t, g);
22534 return;
22537 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22538 (Power7) targets. DST is the target, and SRC is the argument operand. */
22540 void
22541 rs6000_emit_popcount (rtx dst, rtx src)
22543 machine_mode mode = GET_MODE (dst);
22544 rtx tmp1, tmp2;
22546 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22547 if (TARGET_POPCNTD)
22549 if (mode == SImode)
22550 emit_insn (gen_popcntdsi2 (dst, src));
22551 else
22552 emit_insn (gen_popcntddi2 (dst, src));
22553 return;
22556 tmp1 = gen_reg_rtx (mode);
22558 if (mode == SImode)
22560 emit_insn (gen_popcntbsi2 (tmp1, src));
22561 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22562 NULL_RTX, 0);
22563 tmp2 = force_reg (SImode, tmp2);
22564 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22566 else
22568 emit_insn (gen_popcntbdi2 (tmp1, src));
22569 tmp2 = expand_mult (DImode, tmp1,
22570 GEN_INT ((HOST_WIDE_INT)
22571 0x01010101 << 32 | 0x01010101),
22572 NULL_RTX, 0);
22573 tmp2 = force_reg (DImode, tmp2);
22574 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22579 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22580 target, and SRC is the argument operand. */
22582 void
22583 rs6000_emit_parity (rtx dst, rtx src)
22585 machine_mode mode = GET_MODE (dst);
22586 rtx tmp;
22588 tmp = gen_reg_rtx (mode);
22590 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22591 if (TARGET_CMPB)
22593 if (mode == SImode)
22595 emit_insn (gen_popcntbsi2 (tmp, src));
22596 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22598 else
22600 emit_insn (gen_popcntbdi2 (tmp, src));
22601 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22603 return;
22606 if (mode == SImode)
22608 /* Is mult+shift >= shift+xor+shift+xor? */
22609 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22611 rtx tmp1, tmp2, tmp3, tmp4;
22613 tmp1 = gen_reg_rtx (SImode);
22614 emit_insn (gen_popcntbsi2 (tmp1, src));
22616 tmp2 = gen_reg_rtx (SImode);
22617 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22618 tmp3 = gen_reg_rtx (SImode);
22619 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22621 tmp4 = gen_reg_rtx (SImode);
22622 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22623 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22625 else
22626 rs6000_emit_popcount (tmp, src);
22627 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22629 else
22631 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22632 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22634 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22636 tmp1 = gen_reg_rtx (DImode);
22637 emit_insn (gen_popcntbdi2 (tmp1, src));
22639 tmp2 = gen_reg_rtx (DImode);
22640 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22641 tmp3 = gen_reg_rtx (DImode);
22642 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22644 tmp4 = gen_reg_rtx (DImode);
22645 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22646 tmp5 = gen_reg_rtx (DImode);
22647 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22649 tmp6 = gen_reg_rtx (DImode);
22650 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22651 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22653 else
22654 rs6000_emit_popcount (tmp, src);
22655 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22659 /* Expand an Altivec constant permutation for little endian mode.
22660 OP0 and OP1 are the input vectors and TARGET is the output vector.
22661 SEL specifies the constant permutation vector.
22663 There are two issues: First, the two input operands must be
22664 swapped so that together they form a double-wide array in LE
22665 order. Second, the vperm instruction has surprising behavior
22666 in LE mode: it interprets the elements of the source vectors
22667 in BE mode ("left to right") and interprets the elements of
22668 the destination vector in LE mode ("right to left"). To
22669 correct for this, we must subtract each element of the permute
22670 control vector from 31.
22672 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22673 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22674 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22675 serve as the permute control vector. Then, in BE mode,
22677 vperm 9,10,11,12
22679 places the desired result in vr9. However, in LE mode the
22680 vector contents will be
22682 vr10 = 00000003 00000002 00000001 00000000
22683 vr11 = 00000007 00000006 00000005 00000004
22685 The result of the vperm using the same permute control vector is
22687 vr9 = 05000000 07000000 01000000 03000000
22689 That is, the leftmost 4 bytes of vr10 are interpreted as the
22690 source for the rightmost 4 bytes of vr9, and so on.
22692 If we change the permute control vector to
22694 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22696 and issue
22698 vperm 9,11,10,12
22700 we get the desired
22702 vr9 = 00000006 00000004 00000002 00000000. */
22704 static void
22705 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22706 const vec_perm_indices &sel)
22708 unsigned int i;
22709 rtx perm[16];
22710 rtx constv, unspec;
22712 /* Unpack and adjust the constant selector. */
22713 for (i = 0; i < 16; ++i)
22715 unsigned int elt = 31 - (sel[i] & 31);
22716 perm[i] = GEN_INT (elt);
22719 /* Expand to a permute, swapping the inputs and using the
22720 adjusted selector. */
22721 if (!REG_P (op0))
22722 op0 = force_reg (V16QImode, op0);
22723 if (!REG_P (op1))
22724 op1 = force_reg (V16QImode, op1);
22726 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22727 constv = force_reg (V16QImode, constv);
22728 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22729 UNSPEC_VPERM);
22730 if (!REG_P (target))
22732 rtx tmp = gen_reg_rtx (V16QImode);
22733 emit_move_insn (tmp, unspec);
22734 unspec = tmp;
22737 emit_move_insn (target, unspec);
22740 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22741 permute control vector. But here it's not a constant, so we must
22742 generate a vector NAND or NOR to do the adjustment. */
22744 void
22745 altivec_expand_vec_perm_le (rtx operands[4])
22747 rtx notx, iorx, unspec;
22748 rtx target = operands[0];
22749 rtx op0 = operands[1];
22750 rtx op1 = operands[2];
22751 rtx sel = operands[3];
22752 rtx tmp = target;
22753 rtx norreg = gen_reg_rtx (V16QImode);
22754 machine_mode mode = GET_MODE (target);
22756 /* Get everything in regs so the pattern matches. */
22757 if (!REG_P (op0))
22758 op0 = force_reg (mode, op0);
22759 if (!REG_P (op1))
22760 op1 = force_reg (mode, op1);
22761 if (!REG_P (sel))
22762 sel = force_reg (V16QImode, sel);
22763 if (!REG_P (target))
22764 tmp = gen_reg_rtx (mode);
22766 if (TARGET_P9_VECTOR)
22768 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22769 UNSPEC_VPERMR);
22771 else
22773 /* Invert the selector with a VNAND if available, else a VNOR.
22774 The VNAND is preferred for future fusion opportunities. */
22775 notx = gen_rtx_NOT (V16QImode, sel);
22776 iorx = (TARGET_P8_VECTOR
22777 ? gen_rtx_IOR (V16QImode, notx, notx)
22778 : gen_rtx_AND (V16QImode, notx, notx));
22779 emit_insn (gen_rtx_SET (norreg, iorx));
22781 /* Permute with operands reversed and adjusted selector. */
22782 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22783 UNSPEC_VPERM);
22786 /* Copy into target, possibly by way of a register. */
22787 if (!REG_P (target))
22789 emit_move_insn (tmp, unspec);
22790 unspec = tmp;
22793 emit_move_insn (target, unspec);
22796 /* Expand an Altivec constant permutation. Return true if we match
22797 an efficient implementation; false to fall back to VPERM.
22799 OP0 and OP1 are the input vectors and TARGET is the output vector.
22800 SEL specifies the constant permutation vector. */
22802 static bool
22803 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22804 const vec_perm_indices &sel)
22806 struct altivec_perm_insn {
22807 HOST_WIDE_INT mask;
22808 enum insn_code impl;
22809 unsigned char perm[16];
22811 static const struct altivec_perm_insn patterns[] = {
22812 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
22813 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22814 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
22815 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22816 { OPTION_MASK_ALTIVEC,
22817 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22818 : CODE_FOR_altivec_vmrglb_direct),
22819 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22820 { OPTION_MASK_ALTIVEC,
22821 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22822 : CODE_FOR_altivec_vmrglh_direct),
22823 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22824 { OPTION_MASK_ALTIVEC,
22825 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22826 : CODE_FOR_altivec_vmrglw_direct),
22827 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22828 { OPTION_MASK_ALTIVEC,
22829 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22830 : CODE_FOR_altivec_vmrghb_direct),
22831 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22832 { OPTION_MASK_ALTIVEC,
22833 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22834 : CODE_FOR_altivec_vmrghh_direct),
22835 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22836 { OPTION_MASK_ALTIVEC,
22837 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22838 : CODE_FOR_altivec_vmrghw_direct),
22839 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22840 { OPTION_MASK_P8_VECTOR,
22841 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22842 : CODE_FOR_p8_vmrgow_v4sf_direct),
22843 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22844 { OPTION_MASK_P8_VECTOR,
22845 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22846 : CODE_FOR_p8_vmrgew_v4sf_direct),
22847 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22850 unsigned int i, j, elt, which;
22851 unsigned char perm[16];
22852 rtx x;
22853 bool one_vec;
22855 /* Unpack the constant selector. */
22856 for (i = which = 0; i < 16; ++i)
22858 elt = sel[i] & 31;
22859 which |= (elt < 16 ? 1 : 2);
22860 perm[i] = elt;
22863 /* Simplify the constant selector based on operands. */
22864 switch (which)
22866 default:
22867 gcc_unreachable ();
22869 case 3:
22870 one_vec = false;
22871 if (!rtx_equal_p (op0, op1))
22872 break;
22873 /* FALLTHRU */
22875 case 2:
22876 for (i = 0; i < 16; ++i)
22877 perm[i] &= 15;
22878 op0 = op1;
22879 one_vec = true;
22880 break;
22882 case 1:
22883 op1 = op0;
22884 one_vec = true;
22885 break;
22888 /* Look for splat patterns. */
22889 if (one_vec)
22891 elt = perm[0];
22893 for (i = 0; i < 16; ++i)
22894 if (perm[i] != elt)
22895 break;
22896 if (i == 16)
22898 if (!BYTES_BIG_ENDIAN)
22899 elt = 15 - elt;
22900 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22901 return true;
22904 if (elt % 2 == 0)
22906 for (i = 0; i < 16; i += 2)
22907 if (perm[i] != elt || perm[i + 1] != elt + 1)
22908 break;
22909 if (i == 16)
22911 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22912 x = gen_reg_rtx (V8HImode);
22913 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22914 GEN_INT (field)));
22915 emit_move_insn (target, gen_lowpart (V16QImode, x));
22916 return true;
22920 if (elt % 4 == 0)
22922 for (i = 0; i < 16; i += 4)
22923 if (perm[i] != elt
22924 || perm[i + 1] != elt + 1
22925 || perm[i + 2] != elt + 2
22926 || perm[i + 3] != elt + 3)
22927 break;
22928 if (i == 16)
22930 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22931 x = gen_reg_rtx (V4SImode);
22932 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22933 GEN_INT (field)));
22934 emit_move_insn (target, gen_lowpart (V16QImode, x));
22935 return true;
22940 /* Look for merge and pack patterns. */
22941 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22943 bool swapped;
22945 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22946 continue;
22948 elt = patterns[j].perm[0];
22949 if (perm[0] == elt)
22950 swapped = false;
22951 else if (perm[0] == elt + 16)
22952 swapped = true;
22953 else
22954 continue;
22955 for (i = 1; i < 16; ++i)
22957 elt = patterns[j].perm[i];
22958 if (swapped)
22959 elt = (elt >= 16 ? elt - 16 : elt + 16);
22960 else if (one_vec && elt >= 16)
22961 elt -= 16;
22962 if (perm[i] != elt)
22963 break;
22965 if (i == 16)
22967 enum insn_code icode = patterns[j].impl;
22968 machine_mode omode = insn_data[icode].operand[0].mode;
22969 machine_mode imode = insn_data[icode].operand[1].mode;
22971 /* For little-endian, don't use vpkuwum and vpkuhum if the
22972 underlying vector type is not V4SI and V8HI, respectively.
22973 For example, using vpkuwum with a V8HI picks up the even
22974 halfwords (BE numbering) when the even halfwords (LE
22975 numbering) are what we need. */
22976 if (!BYTES_BIG_ENDIAN
22977 && icode == CODE_FOR_altivec_vpkuwum_direct
22978 && ((REG_P (op0)
22979 && GET_MODE (op0) != V4SImode)
22980 || (SUBREG_P (op0)
22981 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22982 continue;
22983 if (!BYTES_BIG_ENDIAN
22984 && icode == CODE_FOR_altivec_vpkuhum_direct
22985 && ((REG_P (op0)
22986 && GET_MODE (op0) != V8HImode)
22987 || (SUBREG_P (op0)
22988 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22989 continue;
22991 /* For little-endian, the two input operands must be swapped
22992 (or swapped back) to ensure proper right-to-left numbering
22993 from 0 to 2N-1. */
22994 if (swapped ^ !BYTES_BIG_ENDIAN)
22995 std::swap (op0, op1);
22996 if (imode != V16QImode)
22998 op0 = gen_lowpart (imode, op0);
22999 op1 = gen_lowpart (imode, op1);
23001 if (omode == V16QImode)
23002 x = target;
23003 else
23004 x = gen_reg_rtx (omode);
23005 emit_insn (GEN_FCN (icode) (x, op0, op1));
23006 if (omode != V16QImode)
23007 emit_move_insn (target, gen_lowpart (V16QImode, x));
23008 return true;
23012 if (!BYTES_BIG_ENDIAN)
23014 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23015 return true;
23018 return false;
23021 /* Expand a VSX Permute Doubleword constant permutation.
23022 Return true if we match an efficient implementation. */
23024 static bool
23025 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23026 unsigned char perm0, unsigned char perm1)
23028 rtx x;
23030 /* If both selectors come from the same operand, fold to single op. */
23031 if ((perm0 & 2) == (perm1 & 2))
23033 if (perm0 & 2)
23034 op0 = op1;
23035 else
23036 op1 = op0;
23038 /* If both operands are equal, fold to simpler permutation. */
23039 if (rtx_equal_p (op0, op1))
23041 perm0 = perm0 & 1;
23042 perm1 = (perm1 & 1) + 2;
23044 /* If the first selector comes from the second operand, swap. */
23045 else if (perm0 & 2)
23047 if (perm1 & 2)
23048 return false;
23049 perm0 -= 2;
23050 perm1 += 2;
23051 std::swap (op0, op1);
23053 /* If the second selector does not come from the second operand, fail. */
23054 else if ((perm1 & 2) == 0)
23055 return false;
23057 /* Success! */
23058 if (target != NULL)
23060 machine_mode vmode, dmode;
23061 rtvec v;
23063 vmode = GET_MODE (target);
23064 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23065 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23066 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23067 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23068 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23069 emit_insn (gen_rtx_SET (target, x));
23071 return true;
23074 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23076 static bool
23077 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
23078 rtx op1, const vec_perm_indices &sel)
23080 bool testing_p = !target;
23082 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23083 if (TARGET_ALTIVEC && testing_p)
23084 return true;
23086 if (op0)
23088 rtx nop0 = force_reg (vmode, op0);
23089 if (op0 == op1)
23090 op1 = nop0;
23091 op0 = nop0;
23093 if (op1)
23094 op1 = force_reg (vmode, op1);
23096 /* Check for ps_merge* or xxpermdi insns. */
23097 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23099 if (testing_p)
23101 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23102 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23104 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23105 return true;
23108 if (TARGET_ALTIVEC)
23110 /* Force the target-independent code to lower to V16QImode. */
23111 if (vmode != V16QImode)
23112 return false;
23113 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23114 return true;
23117 return false;
23120 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23121 OP0 and OP1 are the input vectors and TARGET is the output vector.
23122 PERM specifies the constant permutation vector. */
23124 static void
23125 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23126 machine_mode vmode, const vec_perm_builder &perm)
23128 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23129 if (x != target)
23130 emit_move_insn (target, x);
23133 /* Expand an extract even operation. */
23135 void
23136 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23138 machine_mode vmode = GET_MODE (target);
23139 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23140 vec_perm_builder perm (nelt, nelt, 1);
23142 for (i = 0; i < nelt; i++)
23143 perm.quick_push (i * 2);
23145 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23148 /* Expand a vector interleave operation. */
23150 void
23151 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23153 machine_mode vmode = GET_MODE (target);
23154 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23155 vec_perm_builder perm (nelt, nelt, 1);
23157 high = (highp ? 0 : nelt / 2);
23158 for (i = 0; i < nelt / 2; i++)
23160 perm.quick_push (i + high);
23161 perm.quick_push (i + nelt + high);
23164 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23167 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23168 void
23169 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23171 HOST_WIDE_INT hwi_scale (scale);
23172 REAL_VALUE_TYPE r_pow;
23173 rtvec v = rtvec_alloc (2);
23174 rtx elt;
23175 rtx scale_vec = gen_reg_rtx (V2DFmode);
23176 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23177 elt = const_double_from_real_value (r_pow, DFmode);
23178 RTVEC_ELT (v, 0) = elt;
23179 RTVEC_ELT (v, 1) = elt;
23180 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23181 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23184 /* Return an RTX representing where to find the function value of a
23185 function returning MODE. */
23186 static rtx
23187 rs6000_complex_function_value (machine_mode mode)
23189 unsigned int regno;
23190 rtx r1, r2;
23191 machine_mode inner = GET_MODE_INNER (mode);
23192 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23194 if (TARGET_FLOAT128_TYPE
23195 && (mode == KCmode
23196 || (mode == TCmode && TARGET_IEEEQUAD)))
23197 regno = ALTIVEC_ARG_RETURN;
23199 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23200 regno = FP_ARG_RETURN;
23202 else
23204 regno = GP_ARG_RETURN;
23206 /* 32-bit is OK since it'll go in r3/r4. */
23207 if (TARGET_32BIT && inner_bytes >= 4)
23208 return gen_rtx_REG (mode, regno);
23211 if (inner_bytes >= 8)
23212 return gen_rtx_REG (mode, regno);
23214 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23215 const0_rtx);
23216 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23217 GEN_INT (inner_bytes));
23218 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23221 /* Return an rtx describing a return value of MODE as a PARALLEL
23222 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23223 stride REG_STRIDE. */
23225 static rtx
23226 rs6000_parallel_return (machine_mode mode,
23227 int n_elts, machine_mode elt_mode,
23228 unsigned int regno, unsigned int reg_stride)
23230 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23232 int i;
23233 for (i = 0; i < n_elts; i++)
23235 rtx r = gen_rtx_REG (elt_mode, regno);
23236 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23237 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23238 regno += reg_stride;
23241 return par;
23244 /* Target hook for TARGET_FUNCTION_VALUE.
23246 An integer value is in r3 and a floating-point value is in fp1,
23247 unless -msoft-float. */
23249 static rtx
23250 rs6000_function_value (const_tree valtype,
23251 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23252 bool outgoing ATTRIBUTE_UNUSED)
23254 machine_mode mode;
23255 unsigned int regno;
23256 machine_mode elt_mode;
23257 int n_elts;
23259 /* Special handling for structs in darwin64. */
23260 if (TARGET_MACHO
23261 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23263 CUMULATIVE_ARGS valcum;
23264 rtx valret;
23266 valcum.words = 0;
23267 valcum.fregno = FP_ARG_MIN_REG;
23268 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23269 /* Do a trial code generation as if this were going to be passed as
23270 an argument; if any part goes in memory, we return NULL. */
23271 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23272 if (valret)
23273 return valret;
23274 /* Otherwise fall through to standard ABI rules. */
23277 mode = TYPE_MODE (valtype);
23279 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23280 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23282 int first_reg, n_regs;
23284 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23286 /* _Decimal128 must use even/odd register pairs. */
23287 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23288 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23290 else
23292 first_reg = ALTIVEC_ARG_RETURN;
23293 n_regs = 1;
23296 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23299 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23300 if (TARGET_32BIT && TARGET_POWERPC64)
23301 switch (mode)
23303 default:
23304 break;
23305 case E_DImode:
23306 case E_SCmode:
23307 case E_DCmode:
23308 case E_TCmode:
23309 int count = GET_MODE_SIZE (mode) / 4;
23310 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23313 if ((INTEGRAL_TYPE_P (valtype)
23314 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23315 || POINTER_TYPE_P (valtype))
23316 mode = TARGET_32BIT ? SImode : DImode;
23318 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23319 /* _Decimal128 must use an even/odd register pair. */
23320 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23321 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23322 && !FLOAT128_VECTOR_P (mode))
23323 regno = FP_ARG_RETURN;
23324 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23325 && targetm.calls.split_complex_arg)
23326 return rs6000_complex_function_value (mode);
23327 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23328 return register is used in both cases, and we won't see V2DImode/V2DFmode
23329 for pure altivec, combine the two cases. */
23330 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
23331 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23332 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23333 regno = ALTIVEC_ARG_RETURN;
23334 else
23335 regno = GP_ARG_RETURN;
23337 return gen_rtx_REG (mode, regno);
23340 /* Define how to find the value returned by a library function
23341 assuming the value has mode MODE. */
23343 rs6000_libcall_value (machine_mode mode)
23345 unsigned int regno;
23347 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23348 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23349 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23351 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23352 /* _Decimal128 must use an even/odd register pair. */
23353 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23354 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23355 regno = FP_ARG_RETURN;
23356 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23357 return register is used in both cases, and we won't see V2DImode/V2DFmode
23358 for pure altivec, combine the two cases. */
23359 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23360 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23361 regno = ALTIVEC_ARG_RETURN;
23362 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23363 return rs6000_complex_function_value (mode);
23364 else
23365 regno = GP_ARG_RETURN;
23367 return gen_rtx_REG (mode, regno);
23370 /* Compute register pressure classes. We implement the target hook to avoid
23371 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23372 lead to incorrect estimates of number of available registers and therefor
23373 increased register pressure/spill. */
23374 static int
23375 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23377 int n;
23379 n = 0;
23380 pressure_classes[n++] = GENERAL_REGS;
23381 if (TARGET_ALTIVEC)
23382 pressure_classes[n++] = ALTIVEC_REGS;
23383 if (TARGET_VSX)
23384 pressure_classes[n++] = VSX_REGS;
23385 else
23387 if (TARGET_HARD_FLOAT)
23388 pressure_classes[n++] = FLOAT_REGS;
23390 pressure_classes[n++] = CR_REGS;
23391 pressure_classes[n++] = SPECIAL_REGS;
23393 return n;
23396 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23397 Frame pointer elimination is automatically handled.
23399 For the RS/6000, if frame pointer elimination is being done, we would like
23400 to convert ap into fp, not sp.
23402 We need r30 if -mminimal-toc was specified, and there are constant pool
23403 references. */
23405 static bool
23406 rs6000_can_eliminate (const int from, const int to)
23408 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23409 ? ! frame_pointer_needed
23410 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23411 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23412 || constant_pool_empty_p ()
23413 : true);
23416 /* Define the offset between two registers, FROM to be eliminated and its
23417 replacement TO, at the start of a routine. */
23418 HOST_WIDE_INT
23419 rs6000_initial_elimination_offset (int from, int to)
23421 rs6000_stack_t *info = rs6000_stack_info ();
23422 HOST_WIDE_INT offset;
23424 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23425 offset = info->push_p ? 0 : -info->total_size;
23426 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23428 offset = info->push_p ? 0 : -info->total_size;
23429 if (FRAME_GROWS_DOWNWARD)
23430 offset += info->fixed_size + info->vars_size + info->parm_size;
23432 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23433 offset = FRAME_GROWS_DOWNWARD
23434 ? info->fixed_size + info->vars_size + info->parm_size
23435 : 0;
23436 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23437 offset = info->total_size;
23438 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23439 offset = info->push_p ? info->total_size : 0;
23440 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23441 offset = 0;
23442 else
23443 gcc_unreachable ();
23445 return offset;
23448 /* Fill in sizes of registers used by unwinder. */
23450 static void
23451 rs6000_init_dwarf_reg_sizes_extra (tree address)
23453 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23455 int i;
23456 machine_mode mode = TYPE_MODE (char_type_node);
23457 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23458 rtx mem = gen_rtx_MEM (BLKmode, addr);
23459 rtx value = gen_int_mode (16, mode);
23461 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23462 The unwinder still needs to know the size of Altivec registers. */
23464 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23466 int column = DWARF_REG_TO_UNWIND_COLUMN
23467 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23468 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23470 emit_move_insn (adjust_address (mem, mode, offset), value);
23475 /* Map internal gcc register numbers to debug format register numbers.
23476 FORMAT specifies the type of debug register number to use:
23477 0 -- debug information, except for frame-related sections
23478 1 -- DWARF .debug_frame section
23479 2 -- DWARF .eh_frame section */
23481 unsigned int
23482 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23484 /* On some platforms, we use the standard DWARF register
23485 numbering for .debug_info and .debug_frame. */
23486 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23488 #ifdef RS6000_USE_DWARF_NUMBERING
23489 if (regno <= 31)
23490 return regno;
23491 if (FP_REGNO_P (regno))
23492 return regno - FIRST_FPR_REGNO + 32;
23493 if (ALTIVEC_REGNO_P (regno))
23494 return regno - FIRST_ALTIVEC_REGNO + 1124;
23495 if (regno == LR_REGNO)
23496 return 108;
23497 if (regno == CTR_REGNO)
23498 return 109;
23499 if (regno == CA_REGNO)
23500 return 101; /* XER */
23501 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23502 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23503 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23504 to the DWARF reg for CR. */
23505 if (format == 1 && regno == CR2_REGNO)
23506 return 64;
23507 if (CR_REGNO_P (regno))
23508 return regno - CR0_REGNO + 86;
23509 if (regno == VRSAVE_REGNO)
23510 return 356;
23511 if (regno == VSCR_REGNO)
23512 return 67;
23514 /* These do not make much sense. */
23515 if (regno == FRAME_POINTER_REGNUM)
23516 return 111;
23517 if (regno == ARG_POINTER_REGNUM)
23518 return 67;
23519 if (regno == 64)
23520 return 100;
23522 gcc_unreachable ();
23523 #endif
23526 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23527 information, and also for .eh_frame. */
23528 /* Translate the regnos to their numbers in GCC 7 (and before). */
23529 if (regno <= 31)
23530 return regno;
23531 if (FP_REGNO_P (regno))
23532 return regno - FIRST_FPR_REGNO + 32;
23533 if (ALTIVEC_REGNO_P (regno))
23534 return regno - FIRST_ALTIVEC_REGNO + 77;
23535 if (regno == LR_REGNO)
23536 return 65;
23537 if (regno == CTR_REGNO)
23538 return 66;
23539 if (regno == CA_REGNO)
23540 return 76; /* XER */
23541 if (CR_REGNO_P (regno))
23542 return regno - CR0_REGNO + 68;
23543 if (regno == VRSAVE_REGNO)
23544 return 109;
23545 if (regno == VSCR_REGNO)
23546 return 110;
23548 if (regno == FRAME_POINTER_REGNUM)
23549 return 111;
23550 if (regno == ARG_POINTER_REGNUM)
23551 return 67;
23552 if (regno == 64)
23553 return 64;
23555 gcc_unreachable ();
23558 /* target hook eh_return_filter_mode */
23559 static scalar_int_mode
23560 rs6000_eh_return_filter_mode (void)
23562 return TARGET_32BIT ? SImode : word_mode;
23565 /* Target hook for translate_mode_attribute. */
23566 static machine_mode
23567 rs6000_translate_mode_attribute (machine_mode mode)
23569 if ((FLOAT128_IEEE_P (mode)
23570 && ieee128_float_type_node == long_double_type_node)
23571 || (FLOAT128_IBM_P (mode)
23572 && ibm128_float_type_node == long_double_type_node))
23573 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23574 return mode;
23577 /* Target hook for scalar_mode_supported_p. */
23578 static bool
23579 rs6000_scalar_mode_supported_p (scalar_mode mode)
23581 /* -m32 does not support TImode. This is the default, from
23582 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23583 same ABI as for -m32. But default_scalar_mode_supported_p allows
23584 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23585 for -mpowerpc64. */
23586 if (TARGET_32BIT && mode == TImode)
23587 return false;
23589 if (DECIMAL_FLOAT_MODE_P (mode))
23590 return default_decimal_float_supported_p ();
23591 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23592 return true;
23593 else
23594 return default_scalar_mode_supported_p (mode);
23597 /* Target hook for libgcc_floating_mode_supported_p. */
23599 static bool
23600 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
23602 switch (mode)
23604 case E_SFmode:
23605 case E_DFmode:
23606 case E_TFmode:
23607 return true;
23609 /* We only return true for KFmode if IEEE 128-bit types are supported, and
23610 if long double does not use the IEEE 128-bit format. If long double
23611 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
23612 Because the code will not use KFmode in that case, there will be aborts
23613 because it can't find KFmode in the Floatn types. */
23614 case E_KFmode:
23615 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
23617 default:
23618 return false;
23622 /* Target hook for vector_mode_supported_p. */
23623 static bool
23624 rs6000_vector_mode_supported_p (machine_mode mode)
23626 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23627 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23628 double-double. */
23629 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23630 return true;
23632 else
23633 return false;
23636 /* Target hook for floatn_mode. */
23637 static opt_scalar_float_mode
23638 rs6000_floatn_mode (int n, bool extended)
23640 if (extended)
23642 switch (n)
23644 case 32:
23645 return DFmode;
23647 case 64:
23648 if (TARGET_FLOAT128_TYPE)
23649 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23650 else
23651 return opt_scalar_float_mode ();
23653 case 128:
23654 return opt_scalar_float_mode ();
23656 default:
23657 /* Those are the only valid _FloatNx types. */
23658 gcc_unreachable ();
23661 else
23663 switch (n)
23665 case 32:
23666 return SFmode;
23668 case 64:
23669 return DFmode;
23671 case 128:
23672 if (TARGET_FLOAT128_TYPE)
23673 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23674 else
23675 return opt_scalar_float_mode ();
23677 default:
23678 return opt_scalar_float_mode ();
23684 /* Target hook for c_mode_for_suffix. */
23685 static machine_mode
23686 rs6000_c_mode_for_suffix (char suffix)
23688 if (TARGET_FLOAT128_TYPE)
23690 if (suffix == 'q' || suffix == 'Q')
23691 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23693 /* At the moment, we are not defining a suffix for IBM extended double.
23694 If/when the default for -mabi=ieeelongdouble is changed, and we want
23695 to support __ibm128 constants in legacy library code, we may need to
23696 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23697 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23698 __float80 constants. */
23701 return VOIDmode;
23704 /* Target hook for invalid_arg_for_unprototyped_fn. */
23705 static const char *
23706 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23708 return (!rs6000_darwin64_abi
23709 && typelist == 0
23710 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23711 && (funcdecl == NULL_TREE
23712 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23713 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23714 ? N_("AltiVec argument passed to unprototyped function")
23715 : NULL;
23718 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23719 setup by using __stack_chk_fail_local hidden function instead of
23720 calling __stack_chk_fail directly. Otherwise it is better to call
23721 __stack_chk_fail directly. */
23723 static tree ATTRIBUTE_UNUSED
23724 rs6000_stack_protect_fail (void)
23726 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23727 ? default_hidden_stack_protect_fail ()
23728 : default_external_stack_protect_fail ();
23731 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23733 #if TARGET_ELF
23734 static unsigned HOST_WIDE_INT
23735 rs6000_asan_shadow_offset (void)
23737 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23739 #endif
23741 /* Mask options that we want to support inside of attribute((target)) and
23742 #pragma GCC target operations. Note, we do not include things like
23743 64/32-bit, endianness, hard/soft floating point, etc. that would have
23744 different calling sequences. */
23746 struct rs6000_opt_mask {
23747 const char *name; /* option name */
23748 HOST_WIDE_INT mask; /* mask to set */
23749 bool invert; /* invert sense of mask */
23750 bool valid_target; /* option is a target option */
23753 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23755 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23756 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23757 false, true },
23758 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23759 false, true },
23760 { "cmpb", OPTION_MASK_CMPB, false, true },
23761 { "crypto", OPTION_MASK_CRYPTO, false, true },
23762 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23763 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23764 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23765 false, true },
23766 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23767 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23768 { "fprnd", OPTION_MASK_FPRND, false, true },
23769 { "power10", OPTION_MASK_POWER10, false, true },
23770 { "hard-dfp", OPTION_MASK_DFP, false, true },
23771 { "htm", OPTION_MASK_HTM, false, true },
23772 { "isel", OPTION_MASK_ISEL, false, true },
23773 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23774 { "mfpgpr", 0, false, true },
23775 { "mma", OPTION_MASK_MMA, false, true },
23776 { "modulo", OPTION_MASK_MODULO, false, true },
23777 { "mulhw", OPTION_MASK_MULHW, false, true },
23778 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23779 { "pcrel", OPTION_MASK_PCREL, false, true },
23780 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
23781 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23782 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23783 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23784 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23785 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23786 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23787 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23788 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23789 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
23790 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23791 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23792 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23793 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23794 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23795 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23796 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23797 { "string", 0, false, true },
23798 { "update", OPTION_MASK_NO_UPDATE, true , true },
23799 { "vsx", OPTION_MASK_VSX, false, true },
23800 #ifdef OPTION_MASK_64BIT
23801 #if TARGET_AIX_OS
23802 { "aix64", OPTION_MASK_64BIT, false, false },
23803 { "aix32", OPTION_MASK_64BIT, true, false },
23804 #else
23805 { "64", OPTION_MASK_64BIT, false, false },
23806 { "32", OPTION_MASK_64BIT, true, false },
23807 #endif
23808 #endif
23809 #ifdef OPTION_MASK_EABI
23810 { "eabi", OPTION_MASK_EABI, false, false },
23811 #endif
23812 #ifdef OPTION_MASK_LITTLE_ENDIAN
23813 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23814 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23815 #endif
23816 #ifdef OPTION_MASK_RELOCATABLE
23817 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23818 #endif
23819 #ifdef OPTION_MASK_STRICT_ALIGN
23820 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23821 #endif
23822 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23823 { "string", 0, false, false },
23826 /* Builtin mask mapping for printing the flags. */
23827 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
23829 { "altivec", RS6000_BTM_ALTIVEC, false, false },
23830 { "vsx", RS6000_BTM_VSX, false, false },
23831 { "fre", RS6000_BTM_FRE, false, false },
23832 { "fres", RS6000_BTM_FRES, false, false },
23833 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
23834 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23835 { "popcntd", RS6000_BTM_POPCNTD, false, false },
23836 { "cell", RS6000_BTM_CELL, false, false },
23837 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23838 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
23839 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
23840 { "crypto", RS6000_BTM_CRYPTO, false, false },
23841 { "htm", RS6000_BTM_HTM, false, false },
23842 { "hard-dfp", RS6000_BTM_DFP, false, false },
23843 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
23844 { "long-double-128", RS6000_BTM_LDBL128, false, false },
23845 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
23846 { "float128", RS6000_BTM_FLOAT128, false, false },
23847 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
23848 { "mma", RS6000_BTM_MMA, false, false },
23849 { "power10", RS6000_BTM_P10, false, false },
23852 /* Option variables that we want to support inside attribute((target)) and
23853 #pragma GCC target operations. */
23855 struct rs6000_opt_var {
23856 const char *name; /* option name */
23857 size_t global_offset; /* offset of the option in global_options. */
23858 size_t target_offset; /* offset of the option in target options. */
23861 static struct rs6000_opt_var const rs6000_opt_vars[] =
23863 { "friz",
23864 offsetof (struct gcc_options, x_TARGET_FRIZ),
23865 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23866 { "avoid-indexed-addresses",
23867 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23868 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23869 { "longcall",
23870 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23871 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23872 { "optimize-swaps",
23873 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23874 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23875 { "allow-movmisalign",
23876 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23877 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23878 { "sched-groups",
23879 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23880 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23881 { "always-hint",
23882 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23883 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23884 { "align-branch-targets",
23885 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23886 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23887 { "sched-prolog",
23888 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23889 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23890 { "sched-epilog",
23891 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23892 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23893 { "speculate-indirect-jumps",
23894 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23895 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23898 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23899 parsing. Return true if there were no errors. */
23901 static bool
23902 rs6000_inner_target_options (tree args, bool attr_p)
23904 bool ret = true;
23906 if (args == NULL_TREE)
23909 else if (TREE_CODE (args) == STRING_CST)
23911 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23912 char *q;
23914 while ((q = strtok (p, ",")) != NULL)
23916 bool error_p = false;
23917 bool not_valid_p = false;
23918 const char *cpu_opt = NULL;
23920 p = NULL;
23921 if (startswith (q, "cpu="))
23923 int cpu_index = rs6000_cpu_name_lookup (q+4);
23924 if (cpu_index >= 0)
23925 rs6000_cpu_index = cpu_index;
23926 else
23928 error_p = true;
23929 cpu_opt = q+4;
23932 else if (startswith (q, "tune="))
23934 int tune_index = rs6000_cpu_name_lookup (q+5);
23935 if (tune_index >= 0)
23936 rs6000_tune_index = tune_index;
23937 else
23939 error_p = true;
23940 cpu_opt = q+5;
23943 else
23945 size_t i;
23946 bool invert = false;
23947 char *r = q;
23949 error_p = true;
23950 if (startswith (r, "no-"))
23952 invert = true;
23953 r += 3;
23956 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23957 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23959 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23961 if (!rs6000_opt_masks[i].valid_target)
23962 not_valid_p = true;
23963 else
23965 error_p = false;
23966 rs6000_isa_flags_explicit |= mask;
23968 /* VSX needs altivec, so -mvsx automagically sets
23969 altivec and disables -mavoid-indexed-addresses. */
23970 if (!invert)
23972 if (mask == OPTION_MASK_VSX)
23974 mask |= OPTION_MASK_ALTIVEC;
23975 TARGET_AVOID_XFORM = 0;
23979 if (rs6000_opt_masks[i].invert)
23980 invert = !invert;
23982 if (invert)
23983 rs6000_isa_flags &= ~mask;
23984 else
23985 rs6000_isa_flags |= mask;
23987 break;
23990 if (error_p && !not_valid_p)
23992 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23993 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23995 size_t j = rs6000_opt_vars[i].global_offset;
23996 *((int *) ((char *)&global_options + j)) = !invert;
23997 error_p = false;
23998 not_valid_p = false;
23999 break;
24004 if (error_p)
24006 const char *eprefix, *esuffix;
24008 ret = false;
24009 if (attr_p)
24011 eprefix = "__attribute__((__target__(";
24012 esuffix = ")))";
24014 else
24016 eprefix = "#pragma GCC target ";
24017 esuffix = "";
24020 if (cpu_opt)
24021 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24022 q, esuffix);
24023 else if (not_valid_p)
24024 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24025 else
24026 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24031 else if (TREE_CODE (args) == TREE_LIST)
24035 tree value = TREE_VALUE (args);
24036 if (value)
24038 bool ret2 = rs6000_inner_target_options (value, attr_p);
24039 if (!ret2)
24040 ret = false;
24042 args = TREE_CHAIN (args);
24044 while (args != NULL_TREE);
24047 else
24049 error ("attribute %<target%> argument not a string");
24050 return false;
24053 return ret;
24056 /* Print out the target options as a list for -mdebug=target. */
24058 static void
24059 rs6000_debug_target_options (tree args, const char *prefix)
24061 if (args == NULL_TREE)
24062 fprintf (stderr, "%s<NULL>", prefix);
24064 else if (TREE_CODE (args) == STRING_CST)
24066 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24067 char *q;
24069 while ((q = strtok (p, ",")) != NULL)
24071 p = NULL;
24072 fprintf (stderr, "%s\"%s\"", prefix, q);
24073 prefix = ", ";
24077 else if (TREE_CODE (args) == TREE_LIST)
24081 tree value = TREE_VALUE (args);
24082 if (value)
24084 rs6000_debug_target_options (value, prefix);
24085 prefix = ", ";
24087 args = TREE_CHAIN (args);
24089 while (args != NULL_TREE);
24092 else
24093 gcc_unreachable ();
24095 return;
24099 /* Hook to validate attribute((target("..."))). */
24101 static bool
24102 rs6000_valid_attribute_p (tree fndecl,
24103 tree ARG_UNUSED (name),
24104 tree args,
24105 int flags)
24107 struct cl_target_option cur_target;
24108 bool ret;
24109 tree old_optimize;
24110 tree new_target, new_optimize;
24111 tree func_optimize;
24113 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24115 if (TARGET_DEBUG_TARGET)
24117 tree tname = DECL_NAME (fndecl);
24118 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24119 if (tname)
24120 fprintf (stderr, "function: %.*s\n",
24121 (int) IDENTIFIER_LENGTH (tname),
24122 IDENTIFIER_POINTER (tname));
24123 else
24124 fprintf (stderr, "function: unknown\n");
24126 fprintf (stderr, "args:");
24127 rs6000_debug_target_options (args, " ");
24128 fprintf (stderr, "\n");
24130 if (flags)
24131 fprintf (stderr, "flags: 0x%x\n", flags);
24133 fprintf (stderr, "--------------------\n");
24136 /* attribute((target("default"))) does nothing, beyond
24137 affecting multi-versioning. */
24138 if (TREE_VALUE (args)
24139 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24140 && TREE_CHAIN (args) == NULL_TREE
24141 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24142 return true;
24144 old_optimize = build_optimization_node (&global_options,
24145 &global_options_set);
24146 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24148 /* If the function changed the optimization levels as well as setting target
24149 options, start with the optimizations specified. */
24150 if (func_optimize && func_optimize != old_optimize)
24151 cl_optimization_restore (&global_options, &global_options_set,
24152 TREE_OPTIMIZATION (func_optimize));
24154 /* The target attributes may also change some optimization flags, so update
24155 the optimization options if necessary. */
24156 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24157 rs6000_cpu_index = rs6000_tune_index = -1;
24158 ret = rs6000_inner_target_options (args, true);
24160 /* Set up any additional state. */
24161 if (ret)
24163 ret = rs6000_option_override_internal (false);
24164 new_target = build_target_option_node (&global_options,
24165 &global_options_set);
24167 else
24168 new_target = NULL;
24170 new_optimize = build_optimization_node (&global_options,
24171 &global_options_set);
24173 if (!new_target)
24174 ret = false;
24176 else if (fndecl)
24178 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24180 if (old_optimize != new_optimize)
24181 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24184 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24186 if (old_optimize != new_optimize)
24187 cl_optimization_restore (&global_options, &global_options_set,
24188 TREE_OPTIMIZATION (old_optimize));
24190 return ret;
24194 /* Hook to validate the current #pragma GCC target and set the state, and
24195 update the macros based on what was changed. If ARGS is NULL, then
24196 POP_TARGET is used to reset the options. */
24198 bool
24199 rs6000_pragma_target_parse (tree args, tree pop_target)
24201 tree prev_tree = build_target_option_node (&global_options,
24202 &global_options_set);
24203 tree cur_tree;
24204 struct cl_target_option *prev_opt, *cur_opt;
24205 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24206 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
24208 if (TARGET_DEBUG_TARGET)
24210 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24211 fprintf (stderr, "args:");
24212 rs6000_debug_target_options (args, " ");
24213 fprintf (stderr, "\n");
24215 if (pop_target)
24217 fprintf (stderr, "pop_target:\n");
24218 debug_tree (pop_target);
24220 else
24221 fprintf (stderr, "pop_target: <NULL>\n");
24223 fprintf (stderr, "--------------------\n");
24226 if (! args)
24228 cur_tree = ((pop_target)
24229 ? pop_target
24230 : target_option_default_node);
24231 cl_target_option_restore (&global_options, &global_options_set,
24232 TREE_TARGET_OPTION (cur_tree));
24234 else
24236 rs6000_cpu_index = rs6000_tune_index = -1;
24237 if (!rs6000_inner_target_options (args, false)
24238 || !rs6000_option_override_internal (false)
24239 || (cur_tree = build_target_option_node (&global_options,
24240 &global_options_set))
24241 == NULL_TREE)
24243 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24244 fprintf (stderr, "invalid pragma\n");
24246 return false;
24250 target_option_current_node = cur_tree;
24251 rs6000_activate_target_options (target_option_current_node);
24253 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24254 change the macros that are defined. */
24255 if (rs6000_target_modify_macros_ptr)
24257 prev_opt = TREE_TARGET_OPTION (prev_tree);
24258 prev_bumask = prev_opt->x_rs6000_builtin_mask;
24259 prev_flags = prev_opt->x_rs6000_isa_flags;
24261 cur_opt = TREE_TARGET_OPTION (cur_tree);
24262 cur_flags = cur_opt->x_rs6000_isa_flags;
24263 cur_bumask = cur_opt->x_rs6000_builtin_mask;
24265 diff_bumask = (prev_bumask ^ cur_bumask);
24266 diff_flags = (prev_flags ^ cur_flags);
24268 if ((diff_flags != 0) || (diff_bumask != 0))
24270 /* Delete old macros. */
24271 rs6000_target_modify_macros_ptr (false,
24272 prev_flags & diff_flags,
24273 prev_bumask & diff_bumask);
24275 /* Define new macros. */
24276 rs6000_target_modify_macros_ptr (true,
24277 cur_flags & diff_flags,
24278 cur_bumask & diff_bumask);
24282 return true;
24286 /* Remember the last target of rs6000_set_current_function. */
24287 static GTY(()) tree rs6000_previous_fndecl;
24289 /* Restore target's globals from NEW_TREE and invalidate the
24290 rs6000_previous_fndecl cache. */
24292 void
24293 rs6000_activate_target_options (tree new_tree)
24295 cl_target_option_restore (&global_options, &global_options_set,
24296 TREE_TARGET_OPTION (new_tree));
24297 if (TREE_TARGET_GLOBALS (new_tree))
24298 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24299 else if (new_tree == target_option_default_node)
24300 restore_target_globals (&default_target_globals);
24301 else
24302 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24303 rs6000_previous_fndecl = NULL_TREE;
24306 /* Establish appropriate back-end context for processing the function
24307 FNDECL. The argument might be NULL to indicate processing at top
24308 level, outside of any function scope. */
24309 static void
24310 rs6000_set_current_function (tree fndecl)
24312 if (TARGET_DEBUG_TARGET)
24314 fprintf (stderr, "\n==================== rs6000_set_current_function");
24316 if (fndecl)
24317 fprintf (stderr, ", fndecl %s (%p)",
24318 (DECL_NAME (fndecl)
24319 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24320 : "<unknown>"), (void *)fndecl);
24322 if (rs6000_previous_fndecl)
24323 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24325 fprintf (stderr, "\n");
24328 /* Only change the context if the function changes. This hook is called
24329 several times in the course of compiling a function, and we don't want to
24330 slow things down too much or call target_reinit when it isn't safe. */
24331 if (fndecl == rs6000_previous_fndecl)
24332 return;
24334 tree old_tree;
24335 if (rs6000_previous_fndecl == NULL_TREE)
24336 old_tree = target_option_current_node;
24337 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24338 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24339 else
24340 old_tree = target_option_default_node;
24342 tree new_tree;
24343 if (fndecl == NULL_TREE)
24345 if (old_tree != target_option_current_node)
24346 new_tree = target_option_current_node;
24347 else
24348 new_tree = NULL_TREE;
24350 else
24352 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24353 if (new_tree == NULL_TREE)
24354 new_tree = target_option_default_node;
24357 if (TARGET_DEBUG_TARGET)
24359 if (new_tree)
24361 fprintf (stderr, "\nnew fndecl target specific options:\n");
24362 debug_tree (new_tree);
24365 if (old_tree)
24367 fprintf (stderr, "\nold fndecl target specific options:\n");
24368 debug_tree (old_tree);
24371 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24372 fprintf (stderr, "--------------------\n");
24375 if (new_tree && old_tree != new_tree)
24376 rs6000_activate_target_options (new_tree);
24378 if (fndecl)
24379 rs6000_previous_fndecl = fndecl;
24383 /* Save the current options */
24385 static void
24386 rs6000_function_specific_save (struct cl_target_option *ptr,
24387 struct gcc_options *opts,
24388 struct gcc_options */* opts_set */)
24390 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24391 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24394 /* Restore the current options */
24396 static void
24397 rs6000_function_specific_restore (struct gcc_options *opts,
24398 struct gcc_options */* opts_set */,
24399 struct cl_target_option *ptr)
24402 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24403 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24404 (void) rs6000_option_override_internal (false);
24407 /* Print the current options */
24409 static void
24410 rs6000_function_specific_print (FILE *file, int indent,
24411 struct cl_target_option *ptr)
24413 rs6000_print_isa_options (file, indent, "Isa options set",
24414 ptr->x_rs6000_isa_flags);
24416 rs6000_print_isa_options (file, indent, "Isa options explicit",
24417 ptr->x_rs6000_isa_flags_explicit);
24420 /* Helper function to print the current isa or misc options on a line. */
24422 static void
24423 rs6000_print_options_internal (FILE *file,
24424 int indent,
24425 const char *string,
24426 HOST_WIDE_INT flags,
24427 const char *prefix,
24428 const struct rs6000_opt_mask *opts,
24429 size_t num_elements)
24431 size_t i;
24432 size_t start_column = 0;
24433 size_t cur_column;
24434 size_t max_column = 120;
24435 size_t prefix_len = strlen (prefix);
24436 size_t comma_len = 0;
24437 const char *comma = "";
24439 if (indent)
24440 start_column += fprintf (file, "%*s", indent, "");
24442 if (!flags)
24444 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24445 return;
24448 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24450 /* Print the various mask options. */
24451 cur_column = start_column;
24452 for (i = 0; i < num_elements; i++)
24454 bool invert = opts[i].invert;
24455 const char *name = opts[i].name;
24456 const char *no_str = "";
24457 HOST_WIDE_INT mask = opts[i].mask;
24458 size_t len = comma_len + prefix_len + strlen (name);
24460 if (!invert)
24462 if ((flags & mask) == 0)
24464 no_str = "no-";
24465 len += strlen ("no-");
24468 flags &= ~mask;
24471 else
24473 if ((flags & mask) != 0)
24475 no_str = "no-";
24476 len += strlen ("no-");
24479 flags |= mask;
24482 cur_column += len;
24483 if (cur_column > max_column)
24485 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24486 cur_column = start_column + len;
24487 comma = "";
24490 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24491 comma = ", ";
24492 comma_len = strlen (", ");
24495 fputs ("\n", file);
24498 /* Helper function to print the current isa options on a line. */
24500 static void
24501 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24502 HOST_WIDE_INT flags)
24504 rs6000_print_options_internal (file, indent, string, flags, "-m",
24505 &rs6000_opt_masks[0],
24506 ARRAY_SIZE (rs6000_opt_masks));
24509 static void
24510 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24511 HOST_WIDE_INT flags)
24513 rs6000_print_options_internal (file, indent, string, flags, "",
24514 &rs6000_builtin_mask_names[0],
24515 ARRAY_SIZE (rs6000_builtin_mask_names));
24518 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24519 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24520 -mupper-regs-df, etc.).
24522 If the user used -mno-power8-vector, we need to turn off all of the implicit
24523 ISA 2.07 and 3.0 options that relate to the vector unit.
24525 If the user used -mno-power9-vector, we need to turn off all of the implicit
24526 ISA 3.0 options that relate to the vector unit.
24528 This function does not handle explicit options such as the user specifying
24529 -mdirect-move. These are handled in rs6000_option_override_internal, and
24530 the appropriate error is given if needed.
24532 We return a mask of all of the implicit options that should not be enabled
24533 by default. */
24535 static HOST_WIDE_INT
24536 rs6000_disable_incompatible_switches (void)
24538 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24539 size_t i, j;
24541 static const struct {
24542 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24543 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24544 const char *const name; /* name of the switch. */
24545 } flags[] = {
24546 { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" },
24547 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24548 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24549 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24550 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24553 for (i = 0; i < ARRAY_SIZE (flags); i++)
24555 HOST_WIDE_INT no_flag = flags[i].no_flag;
24557 if ((rs6000_isa_flags & no_flag) == 0
24558 && (rs6000_isa_flags_explicit & no_flag) != 0)
24560 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24561 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24562 & rs6000_isa_flags
24563 & dep_flags);
24565 if (set_flags)
24567 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24568 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24570 set_flags &= ~rs6000_opt_masks[j].mask;
24571 error ("%<-mno-%s%> turns off %<-m%s%>",
24572 flags[i].name,
24573 rs6000_opt_masks[j].name);
24576 gcc_assert (!set_flags);
24579 rs6000_isa_flags &= ~dep_flags;
24580 ignore_masks |= no_flag | dep_flags;
24584 return ignore_masks;
24588 /* Helper function for printing the function name when debugging. */
24590 static const char *
24591 get_decl_name (tree fn)
24593 tree name;
24595 if (!fn)
24596 return "<null>";
24598 name = DECL_NAME (fn);
24599 if (!name)
24600 return "<no-name>";
24602 return IDENTIFIER_POINTER (name);
24605 /* Return the clone id of the target we are compiling code for in a target
24606 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24607 the priority list for the target clones (ordered from lowest to
24608 highest). */
24610 static int
24611 rs6000_clone_priority (tree fndecl)
24613 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24614 HOST_WIDE_INT isa_masks;
24615 int ret = CLONE_DEFAULT;
24616 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24617 const char *attrs_str = NULL;
24619 attrs = TREE_VALUE (TREE_VALUE (attrs));
24620 attrs_str = TREE_STRING_POINTER (attrs);
24622 /* Return priority zero for default function. Return the ISA needed for the
24623 function if it is not the default. */
24624 if (strcmp (attrs_str, "default") != 0)
24626 if (fn_opts == NULL_TREE)
24627 fn_opts = target_option_default_node;
24629 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24630 isa_masks = rs6000_isa_flags;
24631 else
24632 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24634 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24635 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24636 break;
24639 if (TARGET_DEBUG_TARGET)
24640 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24641 get_decl_name (fndecl), ret);
24643 return ret;
24646 /* This compares the priority of target features in function DECL1 and DECL2.
24647 It returns positive value if DECL1 is higher priority, negative value if
24648 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24649 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24651 static int
24652 rs6000_compare_version_priority (tree decl1, tree decl2)
24654 int priority1 = rs6000_clone_priority (decl1);
24655 int priority2 = rs6000_clone_priority (decl2);
24656 int ret = priority1 - priority2;
24658 if (TARGET_DEBUG_TARGET)
24659 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24660 get_decl_name (decl1), get_decl_name (decl2), ret);
24662 return ret;
24665 /* Make a dispatcher declaration for the multi-versioned function DECL.
24666 Calls to DECL function will be replaced with calls to the dispatcher
24667 by the front-end. Returns the decl of the dispatcher function. */
24669 static tree
24670 rs6000_get_function_versions_dispatcher (void *decl)
24672 tree fn = (tree) decl;
24673 struct cgraph_node *node = NULL;
24674 struct cgraph_node *default_node = NULL;
24675 struct cgraph_function_version_info *node_v = NULL;
24676 struct cgraph_function_version_info *first_v = NULL;
24678 tree dispatch_decl = NULL;
24680 struct cgraph_function_version_info *default_version_info = NULL;
24681 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24683 if (TARGET_DEBUG_TARGET)
24684 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24685 get_decl_name (fn));
24687 node = cgraph_node::get (fn);
24688 gcc_assert (node != NULL);
24690 node_v = node->function_version ();
24691 gcc_assert (node_v != NULL);
24693 if (node_v->dispatcher_resolver != NULL)
24694 return node_v->dispatcher_resolver;
24696 /* Find the default version and make it the first node. */
24697 first_v = node_v;
24698 /* Go to the beginning of the chain. */
24699 while (first_v->prev != NULL)
24700 first_v = first_v->prev;
24702 default_version_info = first_v;
24703 while (default_version_info != NULL)
24705 const tree decl2 = default_version_info->this_node->decl;
24706 if (is_function_default_version (decl2))
24707 break;
24708 default_version_info = default_version_info->next;
24711 /* If there is no default node, just return NULL. */
24712 if (default_version_info == NULL)
24713 return NULL;
24715 /* Make default info the first node. */
24716 if (first_v != default_version_info)
24718 default_version_info->prev->next = default_version_info->next;
24719 if (default_version_info->next)
24720 default_version_info->next->prev = default_version_info->prev;
24721 first_v->prev = default_version_info;
24722 default_version_info->next = first_v;
24723 default_version_info->prev = NULL;
24726 default_node = default_version_info->this_node;
24728 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24729 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24730 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24731 "exports hardware capability bits");
24732 #else
24734 if (targetm.has_ifunc_p ())
24736 struct cgraph_function_version_info *it_v = NULL;
24737 struct cgraph_node *dispatcher_node = NULL;
24738 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24740 /* Right now, the dispatching is done via ifunc. */
24741 dispatch_decl = make_dispatcher_decl (default_node->decl);
24743 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24744 gcc_assert (dispatcher_node != NULL);
24745 dispatcher_node->dispatcher_function = 1;
24746 dispatcher_version_info
24747 = dispatcher_node->insert_new_function_version ();
24748 dispatcher_version_info->next = default_version_info;
24749 dispatcher_node->definition = 1;
24751 /* Set the dispatcher for all the versions. */
24752 it_v = default_version_info;
24753 while (it_v != NULL)
24755 it_v->dispatcher_resolver = dispatch_decl;
24756 it_v = it_v->next;
24759 else
24761 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24762 "multiversioning needs ifunc which is not supported "
24763 "on this target");
24765 #endif
24767 return dispatch_decl;
24770 /* Make the resolver function decl to dispatch the versions of a multi-
24771 versioned function, DEFAULT_DECL. Create an empty basic block in the
24772 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24773 function. */
24775 static tree
24776 make_resolver_func (const tree default_decl,
24777 const tree dispatch_decl,
24778 basic_block *empty_bb)
24780 /* Make the resolver function static. The resolver function returns
24781 void *. */
24782 tree decl_name = clone_function_name (default_decl, "resolver");
24783 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24784 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24785 tree decl = build_fn_decl (resolver_name, type);
24786 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24788 DECL_NAME (decl) = decl_name;
24789 TREE_USED (decl) = 1;
24790 DECL_ARTIFICIAL (decl) = 1;
24791 DECL_IGNORED_P (decl) = 0;
24792 TREE_PUBLIC (decl) = 0;
24793 DECL_UNINLINABLE (decl) = 1;
24795 /* Resolver is not external, body is generated. */
24796 DECL_EXTERNAL (decl) = 0;
24797 DECL_EXTERNAL (dispatch_decl) = 0;
24799 DECL_CONTEXT (decl) = NULL_TREE;
24800 DECL_INITIAL (decl) = make_node (BLOCK);
24801 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24803 if (DECL_COMDAT_GROUP (default_decl)
24804 || TREE_PUBLIC (default_decl))
24806 /* In this case, each translation unit with a call to this
24807 versioned function will put out a resolver. Ensure it
24808 is comdat to keep just one copy. */
24809 DECL_COMDAT (decl) = 1;
24810 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24812 else
24813 TREE_PUBLIC (dispatch_decl) = 0;
24815 /* Build result decl and add to function_decl. */
24816 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24817 DECL_CONTEXT (t) = decl;
24818 DECL_ARTIFICIAL (t) = 1;
24819 DECL_IGNORED_P (t) = 1;
24820 DECL_RESULT (decl) = t;
24822 gimplify_function_tree (decl);
24823 push_cfun (DECL_STRUCT_FUNCTION (decl));
24824 *empty_bb = init_lowered_empty_function (decl, false,
24825 profile_count::uninitialized ());
24827 cgraph_node::add_new_function (decl, true);
24828 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24830 pop_cfun ();
24832 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24833 DECL_ATTRIBUTES (dispatch_decl)
24834 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24836 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24838 return decl;
24841 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24842 return a pointer to VERSION_DECL if we are running on a machine that
24843 supports the index CLONE_ISA hardware architecture bits. This function will
24844 be called during version dispatch to decide which function version to
24845 execute. It returns the basic block at the end, to which more conditions
24846 can be added. */
24848 static basic_block
24849 add_condition_to_bb (tree function_decl, tree version_decl,
24850 int clone_isa, basic_block new_bb)
24852 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
24854 gcc_assert (new_bb != NULL);
24855 gimple_seq gseq = bb_seq (new_bb);
24858 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
24859 build_fold_addr_expr (version_decl));
24860 tree result_var = create_tmp_var (ptr_type_node);
24861 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
24862 gimple *return_stmt = gimple_build_return (result_var);
24864 if (clone_isa == CLONE_DEFAULT)
24866 gimple_seq_add_stmt (&gseq, convert_stmt);
24867 gimple_seq_add_stmt (&gseq, return_stmt);
24868 set_bb_seq (new_bb, gseq);
24869 gimple_set_bb (convert_stmt, new_bb);
24870 gimple_set_bb (return_stmt, new_bb);
24871 pop_cfun ();
24872 return new_bb;
24875 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24876 tree cond_var = create_tmp_var (bool_int_type_node);
24877 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24878 const char *arg_str = rs6000_clone_map[clone_isa].name;
24879 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24880 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24881 gimple_call_set_lhs (call_cond_stmt, cond_var);
24883 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24884 gimple_set_bb (call_cond_stmt, new_bb);
24885 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24887 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24888 NULL_TREE, NULL_TREE);
24889 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24890 gimple_set_bb (if_else_stmt, new_bb);
24891 gimple_seq_add_stmt (&gseq, if_else_stmt);
24893 gimple_seq_add_stmt (&gseq, convert_stmt);
24894 gimple_seq_add_stmt (&gseq, return_stmt);
24895 set_bb_seq (new_bb, gseq);
24897 basic_block bb1 = new_bb;
24898 edge e12 = split_block (bb1, if_else_stmt);
24899 basic_block bb2 = e12->dest;
24900 e12->flags &= ~EDGE_FALLTHRU;
24901 e12->flags |= EDGE_TRUE_VALUE;
24903 edge e23 = split_block (bb2, return_stmt);
24904 gimple_set_bb (convert_stmt, bb2);
24905 gimple_set_bb (return_stmt, bb2);
24907 basic_block bb3 = e23->dest;
24908 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24910 remove_edge (e23);
24911 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24913 pop_cfun ();
24914 return bb3;
24917 /* This function generates the dispatch function for multi-versioned functions.
24918 DISPATCH_DECL is the function which will contain the dispatch logic.
24919 FNDECLS are the function choices for dispatch, and is a tree chain.
24920 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24921 code is generated. */
24923 static int
24924 dispatch_function_versions (tree dispatch_decl,
24925 void *fndecls_p,
24926 basic_block *empty_bb)
24928 int ix;
24929 tree ele;
24930 vec<tree> *fndecls;
24931 tree clones[CLONE_MAX];
24933 if (TARGET_DEBUG_TARGET)
24934 fputs ("dispatch_function_versions, top\n", stderr);
24936 gcc_assert (dispatch_decl != NULL
24937 && fndecls_p != NULL
24938 && empty_bb != NULL);
24940 /* fndecls_p is actually a vector. */
24941 fndecls = static_cast<vec<tree> *> (fndecls_p);
24943 /* At least one more version other than the default. */
24944 gcc_assert (fndecls->length () >= 2);
24946 /* The first version in the vector is the default decl. */
24947 memset ((void *) clones, '\0', sizeof (clones));
24948 clones[CLONE_DEFAULT] = (*fndecls)[0];
24950 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24951 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24952 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24953 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24954 to insert the code here to do the call. */
24956 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24958 int priority = rs6000_clone_priority (ele);
24959 if (!clones[priority])
24960 clones[priority] = ele;
24963 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24964 if (clones[ix])
24966 if (TARGET_DEBUG_TARGET)
24967 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24968 ix, get_decl_name (clones[ix]));
24970 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24971 *empty_bb);
24974 return 0;
24977 /* Generate the dispatching code body to dispatch multi-versioned function
24978 DECL. The target hook is called to process the "target" attributes and
24979 provide the code to dispatch the right function at run-time. NODE points
24980 to the dispatcher decl whose body will be created. */
24982 static tree
24983 rs6000_generate_version_dispatcher_body (void *node_p)
24985 tree resolver;
24986 basic_block empty_bb;
24987 struct cgraph_node *node = (cgraph_node *) node_p;
24988 struct cgraph_function_version_info *ninfo = node->function_version ();
24990 if (ninfo->dispatcher_resolver)
24991 return ninfo->dispatcher_resolver;
24993 /* node is going to be an alias, so remove the finalized bit. */
24994 node->definition = false;
24996 /* The first version in the chain corresponds to the default version. */
24997 ninfo->dispatcher_resolver = resolver
24998 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25000 if (TARGET_DEBUG_TARGET)
25001 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25002 get_decl_name (resolver));
25004 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25005 auto_vec<tree, 2> fn_ver_vec;
25007 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25008 vinfo;
25009 vinfo = vinfo->next)
25011 struct cgraph_node *version = vinfo->this_node;
25012 /* Check for virtual functions here again, as by this time it should
25013 have been determined if this function needs a vtable index or
25014 not. This happens for methods in derived classes that override
25015 virtual methods in base classes but are not explicitly marked as
25016 virtual. */
25017 if (DECL_VINDEX (version->decl))
25018 sorry ("Virtual function multiversioning not supported");
25020 fn_ver_vec.safe_push (version->decl);
25023 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25024 cgraph_edge::rebuild_edges ();
25025 pop_cfun ();
25026 return resolver;
25030 /* Hook to determine if one function can safely inline another. */
25032 static bool
25033 rs6000_can_inline_p (tree caller, tree callee)
25035 bool ret = false;
25036 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25037 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25039 /* If the callee has no option attributes, then it is ok to inline. */
25040 if (!callee_tree)
25041 ret = true;
25043 else
25045 HOST_WIDE_INT caller_isa;
25046 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25047 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25048 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25050 /* If the caller has option attributes, then use them.
25051 Otherwise, use the command line options. */
25052 if (caller_tree)
25053 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
25054 else
25055 caller_isa = rs6000_isa_flags;
25057 /* The callee's options must be a subset of the caller's options, i.e.
25058 a vsx function may inline an altivec function, but a no-vsx function
25059 must not inline a vsx function. However, for those options that the
25060 callee has explicitly enabled or disabled, then we must enforce that
25061 the callee's and caller's options match exactly; see PR70010. */
25062 if (((caller_isa & callee_isa) == callee_isa)
25063 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25064 ret = true;
25067 if (TARGET_DEBUG_TARGET)
25068 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25069 get_decl_name (caller), get_decl_name (callee),
25070 (ret ? "can" : "cannot"));
25072 return ret;
25075 /* Allocate a stack temp and fixup the address so it meets the particular
25076 memory requirements (either offetable or REG+REG addressing). */
25079 rs6000_allocate_stack_temp (machine_mode mode,
25080 bool offsettable_p,
25081 bool reg_reg_p)
25083 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25084 rtx addr = XEXP (stack, 0);
25085 int strict_p = reload_completed;
25087 if (!legitimate_indirect_address_p (addr, strict_p))
25089 if (offsettable_p
25090 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25091 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25093 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25094 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25097 return stack;
25100 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25101 convert to such a form to deal with memory reference instructions
25102 like STFIWX and LDBRX that only take reg+reg addressing. */
25105 rs6000_force_indexed_or_indirect_mem (rtx x)
25107 machine_mode mode = GET_MODE (x);
25109 gcc_assert (MEM_P (x));
25110 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25112 rtx addr = XEXP (x, 0);
25113 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25115 rtx reg = XEXP (addr, 0);
25116 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25117 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25118 gcc_assert (REG_P (reg));
25119 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25120 addr = reg;
25122 else if (GET_CODE (addr) == PRE_MODIFY)
25124 rtx reg = XEXP (addr, 0);
25125 rtx expr = XEXP (addr, 1);
25126 gcc_assert (REG_P (reg));
25127 gcc_assert (GET_CODE (expr) == PLUS);
25128 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25129 addr = reg;
25132 if (GET_CODE (addr) == PLUS)
25134 rtx op0 = XEXP (addr, 0);
25135 rtx op1 = XEXP (addr, 1);
25136 op0 = force_reg (Pmode, op0);
25137 op1 = force_reg (Pmode, op1);
25138 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25140 else
25141 x = replace_equiv_address (x, force_reg (Pmode, addr));
25144 return x;
25147 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25149 On the RS/6000, all integer constants are acceptable, most won't be valid
25150 for particular insns, though. Only easy FP constants are acceptable. */
25152 static bool
25153 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25155 if (TARGET_ELF && tls_referenced_p (x))
25156 return false;
25158 if (CONST_DOUBLE_P (x))
25159 return easy_fp_constant (x, mode);
25161 if (GET_CODE (x) == CONST_VECTOR)
25162 return easy_vector_constant (x, mode);
25164 return true;
25167 #if TARGET_AIX_OS
25168 /* Implement TARGET_PRECOMPUTE_TLS_P.
25170 On the AIX, TLS symbols are in the TOC, which is maintained in the
25171 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25172 must be considered legitimate constants. */
25174 static bool
25175 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25177 return tls_referenced_p (x);
25179 #endif
25182 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25184 static bool
25185 chain_already_loaded (rtx_insn *last)
25187 for (; last != NULL; last = PREV_INSN (last))
25189 if (NONJUMP_INSN_P (last))
25191 rtx patt = PATTERN (last);
25193 if (GET_CODE (patt) == SET)
25195 rtx lhs = XEXP (patt, 0);
25197 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25198 return true;
25202 return false;
25205 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25207 void
25208 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25210 rtx func = func_desc;
25211 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25212 rtx toc_load = NULL_RTX;
25213 rtx toc_restore = NULL_RTX;
25214 rtx func_addr;
25215 rtx abi_reg = NULL_RTX;
25216 rtx call[5];
25217 int n_call;
25218 rtx insn;
25219 bool is_pltseq_longcall;
25221 if (global_tlsarg)
25222 tlsarg = global_tlsarg;
25224 /* Handle longcall attributes. */
25225 is_pltseq_longcall = false;
25226 if ((INTVAL (cookie) & CALL_LONG) != 0
25227 && GET_CODE (func_desc) == SYMBOL_REF)
25229 func = rs6000_longcall_ref (func_desc, tlsarg);
25230 if (TARGET_PLTSEQ)
25231 is_pltseq_longcall = true;
25234 /* Handle indirect calls. */
25235 if (!SYMBOL_REF_P (func)
25236 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25238 if (!rs6000_pcrel_p ())
25240 /* Save the TOC into its reserved slot before the call,
25241 and prepare to restore it after the call. */
25242 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25243 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25244 gen_rtvec (1, stack_toc_offset),
25245 UNSPEC_TOCSLOT);
25246 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25248 /* Can we optimize saving the TOC in the prologue or
25249 do we need to do it at every call? */
25250 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25251 cfun->machine->save_toc_in_prologue = true;
25252 else
25254 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25255 rtx stack_toc_mem = gen_frame_mem (Pmode,
25256 gen_rtx_PLUS (Pmode, stack_ptr,
25257 stack_toc_offset));
25258 MEM_VOLATILE_P (stack_toc_mem) = 1;
25259 if (is_pltseq_longcall)
25261 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25262 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25263 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25265 else
25266 emit_move_insn (stack_toc_mem, toc_reg);
25270 if (DEFAULT_ABI == ABI_ELFv2)
25272 /* A function pointer in the ELFv2 ABI is just a plain address, but
25273 the ABI requires it to be loaded into r12 before the call. */
25274 func_addr = gen_rtx_REG (Pmode, 12);
25275 emit_move_insn (func_addr, func);
25276 abi_reg = func_addr;
25277 /* Indirect calls via CTR are strongly preferred over indirect
25278 calls via LR, so move the address there. Needed to mark
25279 this insn for linker plt sequence editing too. */
25280 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25281 if (is_pltseq_longcall)
25283 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25284 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25285 emit_insn (gen_rtx_SET (func_addr, mark_func));
25286 v = gen_rtvec (2, func_addr, func_desc);
25287 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25289 else
25290 emit_move_insn (func_addr, abi_reg);
25292 else
25294 /* A function pointer under AIX is a pointer to a data area whose
25295 first word contains the actual address of the function, whose
25296 second word contains a pointer to its TOC, and whose third word
25297 contains a value to place in the static chain register (r11).
25298 Note that if we load the static chain, our "trampoline" need
25299 not have any executable code. */
25301 /* Load up address of the actual function. */
25302 func = force_reg (Pmode, func);
25303 func_addr = gen_reg_rtx (Pmode);
25304 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25306 /* Indirect calls via CTR are strongly preferred over indirect
25307 calls via LR, so move the address there. */
25308 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25309 emit_move_insn (ctr_reg, func_addr);
25310 func_addr = ctr_reg;
25312 /* Prepare to load the TOC of the called function. Note that the
25313 TOC load must happen immediately before the actual call so
25314 that unwinding the TOC registers works correctly. See the
25315 comment in frob_update_context. */
25316 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25317 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25318 gen_rtx_PLUS (Pmode, func,
25319 func_toc_offset));
25320 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25322 /* If we have a static chain, load it up. But, if the call was
25323 originally direct, the 3rd word has not been written since no
25324 trampoline has been built, so we ought not to load it, lest we
25325 override a static chain value. */
25326 if (!(GET_CODE (func_desc) == SYMBOL_REF
25327 && SYMBOL_REF_FUNCTION_P (func_desc))
25328 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25329 && !chain_already_loaded (get_current_sequence ()->next->last))
25331 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25332 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25333 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25334 gen_rtx_PLUS (Pmode, func,
25335 func_sc_offset));
25336 emit_move_insn (sc_reg, func_sc_mem);
25337 abi_reg = sc_reg;
25341 else
25343 /* No TOC register needed for calls from PC-relative callers. */
25344 if (!rs6000_pcrel_p ())
25345 /* Direct calls use the TOC: for local calls, the callee will
25346 assume the TOC register is set; for non-local calls, the
25347 PLT stub needs the TOC register. */
25348 abi_reg = toc_reg;
25349 func_addr = func;
25352 /* Create the call. */
25353 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25354 if (value != NULL_RTX)
25355 call[0] = gen_rtx_SET (value, call[0]);
25356 call[1] = gen_rtx_USE (VOIDmode, cookie);
25357 n_call = 2;
25359 if (toc_load)
25360 call[n_call++] = toc_load;
25361 if (toc_restore)
25362 call[n_call++] = toc_restore;
25364 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25366 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25367 insn = emit_call_insn (insn);
25369 /* Mention all registers defined by the ABI to hold information
25370 as uses in CALL_INSN_FUNCTION_USAGE. */
25371 if (abi_reg)
25372 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25375 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25377 void
25378 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25380 rtx call[2];
25381 rtx insn;
25382 rtx r12 = NULL_RTX;
25383 rtx func_addr = func_desc;
25385 gcc_assert (INTVAL (cookie) == 0);
25387 if (global_tlsarg)
25388 tlsarg = global_tlsarg;
25390 /* For ELFv2, r12 and CTR need to hold the function address
25391 for an indirect call. */
25392 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25394 r12 = gen_rtx_REG (Pmode, 12);
25395 emit_move_insn (r12, func_desc);
25396 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25397 emit_move_insn (func_addr, r12);
25400 /* Create the call. */
25401 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25402 if (value != NULL_RTX)
25403 call[0] = gen_rtx_SET (value, call[0]);
25405 call[1] = simple_return_rtx;
25407 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25408 insn = emit_call_insn (insn);
25410 /* Note use of the TOC register. */
25411 if (!rs6000_pcrel_p ())
25412 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25413 gen_rtx_REG (Pmode, TOC_REGNUM));
25415 /* Note use of r12. */
25416 if (r12)
25417 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25420 /* Expand code to perform a call under the SYSV4 ABI. */
25422 void
25423 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25425 rtx func = func_desc;
25426 rtx func_addr;
25427 rtx call[4];
25428 rtx insn;
25429 rtx abi_reg = NULL_RTX;
25430 int n;
25432 if (global_tlsarg)
25433 tlsarg = global_tlsarg;
25435 /* Handle longcall attributes. */
25436 if ((INTVAL (cookie) & CALL_LONG) != 0
25437 && GET_CODE (func_desc) == SYMBOL_REF)
25439 func = rs6000_longcall_ref (func_desc, tlsarg);
25440 /* If the longcall was implemented as an inline PLT call using
25441 PLT unspecs then func will be REG:r11. If not, func will be
25442 a pseudo reg. The inline PLT call sequence supports lazy
25443 linking (and longcalls to functions in dlopen'd libraries).
25444 The other style of longcalls don't. The lazy linking entry
25445 to the dynamic symbol resolver requires r11 be the function
25446 address (as it is for linker generated PLT stubs). Ensure
25447 r11 stays valid to the bctrl by marking r11 used by the call. */
25448 if (TARGET_PLTSEQ)
25449 abi_reg = func;
25452 /* Handle indirect calls. */
25453 if (GET_CODE (func) != SYMBOL_REF)
25455 func = force_reg (Pmode, func);
25457 /* Indirect calls via CTR are strongly preferred over indirect
25458 calls via LR, so move the address there. That can't be left
25459 to reload because we want to mark every instruction in an
25460 inline PLT call sequence with a reloc, enabling the linker to
25461 edit the sequence back to a direct call when that makes sense. */
25462 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25463 if (abi_reg)
25465 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25466 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25467 emit_insn (gen_rtx_SET (func_addr, mark_func));
25468 v = gen_rtvec (2, func_addr, func_desc);
25469 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25471 else
25472 emit_move_insn (func_addr, func);
25474 else
25475 func_addr = func;
25477 /* Create the call. */
25478 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25479 if (value != NULL_RTX)
25480 call[0] = gen_rtx_SET (value, call[0]);
25482 call[1] = gen_rtx_USE (VOIDmode, cookie);
25483 n = 2;
25484 if (TARGET_SECURE_PLT
25485 && flag_pic
25486 && GET_CODE (func_addr) == SYMBOL_REF
25487 && !SYMBOL_REF_LOCAL_P (func_addr))
25488 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
25490 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25492 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
25493 insn = emit_call_insn (insn);
25494 if (abi_reg)
25495 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25498 /* Expand code to perform a sibling call under the SysV4 ABI. */
25500 void
25501 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25503 rtx func = func_desc;
25504 rtx func_addr;
25505 rtx call[3];
25506 rtx insn;
25507 rtx abi_reg = NULL_RTX;
25509 if (global_tlsarg)
25510 tlsarg = global_tlsarg;
25512 /* Handle longcall attributes. */
25513 if ((INTVAL (cookie) & CALL_LONG) != 0
25514 && GET_CODE (func_desc) == SYMBOL_REF)
25516 func = rs6000_longcall_ref (func_desc, tlsarg);
25517 /* If the longcall was implemented as an inline PLT call using
25518 PLT unspecs then func will be REG:r11. If not, func will be
25519 a pseudo reg. The inline PLT call sequence supports lazy
25520 linking (and longcalls to functions in dlopen'd libraries).
25521 The other style of longcalls don't. The lazy linking entry
25522 to the dynamic symbol resolver requires r11 be the function
25523 address (as it is for linker generated PLT stubs). Ensure
25524 r11 stays valid to the bctr by marking r11 used by the call. */
25525 if (TARGET_PLTSEQ)
25526 abi_reg = func;
25529 /* Handle indirect calls. */
25530 if (GET_CODE (func) != SYMBOL_REF)
25532 func = force_reg (Pmode, func);
25534 /* Indirect sibcalls must go via CTR. That can't be left to
25535 reload because we want to mark every instruction in an inline
25536 PLT call sequence with a reloc, enabling the linker to edit
25537 the sequence back to a direct call when that makes sense. */
25538 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25539 if (abi_reg)
25541 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25542 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25543 emit_insn (gen_rtx_SET (func_addr, mark_func));
25544 v = gen_rtvec (2, func_addr, func_desc);
25545 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25547 else
25548 emit_move_insn (func_addr, func);
25550 else
25551 func_addr = func;
25553 /* Create the call. */
25554 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25555 if (value != NULL_RTX)
25556 call[0] = gen_rtx_SET (value, call[0]);
25558 call[1] = gen_rtx_USE (VOIDmode, cookie);
25559 call[2] = simple_return_rtx;
25561 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25562 insn = emit_call_insn (insn);
25563 if (abi_reg)
25564 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25567 #if TARGET_MACHO
25569 /* Expand code to perform a call under the Darwin ABI.
25570 Modulo handling of mlongcall, this is much the same as sysv.
25571 if/when the longcall optimisation is removed, we could drop this
25572 code and use the sysv case (taking care to avoid the tls stuff).
25574 We can use this for sibcalls too, if needed. */
25576 void
25577 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25578 rtx cookie, bool sibcall)
25580 rtx func = func_desc;
25581 rtx func_addr;
25582 rtx call[3];
25583 rtx insn;
25584 int cookie_val = INTVAL (cookie);
25585 bool make_island = false;
25587 /* Handle longcall attributes, there are two cases for Darwin:
25588 1) Newer linkers are capable of synthesising any branch islands needed.
25589 2) We need a helper branch island synthesised by the compiler.
25590 The second case has mostly been retired and we don't use it for m64.
25591 In fact, it's is an optimisation, we could just indirect as sysv does..
25592 ... however, backwards compatibility for now.
25593 If we're going to use this, then we need to keep the CALL_LONG bit set,
25594 so that we can pick up the special insn form later. */
25595 if ((cookie_val & CALL_LONG) != 0
25596 && GET_CODE (func_desc) == SYMBOL_REF)
25598 /* FIXME: the longcall opt should not hang off this flag, it is most
25599 likely incorrect for kernel-mode code-generation. */
25600 if (darwin_symbol_stubs && TARGET_32BIT)
25601 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25602 else
25604 /* The linker is capable of doing this, but the user explicitly
25605 asked for -mlongcall, so we'll do the 'normal' version. */
25606 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25607 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25611 /* Handle indirect calls. */
25612 if (GET_CODE (func) != SYMBOL_REF)
25614 func = force_reg (Pmode, func);
25616 /* Indirect calls via CTR are strongly preferred over indirect
25617 calls via LR, and are required for indirect sibcalls, so move
25618 the address there. */
25619 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25620 emit_move_insn (func_addr, func);
25622 else
25623 func_addr = func;
25625 /* Create the call. */
25626 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25627 if (value != NULL_RTX)
25628 call[0] = gen_rtx_SET (value, call[0]);
25630 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25632 if (sibcall)
25633 call[2] = simple_return_rtx;
25634 else
25635 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25637 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25638 insn = emit_call_insn (insn);
25639 /* Now we have the debug info in the insn, we can set up the branch island
25640 if we're using one. */
25641 if (make_island)
25643 tree funname = get_identifier (XSTR (func_desc, 0));
25645 if (no_previous_def (funname))
25647 rtx label_rtx = gen_label_rtx ();
25648 char *label_buf, temp_buf[256];
25649 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25650 CODE_LABEL_NUMBER (label_rtx));
25651 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25652 tree labelname = get_identifier (label_buf);
25653 add_compiler_branch_island (labelname, funname,
25654 insn_line ((const rtx_insn*)insn));
25658 #endif
25660 void
25661 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25662 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25664 #if TARGET_MACHO
25665 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25666 #else
25667 gcc_unreachable();
25668 #endif
25672 void
25673 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25674 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25676 #if TARGET_MACHO
25677 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25678 #else
25679 gcc_unreachable();
25680 #endif
25683 /* Return whether we should generate PC-relative code for FNDECL. */
25684 bool
25685 rs6000_fndecl_pcrel_p (const_tree fndecl)
25687 if (DEFAULT_ABI != ABI_ELFv2)
25688 return false;
25690 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25692 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25693 && TARGET_CMODEL == CMODEL_MEDIUM);
25696 /* Return whether we should generate PC-relative code for *FN. */
25697 bool
25698 rs6000_function_pcrel_p (struct function *fn)
25700 if (DEFAULT_ABI != ABI_ELFv2)
25701 return false;
25703 /* Optimize usual case. */
25704 if (fn == cfun)
25705 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25706 && TARGET_CMODEL == CMODEL_MEDIUM);
25708 return rs6000_fndecl_pcrel_p (fn->decl);
25711 /* Return whether we should generate PC-relative code for the current
25712 function. */
25713 bool
25714 rs6000_pcrel_p ()
25716 return (DEFAULT_ABI == ABI_ELFv2
25717 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25718 && TARGET_CMODEL == CMODEL_MEDIUM);
25722 /* Given an address (ADDR), a mode (MODE), and what the format of the
25723 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25724 for the address. */
25726 enum insn_form
25727 address_to_insn_form (rtx addr,
25728 machine_mode mode,
25729 enum non_prefixed_form non_prefixed_format)
25731 /* Single register is easy. */
25732 if (REG_P (addr) || SUBREG_P (addr))
25733 return INSN_FORM_BASE_REG;
25735 /* If the non prefixed instruction format doesn't support offset addressing,
25736 make sure only indexed addressing is allowed.
25738 We special case SDmode so that the register allocator does not try to move
25739 SDmode through GPR registers, but instead uses the 32-bit integer load and
25740 store instructions for the floating point registers. */
25741 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25743 if (GET_CODE (addr) != PLUS)
25744 return INSN_FORM_BAD;
25746 rtx op0 = XEXP (addr, 0);
25747 rtx op1 = XEXP (addr, 1);
25748 if (!REG_P (op0) && !SUBREG_P (op0))
25749 return INSN_FORM_BAD;
25751 if (!REG_P (op1) && !SUBREG_P (op1))
25752 return INSN_FORM_BAD;
25754 return INSN_FORM_X;
25757 /* Deal with update forms. */
25758 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25759 return INSN_FORM_UPDATE;
25761 /* Handle PC-relative symbols and labels. Check for both local and
25762 external symbols. Assume labels are always local. TLS symbols
25763 are not PC-relative for rs6000. */
25764 if (TARGET_PCREL)
25766 if (LABEL_REF_P (addr))
25767 return INSN_FORM_PCREL_LOCAL;
25769 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25771 if (!SYMBOL_REF_LOCAL_P (addr))
25772 return INSN_FORM_PCREL_EXTERNAL;
25773 else
25774 return INSN_FORM_PCREL_LOCAL;
25778 if (GET_CODE (addr) == CONST)
25779 addr = XEXP (addr, 0);
25781 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25782 if (GET_CODE (addr) == LO_SUM)
25783 return INSN_FORM_LO_SUM;
25785 /* Everything below must be an offset address of some form. */
25786 if (GET_CODE (addr) != PLUS)
25787 return INSN_FORM_BAD;
25789 rtx op0 = XEXP (addr, 0);
25790 rtx op1 = XEXP (addr, 1);
25792 /* Check for indexed addresses. */
25793 if (REG_P (op1) || SUBREG_P (op1))
25795 if (REG_P (op0) || SUBREG_P (op0))
25796 return INSN_FORM_X;
25798 return INSN_FORM_BAD;
25801 if (!CONST_INT_P (op1))
25802 return INSN_FORM_BAD;
25804 HOST_WIDE_INT offset = INTVAL (op1);
25805 if (!SIGNED_INTEGER_34BIT_P (offset))
25806 return INSN_FORM_BAD;
25808 /* Check for local and external PC-relative addresses. Labels are always
25809 local. TLS symbols are not PC-relative for rs6000. */
25810 if (TARGET_PCREL)
25812 if (LABEL_REF_P (op0))
25813 return INSN_FORM_PCREL_LOCAL;
25815 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
25817 if (!SYMBOL_REF_LOCAL_P (op0))
25818 return INSN_FORM_PCREL_EXTERNAL;
25819 else
25820 return INSN_FORM_PCREL_LOCAL;
25824 /* If it isn't PC-relative, the address must use a base register. */
25825 if (!REG_P (op0) && !SUBREG_P (op0))
25826 return INSN_FORM_BAD;
25828 /* Large offsets must be prefixed. */
25829 if (!SIGNED_INTEGER_16BIT_P (offset))
25831 if (TARGET_PREFIXED)
25832 return INSN_FORM_PREFIXED_NUMERIC;
25834 return INSN_FORM_BAD;
25837 /* We have a 16-bit offset, see what default instruction format to use. */
25838 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
25840 unsigned size = GET_MODE_SIZE (mode);
25842 /* On 64-bit systems, assume 64-bit integers need to use DS form
25843 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25844 (for LXV and STXV). TImode is problematical in that its normal usage
25845 is expected to be GPRs where it wants a DS instruction format, but if
25846 it goes into the vector registers, it wants a DQ instruction
25847 format. */
25848 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
25849 non_prefixed_format = NON_PREFIXED_DS;
25851 else if (TARGET_VSX && size >= 16
25852 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
25853 non_prefixed_format = NON_PREFIXED_DQ;
25855 else
25856 non_prefixed_format = NON_PREFIXED_D;
25859 /* Classify the D/DS/DQ-form addresses. */
25860 switch (non_prefixed_format)
25862 /* Instruction format D, all 16 bits are valid. */
25863 case NON_PREFIXED_D:
25864 return INSN_FORM_D;
25866 /* Instruction format DS, bottom 2 bits must be 0. */
25867 case NON_PREFIXED_DS:
25868 if ((offset & 3) == 0)
25869 return INSN_FORM_DS;
25871 else if (TARGET_PREFIXED)
25872 return INSN_FORM_PREFIXED_NUMERIC;
25874 else
25875 return INSN_FORM_BAD;
25877 /* Instruction format DQ, bottom 4 bits must be 0. */
25878 case NON_PREFIXED_DQ:
25879 if ((offset & 15) == 0)
25880 return INSN_FORM_DQ;
25882 else if (TARGET_PREFIXED)
25883 return INSN_FORM_PREFIXED_NUMERIC;
25885 else
25886 return INSN_FORM_BAD;
25888 default:
25889 break;
25892 return INSN_FORM_BAD;
25895 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
25896 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
25897 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
25898 a D-form or DS-form instruction. X-form and base_reg are always
25899 allowed. */
25900 bool
25901 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
25902 enum non_prefixed_form non_prefixed_format)
25904 enum insn_form result_form;
25906 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
25908 switch (non_prefixed_format)
25910 case NON_PREFIXED_D:
25911 switch (result_form)
25913 case INSN_FORM_X:
25914 case INSN_FORM_D:
25915 case INSN_FORM_DS:
25916 case INSN_FORM_BASE_REG:
25917 return true;
25918 default:
25919 return false;
25921 break;
25922 case NON_PREFIXED_DS:
25923 switch (result_form)
25925 case INSN_FORM_X:
25926 case INSN_FORM_DS:
25927 case INSN_FORM_BASE_REG:
25928 return true;
25929 default:
25930 return false;
25932 break;
25933 default:
25934 break;
25936 return false;
25939 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
25940 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
25941 the load or store with the PCREL_OPT optimization to make sure it is an
25942 instruction that can be optimized.
25944 We need to specify the MODE separately from the REG to allow for loads that
25945 include zero/sign/float extension. */
25947 bool
25948 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
25950 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
25951 PCREL_OPT optimization. */
25952 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
25953 if (non_prefixed == NON_PREFIXED_X)
25954 return false;
25956 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
25957 rtx addr = XEXP (mem, 0);
25958 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
25959 return (iform == INSN_FORM_BASE_REG
25960 || iform == INSN_FORM_D
25961 || iform == INSN_FORM_DS
25962 || iform == INSN_FORM_DQ);
25965 /* Helper function to see if we're potentially looking at lfs/stfs.
25966 - PARALLEL containing a SET and a CLOBBER
25967 - stfs:
25968 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25969 - CLOBBER is a V4SF
25970 - lfs:
25971 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25972 - CLOBBER is a DI
25975 static bool
25976 is_lfs_stfs_insn (rtx_insn *insn)
25978 rtx pattern = PATTERN (insn);
25979 if (GET_CODE (pattern) != PARALLEL)
25980 return false;
25982 /* This should be a parallel with exactly one set and one clobber. */
25983 if (XVECLEN (pattern, 0) != 2)
25984 return false;
25986 rtx set = XVECEXP (pattern, 0, 0);
25987 if (GET_CODE (set) != SET)
25988 return false;
25990 rtx clobber = XVECEXP (pattern, 0, 1);
25991 if (GET_CODE (clobber) != CLOBBER)
25992 return false;
25994 /* All we care is that the destination of the SET is a mem:SI,
25995 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25996 should be a scratch:V4SF. */
25998 rtx dest = SET_DEST (set);
25999 rtx src = SET_SRC (set);
26000 rtx scratch = SET_DEST (clobber);
26002 if (GET_CODE (src) != UNSPEC)
26003 return false;
26005 /* stfs case. */
26006 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26007 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26008 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26009 return true;
26011 /* lfs case. */
26012 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26013 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26014 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26015 return true;
26017 return false;
26020 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26021 instruction format (D/DS/DQ) used for offset memory. */
26023 enum non_prefixed_form
26024 reg_to_non_prefixed (rtx reg, machine_mode mode)
26026 /* If it isn't a register, use the defaults. */
26027 if (!REG_P (reg) && !SUBREG_P (reg))
26028 return NON_PREFIXED_DEFAULT;
26030 unsigned int r = reg_or_subregno (reg);
26032 /* If we have a pseudo, use the default instruction format. */
26033 if (!HARD_REGISTER_NUM_P (r))
26034 return NON_PREFIXED_DEFAULT;
26036 unsigned size = GET_MODE_SIZE (mode);
26038 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26039 128-bit floating point, and 128-bit integers. Before power9, only indexed
26040 addressing was available for vectors. */
26041 if (FP_REGNO_P (r))
26043 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26044 return NON_PREFIXED_D;
26046 else if (size < 8)
26047 return NON_PREFIXED_X;
26049 else if (TARGET_VSX && size >= 16
26050 && (VECTOR_MODE_P (mode)
26051 || VECTOR_ALIGNMENT_P (mode)
26052 || mode == TImode || mode == CTImode))
26053 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26055 else
26056 return NON_PREFIXED_DEFAULT;
26059 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26060 128-bit floating point, and 128-bit integers. Before power9, only indexed
26061 addressing was available. */
26062 else if (ALTIVEC_REGNO_P (r))
26064 if (!TARGET_P9_VECTOR)
26065 return NON_PREFIXED_X;
26067 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26068 return NON_PREFIXED_DS;
26070 else if (size < 8)
26071 return NON_PREFIXED_X;
26073 else if (TARGET_VSX && size >= 16
26074 && (VECTOR_MODE_P (mode)
26075 || VECTOR_ALIGNMENT_P (mode)
26076 || mode == TImode || mode == CTImode))
26077 return NON_PREFIXED_DQ;
26079 else
26080 return NON_PREFIXED_DEFAULT;
26083 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26084 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26085 through the GPR registers for memory operations. */
26086 else if (TARGET_POWERPC64 && size >= 8)
26087 return NON_PREFIXED_DS;
26089 return NON_PREFIXED_D;
26093 /* Whether a load instruction is a prefixed instruction. This is called from
26094 the prefixed attribute processing. */
26096 bool
26097 prefixed_load_p (rtx_insn *insn)
26099 /* Validate the insn to make sure it is a normal load insn. */
26100 extract_insn_cached (insn);
26101 if (recog_data.n_operands < 2)
26102 return false;
26104 rtx reg = recog_data.operand[0];
26105 rtx mem = recog_data.operand[1];
26107 if (!REG_P (reg) && !SUBREG_P (reg))
26108 return false;
26110 if (!MEM_P (mem))
26111 return false;
26113 /* Prefixed load instructions do not support update or indexed forms. */
26114 if (get_attr_indexed (insn) == INDEXED_YES
26115 || get_attr_update (insn) == UPDATE_YES)
26116 return false;
26118 /* LWA uses the DS format instead of the D format that LWZ uses. */
26119 enum non_prefixed_form non_prefixed;
26120 machine_mode reg_mode = GET_MODE (reg);
26121 machine_mode mem_mode = GET_MODE (mem);
26123 if (mem_mode == SImode && reg_mode == DImode
26124 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26125 non_prefixed = NON_PREFIXED_DS;
26127 else
26128 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26130 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26131 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26132 else
26133 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26136 /* Whether a store instruction is a prefixed instruction. This is called from
26137 the prefixed attribute processing. */
26139 bool
26140 prefixed_store_p (rtx_insn *insn)
26142 /* Validate the insn to make sure it is a normal store insn. */
26143 extract_insn_cached (insn);
26144 if (recog_data.n_operands < 2)
26145 return false;
26147 rtx mem = recog_data.operand[0];
26148 rtx reg = recog_data.operand[1];
26150 if (!REG_P (reg) && !SUBREG_P (reg))
26151 return false;
26153 if (!MEM_P (mem))
26154 return false;
26156 /* Prefixed store instructions do not support update or indexed forms. */
26157 if (get_attr_indexed (insn) == INDEXED_YES
26158 || get_attr_update (insn) == UPDATE_YES)
26159 return false;
26161 machine_mode mem_mode = GET_MODE (mem);
26162 rtx addr = XEXP (mem, 0);
26163 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26165 /* Need to make sure we aren't looking at a stfs which doesn't look
26166 like the other things reg_to_non_prefixed/address_is_prefixed
26167 looks for. */
26168 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26169 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26170 else
26171 return address_is_prefixed (addr, mem_mode, non_prefixed);
26174 /* Whether a load immediate or add instruction is a prefixed instruction. This
26175 is called from the prefixed attribute processing. */
26177 bool
26178 prefixed_paddi_p (rtx_insn *insn)
26180 rtx set = single_set (insn);
26181 if (!set)
26182 return false;
26184 rtx dest = SET_DEST (set);
26185 rtx src = SET_SRC (set);
26187 if (!REG_P (dest) && !SUBREG_P (dest))
26188 return false;
26190 /* Is this a load immediate that can't be done with a simple ADDI or
26191 ADDIS? */
26192 if (CONST_INT_P (src))
26193 return (satisfies_constraint_eI (src)
26194 && !satisfies_constraint_I (src)
26195 && !satisfies_constraint_L (src));
26197 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26198 ADDIS? */
26199 if (GET_CODE (src) == PLUS)
26201 rtx op1 = XEXP (src, 1);
26203 return (CONST_INT_P (op1)
26204 && satisfies_constraint_eI (op1)
26205 && !satisfies_constraint_I (op1)
26206 && !satisfies_constraint_L (op1));
26209 /* If not, is it a load of a PC-relative address? */
26210 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26211 return false;
26213 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26214 return false;
26216 enum insn_form iform = address_to_insn_form (src, Pmode,
26217 NON_PREFIXED_DEFAULT);
26219 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26222 /* Whether the next instruction needs a 'p' prefix issued before the
26223 instruction is printed out. */
26224 static bool prepend_p_to_next_insn;
26226 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26227 outputting the assembler code. On the PowerPC, we remember if the current
26228 insn is a prefixed insn where we need to emit a 'p' before the insn.
26230 In addition, if the insn is part of a PC-relative reference to an external
26231 label optimization, this is recorded also. */
26232 void
26233 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26235 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26236 == MAYBE_PREFIXED_YES
26237 && get_attr_prefixed (insn) == PREFIXED_YES);
26238 return;
26241 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26242 We use it to emit a 'p' for prefixed insns that is set in
26243 FINAL_PRESCAN_INSN. */
26244 void
26245 rs6000_asm_output_opcode (FILE *stream)
26247 if (prepend_p_to_next_insn)
26249 fprintf (stream, "p");
26251 /* Reset the flag in the case where there are separate insn lines in the
26252 sequence, so the 'p' is only emitted for the first line. This shows up
26253 when we are doing the PCREL_OPT optimization, in that the label created
26254 with %r<n> would have a leading 'p' printed. */
26255 prepend_p_to_next_insn = false;
26258 return;
26261 /* Emit the relocation to tie the next instruction to a previous instruction
26262 that loads up an external address. This is used to do the PCREL_OPT
26263 optimization. Note, the label is generated after the PLD of the got
26264 pc-relative address to allow for the assembler to insert NOPs before the PLD
26265 instruction. The operand is a constant integer that is the label
26266 number. */
26268 void
26269 output_pcrel_opt_reloc (rtx label_num)
26271 rtx operands[1] = { label_num };
26272 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26273 operands);
26276 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26277 should be adjusted to reflect any required changes. This macro is used when
26278 there is some systematic length adjustment required that would be difficult
26279 to express in the length attribute.
26281 In the PowerPC, we use this to adjust the length of an instruction if one or
26282 more prefixed instructions are generated, using the attribute
26283 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26284 hardware requires that a prefied instruciton does not cross a 64-byte
26285 boundary. This means the compiler has to assume the length of the first
26286 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26287 already set for the non-prefixed instruction, we just need to udpate for the
26288 difference. */
26291 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26293 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26295 rtx pattern = PATTERN (insn);
26296 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26297 && get_attr_prefixed (insn) == PREFIXED_YES)
26299 int num_prefixed = get_attr_max_prefixed_insns (insn);
26300 length += 4 * (num_prefixed + 1);
26304 return length;
26308 #ifdef HAVE_GAS_HIDDEN
26309 # define USE_HIDDEN_LINKONCE 1
26310 #else
26311 # define USE_HIDDEN_LINKONCE 0
26312 #endif
26314 /* Fills in the label name that should be used for a 476 link stack thunk. */
26316 void
26317 get_ppc476_thunk_name (char name[32])
26319 gcc_assert (TARGET_LINK_STACK);
26321 if (USE_HIDDEN_LINKONCE)
26322 sprintf (name, "__ppc476.get_thunk");
26323 else
26324 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26327 /* This function emits the simple thunk routine that is used to preserve
26328 the link stack on the 476 cpu. */
26330 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26331 static void
26332 rs6000_code_end (void)
26334 char name[32];
26335 tree decl;
26337 if (!TARGET_LINK_STACK)
26338 return;
26340 get_ppc476_thunk_name (name);
26342 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26343 build_function_type_list (void_type_node, NULL_TREE));
26344 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26345 NULL_TREE, void_type_node);
26346 TREE_PUBLIC (decl) = 1;
26347 TREE_STATIC (decl) = 1;
26349 #if RS6000_WEAK
26350 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26352 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26353 targetm.asm_out.unique_section (decl, 0);
26354 switch_to_section (get_named_section (decl, NULL, 0));
26355 DECL_WEAK (decl) = 1;
26356 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26357 targetm.asm_out.globalize_label (asm_out_file, name);
26358 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26359 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26361 else
26362 #endif
26364 switch_to_section (text_section);
26365 ASM_OUTPUT_LABEL (asm_out_file, name);
26368 DECL_INITIAL (decl) = make_node (BLOCK);
26369 current_function_decl = decl;
26370 allocate_struct_function (decl, false);
26371 init_function_start (decl);
26372 first_function_block_is_cold = false;
26373 /* Make sure unwind info is emitted for the thunk if needed. */
26374 final_start_function (emit_barrier (), asm_out_file, 1);
26376 fputs ("\tblr\n", asm_out_file);
26378 final_end_function ();
26379 init_insn_lengths ();
26380 free_after_compilation (cfun);
26381 set_cfun (NULL);
26382 current_function_decl = NULL;
26385 /* Add r30 to hard reg set if the prologue sets it up and it is not
26386 pic_offset_table_rtx. */
26388 static void
26389 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26391 if (!TARGET_SINGLE_PIC_BASE
26392 && TARGET_TOC
26393 && TARGET_MINIMAL_TOC
26394 && !constant_pool_empty_p ())
26395 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26396 if (cfun->machine->split_stack_argp_used)
26397 add_to_hard_reg_set (&set->set, Pmode, 12);
26399 /* Make sure the hard reg set doesn't include r2, which was possibly added
26400 via PIC_OFFSET_TABLE_REGNUM. */
26401 if (TARGET_TOC)
26402 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26406 /* Helper function for rs6000_split_logical to emit a logical instruction after
26407 spliting the operation to single GPR registers.
26409 DEST is the destination register.
26410 OP1 and OP2 are the input source registers.
26411 CODE is the base operation (AND, IOR, XOR, NOT).
26412 MODE is the machine mode.
26413 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26414 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26415 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26417 static void
26418 rs6000_split_logical_inner (rtx dest,
26419 rtx op1,
26420 rtx op2,
26421 enum rtx_code code,
26422 machine_mode mode,
26423 bool complement_final_p,
26424 bool complement_op1_p,
26425 bool complement_op2_p)
26427 rtx bool_rtx;
26429 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26430 if (op2 && CONST_INT_P (op2)
26431 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26432 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26434 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26435 HOST_WIDE_INT value = INTVAL (op2) & mask;
26437 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26438 if (code == AND)
26440 if (value == 0)
26442 emit_insn (gen_rtx_SET (dest, const0_rtx));
26443 return;
26446 else if (value == mask)
26448 if (!rtx_equal_p (dest, op1))
26449 emit_insn (gen_rtx_SET (dest, op1));
26450 return;
26454 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26455 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26456 else if (code == IOR || code == XOR)
26458 if (value == 0)
26460 if (!rtx_equal_p (dest, op1))
26461 emit_insn (gen_rtx_SET (dest, op1));
26462 return;
26467 if (code == AND && mode == SImode
26468 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26470 emit_insn (gen_andsi3 (dest, op1, op2));
26471 return;
26474 if (complement_op1_p)
26475 op1 = gen_rtx_NOT (mode, op1);
26477 if (complement_op2_p)
26478 op2 = gen_rtx_NOT (mode, op2);
26480 /* For canonical RTL, if only one arm is inverted it is the first. */
26481 if (!complement_op1_p && complement_op2_p)
26482 std::swap (op1, op2);
26484 bool_rtx = ((code == NOT)
26485 ? gen_rtx_NOT (mode, op1)
26486 : gen_rtx_fmt_ee (code, mode, op1, op2));
26488 if (complement_final_p)
26489 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
26491 emit_insn (gen_rtx_SET (dest, bool_rtx));
26494 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26495 operations are split immediately during RTL generation to allow for more
26496 optimizations of the AND/IOR/XOR.
26498 OPERANDS is an array containing the destination and two input operands.
26499 CODE is the base operation (AND, IOR, XOR, NOT).
26500 MODE is the machine mode.
26501 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26502 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26503 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26504 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26505 formation of the AND instructions. */
26507 static void
26508 rs6000_split_logical_di (rtx operands[3],
26509 enum rtx_code code,
26510 bool complement_final_p,
26511 bool complement_op1_p,
26512 bool complement_op2_p)
26514 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
26515 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
26516 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
26517 enum hi_lo { hi = 0, lo = 1 };
26518 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
26519 size_t i;
26521 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
26522 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
26523 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
26524 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
26526 if (code == NOT)
26527 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
26528 else
26530 if (!CONST_INT_P (operands[2]))
26532 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
26533 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
26535 else
26537 HOST_WIDE_INT value = INTVAL (operands[2]);
26538 HOST_WIDE_INT value_hi_lo[2];
26540 gcc_assert (!complement_final_p);
26541 gcc_assert (!complement_op1_p);
26542 gcc_assert (!complement_op2_p);
26544 value_hi_lo[hi] = value >> 32;
26545 value_hi_lo[lo] = value & lower_32bits;
26547 for (i = 0; i < 2; i++)
26549 HOST_WIDE_INT sub_value = value_hi_lo[i];
26551 if (sub_value & sign_bit)
26552 sub_value |= upper_32bits;
26554 op2_hi_lo[i] = GEN_INT (sub_value);
26556 /* If this is an AND instruction, check to see if we need to load
26557 the value in a register. */
26558 if (code == AND && sub_value != -1 && sub_value != 0
26559 && !and_operand (op2_hi_lo[i], SImode))
26560 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
26565 for (i = 0; i < 2; i++)
26567 /* Split large IOR/XOR operations. */
26568 if ((code == IOR || code == XOR)
26569 && CONST_INT_P (op2_hi_lo[i])
26570 && !complement_final_p
26571 && !complement_op1_p
26572 && !complement_op2_p
26573 && !logical_const_operand (op2_hi_lo[i], SImode))
26575 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
26576 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
26577 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
26578 rtx tmp = gen_reg_rtx (SImode);
26580 /* Make sure the constant is sign extended. */
26581 if ((hi_16bits & sign_bit) != 0)
26582 hi_16bits |= upper_32bits;
26584 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
26585 code, SImode, false, false, false);
26587 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
26588 code, SImode, false, false, false);
26590 else
26591 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
26592 code, SImode, complement_final_p,
26593 complement_op1_p, complement_op2_p);
26596 return;
26599 /* Split the insns that make up boolean operations operating on multiple GPR
26600 registers. The boolean MD patterns ensure that the inputs either are
26601 exactly the same as the output registers, or there is no overlap.
26603 OPERANDS is an array containing the destination and two input operands.
26604 CODE is the base operation (AND, IOR, XOR, NOT).
26605 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26606 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26607 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26609 void
26610 rs6000_split_logical (rtx operands[3],
26611 enum rtx_code code,
26612 bool complement_final_p,
26613 bool complement_op1_p,
26614 bool complement_op2_p)
26616 machine_mode mode = GET_MODE (operands[0]);
26617 machine_mode sub_mode;
26618 rtx op0, op1, op2;
26619 int sub_size, regno0, regno1, nregs, i;
26621 /* If this is DImode, use the specialized version that can run before
26622 register allocation. */
26623 if (mode == DImode && !TARGET_POWERPC64)
26625 rs6000_split_logical_di (operands, code, complement_final_p,
26626 complement_op1_p, complement_op2_p);
26627 return;
26630 op0 = operands[0];
26631 op1 = operands[1];
26632 op2 = (code == NOT) ? NULL_RTX : operands[2];
26633 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26634 sub_size = GET_MODE_SIZE (sub_mode);
26635 regno0 = REGNO (op0);
26636 regno1 = REGNO (op1);
26638 gcc_assert (reload_completed);
26639 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26640 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26642 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26643 gcc_assert (nregs > 1);
26645 if (op2 && REG_P (op2))
26646 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26648 for (i = 0; i < nregs; i++)
26650 int offset = i * sub_size;
26651 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26652 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26653 rtx sub_op2 = ((code == NOT)
26654 ? NULL_RTX
26655 : simplify_subreg (sub_mode, op2, mode, offset));
26657 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26658 complement_final_p, complement_op1_p,
26659 complement_op2_p);
26662 return;
26665 /* Emit instructions to move SRC to DST. Called by splitters for
26666 multi-register moves. It will emit at most one instruction for
26667 each register that is accessed; that is, it won't emit li/lis pairs
26668 (or equivalent for 64-bit code). One of SRC or DST must be a hard
26669 register. */
26671 void
26672 rs6000_split_multireg_move (rtx dst, rtx src)
26674 /* The register number of the first register being moved. */
26675 int reg;
26676 /* The mode that is to be moved. */
26677 machine_mode mode;
26678 /* The mode that the move is being done in, and its size. */
26679 machine_mode reg_mode;
26680 int reg_mode_size;
26681 /* The number of registers that will be moved. */
26682 int nregs;
26684 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
26685 mode = GET_MODE (dst);
26686 nregs = hard_regno_nregs (reg, mode);
26688 /* If we have a vector quad register for MMA, and this is a load or store,
26689 see if we can use vector paired load/stores. */
26690 if (mode == XOmode && TARGET_MMA
26691 && (MEM_P (dst) || MEM_P (src)))
26693 reg_mode = OOmode;
26694 nregs /= 2;
26696 /* If we have a vector pair/quad mode, split it into two/four separate
26697 vectors. */
26698 else if (mode == OOmode || mode == XOmode)
26699 reg_mode = V1TImode;
26700 else if (FP_REGNO_P (reg))
26701 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
26702 (TARGET_HARD_FLOAT ? DFmode : SFmode);
26703 else if (ALTIVEC_REGNO_P (reg))
26704 reg_mode = V16QImode;
26705 else
26706 reg_mode = word_mode;
26707 reg_mode_size = GET_MODE_SIZE (reg_mode);
26709 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
26711 /* TDmode residing in FP registers is special, since the ISA requires that
26712 the lower-numbered word of a register pair is always the most significant
26713 word, even in little-endian mode. This does not match the usual subreg
26714 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
26715 the appropriate constituent registers "by hand" in little-endian mode.
26717 Note we do not need to check for destructive overlap here since TDmode
26718 can only reside in even/odd register pairs. */
26719 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
26721 rtx p_src, p_dst;
26722 int i;
26724 for (i = 0; i < nregs; i++)
26726 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
26727 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
26728 else
26729 p_src = simplify_gen_subreg (reg_mode, src, mode,
26730 i * reg_mode_size);
26732 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
26733 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
26734 else
26735 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
26736 i * reg_mode_size);
26738 emit_insn (gen_rtx_SET (p_dst, p_src));
26741 return;
26744 /* The __vector_pair and __vector_quad modes are multi-register
26745 modes, so if we have to load or store the registers, we have to be
26746 careful to properly swap them if we're in little endian mode
26747 below. This means the last register gets the first memory
26748 location. We also need to be careful of using the right register
26749 numbers if we are splitting XO to OO. */
26750 if (mode == OOmode || mode == XOmode)
26752 nregs = hard_regno_nregs (reg, mode);
26753 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
26754 if (MEM_P (dst))
26756 unsigned offset = 0;
26757 unsigned size = GET_MODE_SIZE (reg_mode);
26759 /* If we are reading an accumulator register, we have to
26760 deprime it before we can access it. */
26761 if (TARGET_MMA
26762 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
26763 emit_insn (gen_mma_xxmfacc (src, src));
26765 for (int i = 0; i < nregs; i += reg_mode_nregs)
26767 unsigned subreg
26768 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
26769 rtx dst2 = adjust_address (dst, reg_mode, offset);
26770 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
26771 offset += size;
26772 emit_insn (gen_rtx_SET (dst2, src2));
26775 return;
26778 if (MEM_P (src))
26780 unsigned offset = 0;
26781 unsigned size = GET_MODE_SIZE (reg_mode);
26783 for (int i = 0; i < nregs; i += reg_mode_nregs)
26785 unsigned subreg
26786 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
26787 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
26788 rtx src2 = adjust_address (src, reg_mode, offset);
26789 offset += size;
26790 emit_insn (gen_rtx_SET (dst2, src2));
26793 /* If we are writing an accumulator register, we have to
26794 prime it after we've written it. */
26795 if (TARGET_MMA
26796 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
26797 emit_insn (gen_mma_xxmtacc (dst, dst));
26799 return;
26802 if (GET_CODE (src) == UNSPEC)
26804 gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE);
26805 gcc_assert (REG_P (dst));
26806 if (GET_MODE (src) == XOmode)
26807 gcc_assert (FP_REGNO_P (REGNO (dst)));
26808 if (GET_MODE (src) == OOmode)
26809 gcc_assert (VSX_REGNO_P (REGNO (dst)));
26811 int nvecs = XVECLEN (src, 0);
26812 for (int i = 0; i < nvecs; i++)
26814 rtx op;
26815 int regno = reg + i;
26817 if (WORDS_BIG_ENDIAN)
26819 op = XVECEXP (src, 0, i);
26821 /* If we are loading an even VSX register and the memory location
26822 is adjacent to the next register's memory location (if any),
26823 then we can load them both with one LXVP instruction. */
26824 if ((regno & 1) == 0)
26826 rtx op2 = XVECEXP (src, 0, i + 1);
26827 if (adjacent_mem_locations (op, op2) == op)
26829 op = adjust_address (op, OOmode, 0);
26830 /* Skip the next register, since we're going to
26831 load it together with this register. */
26832 i++;
26836 else
26838 op = XVECEXP (src, 0, nvecs - i - 1);
26840 /* If we are loading an even VSX register and the memory location
26841 is adjacent to the next register's memory location (if any),
26842 then we can load them both with one LXVP instruction. */
26843 if ((regno & 1) == 0)
26845 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
26846 if (adjacent_mem_locations (op2, op) == op2)
26848 op = adjust_address (op2, OOmode, 0);
26849 /* Skip the next register, since we're going to
26850 load it together with this register. */
26851 i++;
26856 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
26857 emit_insn (gen_rtx_SET (dst_i, op));
26860 /* We are writing an accumulator register, so we have to
26861 prime it after we've written it. */
26862 if (GET_MODE (src) == XOmode)
26863 emit_insn (gen_mma_xxmtacc (dst, dst));
26865 return;
26868 /* Register -> register moves can use common code. */
26871 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
26873 /* If we are reading an accumulator register, we have to
26874 deprime it before we can access it. */
26875 if (TARGET_MMA
26876 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
26877 emit_insn (gen_mma_xxmfacc (src, src));
26879 /* Move register range backwards, if we might have destructive
26880 overlap. */
26881 int i;
26882 /* XO/OO are opaque so cannot use subregs. */
26883 if (mode == OOmode || mode == XOmode )
26885 for (i = nregs - 1; i >= 0; i--)
26887 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
26888 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
26889 emit_insn (gen_rtx_SET (dst_i, src_i));
26892 else
26894 for (i = nregs - 1; i >= 0; i--)
26895 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
26896 i * reg_mode_size),
26897 simplify_gen_subreg (reg_mode, src, mode,
26898 i * reg_mode_size)));
26901 /* If we are writing an accumulator register, we have to
26902 prime it after we've written it. */
26903 if (TARGET_MMA
26904 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
26905 emit_insn (gen_mma_xxmtacc (dst, dst));
26907 else
26909 int i;
26910 int j = -1;
26911 bool used_update = false;
26912 rtx restore_basereg = NULL_RTX;
26914 if (MEM_P (src) && INT_REGNO_P (reg))
26916 rtx breg;
26918 if (GET_CODE (XEXP (src, 0)) == PRE_INC
26919 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
26921 rtx delta_rtx;
26922 breg = XEXP (XEXP (src, 0), 0);
26923 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
26924 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
26925 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
26926 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26927 src = replace_equiv_address (src, breg);
26929 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
26931 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
26933 rtx basereg = XEXP (XEXP (src, 0), 0);
26934 if (TARGET_UPDATE)
26936 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
26937 emit_insn (gen_rtx_SET (ndst,
26938 gen_rtx_MEM (reg_mode,
26939 XEXP (src, 0))));
26940 used_update = true;
26942 else
26943 emit_insn (gen_rtx_SET (basereg,
26944 XEXP (XEXP (src, 0), 1)));
26945 src = replace_equiv_address (src, basereg);
26947 else
26949 rtx basereg = gen_rtx_REG (Pmode, reg);
26950 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
26951 src = replace_equiv_address (src, basereg);
26955 breg = XEXP (src, 0);
26956 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
26957 breg = XEXP (breg, 0);
26959 /* If the base register we are using to address memory is
26960 also a destination reg, then change that register last. */
26961 if (REG_P (breg)
26962 && REGNO (breg) >= REGNO (dst)
26963 && REGNO (breg) < REGNO (dst) + nregs)
26964 j = REGNO (breg) - REGNO (dst);
26966 else if (MEM_P (dst) && INT_REGNO_P (reg))
26968 rtx breg;
26970 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
26971 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
26973 rtx delta_rtx;
26974 breg = XEXP (XEXP (dst, 0), 0);
26975 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
26976 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
26977 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
26979 /* We have to update the breg before doing the store.
26980 Use store with update, if available. */
26982 if (TARGET_UPDATE)
26984 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
26985 emit_insn (TARGET_32BIT
26986 ? (TARGET_POWERPC64
26987 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
26988 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
26989 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
26990 used_update = true;
26992 else
26993 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
26994 dst = replace_equiv_address (dst, breg);
26996 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
26997 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
26999 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27001 rtx basereg = XEXP (XEXP (dst, 0), 0);
27002 if (TARGET_UPDATE)
27004 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27005 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27006 XEXP (dst, 0)),
27007 nsrc));
27008 used_update = true;
27010 else
27011 emit_insn (gen_rtx_SET (basereg,
27012 XEXP (XEXP (dst, 0), 1)));
27013 dst = replace_equiv_address (dst, basereg);
27015 else
27017 rtx basereg = XEXP (XEXP (dst, 0), 0);
27018 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27019 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27020 && REG_P (basereg)
27021 && REG_P (offsetreg)
27022 && REGNO (basereg) != REGNO (offsetreg));
27023 if (REGNO (basereg) == 0)
27025 rtx tmp = offsetreg;
27026 offsetreg = basereg;
27027 basereg = tmp;
27029 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27030 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27031 dst = replace_equiv_address (dst, basereg);
27034 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27035 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27038 /* If we are reading an accumulator register, we have to
27039 deprime it before we can access it. */
27040 if (TARGET_MMA && REG_P (src)
27041 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27042 emit_insn (gen_mma_xxmfacc (src, src));
27044 for (i = 0; i < nregs; i++)
27046 /* Calculate index to next subword. */
27047 ++j;
27048 if (j == nregs)
27049 j = 0;
27051 /* If compiler already emitted move of first word by
27052 store with update, no need to do anything. */
27053 if (j == 0 && used_update)
27054 continue;
27056 /* XO/OO are opaque so cannot use subregs. */
27057 if (mode == OOmode || mode == XOmode )
27059 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27060 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27061 emit_insn (gen_rtx_SET (dst_i, src_i));
27063 else
27064 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27065 j * reg_mode_size),
27066 simplify_gen_subreg (reg_mode, src, mode,
27067 j * reg_mode_size)));
27070 /* If we are writing an accumulator register, we have to
27071 prime it after we've written it. */
27072 if (TARGET_MMA && REG_P (dst)
27073 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27074 emit_insn (gen_mma_xxmtacc (dst, dst));
27076 if (restore_basereg != NULL_RTX)
27077 emit_insn (restore_basereg);
27081 /* Return true if the peephole2 can combine a load involving a combination of
27082 an addis instruction and a load with an offset that can be fused together on
27083 a power8. */
27085 bool
27086 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27087 rtx addis_value, /* addis value. */
27088 rtx target, /* target register that is loaded. */
27089 rtx mem) /* bottom part of the memory addr. */
27091 rtx addr;
27092 rtx base_reg;
27094 /* Validate arguments. */
27095 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27096 return false;
27098 if (!base_reg_operand (target, GET_MODE (target)))
27099 return false;
27101 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27102 return false;
27104 /* Allow sign/zero extension. */
27105 if (GET_CODE (mem) == ZERO_EXTEND
27106 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27107 mem = XEXP (mem, 0);
27109 if (!MEM_P (mem))
27110 return false;
27112 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27113 return false;
27115 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27116 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27117 return false;
27119 /* Validate that the register used to load the high value is either the
27120 register being loaded, or we can safely replace its use.
27122 This function is only called from the peephole2 pass and we assume that
27123 there are 2 instructions in the peephole (addis and load), so we want to
27124 check if the target register was not used in the memory address and the
27125 register to hold the addis result is dead after the peephole. */
27126 if (REGNO (addis_reg) != REGNO (target))
27128 if (reg_mentioned_p (target, mem))
27129 return false;
27131 if (!peep2_reg_dead_p (2, addis_reg))
27132 return false;
27134 /* If the target register being loaded is the stack pointer, we must
27135 avoid loading any other value into it, even temporarily. */
27136 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27137 return false;
27140 base_reg = XEXP (addr, 0);
27141 return REGNO (addis_reg) == REGNO (base_reg);
27144 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27145 sequence. We adjust the addis register to use the target register. If the
27146 load sign extends, we adjust the code to do the zero extending load, and an
27147 explicit sign extension later since the fusion only covers zero extending
27148 loads.
27150 The operands are:
27151 operands[0] register set with addis (to be replaced with target)
27152 operands[1] value set via addis
27153 operands[2] target register being loaded
27154 operands[3] D-form memory reference using operands[0]. */
27156 void
27157 expand_fusion_gpr_load (rtx *operands)
27159 rtx addis_value = operands[1];
27160 rtx target = operands[2];
27161 rtx orig_mem = operands[3];
27162 rtx new_addr, new_mem, orig_addr, offset;
27163 enum rtx_code plus_or_lo_sum;
27164 machine_mode target_mode = GET_MODE (target);
27165 machine_mode extend_mode = target_mode;
27166 machine_mode ptr_mode = Pmode;
27167 enum rtx_code extend = UNKNOWN;
27169 if (GET_CODE (orig_mem) == ZERO_EXTEND
27170 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27172 extend = GET_CODE (orig_mem);
27173 orig_mem = XEXP (orig_mem, 0);
27174 target_mode = GET_MODE (orig_mem);
27177 gcc_assert (MEM_P (orig_mem));
27179 orig_addr = XEXP (orig_mem, 0);
27180 plus_or_lo_sum = GET_CODE (orig_addr);
27181 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27183 offset = XEXP (orig_addr, 1);
27184 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27185 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27187 if (extend != UNKNOWN)
27188 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27190 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27191 UNSPEC_FUSION_GPR);
27192 emit_insn (gen_rtx_SET (target, new_mem));
27194 if (extend == SIGN_EXTEND)
27196 int sub_off = ((BYTES_BIG_ENDIAN)
27197 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27198 : 0);
27199 rtx sign_reg
27200 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27202 emit_insn (gen_rtx_SET (target,
27203 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27206 return;
27209 /* Emit the addis instruction that will be part of a fused instruction
27210 sequence. */
27212 void
27213 emit_fusion_addis (rtx target, rtx addis_value)
27215 rtx fuse_ops[10];
27216 const char *addis_str = NULL;
27218 /* Emit the addis instruction. */
27219 fuse_ops[0] = target;
27220 if (satisfies_constraint_L (addis_value))
27222 fuse_ops[1] = addis_value;
27223 addis_str = "lis %0,%v1";
27226 else if (GET_CODE (addis_value) == PLUS)
27228 rtx op0 = XEXP (addis_value, 0);
27229 rtx op1 = XEXP (addis_value, 1);
27231 if (REG_P (op0) && CONST_INT_P (op1)
27232 && satisfies_constraint_L (op1))
27234 fuse_ops[1] = op0;
27235 fuse_ops[2] = op1;
27236 addis_str = "addis %0,%1,%v2";
27240 else if (GET_CODE (addis_value) == HIGH)
27242 rtx value = XEXP (addis_value, 0);
27243 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27245 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27246 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27247 if (TARGET_ELF)
27248 addis_str = "addis %0,%2,%1@toc@ha";
27250 else if (TARGET_XCOFF)
27251 addis_str = "addis %0,%1@u(%2)";
27253 else
27254 gcc_unreachable ();
27257 else if (GET_CODE (value) == PLUS)
27259 rtx op0 = XEXP (value, 0);
27260 rtx op1 = XEXP (value, 1);
27262 if (GET_CODE (op0) == UNSPEC
27263 && XINT (op0, 1) == UNSPEC_TOCREL
27264 && CONST_INT_P (op1))
27266 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27267 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27268 fuse_ops[3] = op1;
27269 if (TARGET_ELF)
27270 addis_str = "addis %0,%2,%1+%3@toc@ha";
27272 else if (TARGET_XCOFF)
27273 addis_str = "addis %0,%1+%3@u(%2)";
27275 else
27276 gcc_unreachable ();
27280 else if (satisfies_constraint_L (value))
27282 fuse_ops[1] = value;
27283 addis_str = "lis %0,%v1";
27286 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27288 fuse_ops[1] = value;
27289 addis_str = "lis %0,%1@ha";
27293 if (!addis_str)
27294 fatal_insn ("Could not generate addis value for fusion", addis_value);
27296 output_asm_insn (addis_str, fuse_ops);
27299 /* Emit a D-form load or store instruction that is the second instruction
27300 of a fusion sequence. */
27302 static void
27303 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27305 rtx fuse_ops[10];
27306 char insn_template[80];
27308 fuse_ops[0] = load_reg;
27309 fuse_ops[1] = addis_reg;
27311 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27313 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27314 fuse_ops[2] = offset;
27315 output_asm_insn (insn_template, fuse_ops);
27318 else if (GET_CODE (offset) == UNSPEC
27319 && XINT (offset, 1) == UNSPEC_TOCREL)
27321 if (TARGET_ELF)
27322 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27324 else if (TARGET_XCOFF)
27325 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27327 else
27328 gcc_unreachable ();
27330 fuse_ops[2] = XVECEXP (offset, 0, 0);
27331 output_asm_insn (insn_template, fuse_ops);
27334 else if (GET_CODE (offset) == PLUS
27335 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27336 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27337 && CONST_INT_P (XEXP (offset, 1)))
27339 rtx tocrel_unspec = XEXP (offset, 0);
27340 if (TARGET_ELF)
27341 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27343 else if (TARGET_XCOFF)
27344 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27346 else
27347 gcc_unreachable ();
27349 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27350 fuse_ops[3] = XEXP (offset, 1);
27351 output_asm_insn (insn_template, fuse_ops);
27354 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27356 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27358 fuse_ops[2] = offset;
27359 output_asm_insn (insn_template, fuse_ops);
27362 else
27363 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27365 return;
27368 /* Given an address, convert it into the addis and load offset parts. Addresses
27369 created during the peephole2 process look like:
27370 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27371 (unspec [(...)] UNSPEC_TOCREL)) */
27373 static void
27374 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27376 rtx hi, lo;
27378 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27380 hi = XEXP (addr, 0);
27381 lo = XEXP (addr, 1);
27383 else
27384 gcc_unreachable ();
27386 *p_hi = hi;
27387 *p_lo = lo;
27390 /* Return a string to fuse an addis instruction with a gpr load to the same
27391 register that we loaded up the addis instruction. The address that is used
27392 is the logical address that was formed during peephole2:
27393 (lo_sum (high) (low-part))
27395 The code is complicated, so we call output_asm_insn directly, and just
27396 return "". */
27398 const char *
27399 emit_fusion_gpr_load (rtx target, rtx mem)
27401 rtx addis_value;
27402 rtx addr;
27403 rtx load_offset;
27404 const char *load_str = NULL;
27405 machine_mode mode;
27407 if (GET_CODE (mem) == ZERO_EXTEND)
27408 mem = XEXP (mem, 0);
27410 gcc_assert (REG_P (target) && MEM_P (mem));
27412 addr = XEXP (mem, 0);
27413 fusion_split_address (addr, &addis_value, &load_offset);
27415 /* Now emit the load instruction to the same register. */
27416 mode = GET_MODE (mem);
27417 switch (mode)
27419 case E_QImode:
27420 load_str = "lbz";
27421 break;
27423 case E_HImode:
27424 load_str = "lhz";
27425 break;
27427 case E_SImode:
27428 case E_SFmode:
27429 load_str = "lwz";
27430 break;
27432 case E_DImode:
27433 case E_DFmode:
27434 gcc_assert (TARGET_POWERPC64);
27435 load_str = "ld";
27436 break;
27438 default:
27439 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27442 /* Emit the addis instruction. */
27443 emit_fusion_addis (target, addis_value);
27445 /* Emit the D-form load instruction. */
27446 emit_fusion_load (target, target, load_offset, load_str);
27448 return "";
27452 #ifdef RS6000_GLIBC_ATOMIC_FENV
27453 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
27454 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
27455 #endif
27457 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27459 static void
27460 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27462 if (!TARGET_HARD_FLOAT)
27464 #ifdef RS6000_GLIBC_ATOMIC_FENV
27465 if (atomic_hold_decl == NULL_TREE)
27467 atomic_hold_decl
27468 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27469 get_identifier ("__atomic_feholdexcept"),
27470 build_function_type_list (void_type_node,
27471 double_ptr_type_node,
27472 NULL_TREE));
27473 TREE_PUBLIC (atomic_hold_decl) = 1;
27474 DECL_EXTERNAL (atomic_hold_decl) = 1;
27477 if (atomic_clear_decl == NULL_TREE)
27479 atomic_clear_decl
27480 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27481 get_identifier ("__atomic_feclearexcept"),
27482 build_function_type_list (void_type_node,
27483 NULL_TREE));
27484 TREE_PUBLIC (atomic_clear_decl) = 1;
27485 DECL_EXTERNAL (atomic_clear_decl) = 1;
27488 tree const_double = build_qualified_type (double_type_node,
27489 TYPE_QUAL_CONST);
27490 tree const_double_ptr = build_pointer_type (const_double);
27491 if (atomic_update_decl == NULL_TREE)
27493 atomic_update_decl
27494 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
27495 get_identifier ("__atomic_feupdateenv"),
27496 build_function_type_list (void_type_node,
27497 const_double_ptr,
27498 NULL_TREE));
27499 TREE_PUBLIC (atomic_update_decl) = 1;
27500 DECL_EXTERNAL (atomic_update_decl) = 1;
27503 tree fenv_var = create_tmp_var_raw (double_type_node);
27504 TREE_ADDRESSABLE (fenv_var) = 1;
27505 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
27506 build4 (TARGET_EXPR, double_type_node, fenv_var,
27507 void_node, NULL_TREE, NULL_TREE));
27509 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
27510 *clear = build_call_expr (atomic_clear_decl, 0);
27511 *update = build_call_expr (atomic_update_decl, 1,
27512 fold_convert (const_double_ptr, fenv_addr));
27513 #endif
27514 return;
27517 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
27518 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
27519 tree call_mffs = build_call_expr (mffs, 0);
27521 /* Generates the equivalent of feholdexcept (&fenv_var)
27523 *fenv_var = __builtin_mffs ();
27524 double fenv_hold;
27525 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27526 __builtin_mtfsf (0xff, fenv_hold); */
27528 /* Mask to clear everything except for the rounding modes and non-IEEE
27529 arithmetic flag. */
27530 const unsigned HOST_WIDE_INT hold_exception_mask
27531 = HOST_WIDE_INT_C (0xffffffff00000007);
27533 tree fenv_var = create_tmp_var_raw (double_type_node);
27535 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
27536 NULL_TREE, NULL_TREE);
27538 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
27539 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27540 build_int_cst (uint64_type_node,
27541 hold_exception_mask));
27543 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27544 fenv_llu_and);
27546 tree hold_mtfsf = build_call_expr (mtfsf, 2,
27547 build_int_cst (unsigned_type_node, 0xff),
27548 fenv_hold_mtfsf);
27550 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
27552 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
27554 double fenv_clear = __builtin_mffs ();
27555 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
27556 __builtin_mtfsf (0xff, fenv_clear); */
27558 /* Mask to clear everything except for the rounding modes and non-IEEE
27559 arithmetic flag. */
27560 const unsigned HOST_WIDE_INT clear_exception_mask
27561 = HOST_WIDE_INT_C (0xffffffff00000000);
27563 tree fenv_clear = create_tmp_var_raw (double_type_node);
27565 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
27566 call_mffs, NULL_TREE, NULL_TREE);
27568 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
27569 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
27570 fenv_clean_llu,
27571 build_int_cst (uint64_type_node,
27572 clear_exception_mask));
27574 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27575 fenv_clear_llu_and);
27577 tree clear_mtfsf = build_call_expr (mtfsf, 2,
27578 build_int_cst (unsigned_type_node, 0xff),
27579 fenv_clear_mtfsf);
27581 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
27583 /* Generates the equivalent of feupdateenv (&fenv_var)
27585 double old_fenv = __builtin_mffs ();
27586 double fenv_update;
27587 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
27588 (*(uint64_t*)fenv_var 0x1ff80fff);
27589 __builtin_mtfsf (0xff, fenv_update); */
27591 const unsigned HOST_WIDE_INT update_exception_mask
27592 = HOST_WIDE_INT_C (0xffffffff1fffff00);
27593 const unsigned HOST_WIDE_INT new_exception_mask
27594 = HOST_WIDE_INT_C (0x1ff80fff);
27596 tree old_fenv = create_tmp_var_raw (double_type_node);
27597 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
27598 call_mffs, NULL_TREE, NULL_TREE);
27600 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
27601 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
27602 build_int_cst (uint64_type_node,
27603 update_exception_mask));
27605 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
27606 build_int_cst (uint64_type_node,
27607 new_exception_mask));
27609 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
27610 old_llu_and, new_llu_and);
27612 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
27613 new_llu_mask);
27615 tree update_mtfsf = build_call_expr (mtfsf, 2,
27616 build_int_cst (unsigned_type_node, 0xff),
27617 fenv_update_mtfsf);
27619 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
27622 void
27623 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
27625 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27627 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27628 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27630 /* The destination of the vmrgew instruction layout is:
27631 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27632 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27633 vmrgew instruction will be correct. */
27634 if (BYTES_BIG_ENDIAN)
27636 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
27637 GEN_INT (0)));
27638 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
27639 GEN_INT (3)));
27641 else
27643 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
27644 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
27647 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27648 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27650 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
27651 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
27653 if (BYTES_BIG_ENDIAN)
27654 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27655 else
27656 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27659 void
27660 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
27662 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27664 rtx_tmp0 = gen_reg_rtx (V2DImode);
27665 rtx_tmp1 = gen_reg_rtx (V2DImode);
27667 /* The destination of the vmrgew instruction layout is:
27668 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
27669 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
27670 vmrgew instruction will be correct. */
27671 if (BYTES_BIG_ENDIAN)
27673 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
27674 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
27676 else
27678 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
27679 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
27682 rtx_tmp2 = gen_reg_rtx (V4SFmode);
27683 rtx_tmp3 = gen_reg_rtx (V4SFmode);
27685 if (signed_convert)
27687 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
27688 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
27690 else
27692 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
27693 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
27696 if (BYTES_BIG_ENDIAN)
27697 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
27698 else
27699 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
27702 void
27703 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
27704 rtx src2)
27706 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
27708 rtx_tmp0 = gen_reg_rtx (V2DFmode);
27709 rtx_tmp1 = gen_reg_rtx (V2DFmode);
27711 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
27712 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
27714 rtx_tmp2 = gen_reg_rtx (V4SImode);
27715 rtx_tmp3 = gen_reg_rtx (V4SImode);
27717 if (signed_convert)
27719 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
27720 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
27722 else
27724 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
27725 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
27728 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
27731 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
27733 static bool
27734 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
27735 optimization_type opt_type)
27737 switch (op)
27739 case rsqrt_optab:
27740 return (opt_type == OPTIMIZE_FOR_SPEED
27741 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
27743 default:
27744 return true;
27748 /* Implement TARGET_CONSTANT_ALIGNMENT. */
27750 static HOST_WIDE_INT
27751 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
27753 if (TREE_CODE (exp) == STRING_CST
27754 && (STRICT_ALIGNMENT || !optimize_size))
27755 return MAX (align, BITS_PER_WORD);
27756 return align;
27759 /* Implement TARGET_STARTING_FRAME_OFFSET. */
27761 static HOST_WIDE_INT
27762 rs6000_starting_frame_offset (void)
27764 if (FRAME_GROWS_DOWNWARD)
27765 return 0;
27766 return RS6000_STARTING_FRAME_OFFSET;
27770 /* Create an alias for a mangled name where we have changed the mangling (in
27771 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
27772 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
27774 #if TARGET_ELF && RS6000_WEAK
27775 static void
27776 rs6000_globalize_decl_name (FILE * stream, tree decl)
27778 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
27780 targetm.asm_out.globalize_label (stream, name);
27782 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
27784 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
27785 const char *old_name;
27787 ieee128_mangling_gcc_8_1 = true;
27788 lang_hooks.set_decl_assembler_name (decl);
27789 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
27790 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
27791 ieee128_mangling_gcc_8_1 = false;
27793 if (strcmp (name, old_name) != 0)
27795 fprintf (stream, "\t.weak %s\n", old_name);
27796 fprintf (stream, "\t.set %s,%s\n", old_name, name);
27800 #endif
27803 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
27804 function names from <foo>l to <foo>f128 if the default long double type is
27805 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
27806 include file switches the names on systems that support long double as IEEE
27807 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
27808 In the future, glibc will export names like __ieee128_sinf128 and we can
27809 switch to using those instead of using sinf128, which pollutes the user's
27810 namespace.
27812 This will switch the names for Fortran math functions as well (which doesn't
27813 use math.h). However, Fortran needs other changes to the compiler and
27814 library before you can switch the real*16 type at compile time.
27816 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
27817 only do this transformation if the __float128 type is enabled. This
27818 prevents us from doing the transformation on older 32-bit ports that might
27819 have enabled using IEEE 128-bit floating point as the default long double
27820 type. */
27822 static tree
27823 rs6000_mangle_decl_assembler_name (tree decl, tree id)
27825 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
27826 && TREE_CODE (decl) == FUNCTION_DECL
27827 && DECL_IS_UNDECLARED_BUILTIN (decl)
27828 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
27830 size_t len = IDENTIFIER_LENGTH (id);
27831 const char *name = IDENTIFIER_POINTER (id);
27832 char *newname = NULL;
27834 /* See if it is one of the built-in functions with an unusual name. */
27835 switch (DECL_FUNCTION_CODE (decl))
27837 case BUILT_IN_DREML:
27838 newname = xstrdup ("__remainderieee128");
27839 break;
27841 case BUILT_IN_GAMMAL:
27842 newname = xstrdup ("__lgammaieee128");
27843 break;
27845 case BUILT_IN_GAMMAL_R:
27846 case BUILT_IN_LGAMMAL_R:
27847 newname = xstrdup ("__lgammaieee128_r");
27848 break;
27850 case BUILT_IN_NEXTTOWARD:
27851 newname = xstrdup ("__nexttoward_to_ieee128");
27852 break;
27854 case BUILT_IN_NEXTTOWARDF:
27855 newname = xstrdup ("__nexttowardf_to_ieee128");
27856 break;
27858 case BUILT_IN_NEXTTOWARDL:
27859 newname = xstrdup ("__nexttowardieee128");
27860 break;
27862 case BUILT_IN_POW10L:
27863 newname = xstrdup ("__exp10ieee128");
27864 break;
27866 case BUILT_IN_SCALBL:
27867 newname = xstrdup ("__scalbieee128");
27868 break;
27870 case BUILT_IN_SIGNIFICANDL:
27871 newname = xstrdup ("__significandieee128");
27872 break;
27874 case BUILT_IN_SINCOSL:
27875 newname = xstrdup ("__sincosieee128");
27876 break;
27878 default:
27879 break;
27882 /* Update the __builtin_*printf and __builtin_*scanf functions. */
27883 if (!newname)
27885 size_t printf_len = strlen ("printf");
27886 size_t scanf_len = strlen ("scanf");
27888 if (len >= printf_len
27889 && strcmp (name + len - printf_len, "printf") == 0)
27890 newname = xasprintf ("__%sieee128", name);
27892 else if (len >= scanf_len
27893 && strcmp (name + len - scanf_len, "scanf") == 0)
27894 newname = xasprintf ("__isoc99_%sieee128", name);
27896 else if (name[len - 1] == 'l')
27898 bool uses_ieee128_p = false;
27899 tree type = TREE_TYPE (decl);
27900 machine_mode ret_mode = TYPE_MODE (type);
27902 /* See if the function returns a IEEE 128-bit floating point type or
27903 complex type. */
27904 if (ret_mode == TFmode || ret_mode == TCmode)
27905 uses_ieee128_p = true;
27906 else
27908 function_args_iterator args_iter;
27909 tree arg;
27911 /* See if the function passes a IEEE 128-bit floating point type
27912 or complex type. */
27913 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
27915 machine_mode arg_mode = TYPE_MODE (arg);
27916 if (arg_mode == TFmode || arg_mode == TCmode)
27918 uses_ieee128_p = true;
27919 break;
27924 /* If we passed or returned an IEEE 128-bit floating point type,
27925 change the name. Use __<name>ieee128, instead of <name>l. */
27926 if (uses_ieee128_p)
27927 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
27931 if (newname)
27933 if (TARGET_DEBUG_BUILTIN)
27934 fprintf (stderr, "Map %s => %s\n", name, newname);
27936 id = get_identifier (newname);
27937 free (newname);
27941 return id;
27944 /* Predict whether the given loop in gimple will be transformed in the RTL
27945 doloop_optimize pass. */
27947 static bool
27948 rs6000_predict_doloop_p (struct loop *loop)
27950 gcc_assert (loop);
27952 /* On rs6000, targetm.can_use_doloop_p is actually
27953 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
27954 if (loop->inner != NULL)
27956 if (dump_file && (dump_flags & TDF_DETAILS))
27957 fprintf (dump_file, "Predict doloop failure due to"
27958 " loop nesting.\n");
27959 return false;
27962 return true;
27965 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
27967 static machine_mode
27968 rs6000_preferred_doloop_mode (machine_mode)
27970 return word_mode;
27973 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
27975 static bool
27976 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
27978 gcc_assert (MEM_P (mem));
27980 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
27981 type addresses, so don't allow MEMs with those address types to be
27982 substituted as an equivalent expression. See PR93974 for details. */
27983 if (GET_CODE (XEXP (mem, 0)) == AND)
27984 return true;
27986 return false;
27989 /* Implement TARGET_INVALID_CONVERSION. */
27991 static const char *
27992 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
27994 /* Make sure we're working with the canonical types. */
27995 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
27996 fromtype = TYPE_CANONICAL (fromtype);
27997 if (TYPE_CANONICAL (totype) != NULL_TREE)
27998 totype = TYPE_CANONICAL (totype);
28000 machine_mode frommode = TYPE_MODE (fromtype);
28001 machine_mode tomode = TYPE_MODE (totype);
28003 if (frommode != tomode)
28005 /* Do not allow conversions to/from XOmode and OOmode types. */
28006 if (frommode == XOmode)
28007 return N_("invalid conversion from type %<__vector_quad%>");
28008 if (tomode == XOmode)
28009 return N_("invalid conversion to type %<__vector_quad%>");
28010 if (frommode == OOmode)
28011 return N_("invalid conversion from type %<__vector_pair%>");
28012 if (tomode == OOmode)
28013 return N_("invalid conversion to type %<__vector_pair%>");
28015 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
28017 /* We really care about the modes of the base types. */
28018 frommode = TYPE_MODE (TREE_TYPE (fromtype));
28019 tomode = TYPE_MODE (TREE_TYPE (totype));
28021 /* Do not allow conversions to/from XOmode and OOmode pointer
28022 types, except to/from void pointers. */
28023 if (frommode != tomode
28024 && frommode != VOIDmode
28025 && tomode != VOIDmode)
28027 if (frommode == XOmode)
28028 return N_("invalid conversion from type %<* __vector_quad%>");
28029 if (tomode == XOmode)
28030 return N_("invalid conversion to type %<* __vector_quad%>");
28031 if (frommode == OOmode)
28032 return N_("invalid conversion from type %<* __vector_pair%>");
28033 if (tomode == OOmode)
28034 return N_("invalid conversion to type %<* __vector_pair%>");
28038 /* Conversion allowed. */
28039 return NULL;
28042 /* Convert a SFmode constant to the integer bit pattern. */
28044 long
28045 rs6000_const_f32_to_i32 (rtx operand)
28047 long value;
28048 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28050 gcc_assert (GET_MODE (operand) == SFmode);
28051 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28052 return value;
28055 void
28056 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28058 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28059 inform (input_location,
28060 "the result for the xxspltidp instruction "
28061 "is undefined for subnormal input values");
28062 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28065 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28067 static bool
28068 rs6000_gen_pic_addr_diff_vec (void)
28070 return rs6000_relative_jumptables;
28073 void
28074 rs6000_output_addr_vec_elt (FILE *file, int value)
28076 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28077 char buf[100];
28079 fprintf (file, "%s", directive);
28080 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28081 assemble_name (file, buf);
28082 fprintf (file, "\n");
28085 struct gcc_target targetm = TARGET_INITIALIZER;
28087 #include "gt-rs6000.h"