Patch ieee128-lib-patch001b
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobabc1b8909eb587f022498b4ad2011d4204c0508c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Don't enable PC-relative addressing if the target does not support it. */
102 #ifndef PCREL_SUPPORTED_BY_OS
103 #define PCREL_SUPPORTED_BY_OS 0
104 #endif
106 /* Support targetm.vectorize.builtin_mask_for_load. */
107 tree altivec_builtin_mask_for_load;
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
122 #if TARGET_ELF
123 /* Note whether IEEE 128-bit floating point was passed or returned, either as
124 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
125 floating point. We changed the default C++ mangling for these types and we
126 may want to generate a weak alias of the old mangling (U10__float128) to the
127 new mangling (u9__ieee128). */
128 bool rs6000_passes_ieee128 = false;
129 #endif
131 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
132 name used in current releases (i.e. u9__ieee128). */
133 static bool ieee128_mangling_gcc_8_1;
135 /* Width in bits of a pointer. */
136 unsigned rs6000_pointer_size;
138 #ifdef HAVE_AS_GNU_ATTRIBUTE
139 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
140 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
141 # endif
142 /* Flag whether floating point values have been passed/returned.
143 Note that this doesn't say whether fprs are used, since the
144 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
145 should be set for soft-float values passed in gprs and ieee128
146 values passed in vsx registers. */
147 bool rs6000_passes_float = false;
148 bool rs6000_passes_long_double = false;
149 /* Flag whether vector values have been passed/returned. */
150 bool rs6000_passes_vector = false;
151 /* Flag whether small (<= 8 byte) structures have been returned. */
152 bool rs6000_returns_struct = false;
153 #endif
155 /* Value is TRUE if register/mode pair is acceptable. */
156 static bool rs6000_hard_regno_mode_ok_p
157 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
159 /* Maximum number of registers needed for a given register class and mode. */
160 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
162 /* How many registers are needed for a given register and mode. */
163 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
165 /* Map register number to register class. */
166 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
168 static int dbg_cost_ctrl;
170 /* Built in types. */
171 tree rs6000_builtin_types[RS6000_BTI_MAX];
172 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
174 /* Flag to say the TOC is initialized */
175 int toc_initialized, need_toc_init;
176 char toc_label_name[10];
178 /* Cached value of rs6000_variable_issue. This is cached in
179 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
180 static short cached_can_issue_more;
182 static GTY(()) section *read_only_data_section;
183 static GTY(()) section *private_data_section;
184 static GTY(()) section *tls_data_section;
185 static GTY(()) section *tls_private_data_section;
186 static GTY(()) section *read_only_private_data_section;
187 static GTY(()) section *sdata2_section;
189 section *toc_section = 0;
191 /* Describe the vector unit used for modes. */
192 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
193 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
195 /* Register classes for various constraints that are based on the target
196 switches. */
197 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
199 /* Describe the alignment of a vector. */
200 int rs6000_vector_align[NUM_MACHINE_MODES];
202 /* Map selected modes to types for builtins. */
203 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
205 /* What modes to automatically generate reciprocal divide estimate (fre) and
206 reciprocal sqrt (frsqrte) for. */
207 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
209 /* Masks to determine which reciprocal esitmate instructions to generate
210 automatically. */
211 enum rs6000_recip_mask {
212 RECIP_SF_DIV = 0x001, /* Use divide estimate */
213 RECIP_DF_DIV = 0x002,
214 RECIP_V4SF_DIV = 0x004,
215 RECIP_V2DF_DIV = 0x008,
217 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
218 RECIP_DF_RSQRT = 0x020,
219 RECIP_V4SF_RSQRT = 0x040,
220 RECIP_V2DF_RSQRT = 0x080,
222 /* Various combination of flags for -mrecip=xxx. */
223 RECIP_NONE = 0,
224 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
225 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
226 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
228 RECIP_HIGH_PRECISION = RECIP_ALL,
230 /* On low precision machines like the power5, don't enable double precision
231 reciprocal square root estimate, since it isn't accurate enough. */
232 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
235 /* -mrecip options. */
236 static struct
238 const char *string; /* option name */
239 unsigned int mask; /* mask bits to set */
240 } recip_options[] = {
241 { "all", RECIP_ALL },
242 { "none", RECIP_NONE },
243 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
244 | RECIP_V2DF_DIV) },
245 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
246 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
247 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
248 | RECIP_V2DF_RSQRT) },
249 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
250 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
253 /* On PowerPC, we have a limited number of target clones that we care about
254 which means we can use an array to hold the options, rather than having more
255 elaborate data structures to identify each possible variation. Order the
256 clones from the default to the highest ISA. */
257 enum {
258 CLONE_DEFAULT = 0, /* default clone. */
259 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
260 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
261 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
262 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
263 CLONE_MAX
266 /* Map compiler ISA bits into HWCAP names. */
267 struct clone_map {
268 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
269 const char *name; /* name to use in __builtin_cpu_supports. */
272 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
273 { 0, "" }, /* Default options. */
274 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
275 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
276 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
277 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
281 /* Newer LIBCs explicitly export this symbol to declare that they provide
282 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
283 reference to this symbol whenever we expand a CPU builtin, so that
284 we never link against an old LIBC. */
285 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
287 /* True if we have expanded a CPU builtin. */
288 bool cpu_builtin_p = false;
290 /* Pointer to function (in rs6000-c.c) that can define or undefine target
291 macros that have changed. Languages that don't support the preprocessor
292 don't link in rs6000-c.c, so we can't call it directly. */
293 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
295 /* Simplfy register classes into simpler classifications. We assume
296 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
297 check for standard register classes (gpr/floating/altivec/vsx) and
298 floating/vector classes (float/altivec/vsx). */
300 enum rs6000_reg_type {
301 NO_REG_TYPE,
302 PSEUDO_REG_TYPE,
303 GPR_REG_TYPE,
304 VSX_REG_TYPE,
305 ALTIVEC_REG_TYPE,
306 FPR_REG_TYPE,
307 SPR_REG_TYPE,
308 CR_REG_TYPE
311 /* Map register class to register type. */
312 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
314 /* First/last register type for the 'normal' register types (i.e. general
315 purpose, floating point, altivec, and VSX registers). */
316 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
318 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
321 /* Register classes we care about in secondary reload or go if legitimate
322 address. We only need to worry about GPR, FPR, and Altivec registers here,
323 along an ANY field that is the OR of the 3 register classes. */
325 enum rs6000_reload_reg_type {
326 RELOAD_REG_GPR, /* General purpose registers. */
327 RELOAD_REG_FPR, /* Traditional floating point regs. */
328 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
329 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
330 N_RELOAD_REG
333 /* For setting up register classes, loop through the 3 register classes mapping
334 into real registers, and skip the ANY class, which is just an OR of the
335 bits. */
336 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
337 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
339 /* Map reload register type to a register in the register class. */
340 struct reload_reg_map_type {
341 const char *name; /* Register class name. */
342 int reg; /* Register in the register class. */
345 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
346 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
347 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
348 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
349 { "Any", -1 }, /* RELOAD_REG_ANY. */
352 /* Mask bits for each register class, indexed per mode. Historically the
353 compiler has been more restrictive which types can do PRE_MODIFY instead of
354 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
355 typedef unsigned char addr_mask_type;
357 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
358 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
359 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
360 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
361 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
362 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
363 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
364 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
366 /* Register type masks based on the type, of valid addressing modes. */
367 struct rs6000_reg_addr {
368 enum insn_code reload_load; /* INSN to reload for loading. */
369 enum insn_code reload_store; /* INSN to reload for storing. */
370 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
371 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
372 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
373 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
374 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
377 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
379 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
380 static inline bool
381 mode_supports_pre_incdec_p (machine_mode mode)
383 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
384 != 0);
387 /* Helper function to say whether a mode supports PRE_MODIFY. */
388 static inline bool
389 mode_supports_pre_modify_p (machine_mode mode)
391 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
392 != 0);
395 /* Return true if we have D-form addressing in altivec registers. */
396 static inline bool
397 mode_supports_vmx_dform (machine_mode mode)
399 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
402 /* Return true if we have D-form addressing in VSX registers. This addressing
403 is more limited than normal d-form addressing in that the offset must be
404 aligned on a 16-byte boundary. */
405 static inline bool
406 mode_supports_dq_form (machine_mode mode)
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
409 != 0);
412 /* Given that there exists at least one variable that is set (produced)
413 by OUT_INSN and read (consumed) by IN_INSN, return true iff
414 IN_INSN represents one or more memory store operations and none of
415 the variables set by OUT_INSN is used by IN_INSN as the address of a
416 store operation. If either IN_INSN or OUT_INSN does not represent
417 a "single" RTL SET expression (as loosely defined by the
418 implementation of the single_set function) or a PARALLEL with only
419 SETs, CLOBBERs, and USEs inside, this function returns false.
421 This rs6000-specific version of store_data_bypass_p checks for
422 certain conditions that result in assertion failures (and internal
423 compiler errors) in the generic store_data_bypass_p function and
424 returns false rather than calling store_data_bypass_p if one of the
425 problematic conditions is detected. */
428 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
430 rtx out_set, in_set;
431 rtx out_pat, in_pat;
432 rtx out_exp, in_exp;
433 int i, j;
435 in_set = single_set (in_insn);
436 if (in_set)
438 if (MEM_P (SET_DEST (in_set)))
440 out_set = single_set (out_insn);
441 if (!out_set)
443 out_pat = PATTERN (out_insn);
444 if (GET_CODE (out_pat) == PARALLEL)
446 for (i = 0; i < XVECLEN (out_pat, 0); i++)
448 out_exp = XVECEXP (out_pat, 0, i);
449 if ((GET_CODE (out_exp) == CLOBBER)
450 || (GET_CODE (out_exp) == USE))
451 continue;
452 else if (GET_CODE (out_exp) != SET)
453 return false;
459 else
461 in_pat = PATTERN (in_insn);
462 if (GET_CODE (in_pat) != PARALLEL)
463 return false;
465 for (i = 0; i < XVECLEN (in_pat, 0); i++)
467 in_exp = XVECEXP (in_pat, 0, i);
468 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
469 continue;
470 else if (GET_CODE (in_exp) != SET)
471 return false;
473 if (MEM_P (SET_DEST (in_exp)))
475 out_set = single_set (out_insn);
476 if (!out_set)
478 out_pat = PATTERN (out_insn);
479 if (GET_CODE (out_pat) != PARALLEL)
480 return false;
481 for (j = 0; j < XVECLEN (out_pat, 0); j++)
483 out_exp = XVECEXP (out_pat, 0, j);
484 if ((GET_CODE (out_exp) == CLOBBER)
485 || (GET_CODE (out_exp) == USE))
486 continue;
487 else if (GET_CODE (out_exp) != SET)
488 return false;
494 return store_data_bypass_p (out_insn, in_insn);
498 /* Processor costs (relative to an add) */
500 const struct processor_costs *rs6000_cost;
502 /* Instruction size costs on 32bit processors. */
503 static const
504 struct processor_costs size32_cost = {
505 COSTS_N_INSNS (1), /* mulsi */
506 COSTS_N_INSNS (1), /* mulsi_const */
507 COSTS_N_INSNS (1), /* mulsi_const9 */
508 COSTS_N_INSNS (1), /* muldi */
509 COSTS_N_INSNS (1), /* divsi */
510 COSTS_N_INSNS (1), /* divdi */
511 COSTS_N_INSNS (1), /* fp */
512 COSTS_N_INSNS (1), /* dmul */
513 COSTS_N_INSNS (1), /* sdiv */
514 COSTS_N_INSNS (1), /* ddiv */
515 32, /* cache line size */
516 0, /* l1 cache */
517 0, /* l2 cache */
518 0, /* streams */
519 0, /* SF->DF convert */
522 /* Instruction size costs on 64bit processors. */
523 static const
524 struct processor_costs size64_cost = {
525 COSTS_N_INSNS (1), /* mulsi */
526 COSTS_N_INSNS (1), /* mulsi_const */
527 COSTS_N_INSNS (1), /* mulsi_const9 */
528 COSTS_N_INSNS (1), /* muldi */
529 COSTS_N_INSNS (1), /* divsi */
530 COSTS_N_INSNS (1), /* divdi */
531 COSTS_N_INSNS (1), /* fp */
532 COSTS_N_INSNS (1), /* dmul */
533 COSTS_N_INSNS (1), /* sdiv */
534 COSTS_N_INSNS (1), /* ddiv */
535 128, /* cache line size */
536 0, /* l1 cache */
537 0, /* l2 cache */
538 0, /* streams */
539 0, /* SF->DF convert */
542 /* Instruction costs on RS64A processors. */
543 static const
544 struct processor_costs rs64a_cost = {
545 COSTS_N_INSNS (20), /* mulsi */
546 COSTS_N_INSNS (12), /* mulsi_const */
547 COSTS_N_INSNS (8), /* mulsi_const9 */
548 COSTS_N_INSNS (34), /* muldi */
549 COSTS_N_INSNS (65), /* divsi */
550 COSTS_N_INSNS (67), /* divdi */
551 COSTS_N_INSNS (4), /* fp */
552 COSTS_N_INSNS (4), /* dmul */
553 COSTS_N_INSNS (31), /* sdiv */
554 COSTS_N_INSNS (31), /* ddiv */
555 128, /* cache line size */
556 128, /* l1 cache */
557 2048, /* l2 cache */
558 1, /* streams */
559 0, /* SF->DF convert */
562 /* Instruction costs on MPCCORE processors. */
563 static const
564 struct processor_costs mpccore_cost = {
565 COSTS_N_INSNS (2), /* mulsi */
566 COSTS_N_INSNS (2), /* mulsi_const */
567 COSTS_N_INSNS (2), /* mulsi_const9 */
568 COSTS_N_INSNS (2), /* muldi */
569 COSTS_N_INSNS (6), /* divsi */
570 COSTS_N_INSNS (6), /* divdi */
571 COSTS_N_INSNS (4), /* fp */
572 COSTS_N_INSNS (5), /* dmul */
573 COSTS_N_INSNS (10), /* sdiv */
574 COSTS_N_INSNS (17), /* ddiv */
575 32, /* cache line size */
576 4, /* l1 cache */
577 16, /* l2 cache */
578 1, /* streams */
579 0, /* SF->DF convert */
582 /* Instruction costs on PPC403 processors. */
583 static const
584 struct processor_costs ppc403_cost = {
585 COSTS_N_INSNS (4), /* mulsi */
586 COSTS_N_INSNS (4), /* mulsi_const */
587 COSTS_N_INSNS (4), /* mulsi_const9 */
588 COSTS_N_INSNS (4), /* muldi */
589 COSTS_N_INSNS (33), /* divsi */
590 COSTS_N_INSNS (33), /* divdi */
591 COSTS_N_INSNS (11), /* fp */
592 COSTS_N_INSNS (11), /* dmul */
593 COSTS_N_INSNS (11), /* sdiv */
594 COSTS_N_INSNS (11), /* ddiv */
595 32, /* cache line size */
596 4, /* l1 cache */
597 16, /* l2 cache */
598 1, /* streams */
599 0, /* SF->DF convert */
602 /* Instruction costs on PPC405 processors. */
603 static const
604 struct processor_costs ppc405_cost = {
605 COSTS_N_INSNS (5), /* mulsi */
606 COSTS_N_INSNS (4), /* mulsi_const */
607 COSTS_N_INSNS (3), /* mulsi_const9 */
608 COSTS_N_INSNS (5), /* muldi */
609 COSTS_N_INSNS (35), /* divsi */
610 COSTS_N_INSNS (35), /* divdi */
611 COSTS_N_INSNS (11), /* fp */
612 COSTS_N_INSNS (11), /* dmul */
613 COSTS_N_INSNS (11), /* sdiv */
614 COSTS_N_INSNS (11), /* ddiv */
615 32, /* cache line size */
616 16, /* l1 cache */
617 128, /* l2 cache */
618 1, /* streams */
619 0, /* SF->DF convert */
622 /* Instruction costs on PPC440 processors. */
623 static const
624 struct processor_costs ppc440_cost = {
625 COSTS_N_INSNS (3), /* mulsi */
626 COSTS_N_INSNS (2), /* mulsi_const */
627 COSTS_N_INSNS (2), /* mulsi_const9 */
628 COSTS_N_INSNS (3), /* muldi */
629 COSTS_N_INSNS (34), /* divsi */
630 COSTS_N_INSNS (34), /* divdi */
631 COSTS_N_INSNS (5), /* fp */
632 COSTS_N_INSNS (5), /* dmul */
633 COSTS_N_INSNS (19), /* sdiv */
634 COSTS_N_INSNS (33), /* ddiv */
635 32, /* cache line size */
636 32, /* l1 cache */
637 256, /* l2 cache */
638 1, /* streams */
639 0, /* SF->DF convert */
642 /* Instruction costs on PPC476 processors. */
643 static const
644 struct processor_costs ppc476_cost = {
645 COSTS_N_INSNS (4), /* mulsi */
646 COSTS_N_INSNS (4), /* mulsi_const */
647 COSTS_N_INSNS (4), /* mulsi_const9 */
648 COSTS_N_INSNS (4), /* muldi */
649 COSTS_N_INSNS (11), /* divsi */
650 COSTS_N_INSNS (11), /* divdi */
651 COSTS_N_INSNS (6), /* fp */
652 COSTS_N_INSNS (6), /* dmul */
653 COSTS_N_INSNS (19), /* sdiv */
654 COSTS_N_INSNS (33), /* ddiv */
655 32, /* l1 cache line size */
656 32, /* l1 cache */
657 512, /* l2 cache */
658 1, /* streams */
659 0, /* SF->DF convert */
662 /* Instruction costs on PPC601 processors. */
663 static const
664 struct processor_costs ppc601_cost = {
665 COSTS_N_INSNS (5), /* mulsi */
666 COSTS_N_INSNS (5), /* mulsi_const */
667 COSTS_N_INSNS (5), /* mulsi_const9 */
668 COSTS_N_INSNS (5), /* muldi */
669 COSTS_N_INSNS (36), /* divsi */
670 COSTS_N_INSNS (36), /* divdi */
671 COSTS_N_INSNS (4), /* fp */
672 COSTS_N_INSNS (5), /* dmul */
673 COSTS_N_INSNS (17), /* sdiv */
674 COSTS_N_INSNS (31), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 256, /* l2 cache */
678 1, /* streams */
679 0, /* SF->DF convert */
682 /* Instruction costs on PPC603 processors. */
683 static const
684 struct processor_costs ppc603_cost = {
685 COSTS_N_INSNS (5), /* mulsi */
686 COSTS_N_INSNS (3), /* mulsi_const */
687 COSTS_N_INSNS (2), /* mulsi_const9 */
688 COSTS_N_INSNS (5), /* muldi */
689 COSTS_N_INSNS (37), /* divsi */
690 COSTS_N_INSNS (37), /* divdi */
691 COSTS_N_INSNS (3), /* fp */
692 COSTS_N_INSNS (4), /* dmul */
693 COSTS_N_INSNS (18), /* sdiv */
694 COSTS_N_INSNS (33), /* ddiv */
695 32, /* cache line size */
696 8, /* l1 cache */
697 64, /* l2 cache */
698 1, /* streams */
699 0, /* SF->DF convert */
702 /* Instruction costs on PPC604 processors. */
703 static const
704 struct processor_costs ppc604_cost = {
705 COSTS_N_INSNS (4), /* mulsi */
706 COSTS_N_INSNS (4), /* mulsi_const */
707 COSTS_N_INSNS (4), /* mulsi_const9 */
708 COSTS_N_INSNS (4), /* muldi */
709 COSTS_N_INSNS (20), /* divsi */
710 COSTS_N_INSNS (20), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (18), /* sdiv */
714 COSTS_N_INSNS (32), /* ddiv */
715 32, /* cache line size */
716 16, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 0, /* SF->DF convert */
722 /* Instruction costs on PPC604e processors. */
723 static const
724 struct processor_costs ppc604e_cost = {
725 COSTS_N_INSNS (2), /* mulsi */
726 COSTS_N_INSNS (2), /* mulsi_const */
727 COSTS_N_INSNS (2), /* mulsi_const9 */
728 COSTS_N_INSNS (2), /* muldi */
729 COSTS_N_INSNS (20), /* divsi */
730 COSTS_N_INSNS (20), /* divdi */
731 COSTS_N_INSNS (3), /* fp */
732 COSTS_N_INSNS (3), /* dmul */
733 COSTS_N_INSNS (18), /* sdiv */
734 COSTS_N_INSNS (32), /* ddiv */
735 32, /* cache line size */
736 32, /* l1 cache */
737 1024, /* l2 cache */
738 1, /* streams */
739 0, /* SF->DF convert */
742 /* Instruction costs on PPC620 processors. */
743 static const
744 struct processor_costs ppc620_cost = {
745 COSTS_N_INSNS (5), /* mulsi */
746 COSTS_N_INSNS (4), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (7), /* muldi */
749 COSTS_N_INSNS (21), /* divsi */
750 COSTS_N_INSNS (37), /* divdi */
751 COSTS_N_INSNS (3), /* fp */
752 COSTS_N_INSNS (3), /* dmul */
753 COSTS_N_INSNS (18), /* sdiv */
754 COSTS_N_INSNS (32), /* ddiv */
755 128, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
759 0, /* SF->DF convert */
762 /* Instruction costs on PPC630 processors. */
763 static const
764 struct processor_costs ppc630_cost = {
765 COSTS_N_INSNS (5), /* mulsi */
766 COSTS_N_INSNS (4), /* mulsi_const */
767 COSTS_N_INSNS (3), /* mulsi_const9 */
768 COSTS_N_INSNS (7), /* muldi */
769 COSTS_N_INSNS (21), /* divsi */
770 COSTS_N_INSNS (37), /* divdi */
771 COSTS_N_INSNS (3), /* fp */
772 COSTS_N_INSNS (3), /* dmul */
773 COSTS_N_INSNS (17), /* sdiv */
774 COSTS_N_INSNS (21), /* ddiv */
775 128, /* cache line size */
776 64, /* l1 cache */
777 1024, /* l2 cache */
778 1, /* streams */
779 0, /* SF->DF convert */
782 /* Instruction costs on Cell processor. */
783 /* COSTS_N_INSNS (1) ~ one add. */
784 static const
785 struct processor_costs ppccell_cost = {
786 COSTS_N_INSNS (9/2)+2, /* mulsi */
787 COSTS_N_INSNS (6/2), /* mulsi_const */
788 COSTS_N_INSNS (6/2), /* mulsi_const9 */
789 COSTS_N_INSNS (15/2)+2, /* muldi */
790 COSTS_N_INSNS (38/2), /* divsi */
791 COSTS_N_INSNS (70/2), /* divdi */
792 COSTS_N_INSNS (10/2), /* fp */
793 COSTS_N_INSNS (10/2), /* dmul */
794 COSTS_N_INSNS (74/2), /* sdiv */
795 COSTS_N_INSNS (74/2), /* ddiv */
796 128, /* cache line size */
797 32, /* l1 cache */
798 512, /* l2 cache */
799 6, /* streams */
800 0, /* SF->DF convert */
803 /* Instruction costs on PPC750 and PPC7400 processors. */
804 static const
805 struct processor_costs ppc750_cost = {
806 COSTS_N_INSNS (5), /* mulsi */
807 COSTS_N_INSNS (3), /* mulsi_const */
808 COSTS_N_INSNS (2), /* mulsi_const9 */
809 COSTS_N_INSNS (5), /* muldi */
810 COSTS_N_INSNS (17), /* divsi */
811 COSTS_N_INSNS (17), /* divdi */
812 COSTS_N_INSNS (3), /* fp */
813 COSTS_N_INSNS (3), /* dmul */
814 COSTS_N_INSNS (17), /* sdiv */
815 COSTS_N_INSNS (31), /* ddiv */
816 32, /* cache line size */
817 32, /* l1 cache */
818 512, /* l2 cache */
819 1, /* streams */
820 0, /* SF->DF convert */
823 /* Instruction costs on PPC7450 processors. */
824 static const
825 struct processor_costs ppc7450_cost = {
826 COSTS_N_INSNS (4), /* mulsi */
827 COSTS_N_INSNS (3), /* mulsi_const */
828 COSTS_N_INSNS (3), /* mulsi_const9 */
829 COSTS_N_INSNS (4), /* muldi */
830 COSTS_N_INSNS (23), /* divsi */
831 COSTS_N_INSNS (23), /* divdi */
832 COSTS_N_INSNS (5), /* fp */
833 COSTS_N_INSNS (5), /* dmul */
834 COSTS_N_INSNS (21), /* sdiv */
835 COSTS_N_INSNS (35), /* ddiv */
836 32, /* cache line size */
837 32, /* l1 cache */
838 1024, /* l2 cache */
839 1, /* streams */
840 0, /* SF->DF convert */
843 /* Instruction costs on PPC8540 processors. */
844 static const
845 struct processor_costs ppc8540_cost = {
846 COSTS_N_INSNS (4), /* mulsi */
847 COSTS_N_INSNS (4), /* mulsi_const */
848 COSTS_N_INSNS (4), /* mulsi_const9 */
849 COSTS_N_INSNS (4), /* muldi */
850 COSTS_N_INSNS (19), /* divsi */
851 COSTS_N_INSNS (19), /* divdi */
852 COSTS_N_INSNS (4), /* fp */
853 COSTS_N_INSNS (4), /* dmul */
854 COSTS_N_INSNS (29), /* sdiv */
855 COSTS_N_INSNS (29), /* ddiv */
856 32, /* cache line size */
857 32, /* l1 cache */
858 256, /* l2 cache */
859 1, /* prefetch streams /*/
860 0, /* SF->DF convert */
863 /* Instruction costs on E300C2 and E300C3 cores. */
864 static const
865 struct processor_costs ppce300c2c3_cost = {
866 COSTS_N_INSNS (4), /* mulsi */
867 COSTS_N_INSNS (4), /* mulsi_const */
868 COSTS_N_INSNS (4), /* mulsi_const9 */
869 COSTS_N_INSNS (4), /* muldi */
870 COSTS_N_INSNS (19), /* divsi */
871 COSTS_N_INSNS (19), /* divdi */
872 COSTS_N_INSNS (3), /* fp */
873 COSTS_N_INSNS (4), /* dmul */
874 COSTS_N_INSNS (18), /* sdiv */
875 COSTS_N_INSNS (33), /* ddiv */
877 16, /* l1 cache */
878 16, /* l2 cache */
879 1, /* prefetch streams /*/
880 0, /* SF->DF convert */
883 /* Instruction costs on PPCE500MC processors. */
884 static const
885 struct processor_costs ppce500mc_cost = {
886 COSTS_N_INSNS (4), /* mulsi */
887 COSTS_N_INSNS (4), /* mulsi_const */
888 COSTS_N_INSNS (4), /* mulsi_const9 */
889 COSTS_N_INSNS (4), /* muldi */
890 COSTS_N_INSNS (14), /* divsi */
891 COSTS_N_INSNS (14), /* divdi */
892 COSTS_N_INSNS (8), /* fp */
893 COSTS_N_INSNS (10), /* dmul */
894 COSTS_N_INSNS (36), /* sdiv */
895 COSTS_N_INSNS (66), /* ddiv */
896 64, /* cache line size */
897 32, /* l1 cache */
898 128, /* l2 cache */
899 1, /* prefetch streams /*/
900 0, /* SF->DF convert */
903 /* Instruction costs on PPCE500MC64 processors. */
904 static const
905 struct processor_costs ppce500mc64_cost = {
906 COSTS_N_INSNS (4), /* mulsi */
907 COSTS_N_INSNS (4), /* mulsi_const */
908 COSTS_N_INSNS (4), /* mulsi_const9 */
909 COSTS_N_INSNS (4), /* muldi */
910 COSTS_N_INSNS (14), /* divsi */
911 COSTS_N_INSNS (14), /* divdi */
912 COSTS_N_INSNS (4), /* fp */
913 COSTS_N_INSNS (10), /* dmul */
914 COSTS_N_INSNS (36), /* sdiv */
915 COSTS_N_INSNS (66), /* ddiv */
916 64, /* cache line size */
917 32, /* l1 cache */
918 128, /* l2 cache */
919 1, /* prefetch streams /*/
920 0, /* SF->DF convert */
923 /* Instruction costs on PPCE5500 processors. */
924 static const
925 struct processor_costs ppce5500_cost = {
926 COSTS_N_INSNS (5), /* mulsi */
927 COSTS_N_INSNS (5), /* mulsi_const */
928 COSTS_N_INSNS (4), /* mulsi_const9 */
929 COSTS_N_INSNS (5), /* muldi */
930 COSTS_N_INSNS (14), /* divsi */
931 COSTS_N_INSNS (14), /* divdi */
932 COSTS_N_INSNS (7), /* fp */
933 COSTS_N_INSNS (10), /* dmul */
934 COSTS_N_INSNS (36), /* sdiv */
935 COSTS_N_INSNS (66), /* ddiv */
936 64, /* cache line size */
937 32, /* l1 cache */
938 128, /* l2 cache */
939 1, /* prefetch streams /*/
940 0, /* SF->DF convert */
943 /* Instruction costs on PPCE6500 processors. */
944 static const
945 struct processor_costs ppce6500_cost = {
946 COSTS_N_INSNS (5), /* mulsi */
947 COSTS_N_INSNS (5), /* mulsi_const */
948 COSTS_N_INSNS (4), /* mulsi_const9 */
949 COSTS_N_INSNS (5), /* muldi */
950 COSTS_N_INSNS (14), /* divsi */
951 COSTS_N_INSNS (14), /* divdi */
952 COSTS_N_INSNS (7), /* fp */
953 COSTS_N_INSNS (10), /* dmul */
954 COSTS_N_INSNS (36), /* sdiv */
955 COSTS_N_INSNS (66), /* ddiv */
956 64, /* cache line size */
957 32, /* l1 cache */
958 128, /* l2 cache */
959 1, /* prefetch streams /*/
960 0, /* SF->DF convert */
963 /* Instruction costs on AppliedMicro Titan processors. */
964 static const
965 struct processor_costs titan_cost = {
966 COSTS_N_INSNS (5), /* mulsi */
967 COSTS_N_INSNS (5), /* mulsi_const */
968 COSTS_N_INSNS (5), /* mulsi_const9 */
969 COSTS_N_INSNS (5), /* muldi */
970 COSTS_N_INSNS (18), /* divsi */
971 COSTS_N_INSNS (18), /* divdi */
972 COSTS_N_INSNS (10), /* fp */
973 COSTS_N_INSNS (10), /* dmul */
974 COSTS_N_INSNS (46), /* sdiv */
975 COSTS_N_INSNS (72), /* ddiv */
976 32, /* cache line size */
977 32, /* l1 cache */
978 512, /* l2 cache */
979 1, /* prefetch streams /*/
980 0, /* SF->DF convert */
983 /* Instruction costs on POWER4 and POWER5 processors. */
984 static const
985 struct processor_costs power4_cost = {
986 COSTS_N_INSNS (3), /* mulsi */
987 COSTS_N_INSNS (2), /* mulsi_const */
988 COSTS_N_INSNS (2), /* mulsi_const9 */
989 COSTS_N_INSNS (4), /* muldi */
990 COSTS_N_INSNS (18), /* divsi */
991 COSTS_N_INSNS (34), /* divdi */
992 COSTS_N_INSNS (3), /* fp */
993 COSTS_N_INSNS (3), /* dmul */
994 COSTS_N_INSNS (17), /* sdiv */
995 COSTS_N_INSNS (17), /* ddiv */
996 128, /* cache line size */
997 32, /* l1 cache */
998 1024, /* l2 cache */
999 8, /* prefetch streams /*/
1000 0, /* SF->DF convert */
1003 /* Instruction costs on POWER6 processors. */
1004 static const
1005 struct processor_costs power6_cost = {
1006 COSTS_N_INSNS (8), /* mulsi */
1007 COSTS_N_INSNS (8), /* mulsi_const */
1008 COSTS_N_INSNS (8), /* mulsi_const9 */
1009 COSTS_N_INSNS (8), /* muldi */
1010 COSTS_N_INSNS (22), /* divsi */
1011 COSTS_N_INSNS (28), /* divdi */
1012 COSTS_N_INSNS (3), /* fp */
1013 COSTS_N_INSNS (3), /* dmul */
1014 COSTS_N_INSNS (13), /* sdiv */
1015 COSTS_N_INSNS (16), /* ddiv */
1016 128, /* cache line size */
1017 64, /* l1 cache */
1018 2048, /* l2 cache */
1019 16, /* prefetch streams */
1020 0, /* SF->DF convert */
1023 /* Instruction costs on POWER7 processors. */
1024 static const
1025 struct processor_costs power7_cost = {
1026 COSTS_N_INSNS (2), /* mulsi */
1027 COSTS_N_INSNS (2), /* mulsi_const */
1028 COSTS_N_INSNS (2), /* mulsi_const9 */
1029 COSTS_N_INSNS (2), /* muldi */
1030 COSTS_N_INSNS (18), /* divsi */
1031 COSTS_N_INSNS (34), /* divdi */
1032 COSTS_N_INSNS (3), /* fp */
1033 COSTS_N_INSNS (3), /* dmul */
1034 COSTS_N_INSNS (13), /* sdiv */
1035 COSTS_N_INSNS (16), /* ddiv */
1036 128, /* cache line size */
1037 32, /* l1 cache */
1038 256, /* l2 cache */
1039 12, /* prefetch streams */
1040 COSTS_N_INSNS (3), /* SF->DF convert */
1043 /* Instruction costs on POWER8 processors. */
1044 static const
1045 struct processor_costs power8_cost = {
1046 COSTS_N_INSNS (3), /* mulsi */
1047 COSTS_N_INSNS (3), /* mulsi_const */
1048 COSTS_N_INSNS (3), /* mulsi_const9 */
1049 COSTS_N_INSNS (3), /* muldi */
1050 COSTS_N_INSNS (19), /* divsi */
1051 COSTS_N_INSNS (35), /* divdi */
1052 COSTS_N_INSNS (3), /* fp */
1053 COSTS_N_INSNS (3), /* dmul */
1054 COSTS_N_INSNS (14), /* sdiv */
1055 COSTS_N_INSNS (17), /* ddiv */
1056 128, /* cache line size */
1057 32, /* l1 cache */
1058 256, /* l2 cache */
1059 12, /* prefetch streams */
1060 COSTS_N_INSNS (3), /* SF->DF convert */
1063 /* Instruction costs on POWER9 processors. */
1064 static const
1065 struct processor_costs power9_cost = {
1066 COSTS_N_INSNS (3), /* mulsi */
1067 COSTS_N_INSNS (3), /* mulsi_const */
1068 COSTS_N_INSNS (3), /* mulsi_const9 */
1069 COSTS_N_INSNS (3), /* muldi */
1070 COSTS_N_INSNS (8), /* divsi */
1071 COSTS_N_INSNS (12), /* divdi */
1072 COSTS_N_INSNS (3), /* fp */
1073 COSTS_N_INSNS (3), /* dmul */
1074 COSTS_N_INSNS (13), /* sdiv */
1075 COSTS_N_INSNS (18), /* ddiv */
1076 128, /* cache line size */
1077 32, /* l1 cache */
1078 512, /* l2 cache */
1079 8, /* prefetch streams */
1080 COSTS_N_INSNS (3), /* SF->DF convert */
1083 /* Instruction costs on POWER A2 processors. */
1084 static const
1085 struct processor_costs ppca2_cost = {
1086 COSTS_N_INSNS (16), /* mulsi */
1087 COSTS_N_INSNS (16), /* mulsi_const */
1088 COSTS_N_INSNS (16), /* mulsi_const9 */
1089 COSTS_N_INSNS (16), /* muldi */
1090 COSTS_N_INSNS (22), /* divsi */
1091 COSTS_N_INSNS (28), /* divdi */
1092 COSTS_N_INSNS (3), /* fp */
1093 COSTS_N_INSNS (3), /* dmul */
1094 COSTS_N_INSNS (59), /* sdiv */
1095 COSTS_N_INSNS (72), /* ddiv */
1097 16, /* l1 cache */
1098 2048, /* l2 cache */
1099 16, /* prefetch streams */
1100 0, /* SF->DF convert */
1103 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1104 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1107 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1108 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1112 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1113 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1114 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1115 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1116 bool);
1117 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1118 unsigned int);
1119 static bool is_microcoded_insn (rtx_insn *);
1120 static bool is_nonpipeline_insn (rtx_insn *);
1121 static bool is_cracked_insn (rtx_insn *);
1122 static bool is_load_insn (rtx, rtx *);
1123 static bool is_store_insn (rtx, rtx *);
1124 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1125 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1126 static bool insn_must_be_first_in_group (rtx_insn *);
1127 static bool insn_must_be_last_in_group (rtx_insn *);
1128 int easy_vector_constant (rtx, machine_mode);
1129 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1130 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1131 #if TARGET_MACHO
1132 static tree get_prev_label (tree);
1133 #endif
1134 static bool rs6000_mode_dependent_address (const_rtx);
1135 static bool rs6000_debug_mode_dependent_address (const_rtx);
1136 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1137 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1138 machine_mode, rtx);
1139 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1140 machine_mode,
1141 rtx);
1142 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1143 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1144 enum reg_class);
1145 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1146 reg_class_t,
1147 reg_class_t);
1148 static bool rs6000_debug_can_change_mode_class (machine_mode,
1149 machine_mode,
1150 reg_class_t);
1152 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1153 = rs6000_mode_dependent_address;
1155 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1156 machine_mode, rtx)
1157 = rs6000_secondary_reload_class;
1159 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1160 = rs6000_preferred_reload_class;
1162 const int INSN_NOT_AVAILABLE = -1;
1164 static void rs6000_print_isa_options (FILE *, int, const char *,
1165 HOST_WIDE_INT);
1166 static void rs6000_print_builtin_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1177 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1179 /* Hash table stuff for keeping track of TOC entries. */
1181 struct GTY((for_user)) toc_hash_struct
1183 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1184 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1185 rtx key;
1186 machine_mode key_mode;
1187 int labelno;
1190 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1192 static hashval_t hash (toc_hash_struct *);
1193 static bool equal (toc_hash_struct *, toc_hash_struct *);
1196 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1203 /* GPRs */
1204 "0", "1", "2", "3", "4", "5", "6", "7",
1205 "8", "9", "10", "11", "12", "13", "14", "15",
1206 "16", "17", "18", "19", "20", "21", "22", "23",
1207 "24", "25", "26", "27", "28", "29", "30", "31",
1208 /* FPRs */
1209 "0", "1", "2", "3", "4", "5", "6", "7",
1210 "8", "9", "10", "11", "12", "13", "14", "15",
1211 "16", "17", "18", "19", "20", "21", "22", "23",
1212 "24", "25", "26", "27", "28", "29", "30", "31",
1213 /* VRs */
1214 "0", "1", "2", "3", "4", "5", "6", "7",
1215 "8", "9", "10", "11", "12", "13", "14", "15",
1216 "16", "17", "18", "19", "20", "21", "22", "23",
1217 "24", "25", "26", "27", "28", "29", "30", "31",
1218 /* lr ctr ca ap */
1219 "lr", "ctr", "ca", "ap",
1220 /* cr0..cr7 */
1221 "0", "1", "2", "3", "4", "5", "6", "7",
1222 /* vrsave vscr sfp */
1223 "vrsave", "vscr", "sfp",
1226 #ifdef TARGET_REGNAMES
1227 static const char alt_reg_names[][8] =
1229 /* GPRs */
1230 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1231 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1232 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1233 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1234 /* FPRs */
1235 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1236 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1237 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1238 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1239 /* VRs */
1240 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1241 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1242 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1243 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1244 /* lr ctr ca ap */
1245 "lr", "ctr", "ca", "ap",
1246 /* cr0..cr7 */
1247 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1248 /* vrsave vscr sfp */
1249 "vrsave", "vscr", "sfp",
1251 #endif
1253 /* Table of valid machine attributes. */
1255 static const struct attribute_spec rs6000_attribute_table[] =
1257 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1258 affects_type_identity, handler, exclude } */
1259 { "altivec", 1, 1, false, true, false, false,
1260 rs6000_handle_altivec_attribute, NULL },
1261 { "longcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "shortcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "ms_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 { "gcc_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1270 SUBTARGET_ATTRIBUTE_TABLE,
1271 #endif
1272 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1275 #ifndef TARGET_PROFILE_KERNEL
1276 #define TARGET_PROFILE_KERNEL 0
1277 #endif
1279 /* Initialize the GCC target structure. */
1280 #undef TARGET_ATTRIBUTE_TABLE
1281 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1282 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1283 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1284 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1285 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1287 #undef TARGET_ASM_ALIGNED_DI_OP
1288 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1290 /* Default unaligned ops are only provided for ELF. Find the ops needed
1291 for non-ELF systems. */
1292 #ifndef OBJECT_FORMAT_ELF
1293 #if TARGET_XCOFF
1294 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1295 64-bit targets. */
1296 #undef TARGET_ASM_UNALIGNED_HI_OP
1297 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1298 #undef TARGET_ASM_UNALIGNED_SI_OP
1299 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1300 #undef TARGET_ASM_UNALIGNED_DI_OP
1301 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1302 #else
1303 /* For Darwin. */
1304 #undef TARGET_ASM_UNALIGNED_HI_OP
1305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1306 #undef TARGET_ASM_UNALIGNED_SI_OP
1307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1308 #undef TARGET_ASM_UNALIGNED_DI_OP
1309 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1310 #undef TARGET_ASM_ALIGNED_DI_OP
1311 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1312 #endif
1313 #endif
1315 /* This hook deals with fixups for relocatable code and DI-mode objects
1316 in 64-bit code. */
1317 #undef TARGET_ASM_INTEGER
1318 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1320 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1321 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1322 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1323 #endif
1325 #undef TARGET_SET_UP_BY_PROLOGUE
1326 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1328 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1329 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1330 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1331 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1332 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1338 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1341 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1342 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1344 #undef TARGET_INTERNAL_ARG_POINTER
1345 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1347 #undef TARGET_HAVE_TLS
1348 #define TARGET_HAVE_TLS HAVE_AS_TLS
1350 #undef TARGET_CANNOT_FORCE_CONST_MEM
1351 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1353 #undef TARGET_DELEGITIMIZE_ADDRESS
1354 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1356 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1357 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1359 #undef TARGET_LEGITIMATE_COMBINED_INSN
1360 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1362 #undef TARGET_ASM_FUNCTION_PROLOGUE
1363 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1364 #undef TARGET_ASM_FUNCTION_EPILOGUE
1365 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1370 #undef TARGET_LEGITIMIZE_ADDRESS
1371 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1373 #undef TARGET_SCHED_VARIABLE_ISSUE
1374 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1376 #undef TARGET_SCHED_ISSUE_RATE
1377 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1378 #undef TARGET_SCHED_ADJUST_COST
1379 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1380 #undef TARGET_SCHED_ADJUST_PRIORITY
1381 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1382 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1383 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1384 #undef TARGET_SCHED_INIT
1385 #define TARGET_SCHED_INIT rs6000_sched_init
1386 #undef TARGET_SCHED_FINISH
1387 #define TARGET_SCHED_FINISH rs6000_sched_finish
1388 #undef TARGET_SCHED_REORDER
1389 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1390 #undef TARGET_SCHED_REORDER2
1391 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1393 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1394 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1396 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1397 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1399 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1400 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1401 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1402 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1403 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1404 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1405 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1406 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1408 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1409 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1411 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1412 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1413 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1414 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1415 rs6000_builtin_support_vector_misalignment
1416 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1417 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1418 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1419 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1420 rs6000_builtin_vectorization_cost
1421 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1422 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1423 rs6000_preferred_simd_mode
1424 #undef TARGET_VECTORIZE_INIT_COST
1425 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1426 #undef TARGET_VECTORIZE_ADD_STMT_COST
1427 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1428 #undef TARGET_VECTORIZE_FINISH_COST
1429 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1430 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1431 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1433 #undef TARGET_LOOP_UNROLL_ADJUST
1434 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1436 #undef TARGET_INIT_BUILTINS
1437 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1438 #undef TARGET_BUILTIN_DECL
1439 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1441 #undef TARGET_FOLD_BUILTIN
1442 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1443 #undef TARGET_GIMPLE_FOLD_BUILTIN
1444 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1446 #undef TARGET_EXPAND_BUILTIN
1447 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1449 #undef TARGET_MANGLE_TYPE
1450 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1452 #undef TARGET_INIT_LIBFUNCS
1453 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1455 #if TARGET_MACHO
1456 #undef TARGET_BINDS_LOCAL_P
1457 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1458 #endif
1460 #undef TARGET_MS_BITFIELD_LAYOUT_P
1461 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1463 #undef TARGET_ASM_OUTPUT_MI_THUNK
1464 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1470 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1472 #undef TARGET_REGISTER_MOVE_COST
1473 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1474 #undef TARGET_MEMORY_MOVE_COST
1475 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1476 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1477 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1478 rs6000_ira_change_pseudo_allocno_class
1479 #undef TARGET_CANNOT_COPY_INSN_P
1480 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1481 #undef TARGET_RTX_COSTS
1482 #define TARGET_RTX_COSTS rs6000_rtx_costs
1483 #undef TARGET_ADDRESS_COST
1484 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1485 #undef TARGET_INSN_COST
1486 #define TARGET_INSN_COST rs6000_insn_cost
1488 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1489 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1491 #undef TARGET_PROMOTE_FUNCTION_MODE
1492 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1494 #undef TARGET_RETURN_IN_MEMORY
1495 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1497 #undef TARGET_RETURN_IN_MSB
1498 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1500 #undef TARGET_SETUP_INCOMING_VARARGS
1501 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1503 /* Always strict argument naming on rs6000. */
1504 #undef TARGET_STRICT_ARGUMENT_NAMING
1505 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1507 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1508 #undef TARGET_SPLIT_COMPLEX_ARG
1509 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1510 #undef TARGET_MUST_PASS_IN_STACK
1511 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1512 #undef TARGET_PASS_BY_REFERENCE
1513 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1514 #undef TARGET_ARG_PARTIAL_BYTES
1515 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1516 #undef TARGET_FUNCTION_ARG_ADVANCE
1517 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1518 #undef TARGET_FUNCTION_ARG
1519 #define TARGET_FUNCTION_ARG rs6000_function_arg
1520 #undef TARGET_FUNCTION_ARG_PADDING
1521 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1522 #undef TARGET_FUNCTION_ARG_BOUNDARY
1523 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1525 #undef TARGET_BUILD_BUILTIN_VA_LIST
1526 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1528 #undef TARGET_EXPAND_BUILTIN_VA_START
1529 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1531 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1532 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1534 #undef TARGET_EH_RETURN_FILTER_MODE
1535 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1537 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1538 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1540 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1541 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1543 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1544 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1546 #undef TARGET_FLOATN_MODE
1547 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1549 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1550 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1552 #undef TARGET_MD_ASM_ADJUST
1553 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1555 #undef TARGET_OPTION_OVERRIDE
1556 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1558 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1559 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1560 rs6000_builtin_vectorized_function
1562 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1564 rs6000_builtin_md_vectorized_function
1566 #undef TARGET_STACK_PROTECT_GUARD
1567 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1569 #if !TARGET_MACHO
1570 #undef TARGET_STACK_PROTECT_FAIL
1571 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1572 #endif
1574 #ifdef HAVE_AS_TLS
1575 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1576 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1577 #endif
1579 /* Use a 32-bit anchor range. This leads to sequences like:
1581 addis tmp,anchor,high
1582 add dest,tmp,low
1584 where tmp itself acts as an anchor, and can be shared between
1585 accesses to the same 64k page. */
1586 #undef TARGET_MIN_ANCHOR_OFFSET
1587 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1588 #undef TARGET_MAX_ANCHOR_OFFSET
1589 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1590 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1591 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1592 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1593 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1595 #undef TARGET_BUILTIN_RECIPROCAL
1596 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1598 #undef TARGET_SECONDARY_RELOAD
1599 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED
1601 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1602 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1603 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1605 #undef TARGET_LEGITIMATE_ADDRESS_P
1606 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1608 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1609 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1611 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1612 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1614 #undef TARGET_CAN_ELIMINATE
1615 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1617 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1618 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1620 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1621 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1623 #undef TARGET_TRAMPOLINE_INIT
1624 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1626 #undef TARGET_FUNCTION_VALUE
1627 #define TARGET_FUNCTION_VALUE rs6000_function_value
1629 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1630 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1632 #undef TARGET_OPTION_SAVE
1633 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1635 #undef TARGET_OPTION_RESTORE
1636 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1638 #undef TARGET_OPTION_PRINT
1639 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1641 #undef TARGET_CAN_INLINE_P
1642 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1644 #undef TARGET_SET_CURRENT_FUNCTION
1645 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1647 #undef TARGET_LEGITIMATE_CONSTANT_P
1648 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1650 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1651 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1653 #undef TARGET_CAN_USE_DOLOOP_P
1654 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1656 #undef TARGET_PREDICT_DOLOOP_P
1657 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1659 #undef TARGET_HAVE_COUNT_REG_DECR_P
1660 #define TARGET_HAVE_COUNT_REG_DECR_P true
1662 /* 1000000000 is infinite cost in IVOPTs. */
1663 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1664 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1666 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1667 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1672 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1673 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1674 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1675 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1676 #undef TARGET_UNWIND_WORD_MODE
1677 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1679 #undef TARGET_OFFLOAD_OPTIONS
1680 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1682 #undef TARGET_C_MODE_FOR_SUFFIX
1683 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1685 #undef TARGET_INVALID_BINARY_OP
1686 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1688 #undef TARGET_OPTAB_SUPPORTED_P
1689 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1691 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1692 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1694 #undef TARGET_COMPARE_VERSION_PRIORITY
1695 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1697 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1698 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1699 rs6000_generate_version_dispatcher_body
1701 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1702 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1703 rs6000_get_function_versions_dispatcher
1705 #undef TARGET_OPTION_FUNCTION_VERSIONS
1706 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1708 #undef TARGET_HARD_REGNO_NREGS
1709 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1710 #undef TARGET_HARD_REGNO_MODE_OK
1711 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1713 #undef TARGET_MODES_TIEABLE_P
1714 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1716 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1717 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1718 rs6000_hard_regno_call_part_clobbered
1720 #undef TARGET_SLOW_UNALIGNED_ACCESS
1721 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1723 #undef TARGET_CAN_CHANGE_MODE_CLASS
1724 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1726 #undef TARGET_CONSTANT_ALIGNMENT
1727 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1729 #undef TARGET_STARTING_FRAME_OFFSET
1730 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1732 #if TARGET_ELF && RS6000_WEAK
1733 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1734 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1735 #endif
1737 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1738 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1740 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1741 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1743 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1744 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1745 rs6000_cannot_substitute_mem_equiv_p
1748 /* Processor table. */
1749 struct rs6000_ptt
1751 const char *const name; /* Canonical processor name. */
1752 const enum processor_type processor; /* Processor type enum value. */
1753 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1756 static struct rs6000_ptt const processor_target_table[] =
1758 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1759 #include "rs6000-cpus.def"
1760 #undef RS6000_CPU
1763 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1764 name is invalid. */
1766 static int
1767 rs6000_cpu_name_lookup (const char *name)
1769 size_t i;
1771 if (name != NULL)
1773 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1774 if (! strcmp (name, processor_target_table[i].name))
1775 return (int)i;
1778 return -1;
1782 /* Return number of consecutive hard regs needed starting at reg REGNO
1783 to hold something of mode MODE.
1784 This is ordinarily the length in words of a value of mode MODE
1785 but can be less for certain modes in special long registers.
1787 POWER and PowerPC GPRs hold 32 bits worth;
1788 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1790 static int
1791 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1793 unsigned HOST_WIDE_INT reg_size;
1795 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1796 128-bit floating point that can go in vector registers, which has VSX
1797 memory addressing. */
1798 if (FP_REGNO_P (regno))
1799 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1800 ? UNITS_PER_VSX_WORD
1801 : UNITS_PER_FP_WORD);
1803 else if (ALTIVEC_REGNO_P (regno))
1804 reg_size = UNITS_PER_ALTIVEC_WORD;
1806 else
1807 reg_size = UNITS_PER_WORD;
1809 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1812 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1813 MODE. */
1814 static int
1815 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1817 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1819 if (COMPLEX_MODE_P (mode))
1820 mode = GET_MODE_INNER (mode);
1822 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1823 register combinations, and use PTImode where we need to deal with quad
1824 word memory operations. Don't allow quad words in the argument or frame
1825 pointer registers, just registers 0..31. */
1826 if (mode == PTImode)
1827 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1828 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1829 && ((regno & 1) == 0));
1831 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1832 implementations. Don't allow an item to be split between a FP register
1833 and an Altivec register. Allow TImode in all VSX registers if the user
1834 asked for it. */
1835 if (TARGET_VSX && VSX_REGNO_P (regno)
1836 && (VECTOR_MEM_VSX_P (mode)
1837 || FLOAT128_VECTOR_P (mode)
1838 || reg_addr[mode].scalar_in_vmx_p
1839 || mode == TImode
1840 || (TARGET_VADDUQM && mode == V1TImode)))
1842 if (FP_REGNO_P (regno))
1843 return FP_REGNO_P (last_regno);
1845 if (ALTIVEC_REGNO_P (regno))
1847 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1848 return 0;
1850 return ALTIVEC_REGNO_P (last_regno);
1854 /* The GPRs can hold any mode, but values bigger than one register
1855 cannot go past R31. */
1856 if (INT_REGNO_P (regno))
1857 return INT_REGNO_P (last_regno);
1859 /* The float registers (except for VSX vector modes) can only hold floating
1860 modes and DImode. */
1861 if (FP_REGNO_P (regno))
1863 if (FLOAT128_VECTOR_P (mode))
1864 return false;
1866 if (SCALAR_FLOAT_MODE_P (mode)
1867 && (mode != TDmode || (regno % 2) == 0)
1868 && FP_REGNO_P (last_regno))
1869 return 1;
1871 if (GET_MODE_CLASS (mode) == MODE_INT)
1873 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1874 return 1;
1876 if (TARGET_P8_VECTOR && (mode == SImode))
1877 return 1;
1879 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1880 return 1;
1883 return 0;
1886 /* The CR register can only hold CC modes. */
1887 if (CR_REGNO_P (regno))
1888 return GET_MODE_CLASS (mode) == MODE_CC;
1890 if (CA_REGNO_P (regno))
1891 return mode == Pmode || mode == SImode;
1893 /* AltiVec only in AldyVec registers. */
1894 if (ALTIVEC_REGNO_P (regno))
1895 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1896 || mode == V1TImode);
1898 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1899 and it must be able to fit within the register set. */
1901 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1904 /* Implement TARGET_HARD_REGNO_NREGS. */
1906 static unsigned int
1907 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1909 return rs6000_hard_regno_nregs[mode][regno];
1912 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1914 static bool
1915 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1917 return rs6000_hard_regno_mode_ok_p[mode][regno];
1920 /* Implement TARGET_MODES_TIEABLE_P.
1922 PTImode cannot tie with other modes because PTImode is restricted to even
1923 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1924 57744).
1926 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1927 128-bit floating point on VSX systems ties with other vectors. */
1929 static bool
1930 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1932 if (mode1 == PTImode)
1933 return mode2 == PTImode;
1934 if (mode2 == PTImode)
1935 return false;
1937 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1938 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1939 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1940 return false;
1942 if (SCALAR_FLOAT_MODE_P (mode1))
1943 return SCALAR_FLOAT_MODE_P (mode2);
1944 if (SCALAR_FLOAT_MODE_P (mode2))
1945 return false;
1947 if (GET_MODE_CLASS (mode1) == MODE_CC)
1948 return GET_MODE_CLASS (mode2) == MODE_CC;
1949 if (GET_MODE_CLASS (mode2) == MODE_CC)
1950 return false;
1952 return true;
1955 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1957 static bool
1958 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1959 machine_mode mode)
1961 if (TARGET_32BIT
1962 && TARGET_POWERPC64
1963 && GET_MODE_SIZE (mode) > 4
1964 && INT_REGNO_P (regno))
1965 return true;
1967 if (TARGET_VSX
1968 && FP_REGNO_P (regno)
1969 && GET_MODE_SIZE (mode) > 8
1970 && !FLOAT128_2REG_P (mode))
1971 return true;
1973 return false;
1976 /* Print interesting facts about registers. */
1977 static void
1978 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1980 int r, m;
1982 for (r = first_regno; r <= last_regno; ++r)
1984 const char *comma = "";
1985 int len;
1987 if (first_regno == last_regno)
1988 fprintf (stderr, "%s:\t", reg_name);
1989 else
1990 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1992 len = 8;
1993 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1994 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1996 if (len > 70)
1998 fprintf (stderr, ",\n\t");
1999 len = 8;
2000 comma = "";
2003 if (rs6000_hard_regno_nregs[m][r] > 1)
2004 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2005 rs6000_hard_regno_nregs[m][r]);
2006 else
2007 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2009 comma = ", ";
2012 if (call_used_or_fixed_reg_p (r))
2014 if (len > 70)
2016 fprintf (stderr, ",\n\t");
2017 len = 8;
2018 comma = "";
2021 len += fprintf (stderr, "%s%s", comma, "call-used");
2022 comma = ", ";
2025 if (fixed_regs[r])
2027 if (len > 70)
2029 fprintf (stderr, ",\n\t");
2030 len = 8;
2031 comma = "";
2034 len += fprintf (stderr, "%s%s", comma, "fixed");
2035 comma = ", ";
2038 if (len > 70)
2040 fprintf (stderr, ",\n\t");
2041 comma = "";
2044 len += fprintf (stderr, "%sreg-class = %s", comma,
2045 reg_class_names[(int)rs6000_regno_regclass[r]]);
2046 comma = ", ";
2048 if (len > 70)
2050 fprintf (stderr, ",\n\t");
2051 comma = "";
2054 fprintf (stderr, "%sregno = %d\n", comma, r);
2058 static const char *
2059 rs6000_debug_vector_unit (enum rs6000_vector v)
2061 const char *ret;
2063 switch (v)
2065 case VECTOR_NONE: ret = "none"; break;
2066 case VECTOR_ALTIVEC: ret = "altivec"; break;
2067 case VECTOR_VSX: ret = "vsx"; break;
2068 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2069 default: ret = "unknown"; break;
2072 return ret;
2075 /* Inner function printing just the address mask for a particular reload
2076 register class. */
2077 DEBUG_FUNCTION char *
2078 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2080 static char ret[8];
2081 char *p = ret;
2083 if ((mask & RELOAD_REG_VALID) != 0)
2084 *p++ = 'v';
2085 else if (keep_spaces)
2086 *p++ = ' ';
2088 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2089 *p++ = 'm';
2090 else if (keep_spaces)
2091 *p++ = ' ';
2093 if ((mask & RELOAD_REG_INDEXED) != 0)
2094 *p++ = 'i';
2095 else if (keep_spaces)
2096 *p++ = ' ';
2098 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2099 *p++ = 'O';
2100 else if ((mask & RELOAD_REG_OFFSET) != 0)
2101 *p++ = 'o';
2102 else if (keep_spaces)
2103 *p++ = ' ';
2105 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2106 *p++ = '+';
2107 else if (keep_spaces)
2108 *p++ = ' ';
2110 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2111 *p++ = '+';
2112 else if (keep_spaces)
2113 *p++ = ' ';
2115 if ((mask & RELOAD_REG_AND_M16) != 0)
2116 *p++ = '&';
2117 else if (keep_spaces)
2118 *p++ = ' ';
2120 *p = '\0';
2122 return ret;
2125 /* Print the address masks in a human readble fashion. */
2126 DEBUG_FUNCTION void
2127 rs6000_debug_print_mode (ssize_t m)
2129 ssize_t rc;
2130 int spaces = 0;
2132 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2133 for (rc = 0; rc < N_RELOAD_REG; rc++)
2134 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2135 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2137 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2138 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2140 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2141 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2142 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2143 spaces = 0;
2145 else
2146 spaces += strlen (" Reload=sl");
2148 if (reg_addr[m].scalar_in_vmx_p)
2150 fprintf (stderr, "%*s Upper=y", spaces, "");
2151 spaces = 0;
2153 else
2154 spaces += strlen (" Upper=y");
2156 if (rs6000_vector_unit[m] != VECTOR_NONE
2157 || rs6000_vector_mem[m] != VECTOR_NONE)
2159 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2160 spaces, "",
2161 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2162 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2165 fputs ("\n", stderr);
2168 #define DEBUG_FMT_ID "%-32s= "
2169 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2170 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2171 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2173 /* Print various interesting information with -mdebug=reg. */
2174 static void
2175 rs6000_debug_reg_global (void)
2177 static const char *const tf[2] = { "false", "true" };
2178 const char *nl = (const char *)0;
2179 int m;
2180 size_t m1, m2, v;
2181 char costly_num[20];
2182 char nop_num[20];
2183 char flags_buffer[40];
2184 const char *costly_str;
2185 const char *nop_str;
2186 const char *trace_str;
2187 const char *abi_str;
2188 const char *cmodel_str;
2189 struct cl_target_option cl_opts;
2191 /* Modes we want tieable information on. */
2192 static const machine_mode print_tieable_modes[] = {
2193 QImode,
2194 HImode,
2195 SImode,
2196 DImode,
2197 TImode,
2198 PTImode,
2199 SFmode,
2200 DFmode,
2201 TFmode,
2202 IFmode,
2203 KFmode,
2204 SDmode,
2205 DDmode,
2206 TDmode,
2207 V16QImode,
2208 V8HImode,
2209 V4SImode,
2210 V2DImode,
2211 V1TImode,
2212 V32QImode,
2213 V16HImode,
2214 V8SImode,
2215 V4DImode,
2216 V2TImode,
2217 V4SFmode,
2218 V2DFmode,
2219 V8SFmode,
2220 V4DFmode,
2221 CCmode,
2222 CCUNSmode,
2223 CCEQmode,
2226 /* Virtual regs we are interested in. */
2227 const static struct {
2228 int regno; /* register number. */
2229 const char *name; /* register name. */
2230 } virtual_regs[] = {
2231 { STACK_POINTER_REGNUM, "stack pointer:" },
2232 { TOC_REGNUM, "toc: " },
2233 { STATIC_CHAIN_REGNUM, "static chain: " },
2234 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2235 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2236 { ARG_POINTER_REGNUM, "arg pointer: " },
2237 { FRAME_POINTER_REGNUM, "frame pointer:" },
2238 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2239 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2240 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2241 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2242 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2243 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2244 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2245 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2246 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2249 fputs ("\nHard register information:\n", stderr);
2250 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2251 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2252 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2253 LAST_ALTIVEC_REGNO,
2254 "vs");
2255 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2256 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2257 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2258 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2259 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2260 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2262 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2263 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2264 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2266 fprintf (stderr,
2267 "\n"
2268 "d reg_class = %s\n"
2269 "f reg_class = %s\n"
2270 "v reg_class = %s\n"
2271 "wa reg_class = %s\n"
2272 "we reg_class = %s\n"
2273 "wr reg_class = %s\n"
2274 "wx reg_class = %s\n"
2275 "wA reg_class = %s\n"
2276 "\n",
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2279 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2280 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2281 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2282 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2283 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2284 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2286 nl = "\n";
2287 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2288 rs6000_debug_print_mode (m);
2290 fputs ("\n", stderr);
2292 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2294 machine_mode mode1 = print_tieable_modes[m1];
2295 bool first_time = true;
2297 nl = (const char *)0;
2298 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2300 machine_mode mode2 = print_tieable_modes[m2];
2301 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2303 if (first_time)
2305 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2306 nl = "\n";
2307 first_time = false;
2310 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2314 if (!first_time)
2315 fputs ("\n", stderr);
2318 if (nl)
2319 fputs (nl, stderr);
2321 if (rs6000_recip_control)
2323 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2325 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2326 if (rs6000_recip_bits[m])
2328 fprintf (stderr,
2329 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2330 GET_MODE_NAME (m),
2331 (RS6000_RECIP_AUTO_RE_P (m)
2332 ? "auto"
2333 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2334 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2335 ? "auto"
2336 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2339 fputs ("\n", stderr);
2342 if (rs6000_cpu_index >= 0)
2344 const char *name = processor_target_table[rs6000_cpu_index].name;
2345 HOST_WIDE_INT flags
2346 = processor_target_table[rs6000_cpu_index].target_enable;
2348 sprintf (flags_buffer, "-mcpu=%s flags", name);
2349 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2351 else
2352 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2354 if (rs6000_tune_index >= 0)
2356 const char *name = processor_target_table[rs6000_tune_index].name;
2357 HOST_WIDE_INT flags
2358 = processor_target_table[rs6000_tune_index].target_enable;
2360 sprintf (flags_buffer, "-mtune=%s flags", name);
2361 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2363 else
2364 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2366 cl_target_option_save (&cl_opts, &global_options);
2367 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2368 rs6000_isa_flags);
2370 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2371 rs6000_isa_flags_explicit);
2373 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2374 rs6000_builtin_mask);
2376 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2378 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2379 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2381 switch (rs6000_sched_costly_dep)
2383 case max_dep_latency:
2384 costly_str = "max_dep_latency";
2385 break;
2387 case no_dep_costly:
2388 costly_str = "no_dep_costly";
2389 break;
2391 case all_deps_costly:
2392 costly_str = "all_deps_costly";
2393 break;
2395 case true_store_to_load_dep_costly:
2396 costly_str = "true_store_to_load_dep_costly";
2397 break;
2399 case store_to_load_dep_costly:
2400 costly_str = "store_to_load_dep_costly";
2401 break;
2403 default:
2404 costly_str = costly_num;
2405 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2406 break;
2409 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2411 switch (rs6000_sched_insert_nops)
2413 case sched_finish_regroup_exact:
2414 nop_str = "sched_finish_regroup_exact";
2415 break;
2417 case sched_finish_pad_groups:
2418 nop_str = "sched_finish_pad_groups";
2419 break;
2421 case sched_finish_none:
2422 nop_str = "sched_finish_none";
2423 break;
2425 default:
2426 nop_str = nop_num;
2427 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2428 break;
2431 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2433 switch (rs6000_sdata)
2435 default:
2436 case SDATA_NONE:
2437 break;
2439 case SDATA_DATA:
2440 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2441 break;
2443 case SDATA_SYSV:
2444 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2445 break;
2447 case SDATA_EABI:
2448 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2449 break;
2453 switch (rs6000_traceback)
2455 case traceback_default: trace_str = "default"; break;
2456 case traceback_none: trace_str = "none"; break;
2457 case traceback_part: trace_str = "part"; break;
2458 case traceback_full: trace_str = "full"; break;
2459 default: trace_str = "unknown"; break;
2462 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2464 switch (rs6000_current_cmodel)
2466 case CMODEL_SMALL: cmodel_str = "small"; break;
2467 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2468 case CMODEL_LARGE: cmodel_str = "large"; break;
2469 default: cmodel_str = "unknown"; break;
2472 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2474 switch (rs6000_current_abi)
2476 case ABI_NONE: abi_str = "none"; break;
2477 case ABI_AIX: abi_str = "aix"; break;
2478 case ABI_ELFv2: abi_str = "ELFv2"; break;
2479 case ABI_V4: abi_str = "V4"; break;
2480 case ABI_DARWIN: abi_str = "darwin"; break;
2481 default: abi_str = "unknown"; break;
2484 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2486 if (rs6000_altivec_abi)
2487 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2489 if (rs6000_darwin64_abi)
2490 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2492 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2493 (TARGET_SOFT_FLOAT ? "true" : "false"));
2495 if (TARGET_LINK_STACK)
2496 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2498 if (TARGET_P8_FUSION)
2500 char options[80];
2502 strcpy (options, "power8");
2503 if (TARGET_P8_FUSION_SIGN)
2504 strcat (options, ", sign");
2506 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2509 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2510 TARGET_SECURE_PLT ? "secure" : "bss");
2511 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2512 aix_struct_return ? "aix" : "sysv");
2513 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2514 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2515 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2516 tf[!!rs6000_align_branch_targets]);
2517 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2518 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2519 rs6000_long_double_type_size);
2520 if (rs6000_long_double_type_size > 64)
2522 fprintf (stderr, DEBUG_FMT_S, "long double type",
2523 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2524 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2525 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2527 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2528 (int)rs6000_sched_restricted_insns_priority);
2529 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2530 (int)END_BUILTINS);
2531 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2532 (int)RS6000_BUILTIN_COUNT);
2534 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2535 (int)TARGET_FLOAT128_ENABLE_TYPE);
2537 if (TARGET_VSX)
2538 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2539 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2541 if (TARGET_DIRECT_MOVE_128)
2542 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2543 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2547 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2548 legitimate address support to figure out the appropriate addressing to
2549 use. */
2551 static void
2552 rs6000_setup_reg_addr_masks (void)
2554 ssize_t rc, reg, m, nregs;
2555 addr_mask_type any_addr_mask, addr_mask;
2557 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2559 machine_mode m2 = (machine_mode) m;
2560 bool complex_p = false;
2561 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2562 size_t msize;
2564 if (COMPLEX_MODE_P (m2))
2566 complex_p = true;
2567 m2 = GET_MODE_INNER (m2);
2570 msize = GET_MODE_SIZE (m2);
2572 /* SDmode is special in that we want to access it only via REG+REG
2573 addressing on power7 and above, since we want to use the LFIWZX and
2574 STFIWZX instructions to load it. */
2575 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2577 any_addr_mask = 0;
2578 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2580 addr_mask = 0;
2581 reg = reload_reg_map[rc].reg;
2583 /* Can mode values go in the GPR/FPR/Altivec registers? */
2584 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2586 bool small_int_vsx_p = (small_int_p
2587 && (rc == RELOAD_REG_FPR
2588 || rc == RELOAD_REG_VMX));
2590 nregs = rs6000_hard_regno_nregs[m][reg];
2591 addr_mask |= RELOAD_REG_VALID;
2593 /* Indicate if the mode takes more than 1 physical register. If
2594 it takes a single register, indicate it can do REG+REG
2595 addressing. Small integers in VSX registers can only do
2596 REG+REG addressing. */
2597 if (small_int_vsx_p)
2598 addr_mask |= RELOAD_REG_INDEXED;
2599 else if (nregs > 1 || m == BLKmode || complex_p)
2600 addr_mask |= RELOAD_REG_MULTIPLE;
2601 else
2602 addr_mask |= RELOAD_REG_INDEXED;
2604 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2605 addressing. If we allow scalars into Altivec registers,
2606 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2608 For VSX systems, we don't allow update addressing for
2609 DFmode/SFmode if those registers can go in both the
2610 traditional floating point registers and Altivec registers.
2611 The load/store instructions for the Altivec registers do not
2612 have update forms. If we allowed update addressing, it seems
2613 to break IV-OPT code using floating point if the index type is
2614 int instead of long (PR target/81550 and target/84042). */
2616 if (TARGET_UPDATE
2617 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2618 && msize <= 8
2619 && !VECTOR_MODE_P (m2)
2620 && !FLOAT128_VECTOR_P (m2)
2621 && !complex_p
2622 && (m != E_DFmode || !TARGET_VSX)
2623 && (m != E_SFmode || !TARGET_P8_VECTOR)
2624 && !small_int_vsx_p)
2626 addr_mask |= RELOAD_REG_PRE_INCDEC;
2628 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2629 we don't allow PRE_MODIFY for some multi-register
2630 operations. */
2631 switch (m)
2633 default:
2634 addr_mask |= RELOAD_REG_PRE_MODIFY;
2635 break;
2637 case E_DImode:
2638 if (TARGET_POWERPC64)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2642 case E_DFmode:
2643 case E_DDmode:
2644 if (TARGET_HARD_FLOAT)
2645 addr_mask |= RELOAD_REG_PRE_MODIFY;
2646 break;
2651 /* GPR and FPR registers can do REG+OFFSET addressing, except
2652 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2653 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2654 if ((addr_mask != 0) && !indexed_only_p
2655 && msize <= 8
2656 && (rc == RELOAD_REG_GPR
2657 || ((msize == 8 || m2 == SFmode)
2658 && (rc == RELOAD_REG_FPR
2659 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2660 addr_mask |= RELOAD_REG_OFFSET;
2662 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2663 instructions are enabled. The offset for 128-bit VSX registers is
2664 only 12-bits. While GPRs can handle the full offset range, VSX
2665 registers can only handle the restricted range. */
2666 else if ((addr_mask != 0) && !indexed_only_p
2667 && msize == 16 && TARGET_P9_VECTOR
2668 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2669 || (m2 == TImode && TARGET_VSX)))
2671 addr_mask |= RELOAD_REG_OFFSET;
2672 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2673 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2676 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2677 addressing on 128-bit types. */
2678 if (rc == RELOAD_REG_VMX && msize == 16
2679 && (addr_mask & RELOAD_REG_VALID) != 0)
2680 addr_mask |= RELOAD_REG_AND_M16;
2682 reg_addr[m].addr_mask[rc] = addr_mask;
2683 any_addr_mask |= addr_mask;
2686 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2691 /* Initialize the various global tables that are based on register size. */
2692 static void
2693 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2695 ssize_t r, m, c;
2696 int align64;
2697 int align32;
2699 /* Precalculate REGNO_REG_CLASS. */
2700 rs6000_regno_regclass[0] = GENERAL_REGS;
2701 for (r = 1; r < 32; ++r)
2702 rs6000_regno_regclass[r] = BASE_REGS;
2704 for (r = 32; r < 64; ++r)
2705 rs6000_regno_regclass[r] = FLOAT_REGS;
2707 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2708 rs6000_regno_regclass[r] = NO_REGS;
2710 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2711 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2713 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2714 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2715 rs6000_regno_regclass[r] = CR_REGS;
2717 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2718 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2719 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2720 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2721 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2722 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2723 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2725 /* Precalculate register class to simpler reload register class. We don't
2726 need all of the register classes that are combinations of different
2727 classes, just the simple ones that have constraint letters. */
2728 for (c = 0; c < N_REG_CLASSES; c++)
2729 reg_class_to_reg_type[c] = NO_REG_TYPE;
2731 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2734 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2735 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2736 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2737 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2738 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2739 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2740 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2742 if (TARGET_VSX)
2744 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2745 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2747 else
2749 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2750 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2753 /* Precalculate the valid memory formats as well as the vector information,
2754 this must be set up before the rs6000_hard_regno_nregs_internal calls
2755 below. */
2756 gcc_assert ((int)VECTOR_NONE == 0);
2757 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2758 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2760 gcc_assert ((int)CODE_FOR_nothing == 0);
2761 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2763 gcc_assert ((int)NO_REGS == 0);
2764 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2766 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2767 believes it can use native alignment or still uses 128-bit alignment. */
2768 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2770 align64 = 64;
2771 align32 = 32;
2773 else
2775 align64 = 128;
2776 align32 = 128;
2779 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2780 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2781 if (TARGET_FLOAT128_TYPE)
2783 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2784 rs6000_vector_align[KFmode] = 128;
2786 if (FLOAT128_IEEE_P (TFmode))
2788 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2789 rs6000_vector_align[TFmode] = 128;
2793 /* V2DF mode, VSX only. */
2794 if (TARGET_VSX)
2796 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2797 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2798 rs6000_vector_align[V2DFmode] = align64;
2801 /* V4SF mode, either VSX or Altivec. */
2802 if (TARGET_VSX)
2804 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2806 rs6000_vector_align[V4SFmode] = align32;
2808 else if (TARGET_ALTIVEC)
2810 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2811 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2812 rs6000_vector_align[V4SFmode] = align32;
2815 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2816 and stores. */
2817 if (TARGET_ALTIVEC)
2819 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2820 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2821 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2822 rs6000_vector_align[V4SImode] = align32;
2823 rs6000_vector_align[V8HImode] = align32;
2824 rs6000_vector_align[V16QImode] = align32;
2826 if (TARGET_VSX)
2828 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2829 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2830 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2832 else
2834 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2835 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2836 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2840 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2841 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2842 if (TARGET_VSX)
2844 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2845 rs6000_vector_unit[V2DImode]
2846 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2847 rs6000_vector_align[V2DImode] = align64;
2849 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2850 rs6000_vector_unit[V1TImode]
2851 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2852 rs6000_vector_align[V1TImode] = 128;
2855 /* DFmode, see if we want to use the VSX unit. Memory is handled
2856 differently, so don't set rs6000_vector_mem. */
2857 if (TARGET_VSX)
2859 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2860 rs6000_vector_align[DFmode] = 64;
2863 /* SFmode, see if we want to use the VSX unit. */
2864 if (TARGET_P8_VECTOR)
2866 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2867 rs6000_vector_align[SFmode] = 32;
2870 /* Allow TImode in VSX register and set the VSX memory macros. */
2871 if (TARGET_VSX)
2873 rs6000_vector_mem[TImode] = VECTOR_VSX;
2874 rs6000_vector_align[TImode] = align64;
2877 /* Register class constraints for the constraints that depend on compile
2878 switches. When the VSX code was added, different constraints were added
2879 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2880 of the VSX registers are used. The register classes for scalar floating
2881 point types is set, based on whether we allow that type into the upper
2882 (Altivec) registers. GCC has register classes to target the Altivec
2883 registers for load/store operations, to select using a VSX memory
2884 operation instead of the traditional floating point operation. The
2885 constraints are:
2887 d - Register class to use with traditional DFmode instructions.
2888 f - Register class to use with traditional SFmode instructions.
2889 v - Altivec register.
2890 wa - Any VSX register.
2891 wc - Reserved to represent individual CR bits (used in LLVM).
2892 wn - always NO_REGS.
2893 wr - GPR if 64-bit mode is permitted.
2894 wx - Float register if we can do 32-bit int stores. */
2896 if (TARGET_HARD_FLOAT)
2898 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2899 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2902 if (TARGET_VSX)
2903 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2905 /* Add conditional constraints based on various options, to allow us to
2906 collapse multiple insn patterns. */
2907 if (TARGET_ALTIVEC)
2908 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2910 if (TARGET_POWERPC64)
2912 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2913 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2916 if (TARGET_STFIWX)
2917 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2919 /* Support for new direct moves (ISA 3.0 + 64bit). */
2920 if (TARGET_DIRECT_MOVE_128)
2921 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2923 /* Set up the reload helper and direct move functions. */
2924 if (TARGET_VSX || TARGET_ALTIVEC)
2926 if (TARGET_64BIT)
2928 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2929 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2930 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2931 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2932 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2933 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2934 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2935 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2936 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2937 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2938 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2939 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2940 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2941 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2942 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2943 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2944 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2945 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2946 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2947 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2949 if (FLOAT128_VECTOR_P (KFmode))
2951 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2952 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2955 if (FLOAT128_VECTOR_P (TFmode))
2957 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2958 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2961 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2962 available. */
2963 if (TARGET_NO_SDMODE_STACK)
2965 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2966 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2969 if (TARGET_VSX)
2971 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2972 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2975 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2977 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2978 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2979 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2980 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2981 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2982 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2983 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2984 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2985 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2987 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2988 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2989 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2990 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2991 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2992 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2993 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2994 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2995 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2997 if (FLOAT128_VECTOR_P (KFmode))
2999 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3000 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3003 if (FLOAT128_VECTOR_P (TFmode))
3005 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3006 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3010 else
3012 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3013 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3014 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3015 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3016 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3017 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3018 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3019 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3020 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3021 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3022 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3023 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3024 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3025 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3026 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3027 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3028 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3029 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3030 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3031 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3033 if (FLOAT128_VECTOR_P (KFmode))
3035 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3036 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3039 if (FLOAT128_IEEE_P (TFmode))
3041 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3042 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3045 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3046 available. */
3047 if (TARGET_NO_SDMODE_STACK)
3049 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3050 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3053 if (TARGET_VSX)
3055 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3056 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3059 if (TARGET_DIRECT_MOVE)
3061 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3062 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3063 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3067 reg_addr[DFmode].scalar_in_vmx_p = true;
3068 reg_addr[DImode].scalar_in_vmx_p = true;
3070 if (TARGET_P8_VECTOR)
3072 reg_addr[SFmode].scalar_in_vmx_p = true;
3073 reg_addr[SImode].scalar_in_vmx_p = true;
3075 if (TARGET_P9_VECTOR)
3077 reg_addr[HImode].scalar_in_vmx_p = true;
3078 reg_addr[QImode].scalar_in_vmx_p = true;
3083 /* Precalculate HARD_REGNO_NREGS. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_nregs[m][r]
3087 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3089 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3090 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3091 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3092 rs6000_hard_regno_mode_ok_p[m][r]
3093 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3095 /* Precalculate CLASS_MAX_NREGS sizes. */
3096 for (c = 0; c < LIM_REG_CLASSES; ++c)
3098 int reg_size;
3100 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3101 reg_size = UNITS_PER_VSX_WORD;
3103 else if (c == ALTIVEC_REGS)
3104 reg_size = UNITS_PER_ALTIVEC_WORD;
3106 else if (c == FLOAT_REGS)
3107 reg_size = UNITS_PER_FP_WORD;
3109 else
3110 reg_size = UNITS_PER_WORD;
3112 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3114 machine_mode m2 = (machine_mode)m;
3115 int reg_size2 = reg_size;
3117 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3118 in VSX. */
3119 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3120 reg_size2 = UNITS_PER_FP_WORD;
3122 rs6000_class_max_nregs[m][c]
3123 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3127 /* Calculate which modes to automatically generate code to use a the
3128 reciprocal divide and square root instructions. In the future, possibly
3129 automatically generate the instructions even if the user did not specify
3130 -mrecip. The older machines double precision reciprocal sqrt estimate is
3131 not accurate enough. */
3132 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3133 if (TARGET_FRES)
3134 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3135 if (TARGET_FRE)
3136 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3137 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3138 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3139 if (VECTOR_UNIT_VSX_P (V2DFmode))
3140 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3142 if (TARGET_FRSQRTES)
3143 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3144 if (TARGET_FRSQRTE)
3145 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3146 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3147 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3148 if (VECTOR_UNIT_VSX_P (V2DFmode))
3149 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3151 if (rs6000_recip_control)
3153 if (!flag_finite_math_only)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3155 "-ffast-math");
3156 if (flag_trapping_math)
3157 warning (0, "%qs requires %qs or %qs", "-mrecip",
3158 "-fno-trapping-math", "-ffast-math");
3159 if (!flag_reciprocal_math)
3160 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3161 "-ffast-math");
3162 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3164 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3165 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3166 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3168 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3169 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3170 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3172 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3173 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3174 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3176 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3177 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3178 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3180 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3181 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3182 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3184 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3185 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3186 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3188 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3189 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3190 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3192 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3193 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3194 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3198 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3199 legitimate address support to figure out the appropriate addressing to
3200 use. */
3201 rs6000_setup_reg_addr_masks ();
3203 if (global_init_p || TARGET_DEBUG_TARGET)
3205 if (TARGET_DEBUG_REG)
3206 rs6000_debug_reg_global ();
3208 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3209 fprintf (stderr,
3210 "SImode variable mult cost = %d\n"
3211 "SImode constant mult cost = %d\n"
3212 "SImode short constant mult cost = %d\n"
3213 "DImode multipliciation cost = %d\n"
3214 "SImode division cost = %d\n"
3215 "DImode division cost = %d\n"
3216 "Simple fp operation cost = %d\n"
3217 "DFmode multiplication cost = %d\n"
3218 "SFmode division cost = %d\n"
3219 "DFmode division cost = %d\n"
3220 "cache line size = %d\n"
3221 "l1 cache size = %d\n"
3222 "l2 cache size = %d\n"
3223 "simultaneous prefetches = %d\n"
3224 "\n",
3225 rs6000_cost->mulsi,
3226 rs6000_cost->mulsi_const,
3227 rs6000_cost->mulsi_const9,
3228 rs6000_cost->muldi,
3229 rs6000_cost->divsi,
3230 rs6000_cost->divdi,
3231 rs6000_cost->fp,
3232 rs6000_cost->dmul,
3233 rs6000_cost->sdiv,
3234 rs6000_cost->ddiv,
3235 rs6000_cost->cache_line_size,
3236 rs6000_cost->l1_cache_size,
3237 rs6000_cost->l2_cache_size,
3238 rs6000_cost->simultaneous_prefetches);
3242 #if TARGET_MACHO
3243 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3245 static void
3246 darwin_rs6000_override_options (void)
3248 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3249 off. */
3250 rs6000_altivec_abi = 1;
3251 TARGET_ALTIVEC_VRSAVE = 1;
3252 rs6000_current_abi = ABI_DARWIN;
3254 if (DEFAULT_ABI == ABI_DARWIN
3255 && TARGET_64BIT)
3256 darwin_one_byte_bool = 1;
3258 if (TARGET_64BIT && ! TARGET_POWERPC64)
3260 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3261 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3264 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3265 optimisation, and will not work with the most generic case (where the
3266 symbol is undefined external, but there is no symbl stub). */
3267 if (TARGET_64BIT)
3268 rs6000_default_long_calls = 0;
3270 /* ld_classic is (so far) still used for kernel (static) code, and supports
3271 the JBSR longcall / branch islands. */
3272 if (flag_mkernel)
3274 rs6000_default_long_calls = 1;
3276 /* Allow a kext author to do -mkernel -mhard-float. */
3277 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3278 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3281 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3282 Altivec. */
3283 if (!flag_mkernel && !flag_apple_kext
3284 && TARGET_64BIT
3285 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3286 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3288 /* Unless the user (not the configurer) has explicitly overridden
3289 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3290 G4 unless targeting the kernel. */
3291 if (!flag_mkernel
3292 && !flag_apple_kext
3293 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3294 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3295 && ! global_options_set.x_rs6000_cpu_index)
3297 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3300 #endif
3302 /* If not otherwise specified by a target, make 'long double' equivalent to
3303 'double'. */
3305 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3306 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3307 #endif
3309 /* Return the builtin mask of the various options used that could affect which
3310 builtins were used. In the past we used target_flags, but we've run out of
3311 bits, and some options are no longer in target_flags. */
3313 HOST_WIDE_INT
3314 rs6000_builtin_mask_calculate (void)
3316 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3317 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3318 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3319 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3320 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3321 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3322 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3323 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3324 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3325 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3326 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3327 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3328 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3329 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3330 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3331 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3332 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3333 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3334 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3335 | ((TARGET_LONG_DOUBLE_128
3336 && TARGET_HARD_FLOAT
3337 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3338 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3339 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3342 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3343 to clobber the XER[CA] bit because clobbering that bit without telling
3344 the compiler worked just fine with versions of GCC before GCC 5, and
3345 breaking a lot of older code in ways that are hard to track down is
3346 not such a great idea. */
3348 static rtx_insn *
3349 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3350 vec<const char *> &/*constraints*/,
3351 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3353 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3354 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3355 return NULL;
3358 /* Override command line options.
3360 Combine build-specific configuration information with options
3361 specified on the command line to set various state variables which
3362 influence code generation, optimization, and expansion of built-in
3363 functions. Assure that command-line configuration preferences are
3364 compatible with each other and with the build configuration; issue
3365 warnings while adjusting configuration or error messages while
3366 rejecting configuration.
3368 Upon entry to this function:
3370 This function is called once at the beginning of
3371 compilation, and then again at the start and end of compiling
3372 each section of code that has a different configuration, as
3373 indicated, for example, by adding the
3375 __attribute__((__target__("cpu=power9")))
3377 qualifier to a function definition or, for example, by bracketing
3378 code between
3380 #pragma GCC target("altivec")
3384 #pragma GCC reset_options
3386 directives. Parameter global_init_p is true for the initial
3387 invocation, which initializes global variables, and false for all
3388 subsequent invocations.
3391 Various global state information is assumed to be valid. This
3392 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3393 default CPU specified at build configure time, TARGET_DEFAULT,
3394 representing the default set of option flags for the default
3395 target, and global_options_set.x_rs6000_isa_flags, representing
3396 which options were requested on the command line.
3398 Upon return from this function:
3400 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3401 was set by name on the command line. Additionally, if certain
3402 attributes are automatically enabled or disabled by this function
3403 in order to assure compatibility between options and
3404 configuration, the flags associated with those attributes are
3405 also set. By setting these "explicit bits", we avoid the risk
3406 that other code might accidentally overwrite these particular
3407 attributes with "default values".
3409 The various bits of rs6000_isa_flags are set to indicate the
3410 target options that have been selected for the most current
3411 compilation efforts. This has the effect of also turning on the
3412 associated TARGET_XXX values since these are macros which are
3413 generally defined to test the corresponding bit of the
3414 rs6000_isa_flags variable.
3416 The variable rs6000_builtin_mask is set to represent the target
3417 options for the most current compilation efforts, consistent with
3418 the current contents of rs6000_isa_flags. This variable controls
3419 expansion of built-in functions.
3421 Various other global variables and fields of global structures
3422 (over 50 in all) are initialized to reflect the desired options
3423 for the most current compilation efforts. */
3425 static bool
3426 rs6000_option_override_internal (bool global_init_p)
3428 bool ret = true;
3430 HOST_WIDE_INT set_masks;
3431 HOST_WIDE_INT ignore_masks;
3432 int cpu_index = -1;
3433 int tune_index;
3434 struct cl_target_option *main_target_opt
3435 = ((global_init_p || target_option_default_node == NULL)
3436 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3438 /* Print defaults. */
3439 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3440 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3442 /* Remember the explicit arguments. */
3443 if (global_init_p)
3444 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3446 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3447 library functions, so warn about it. The flag may be useful for
3448 performance studies from time to time though, so don't disable it
3449 entirely. */
3450 if (global_options_set.x_rs6000_alignment_flags
3451 && rs6000_alignment_flags == MASK_ALIGN_POWER
3452 && DEFAULT_ABI == ABI_DARWIN
3453 && TARGET_64BIT)
3454 warning (0, "%qs is not supported for 64-bit Darwin;"
3455 " it is incompatible with the installed C and C++ libraries",
3456 "-malign-power");
3458 /* Numerous experiment shows that IRA based loop pressure
3459 calculation works better for RTL loop invariant motion on targets
3460 with enough (>= 32) registers. It is an expensive optimization.
3461 So it is on only for peak performance. */
3462 if (optimize >= 3 && global_init_p
3463 && !global_options_set.x_flag_ira_loop_pressure)
3464 flag_ira_loop_pressure = 1;
3466 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3467 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3468 options were already specified. */
3469 if (flag_sanitize & SANITIZE_USER_ADDRESS
3470 && !global_options_set.x_flag_asynchronous_unwind_tables)
3471 flag_asynchronous_unwind_tables = 1;
3473 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3474 loop unroller is active. It is only checked during unrolling, so
3475 we can just set it on by default. */
3476 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3477 flag_variable_expansion_in_unroller = 1;
3479 /* Set the pointer size. */
3480 if (TARGET_64BIT)
3482 rs6000_pmode = DImode;
3483 rs6000_pointer_size = 64;
3485 else
3487 rs6000_pmode = SImode;
3488 rs6000_pointer_size = 32;
3491 /* Some OSs don't support saving the high part of 64-bit registers on context
3492 switch. Other OSs don't support saving Altivec registers. On those OSs,
3493 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3494 if the user wants either, the user must explicitly specify them and we
3495 won't interfere with the user's specification. */
3497 set_masks = POWERPC_MASKS;
3498 #ifdef OS_MISSING_POWERPC64
3499 if (OS_MISSING_POWERPC64)
3500 set_masks &= ~OPTION_MASK_POWERPC64;
3501 #endif
3502 #ifdef OS_MISSING_ALTIVEC
3503 if (OS_MISSING_ALTIVEC)
3504 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3505 | OTHER_VSX_VECTOR_MASKS);
3506 #endif
3508 /* Don't override by the processor default if given explicitly. */
3509 set_masks &= ~rs6000_isa_flags_explicit;
3511 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3512 the cpu in a target attribute or pragma, but did not specify a tuning
3513 option, use the cpu for the tuning option rather than the option specified
3514 with -mtune on the command line. Process a '--with-cpu' configuration
3515 request as an implicit --cpu. */
3516 if (rs6000_cpu_index >= 0)
3517 cpu_index = rs6000_cpu_index;
3518 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3519 cpu_index = main_target_opt->x_rs6000_cpu_index;
3520 else if (OPTION_TARGET_CPU_DEFAULT)
3521 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3523 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3524 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3525 with those from the cpu, except for options that were explicitly set. If
3526 we don't have a cpu, do not override the target bits set in
3527 TARGET_DEFAULT. */
3528 if (cpu_index >= 0)
3530 rs6000_cpu_index = cpu_index;
3531 rs6000_isa_flags &= ~set_masks;
3532 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3533 & set_masks);
3535 else
3537 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3538 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3539 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3540 to using rs6000_isa_flags, we need to do the initialization here.
3542 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3543 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3544 HOST_WIDE_INT flags;
3545 if (TARGET_DEFAULT)
3546 flags = TARGET_DEFAULT;
3547 else
3549 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3550 const char *default_cpu = (!TARGET_POWERPC64
3551 ? "powerpc"
3552 : (BYTES_BIG_ENDIAN
3553 ? "powerpc64"
3554 : "powerpc64le"));
3555 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3556 flags = processor_target_table[default_cpu_index].target_enable;
3558 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3561 if (rs6000_tune_index >= 0)
3562 tune_index = rs6000_tune_index;
3563 else if (cpu_index >= 0)
3564 rs6000_tune_index = tune_index = cpu_index;
3565 else
3567 size_t i;
3568 enum processor_type tune_proc
3569 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3571 tune_index = -1;
3572 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3573 if (processor_target_table[i].processor == tune_proc)
3575 tune_index = i;
3576 break;
3580 if (cpu_index >= 0)
3581 rs6000_cpu = processor_target_table[cpu_index].processor;
3582 else
3583 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3585 gcc_assert (tune_index >= 0);
3586 rs6000_tune = processor_target_table[tune_index].processor;
3588 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3589 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3590 || rs6000_cpu == PROCESSOR_PPCE5500)
3592 if (TARGET_ALTIVEC)
3593 error ("AltiVec not supported in this target");
3596 /* If we are optimizing big endian systems for space, use the load/store
3597 multiple instructions. */
3598 if (BYTES_BIG_ENDIAN && optimize_size)
3599 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3601 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3602 because the hardware doesn't support the instructions used in little
3603 endian mode, and causes an alignment trap. The 750 does not cause an
3604 alignment trap (except when the target is unaligned). */
3606 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3608 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3609 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3610 warning (0, "%qs is not supported on little endian systems",
3611 "-mmultiple");
3614 /* If little-endian, default to -mstrict-align on older processors.
3615 Testing for htm matches power8 and later. */
3616 if (!BYTES_BIG_ENDIAN
3617 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3618 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3620 if (!rs6000_fold_gimple)
3621 fprintf (stderr,
3622 "gimple folding of rs6000 builtins has been disabled.\n");
3624 /* Add some warnings for VSX. */
3625 if (TARGET_VSX)
3627 const char *msg = NULL;
3628 if (!TARGET_HARD_FLOAT)
3630 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3631 msg = N_("%<-mvsx%> requires hardware floating point");
3632 else
3634 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3635 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3638 else if (TARGET_AVOID_XFORM > 0)
3639 msg = N_("%<-mvsx%> needs indexed addressing");
3640 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3641 & OPTION_MASK_ALTIVEC))
3643 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3644 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3645 else
3646 msg = N_("%<-mno-altivec%> disables vsx");
3649 if (msg)
3651 warning (0, msg);
3652 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3653 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3657 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3658 the -mcpu setting to enable options that conflict. */
3659 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3660 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3661 | OPTION_MASK_ALTIVEC
3662 | OPTION_MASK_VSX)) != 0)
3663 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3664 | OPTION_MASK_DIRECT_MOVE)
3665 & ~rs6000_isa_flags_explicit);
3667 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3668 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3670 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3671 off all of the options that depend on those flags. */
3672 ignore_masks = rs6000_disable_incompatible_switches ();
3674 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3675 unless the user explicitly used the -mno-<option> to disable the code. */
3676 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3677 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3678 else if (TARGET_P9_MINMAX)
3680 if (cpu_index >= 0)
3682 if (cpu_index == PROCESSOR_POWER9)
3684 /* legacy behavior: allow -mcpu=power9 with certain
3685 capabilities explicitly disabled. */
3686 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3688 else
3689 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3690 "for <xxx> less than power9", "-mcpu");
3692 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3693 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3694 & rs6000_isa_flags_explicit))
3695 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3696 were explicitly cleared. */
3697 error ("%qs incompatible with explicitly disabled options",
3698 "-mpower9-minmax");
3699 else
3700 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3702 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3703 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_VSX)
3705 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3706 else if (TARGET_POPCNTD)
3707 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3708 else if (TARGET_DFP)
3709 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3710 else if (TARGET_CMPB)
3711 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3712 else if (TARGET_FPRND)
3713 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3714 else if (TARGET_POPCNTB)
3715 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3716 else if (TARGET_ALTIVEC)
3717 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3719 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3721 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3722 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3723 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3726 if (!TARGET_FPRND && TARGET_VSX)
3728 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3729 /* TARGET_VSX = 1 implies Power 7 and newer */
3730 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3731 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3734 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3736 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3737 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3738 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3741 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3743 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3744 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3745 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3748 if (TARGET_P8_VECTOR && !TARGET_VSX)
3750 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3751 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3752 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3753 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3755 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3757 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3759 else
3761 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3762 not explicit. */
3763 rs6000_isa_flags |= OPTION_MASK_VSX;
3764 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3768 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3770 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3771 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3772 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3775 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3776 silently turn off quad memory mode. */
3777 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3779 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3780 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3782 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3783 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3785 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3786 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3789 /* Non-atomic quad memory load/store are disabled for little endian, since
3790 the words are reversed, but atomic operations can still be done by
3791 swapping the words. */
3792 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3794 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3795 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3796 "mode"));
3798 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3801 /* Assume if the user asked for normal quad memory instructions, they want
3802 the atomic versions as well, unless they explicity told us not to use quad
3803 word atomic instructions. */
3804 if (TARGET_QUAD_MEMORY
3805 && !TARGET_QUAD_MEMORY_ATOMIC
3806 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3807 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3809 /* If we can shrink-wrap the TOC register save separately, then use
3810 -msave-toc-indirect unless explicitly disabled. */
3811 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3812 && flag_shrink_wrap_separate
3813 && optimize_function_for_speed_p (cfun))
3814 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3816 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3817 generating power8 instructions. Power9 does not optimize power8 fusion
3818 cases. */
3819 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3821 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3822 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3823 else
3824 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3827 /* Setting additional fusion flags turns on base fusion. */
3828 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3830 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3832 if (TARGET_P8_FUSION_SIGN)
3833 error ("%qs requires %qs", "-mpower8-fusion-sign",
3834 "-mpower8-fusion");
3836 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3838 else
3839 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3842 /* Power8 does not fuse sign extended loads with the addis. If we are
3843 optimizing at high levels for speed, convert a sign extended load into a
3844 zero extending load, and an explicit sign extension. */
3845 if (TARGET_P8_FUSION
3846 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3847 && optimize_function_for_speed_p (cfun)
3848 && optimize >= 3)
3849 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3851 /* ISA 3.0 vector instructions include ISA 2.07. */
3852 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3854 /* We prefer to not mention undocumented options in
3855 error messages. However, if users have managed to select
3856 power9-vector without selecting power8-vector, they
3857 already know about undocumented flags. */
3858 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3859 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3860 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3861 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3863 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3864 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3865 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3867 else
3869 /* OPTION_MASK_P9_VECTOR is explicit and
3870 OPTION_MASK_P8_VECTOR is not explicit. */
3871 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3872 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3876 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3877 support. If we only have ISA 2.06 support, and the user did not specify
3878 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3879 but we don't enable the full vectorization support */
3880 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3881 TARGET_ALLOW_MOVMISALIGN = 1;
3883 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3885 if (TARGET_ALLOW_MOVMISALIGN > 0
3886 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3887 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3889 TARGET_ALLOW_MOVMISALIGN = 0;
3892 /* Determine when unaligned vector accesses are permitted, and when
3893 they are preferred over masked Altivec loads. Note that if
3894 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3895 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3896 not true. */
3897 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3899 if (!TARGET_VSX)
3901 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3902 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3904 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3907 else if (!TARGET_ALLOW_MOVMISALIGN)
3909 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3910 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3911 "-mallow-movmisalign");
3913 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3917 /* Use long double size to select the appropriate long double. We use
3918 TYPE_PRECISION to differentiate the 3 different long double types. We map
3919 128 into the precision used for TFmode. */
3920 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3921 ? 64
3922 : FLOAT_PRECISION_TFmode);
3924 /* Set long double size before the IEEE 128-bit tests. */
3925 if (!global_options_set.x_rs6000_long_double_type_size)
3927 if (main_target_opt != NULL
3928 && (main_target_opt->x_rs6000_long_double_type_size
3929 != default_long_double_size))
3930 error ("target attribute or pragma changes %<long double%> size");
3931 else
3932 rs6000_long_double_type_size = default_long_double_size;
3934 else if (rs6000_long_double_type_size == 128)
3935 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3936 else if (global_options_set.x_rs6000_ieeequad)
3938 if (global_options.x_rs6000_ieeequad)
3939 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3940 else
3941 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3944 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3945 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3946 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3947 those systems will not pick up this default. Warn if the user changes the
3948 default unless -Wno-psabi. */
3949 if (!global_options_set.x_rs6000_ieeequad)
3950 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3952 else
3954 if (global_options.x_rs6000_ieeequad
3955 && (!TARGET_POPCNTD || !TARGET_VSX))
3956 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3958 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3960 static bool warned_change_long_double;
3961 if (!warned_change_long_double)
3963 warned_change_long_double = true;
3964 if (TARGET_IEEEQUAD)
3965 warning (OPT_Wpsabi, "Using IEEE extended precision "
3966 "%<long double%>");
3967 else
3968 warning (OPT_Wpsabi, "Using IBM extended precision "
3969 "%<long double%>");
3974 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3975 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
3976 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3977 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3978 the keyword as well as the type. */
3979 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3981 /* IEEE 128-bit floating point requires VSX support. */
3982 if (TARGET_FLOAT128_KEYWORD)
3984 if (!TARGET_VSX)
3986 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3987 error ("%qs requires VSX support", "-mfloat128");
3989 TARGET_FLOAT128_TYPE = 0;
3990 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3991 | OPTION_MASK_FLOAT128_HW);
3993 else if (!TARGET_FLOAT128_TYPE)
3995 TARGET_FLOAT128_TYPE = 1;
3996 warning (0, "The %<-mfloat128%> option may not be fully supported");
4000 /* Enable the __float128 keyword under Linux by default. */
4001 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4002 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4003 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4005 /* If we have are supporting the float128 type and full ISA 3.0 support,
4006 enable -mfloat128-hardware by default. However, don't enable the
4007 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4008 because sometimes the compiler wants to put things in an integer
4009 container, and if we don't have __int128 support, it is impossible. */
4010 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4011 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4012 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4013 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4015 if (TARGET_FLOAT128_HW
4016 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4018 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4019 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4021 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4024 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4026 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4027 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4029 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4032 /* Enable -mprefixed by default on 'future' systems. */
4033 if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4034 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4036 /* -mprefixed requires -mcpu=future. */
4037 else if (TARGET_PREFIXED && !TARGET_FUTURE)
4039 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4040 error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
4042 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4045 /* -mpcrel requires prefixed load/store addressing. */
4046 if (TARGET_PCREL && !TARGET_PREFIXED)
4048 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4049 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4051 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4054 /* Print the options after updating the defaults. */
4055 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4056 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4058 /* E500mc does "better" if we inline more aggressively. Respect the
4059 user's opinion, though. */
4060 if (rs6000_block_move_inline_limit == 0
4061 && (rs6000_tune == PROCESSOR_PPCE500MC
4062 || rs6000_tune == PROCESSOR_PPCE500MC64
4063 || rs6000_tune == PROCESSOR_PPCE5500
4064 || rs6000_tune == PROCESSOR_PPCE6500))
4065 rs6000_block_move_inline_limit = 128;
4067 /* store_one_arg depends on expand_block_move to handle at least the
4068 size of reg_parm_stack_space. */
4069 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4070 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4072 if (global_init_p)
4074 /* If the appropriate debug option is enabled, replace the target hooks
4075 with debug versions that call the real version and then prints
4076 debugging information. */
4077 if (TARGET_DEBUG_COST)
4079 targetm.rtx_costs = rs6000_debug_rtx_costs;
4080 targetm.address_cost = rs6000_debug_address_cost;
4081 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4084 if (TARGET_DEBUG_ADDR)
4086 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4087 targetm.legitimize_address = rs6000_debug_legitimize_address;
4088 rs6000_secondary_reload_class_ptr
4089 = rs6000_debug_secondary_reload_class;
4090 targetm.secondary_memory_needed
4091 = rs6000_debug_secondary_memory_needed;
4092 targetm.can_change_mode_class
4093 = rs6000_debug_can_change_mode_class;
4094 rs6000_preferred_reload_class_ptr
4095 = rs6000_debug_preferred_reload_class;
4096 rs6000_mode_dependent_address_ptr
4097 = rs6000_debug_mode_dependent_address;
4100 if (rs6000_veclibabi_name)
4102 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4103 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4104 else
4106 error ("unknown vectorization library ABI type (%qs) for "
4107 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4108 ret = false;
4113 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4114 target attribute or pragma which automatically enables both options,
4115 unless the altivec ABI was set. This is set by default for 64-bit, but
4116 not for 32-bit. */
4117 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4119 TARGET_FLOAT128_TYPE = 0;
4120 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4121 | OPTION_MASK_FLOAT128_KEYWORD)
4122 & ~rs6000_isa_flags_explicit);
4125 /* Enable Altivec ABI for AIX -maltivec. */
4126 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4128 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4129 error ("target attribute or pragma changes AltiVec ABI");
4130 else
4131 rs6000_altivec_abi = 1;
4134 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4135 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4136 be explicitly overridden in either case. */
4137 if (TARGET_ELF)
4139 if (!global_options_set.x_rs6000_altivec_abi
4140 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4142 if (main_target_opt != NULL &&
4143 !main_target_opt->x_rs6000_altivec_abi)
4144 error ("target attribute or pragma changes AltiVec ABI");
4145 else
4146 rs6000_altivec_abi = 1;
4150 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4151 So far, the only darwin64 targets are also MACH-O. */
4152 if (TARGET_MACHO
4153 && DEFAULT_ABI == ABI_DARWIN
4154 && TARGET_64BIT)
4156 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4157 error ("target attribute or pragma changes darwin64 ABI");
4158 else
4160 rs6000_darwin64_abi = 1;
4161 /* Default to natural alignment, for better performance. */
4162 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4166 /* Place FP constants in the constant pool instead of TOC
4167 if section anchors enabled. */
4168 if (flag_section_anchors
4169 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4170 TARGET_NO_FP_IN_TOC = 1;
4172 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4173 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4175 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4176 SUBTARGET_OVERRIDE_OPTIONS;
4177 #endif
4178 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4179 SUBSUBTARGET_OVERRIDE_OPTIONS;
4180 #endif
4181 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4182 SUB3TARGET_OVERRIDE_OPTIONS;
4183 #endif
4185 /* If the ABI has support for PC-relative relocations, enable it by default.
4186 This test depends on the sub-target tests above setting the code model to
4187 medium for ELF v2 systems. */
4188 if (PCREL_SUPPORTED_BY_OS
4189 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4190 rs6000_isa_flags |= OPTION_MASK_PCREL;
4192 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4193 after the subtarget override options are done. */
4194 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4196 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4197 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4199 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4202 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4203 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4205 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4206 && rs6000_tune != PROCESSOR_POWER5
4207 && rs6000_tune != PROCESSOR_POWER6
4208 && rs6000_tune != PROCESSOR_POWER7
4209 && rs6000_tune != PROCESSOR_POWER8
4210 && rs6000_tune != PROCESSOR_POWER9
4211 && rs6000_tune != PROCESSOR_FUTURE
4212 && rs6000_tune != PROCESSOR_PPCA2
4213 && rs6000_tune != PROCESSOR_CELL
4214 && rs6000_tune != PROCESSOR_PPC476);
4215 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4216 || rs6000_tune == PROCESSOR_POWER5
4217 || rs6000_tune == PROCESSOR_POWER7
4218 || rs6000_tune == PROCESSOR_POWER8);
4219 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4220 || rs6000_tune == PROCESSOR_POWER5
4221 || rs6000_tune == PROCESSOR_POWER6
4222 || rs6000_tune == PROCESSOR_POWER7
4223 || rs6000_tune == PROCESSOR_POWER8
4224 || rs6000_tune == PROCESSOR_POWER9
4225 || rs6000_tune == PROCESSOR_FUTURE
4226 || rs6000_tune == PROCESSOR_PPCE500MC
4227 || rs6000_tune == PROCESSOR_PPCE500MC64
4228 || rs6000_tune == PROCESSOR_PPCE5500
4229 || rs6000_tune == PROCESSOR_PPCE6500);
4231 /* Allow debug switches to override the above settings. These are set to -1
4232 in rs6000.opt to indicate the user hasn't directly set the switch. */
4233 if (TARGET_ALWAYS_HINT >= 0)
4234 rs6000_always_hint = TARGET_ALWAYS_HINT;
4236 if (TARGET_SCHED_GROUPS >= 0)
4237 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4239 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4240 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4242 rs6000_sched_restricted_insns_priority
4243 = (rs6000_sched_groups ? 1 : 0);
4245 /* Handle -msched-costly-dep option. */
4246 rs6000_sched_costly_dep
4247 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4249 if (rs6000_sched_costly_dep_str)
4251 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4252 rs6000_sched_costly_dep = no_dep_costly;
4253 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4254 rs6000_sched_costly_dep = all_deps_costly;
4255 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4256 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4257 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4258 rs6000_sched_costly_dep = store_to_load_dep_costly;
4259 else
4260 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4261 atoi (rs6000_sched_costly_dep_str));
4264 /* Handle -minsert-sched-nops option. */
4265 rs6000_sched_insert_nops
4266 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4268 if (rs6000_sched_insert_nops_str)
4270 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4271 rs6000_sched_insert_nops = sched_finish_none;
4272 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4273 rs6000_sched_insert_nops = sched_finish_pad_groups;
4274 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4275 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4276 else
4277 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4278 atoi (rs6000_sched_insert_nops_str));
4281 /* Handle stack protector */
4282 if (!global_options_set.x_rs6000_stack_protector_guard)
4283 #ifdef TARGET_THREAD_SSP_OFFSET
4284 rs6000_stack_protector_guard = SSP_TLS;
4285 #else
4286 rs6000_stack_protector_guard = SSP_GLOBAL;
4287 #endif
4289 #ifdef TARGET_THREAD_SSP_OFFSET
4290 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4291 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4292 #endif
4294 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4296 char *endp;
4297 const char *str = rs6000_stack_protector_guard_offset_str;
4299 errno = 0;
4300 long offset = strtol (str, &endp, 0);
4301 if (!*str || *endp || errno)
4302 error ("%qs is not a valid number in %qs", str,
4303 "-mstack-protector-guard-offset=");
4305 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4306 || (TARGET_64BIT && (offset & 3)))
4307 error ("%qs is not a valid offset in %qs", str,
4308 "-mstack-protector-guard-offset=");
4310 rs6000_stack_protector_guard_offset = offset;
4313 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4315 const char *str = rs6000_stack_protector_guard_reg_str;
4316 int reg = decode_reg_name (str);
4318 if (!IN_RANGE (reg, 1, 31))
4319 error ("%qs is not a valid base register in %qs", str,
4320 "-mstack-protector-guard-reg=");
4322 rs6000_stack_protector_guard_reg = reg;
4325 if (rs6000_stack_protector_guard == SSP_TLS
4326 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4327 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4329 if (global_init_p)
4331 #ifdef TARGET_REGNAMES
4332 /* If the user desires alternate register names, copy in the
4333 alternate names now. */
4334 if (TARGET_REGNAMES)
4335 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4336 #endif
4338 /* Set aix_struct_return last, after the ABI is determined.
4339 If -maix-struct-return or -msvr4-struct-return was explicitly
4340 used, don't override with the ABI default. */
4341 if (!global_options_set.x_aix_struct_return)
4342 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4344 #if 0
4345 /* IBM XL compiler defaults to unsigned bitfields. */
4346 if (TARGET_XL_COMPAT)
4347 flag_signed_bitfields = 0;
4348 #endif
4350 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4351 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4353 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4355 /* We can only guarantee the availability of DI pseudo-ops when
4356 assembling for 64-bit targets. */
4357 if (!TARGET_64BIT)
4359 targetm.asm_out.aligned_op.di = NULL;
4360 targetm.asm_out.unaligned_op.di = NULL;
4364 /* Set branch target alignment, if not optimizing for size. */
4365 if (!optimize_size)
4367 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4368 aligned 8byte to avoid misprediction by the branch predictor. */
4369 if (rs6000_tune == PROCESSOR_TITAN
4370 || rs6000_tune == PROCESSOR_CELL)
4372 if (flag_align_functions && !str_align_functions)
4373 str_align_functions = "8";
4374 if (flag_align_jumps && !str_align_jumps)
4375 str_align_jumps = "8";
4376 if (flag_align_loops && !str_align_loops)
4377 str_align_loops = "8";
4379 if (rs6000_align_branch_targets)
4381 if (flag_align_functions && !str_align_functions)
4382 str_align_functions = "16";
4383 if (flag_align_jumps && !str_align_jumps)
4384 str_align_jumps = "16";
4385 if (flag_align_loops && !str_align_loops)
4387 can_override_loop_align = 1;
4388 str_align_loops = "16";
4393 /* Arrange to save and restore machine status around nested functions. */
4394 init_machine_status = rs6000_init_machine_status;
4396 /* We should always be splitting complex arguments, but we can't break
4397 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4398 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4399 targetm.calls.split_complex_arg = NULL;
4401 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4402 if (DEFAULT_ABI == ABI_AIX)
4403 targetm.calls.custom_function_descriptors = 0;
4406 /* Initialize rs6000_cost with the appropriate target costs. */
4407 if (optimize_size)
4408 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4409 else
4410 switch (rs6000_tune)
4412 case PROCESSOR_RS64A:
4413 rs6000_cost = &rs64a_cost;
4414 break;
4416 case PROCESSOR_MPCCORE:
4417 rs6000_cost = &mpccore_cost;
4418 break;
4420 case PROCESSOR_PPC403:
4421 rs6000_cost = &ppc403_cost;
4422 break;
4424 case PROCESSOR_PPC405:
4425 rs6000_cost = &ppc405_cost;
4426 break;
4428 case PROCESSOR_PPC440:
4429 rs6000_cost = &ppc440_cost;
4430 break;
4432 case PROCESSOR_PPC476:
4433 rs6000_cost = &ppc476_cost;
4434 break;
4436 case PROCESSOR_PPC601:
4437 rs6000_cost = &ppc601_cost;
4438 break;
4440 case PROCESSOR_PPC603:
4441 rs6000_cost = &ppc603_cost;
4442 break;
4444 case PROCESSOR_PPC604:
4445 rs6000_cost = &ppc604_cost;
4446 break;
4448 case PROCESSOR_PPC604e:
4449 rs6000_cost = &ppc604e_cost;
4450 break;
4452 case PROCESSOR_PPC620:
4453 rs6000_cost = &ppc620_cost;
4454 break;
4456 case PROCESSOR_PPC630:
4457 rs6000_cost = &ppc630_cost;
4458 break;
4460 case PROCESSOR_CELL:
4461 rs6000_cost = &ppccell_cost;
4462 break;
4464 case PROCESSOR_PPC750:
4465 case PROCESSOR_PPC7400:
4466 rs6000_cost = &ppc750_cost;
4467 break;
4469 case PROCESSOR_PPC7450:
4470 rs6000_cost = &ppc7450_cost;
4471 break;
4473 case PROCESSOR_PPC8540:
4474 case PROCESSOR_PPC8548:
4475 rs6000_cost = &ppc8540_cost;
4476 break;
4478 case PROCESSOR_PPCE300C2:
4479 case PROCESSOR_PPCE300C3:
4480 rs6000_cost = &ppce300c2c3_cost;
4481 break;
4483 case PROCESSOR_PPCE500MC:
4484 rs6000_cost = &ppce500mc_cost;
4485 break;
4487 case PROCESSOR_PPCE500MC64:
4488 rs6000_cost = &ppce500mc64_cost;
4489 break;
4491 case PROCESSOR_PPCE5500:
4492 rs6000_cost = &ppce5500_cost;
4493 break;
4495 case PROCESSOR_PPCE6500:
4496 rs6000_cost = &ppce6500_cost;
4497 break;
4499 case PROCESSOR_TITAN:
4500 rs6000_cost = &titan_cost;
4501 break;
4503 case PROCESSOR_POWER4:
4504 case PROCESSOR_POWER5:
4505 rs6000_cost = &power4_cost;
4506 break;
4508 case PROCESSOR_POWER6:
4509 rs6000_cost = &power6_cost;
4510 break;
4512 case PROCESSOR_POWER7:
4513 rs6000_cost = &power7_cost;
4514 break;
4516 case PROCESSOR_POWER8:
4517 rs6000_cost = &power8_cost;
4518 break;
4520 case PROCESSOR_POWER9:
4521 case PROCESSOR_FUTURE:
4522 rs6000_cost = &power9_cost;
4523 break;
4525 case PROCESSOR_PPCA2:
4526 rs6000_cost = &ppca2_cost;
4527 break;
4529 default:
4530 gcc_unreachable ();
4533 if (global_init_p)
4535 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4536 param_simultaneous_prefetches,
4537 rs6000_cost->simultaneous_prefetches);
4538 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4539 param_l1_cache_size,
4540 rs6000_cost->l1_cache_size);
4541 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4542 param_l1_cache_line_size,
4543 rs6000_cost->cache_line_size);
4544 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4545 param_l2_cache_size,
4546 rs6000_cost->l2_cache_size);
4548 /* Increase loop peeling limits based on performance analysis. */
4549 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4550 param_max_peeled_insns, 400);
4551 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4552 param_max_completely_peeled_insns, 400);
4554 /* Use the 'model' -fsched-pressure algorithm by default. */
4555 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4556 param_sched_pressure_algorithm,
4557 SCHED_PRESSURE_MODEL);
4559 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4560 turns -frename-registers on. */
4561 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4562 || (global_options_set.x_flag_unroll_all_loops
4563 && flag_unroll_all_loops))
4565 if (!global_options_set.x_unroll_only_small_loops)
4566 unroll_only_small_loops = 0;
4567 if (!global_options_set.x_flag_rename_registers)
4568 flag_rename_registers = 1;
4571 /* If using typedef char *va_list, signal that
4572 __builtin_va_start (&ap, 0) can be optimized to
4573 ap = __builtin_next_arg (0). */
4574 if (DEFAULT_ABI != ABI_V4)
4575 targetm.expand_builtin_va_start = NULL;
4578 /* If not explicitly specified via option, decide whether to generate indexed
4579 load/store instructions. A value of -1 indicates that the
4580 initial value of this variable has not been overwritten. During
4581 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4582 if (TARGET_AVOID_XFORM == -1)
4583 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4584 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4585 need indexed accesses and the type used is the scalar type of the element
4586 being loaded or stored. */
4587 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4588 && !TARGET_ALTIVEC);
4590 /* Set the -mrecip options. */
4591 if (rs6000_recip_name)
4593 char *p = ASTRDUP (rs6000_recip_name);
4594 char *q;
4595 unsigned int mask, i;
4596 bool invert;
4598 while ((q = strtok (p, ",")) != NULL)
4600 p = NULL;
4601 if (*q == '!')
4603 invert = true;
4604 q++;
4606 else
4607 invert = false;
4609 if (!strcmp (q, "default"))
4610 mask = ((TARGET_RECIP_PRECISION)
4611 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4612 else
4614 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4615 if (!strcmp (q, recip_options[i].string))
4617 mask = recip_options[i].mask;
4618 break;
4621 if (i == ARRAY_SIZE (recip_options))
4623 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4624 invert = false;
4625 mask = 0;
4626 ret = false;
4630 if (invert)
4631 rs6000_recip_control &= ~mask;
4632 else
4633 rs6000_recip_control |= mask;
4637 /* Set the builtin mask of the various options used that could affect which
4638 builtins were used. In the past we used target_flags, but we've run out
4639 of bits, and some options are no longer in target_flags. */
4640 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4641 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4642 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4643 rs6000_builtin_mask);
4645 /* Initialize all of the registers. */
4646 rs6000_init_hard_regno_mode_ok (global_init_p);
4648 /* Save the initial options in case the user does function specific options */
4649 if (global_init_p)
4650 target_option_default_node = target_option_current_node
4651 = build_target_option_node (&global_options);
4653 /* If not explicitly specified via option, decide whether to generate the
4654 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4655 if (TARGET_LINK_STACK == -1)
4656 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4658 /* Deprecate use of -mno-speculate-indirect-jumps. */
4659 if (!rs6000_speculate_indirect_jumps)
4660 warning (0, "%qs is deprecated and not recommended in any circumstances",
4661 "-mno-speculate-indirect-jumps");
4663 return ret;
4666 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4667 define the target cpu type. */
4669 static void
4670 rs6000_option_override (void)
4672 (void) rs6000_option_override_internal (true);
4676 /* Implement targetm.vectorize.builtin_mask_for_load. */
4677 static tree
4678 rs6000_builtin_mask_for_load (void)
4680 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4681 if ((TARGET_ALTIVEC && !TARGET_VSX)
4682 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4683 return altivec_builtin_mask_for_load;
4684 else
4685 return 0;
4688 /* Implement LOOP_ALIGN. */
4689 align_flags
4690 rs6000_loop_align (rtx label)
4692 basic_block bb;
4693 int ninsns;
4695 /* Don't override loop alignment if -falign-loops was specified. */
4696 if (!can_override_loop_align)
4697 return align_loops;
4699 bb = BLOCK_FOR_INSN (label);
4700 ninsns = num_loop_insns(bb->loop_father);
4702 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4703 if (ninsns > 4 && ninsns <= 8
4704 && (rs6000_tune == PROCESSOR_POWER4
4705 || rs6000_tune == PROCESSOR_POWER5
4706 || rs6000_tune == PROCESSOR_POWER6
4707 || rs6000_tune == PROCESSOR_POWER7
4708 || rs6000_tune == PROCESSOR_POWER8))
4709 return align_flags (5);
4710 else
4711 return align_loops;
4714 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4715 after applying N number of iterations. This routine does not determine
4716 how may iterations are required to reach desired alignment. */
4718 static bool
4719 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4721 if (is_packed)
4722 return false;
4724 if (TARGET_32BIT)
4726 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4727 return true;
4729 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4730 return true;
4732 return false;
4734 else
4736 if (TARGET_MACHO)
4737 return false;
4739 /* Assuming that all other types are naturally aligned. CHECKME! */
4740 return true;
4744 /* Return true if the vector misalignment factor is supported by the
4745 target. */
4746 static bool
4747 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4748 const_tree type,
4749 int misalignment,
4750 bool is_packed)
4752 if (TARGET_VSX)
4754 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4755 return true;
4757 /* Return if movmisalign pattern is not supported for this mode. */
4758 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4759 return false;
4761 if (misalignment == -1)
4763 /* Misalignment factor is unknown at compile time but we know
4764 it's word aligned. */
4765 if (rs6000_vector_alignment_reachable (type, is_packed))
4767 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4769 if (element_size == 64 || element_size == 32)
4770 return true;
4773 return false;
4776 /* VSX supports word-aligned vector. */
4777 if (misalignment % 4 == 0)
4778 return true;
4780 return false;
4783 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4784 static int
4785 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4786 tree vectype, int misalign)
4788 unsigned elements;
4789 tree elem_type;
4791 switch (type_of_cost)
4793 case scalar_stmt:
4794 case scalar_store:
4795 case vector_stmt:
4796 case vector_store:
4797 case vec_to_scalar:
4798 case scalar_to_vec:
4799 case cond_branch_not_taken:
4800 return 1;
4801 case scalar_load:
4802 case vector_load:
4803 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4804 return 2;
4806 case vec_perm:
4807 /* Power7 has only one permute unit, make it a bit expensive. */
4808 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4809 return 3;
4810 else
4811 return 1;
4813 case vec_promote_demote:
4814 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4815 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4816 return 4;
4817 else
4818 return 1;
4820 case cond_branch_taken:
4821 return 3;
4823 case unaligned_load:
4824 case vector_gather_load:
4825 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4826 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4827 return 2;
4829 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4831 elements = TYPE_VECTOR_SUBPARTS (vectype);
4832 if (elements == 2)
4833 /* Double word aligned. */
4834 return 4;
4836 if (elements == 4)
4838 switch (misalign)
4840 case 8:
4841 /* Double word aligned. */
4842 return 4;
4844 case -1:
4845 /* Unknown misalignment. */
4846 case 4:
4847 case 12:
4848 /* Word aligned. */
4849 return 33;
4851 default:
4852 gcc_unreachable ();
4857 if (TARGET_ALTIVEC)
4858 /* Misaligned loads are not supported. */
4859 gcc_unreachable ();
4861 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4862 return 4;
4864 case unaligned_store:
4865 case vector_scatter_store:
4866 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4867 return 1;
4869 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4871 elements = TYPE_VECTOR_SUBPARTS (vectype);
4872 if (elements == 2)
4873 /* Double word aligned. */
4874 return 2;
4876 if (elements == 4)
4878 switch (misalign)
4880 case 8:
4881 /* Double word aligned. */
4882 return 2;
4884 case -1:
4885 /* Unknown misalignment. */
4886 case 4:
4887 case 12:
4888 /* Word aligned. */
4889 return 23;
4891 default:
4892 gcc_unreachable ();
4897 if (TARGET_ALTIVEC)
4898 /* Misaligned stores are not supported. */
4899 gcc_unreachable ();
4901 return 2;
4903 case vec_construct:
4904 /* This is a rough approximation assuming non-constant elements
4905 constructed into a vector via element insertion. FIXME:
4906 vec_construct is not granular enough for uniformly good
4907 decisions. If the initialization is a splat, this is
4908 cheaper than we estimate. Improve this someday. */
4909 elem_type = TREE_TYPE (vectype);
4910 /* 32-bit vectors loaded into registers are stored as double
4911 precision, so we need 2 permutes, 2 converts, and 1 merge
4912 to construct a vector of short floats from them. */
4913 if (SCALAR_FLOAT_TYPE_P (elem_type)
4914 && TYPE_PRECISION (elem_type) == 32)
4915 return 5;
4916 /* On POWER9, integer vector types are built up in GPRs and then
4917 use a direct move (2 cycles). For POWER8 this is even worse,
4918 as we need two direct moves and a merge, and the direct moves
4919 are five cycles. */
4920 else if (INTEGRAL_TYPE_P (elem_type))
4922 if (TARGET_P9_VECTOR)
4923 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4924 else
4925 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4927 else
4928 /* V2DFmode doesn't need a direct move. */
4929 return 2;
4931 default:
4932 gcc_unreachable ();
4936 /* Implement targetm.vectorize.preferred_simd_mode. */
4938 static machine_mode
4939 rs6000_preferred_simd_mode (scalar_mode mode)
4941 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4943 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4944 return vmode.require ();
4946 return word_mode;
4949 typedef struct _rs6000_cost_data
4951 struct loop *loop_info;
4952 unsigned cost[3];
4953 } rs6000_cost_data;
4955 /* Test for likely overcommitment of vector hardware resources. If a
4956 loop iteration is relatively large, and too large a percentage of
4957 instructions in the loop are vectorized, the cost model may not
4958 adequately reflect delays from unavailable vector resources.
4959 Penalize the loop body cost for this case. */
4961 static void
4962 rs6000_density_test (rs6000_cost_data *data)
4964 const int DENSITY_PCT_THRESHOLD = 85;
4965 const int DENSITY_SIZE_THRESHOLD = 70;
4966 const int DENSITY_PENALTY = 10;
4967 struct loop *loop = data->loop_info;
4968 basic_block *bbs = get_loop_body (loop);
4969 int nbbs = loop->num_nodes;
4970 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4971 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4972 int i, density_pct;
4974 for (i = 0; i < nbbs; i++)
4976 basic_block bb = bbs[i];
4977 gimple_stmt_iterator gsi;
4979 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4981 gimple *stmt = gsi_stmt (gsi);
4982 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4984 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4985 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4986 not_vec_cost++;
4990 free (bbs);
4991 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4993 if (density_pct > DENSITY_PCT_THRESHOLD
4994 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4996 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4997 if (dump_enabled_p ())
4998 dump_printf_loc (MSG_NOTE, vect_location,
4999 "density %d%%, cost %d exceeds threshold, penalizing "
5000 "loop body cost by %d%%", density_pct,
5001 vec_cost + not_vec_cost, DENSITY_PENALTY);
5005 /* Implement targetm.vectorize.init_cost. */
5007 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5008 instruction is needed by the vectorization. */
5009 static bool rs6000_vect_nonmem;
5011 static void *
5012 rs6000_init_cost (struct loop *loop_info)
5014 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5015 data->loop_info = loop_info;
5016 data->cost[vect_prologue] = 0;
5017 data->cost[vect_body] = 0;
5018 data->cost[vect_epilogue] = 0;
5019 rs6000_vect_nonmem = false;
5020 return data;
5023 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5024 For some statement, we would like to further fine-grain tweak the cost on
5025 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5026 information on statement operation codes etc. One typical case here is
5027 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5028 for scalar cost, but it should be priced more whatever transformed to either
5029 compare + branch or compare + isel instructions. */
5031 static unsigned
5032 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5033 struct _stmt_vec_info *stmt_info)
5035 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5036 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5038 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5039 if (subcode == COND_EXPR)
5040 return 2;
5043 return 0;
5046 /* Implement targetm.vectorize.add_stmt_cost. */
5048 static unsigned
5049 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5050 enum vect_cost_for_stmt kind,
5051 struct _stmt_vec_info *stmt_info, int misalign,
5052 enum vect_cost_model_location where)
5054 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5055 unsigned retval = 0;
5057 if (flag_vect_cost_model)
5059 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5060 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5061 misalign);
5062 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5063 /* Statements in an inner loop relative to the loop being
5064 vectorized are weighted more heavily. The value here is
5065 arbitrary and could potentially be improved with analysis. */
5066 if (where == vect_body && stmt_info
5067 && stmt_in_inner_loop_p (vinfo, stmt_info))
5068 count *= 50; /* FIXME. */
5070 retval = (unsigned) (count * stmt_cost);
5071 cost_data->cost[where] += retval;
5073 /* Check whether we're doing something other than just a copy loop.
5074 Not all such loops may be profitably vectorized; see
5075 rs6000_finish_cost. */
5076 if ((kind == vec_to_scalar || kind == vec_perm
5077 || kind == vec_promote_demote || kind == vec_construct
5078 || kind == scalar_to_vec)
5079 || (where == vect_body && kind == vector_stmt))
5080 rs6000_vect_nonmem = true;
5083 return retval;
5086 /* Implement targetm.vectorize.finish_cost. */
5088 static void
5089 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5090 unsigned *body_cost, unsigned *epilogue_cost)
5092 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5094 if (cost_data->loop_info)
5095 rs6000_density_test (cost_data);
5097 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5098 that require versioning for any reason. The vectorization is at
5099 best a wash inside the loop, and the versioning checks make
5100 profitability highly unlikely and potentially quite harmful. */
5101 if (cost_data->loop_info)
5103 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5104 if (!rs6000_vect_nonmem
5105 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5106 && LOOP_REQUIRES_VERSIONING (vec_info))
5107 cost_data->cost[vect_body] += 10000;
5110 *prologue_cost = cost_data->cost[vect_prologue];
5111 *body_cost = cost_data->cost[vect_body];
5112 *epilogue_cost = cost_data->cost[vect_epilogue];
5115 /* Implement targetm.vectorize.destroy_cost_data. */
5117 static void
5118 rs6000_destroy_cost_data (void *data)
5120 free (data);
5123 /* Implement targetm.loop_unroll_adjust. */
5125 static unsigned
5126 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5128 if (unroll_only_small_loops)
5130 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5131 example we may want to unroll very small loops more times (4 perhaps).
5132 We also should use a PARAM for this. */
5133 if (loop->ninsns <= 10)
5134 return MIN (2, nunroll);
5135 else
5136 return 0;
5139 return nunroll;
5142 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5143 library with vectorized intrinsics. */
5145 static tree
5146 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5147 tree type_in)
5149 char name[32];
5150 const char *suffix = NULL;
5151 tree fntype, new_fndecl, bdecl = NULL_TREE;
5152 int n_args = 1;
5153 const char *bname;
5154 machine_mode el_mode, in_mode;
5155 int n, in_n;
5157 /* Libmass is suitable for unsafe math only as it does not correctly support
5158 parts of IEEE with the required precision such as denormals. Only support
5159 it if we have VSX to use the simd d2 or f4 functions.
5160 XXX: Add variable length support. */
5161 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5162 return NULL_TREE;
5164 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5165 n = TYPE_VECTOR_SUBPARTS (type_out);
5166 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5167 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5168 if (el_mode != in_mode
5169 || n != in_n)
5170 return NULL_TREE;
5172 switch (fn)
5174 CASE_CFN_ATAN2:
5175 CASE_CFN_HYPOT:
5176 CASE_CFN_POW:
5177 n_args = 2;
5178 gcc_fallthrough ();
5180 CASE_CFN_ACOS:
5181 CASE_CFN_ACOSH:
5182 CASE_CFN_ASIN:
5183 CASE_CFN_ASINH:
5184 CASE_CFN_ATAN:
5185 CASE_CFN_ATANH:
5186 CASE_CFN_CBRT:
5187 CASE_CFN_COS:
5188 CASE_CFN_COSH:
5189 CASE_CFN_ERF:
5190 CASE_CFN_ERFC:
5191 CASE_CFN_EXP2:
5192 CASE_CFN_EXP:
5193 CASE_CFN_EXPM1:
5194 CASE_CFN_LGAMMA:
5195 CASE_CFN_LOG10:
5196 CASE_CFN_LOG1P:
5197 CASE_CFN_LOG2:
5198 CASE_CFN_LOG:
5199 CASE_CFN_SIN:
5200 CASE_CFN_SINH:
5201 CASE_CFN_SQRT:
5202 CASE_CFN_TAN:
5203 CASE_CFN_TANH:
5204 if (el_mode == DFmode && n == 2)
5206 bdecl = mathfn_built_in (double_type_node, fn);
5207 suffix = "d2"; /* pow -> powd2 */
5209 else if (el_mode == SFmode && n == 4)
5211 bdecl = mathfn_built_in (float_type_node, fn);
5212 suffix = "4"; /* powf -> powf4 */
5214 else
5215 return NULL_TREE;
5216 if (!bdecl)
5217 return NULL_TREE;
5218 break;
5220 default:
5221 return NULL_TREE;
5224 gcc_assert (suffix != NULL);
5225 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5226 if (!bname)
5227 return NULL_TREE;
5229 strcpy (name, bname + strlen ("__builtin_"));
5230 strcat (name, suffix);
5232 if (n_args == 1)
5233 fntype = build_function_type_list (type_out, type_in, NULL);
5234 else if (n_args == 2)
5235 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5236 else
5237 gcc_unreachable ();
5239 /* Build a function declaration for the vectorized function. */
5240 new_fndecl = build_decl (BUILTINS_LOCATION,
5241 FUNCTION_DECL, get_identifier (name), fntype);
5242 TREE_PUBLIC (new_fndecl) = 1;
5243 DECL_EXTERNAL (new_fndecl) = 1;
5244 DECL_IS_NOVOPS (new_fndecl) = 1;
5245 TREE_READONLY (new_fndecl) = 1;
5247 return new_fndecl;
5250 /* Returns a function decl for a vectorized version of the builtin function
5251 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5252 if it is not available. */
5254 static tree
5255 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5256 tree type_in)
5258 machine_mode in_mode, out_mode;
5259 int in_n, out_n;
5261 if (TARGET_DEBUG_BUILTIN)
5262 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5263 combined_fn_name (combined_fn (fn)),
5264 GET_MODE_NAME (TYPE_MODE (type_out)),
5265 GET_MODE_NAME (TYPE_MODE (type_in)));
5267 if (TREE_CODE (type_out) != VECTOR_TYPE
5268 || TREE_CODE (type_in) != VECTOR_TYPE)
5269 return NULL_TREE;
5271 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5272 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5273 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5274 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5276 switch (fn)
5278 CASE_CFN_COPYSIGN:
5279 if (VECTOR_UNIT_VSX_P (V2DFmode)
5280 && out_mode == DFmode && out_n == 2
5281 && in_mode == DFmode && in_n == 2)
5282 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5283 if (VECTOR_UNIT_VSX_P (V4SFmode)
5284 && out_mode == SFmode && out_n == 4
5285 && in_mode == SFmode && in_n == 4)
5286 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5287 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5288 && out_mode == SFmode && out_n == 4
5289 && in_mode == SFmode && in_n == 4)
5290 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5291 break;
5292 CASE_CFN_CEIL:
5293 if (VECTOR_UNIT_VSX_P (V2DFmode)
5294 && out_mode == DFmode && out_n == 2
5295 && in_mode == DFmode && in_n == 2)
5296 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5297 if (VECTOR_UNIT_VSX_P (V4SFmode)
5298 && out_mode == SFmode && out_n == 4
5299 && in_mode == SFmode && in_n == 4)
5300 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5301 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5302 && out_mode == SFmode && out_n == 4
5303 && in_mode == SFmode && in_n == 4)
5304 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5305 break;
5306 CASE_CFN_FLOOR:
5307 if (VECTOR_UNIT_VSX_P (V2DFmode)
5308 && out_mode == DFmode && out_n == 2
5309 && in_mode == DFmode && in_n == 2)
5310 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5311 if (VECTOR_UNIT_VSX_P (V4SFmode)
5312 && out_mode == SFmode && out_n == 4
5313 && in_mode == SFmode && in_n == 4)
5314 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5315 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5316 && out_mode == SFmode && out_n == 4
5317 && in_mode == SFmode && in_n == 4)
5318 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5319 break;
5320 CASE_CFN_FMA:
5321 if (VECTOR_UNIT_VSX_P (V2DFmode)
5322 && out_mode == DFmode && out_n == 2
5323 && in_mode == DFmode && in_n == 2)
5324 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5325 if (VECTOR_UNIT_VSX_P (V4SFmode)
5326 && out_mode == SFmode && out_n == 4
5327 && in_mode == SFmode && in_n == 4)
5328 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5329 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5330 && out_mode == SFmode && out_n == 4
5331 && in_mode == SFmode && in_n == 4)
5332 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5333 break;
5334 CASE_CFN_TRUNC:
5335 if (VECTOR_UNIT_VSX_P (V2DFmode)
5336 && out_mode == DFmode && out_n == 2
5337 && in_mode == DFmode && in_n == 2)
5338 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5339 if (VECTOR_UNIT_VSX_P (V4SFmode)
5340 && out_mode == SFmode && out_n == 4
5341 && in_mode == SFmode && in_n == 4)
5342 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5343 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5344 && out_mode == SFmode && out_n == 4
5345 && in_mode == SFmode && in_n == 4)
5346 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5347 break;
5348 CASE_CFN_NEARBYINT:
5349 if (VECTOR_UNIT_VSX_P (V2DFmode)
5350 && flag_unsafe_math_optimizations
5351 && out_mode == DFmode && out_n == 2
5352 && in_mode == DFmode && in_n == 2)
5353 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5354 if (VECTOR_UNIT_VSX_P (V4SFmode)
5355 && flag_unsafe_math_optimizations
5356 && out_mode == SFmode && out_n == 4
5357 && in_mode == SFmode && in_n == 4)
5358 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5359 break;
5360 CASE_CFN_RINT:
5361 if (VECTOR_UNIT_VSX_P (V2DFmode)
5362 && !flag_trapping_math
5363 && out_mode == DFmode && out_n == 2
5364 && in_mode == DFmode && in_n == 2)
5365 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5366 if (VECTOR_UNIT_VSX_P (V4SFmode)
5367 && !flag_trapping_math
5368 && out_mode == SFmode && out_n == 4
5369 && in_mode == SFmode && in_n == 4)
5370 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5371 break;
5372 default:
5373 break;
5376 /* Generate calls to libmass if appropriate. */
5377 if (rs6000_veclib_handler)
5378 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5380 return NULL_TREE;
5383 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5385 static tree
5386 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5387 tree type_in)
5389 machine_mode in_mode, out_mode;
5390 int in_n, out_n;
5392 if (TARGET_DEBUG_BUILTIN)
5393 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5394 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5395 GET_MODE_NAME (TYPE_MODE (type_out)),
5396 GET_MODE_NAME (TYPE_MODE (type_in)));
5398 if (TREE_CODE (type_out) != VECTOR_TYPE
5399 || TREE_CODE (type_in) != VECTOR_TYPE)
5400 return NULL_TREE;
5402 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5403 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5404 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5405 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5407 enum rs6000_builtins fn
5408 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5409 switch (fn)
5411 case RS6000_BUILTIN_RSQRTF:
5412 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5413 && out_mode == SFmode && out_n == 4
5414 && in_mode == SFmode && in_n == 4)
5415 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5416 break;
5417 case RS6000_BUILTIN_RSQRT:
5418 if (VECTOR_UNIT_VSX_P (V2DFmode)
5419 && out_mode == DFmode && out_n == 2
5420 && in_mode == DFmode && in_n == 2)
5421 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5422 break;
5423 case RS6000_BUILTIN_RECIPF:
5424 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5425 && out_mode == SFmode && out_n == 4
5426 && in_mode == SFmode && in_n == 4)
5427 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5428 break;
5429 case RS6000_BUILTIN_RECIP:
5430 if (VECTOR_UNIT_VSX_P (V2DFmode)
5431 && out_mode == DFmode && out_n == 2
5432 && in_mode == DFmode && in_n == 2)
5433 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5434 break;
5435 default:
5436 break;
5438 return NULL_TREE;
5441 /* Default CPU string for rs6000*_file_start functions. */
5442 static const char *rs6000_default_cpu;
5444 #ifdef USING_ELFOS_H
5445 const char *rs6000_machine;
5447 const char *
5448 rs6000_machine_from_flags (void)
5450 HOST_WIDE_INT flags = rs6000_isa_flags;
5452 /* Disable the flags that should never influence the .machine selection. */
5453 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5455 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5456 return "future";
5457 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5458 return "power9";
5459 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5460 return "power8";
5461 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5462 return "power7";
5463 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5464 return "power6";
5465 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5466 return "power5";
5467 if ((flags & ISA_2_1_MASKS) != 0)
5468 return "power4";
5469 if ((flags & OPTION_MASK_POWERPC64) != 0)
5470 return "ppc64";
5471 return "ppc";
5474 void
5475 emit_asm_machine (void)
5477 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5479 #endif
5481 /* Do anything needed at the start of the asm file. */
5483 static void
5484 rs6000_file_start (void)
5486 char buffer[80];
5487 const char *start = buffer;
5488 FILE *file = asm_out_file;
5490 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5492 default_file_start ();
5494 if (flag_verbose_asm)
5496 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5498 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5500 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5501 start = "";
5504 if (global_options_set.x_rs6000_cpu_index)
5506 fprintf (file, "%s -mcpu=%s", start,
5507 processor_target_table[rs6000_cpu_index].name);
5508 start = "";
5511 if (global_options_set.x_rs6000_tune_index)
5513 fprintf (file, "%s -mtune=%s", start,
5514 processor_target_table[rs6000_tune_index].name);
5515 start = "";
5518 if (PPC405_ERRATUM77)
5520 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5521 start = "";
5524 #ifdef USING_ELFOS_H
5525 switch (rs6000_sdata)
5527 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5528 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5529 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5530 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5533 if (rs6000_sdata && g_switch_value)
5535 fprintf (file, "%s -G %d", start,
5536 g_switch_value);
5537 start = "";
5539 #endif
5541 if (*start == '\0')
5542 putc ('\n', file);
5545 #ifdef USING_ELFOS_H
5546 rs6000_machine = rs6000_machine_from_flags ();
5547 emit_asm_machine ();
5548 #endif
5550 if (DEFAULT_ABI == ABI_ELFv2)
5551 fprintf (file, "\t.abiversion 2\n");
5555 /* Return nonzero if this function is known to have a null epilogue. */
5558 direct_return (void)
5560 if (reload_completed)
5562 rs6000_stack_t *info = rs6000_stack_info ();
5564 if (info->first_gp_reg_save == 32
5565 && info->first_fp_reg_save == 64
5566 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5567 && ! info->lr_save_p
5568 && ! info->cr_save_p
5569 && info->vrsave_size == 0
5570 && ! info->push_p)
5571 return 1;
5574 return 0;
5577 /* Helper for num_insns_constant. Calculate number of instructions to
5578 load VALUE to a single gpr using combinations of addi, addis, ori,
5579 oris and sldi instructions. */
5581 static int
5582 num_insns_constant_gpr (HOST_WIDE_INT value)
5584 /* signed constant loadable with addi */
5585 if (SIGNED_INTEGER_16BIT_P (value))
5586 return 1;
5588 /* constant loadable with addis */
5589 else if ((value & 0xffff) == 0
5590 && (value >> 31 == -1 || value >> 31 == 0))
5591 return 1;
5593 /* PADDI can support up to 34 bit signed integers. */
5594 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5595 return 1;
5597 else if (TARGET_POWERPC64)
5599 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5600 HOST_WIDE_INT high = value >> 31;
5602 if (high == 0 || high == -1)
5603 return 2;
5605 high >>= 1;
5607 if (low == 0)
5608 return num_insns_constant_gpr (high) + 1;
5609 else if (high == 0)
5610 return num_insns_constant_gpr (low) + 1;
5611 else
5612 return (num_insns_constant_gpr (high)
5613 + num_insns_constant_gpr (low) + 1);
5616 else
5617 return 2;
5620 /* Helper for num_insns_constant. Allow constants formed by the
5621 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5622 and handle modes that require multiple gprs. */
5624 static int
5625 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5627 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5628 int total = 0;
5629 while (nregs-- > 0)
5631 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5632 int insns = num_insns_constant_gpr (low);
5633 if (insns > 2
5634 /* We won't get more than 2 from num_insns_constant_gpr
5635 except when TARGET_POWERPC64 and mode is DImode or
5636 wider, so the register mode must be DImode. */
5637 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5638 insns = 2;
5639 total += insns;
5640 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5641 it all at once would be UB. */
5642 value >>= (BITS_PER_WORD - 1);
5643 value >>= 1;
5645 return total;
5648 /* Return the number of instructions it takes to form a constant in as
5649 many gprs are needed for MODE. */
5652 num_insns_constant (rtx op, machine_mode mode)
5654 HOST_WIDE_INT val;
5656 switch (GET_CODE (op))
5658 case CONST_INT:
5659 val = INTVAL (op);
5660 break;
5662 case CONST_WIDE_INT:
5664 int insns = 0;
5665 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5666 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5667 DImode);
5668 return insns;
5671 case CONST_DOUBLE:
5673 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5675 if (mode == SFmode || mode == SDmode)
5677 long l;
5679 if (mode == SDmode)
5680 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5681 else
5682 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5683 /* See the first define_split in rs6000.md handling a
5684 const_double_operand. */
5685 val = l;
5686 mode = SImode;
5688 else if (mode == DFmode || mode == DDmode)
5690 long l[2];
5692 if (mode == DDmode)
5693 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5694 else
5695 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5697 /* See the second (32-bit) and third (64-bit) define_split
5698 in rs6000.md handling a const_double_operand. */
5699 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5700 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5701 mode = DImode;
5703 else if (mode == TFmode || mode == TDmode
5704 || mode == KFmode || mode == IFmode)
5706 long l[4];
5707 int insns;
5709 if (mode == TDmode)
5710 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5711 else
5712 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5714 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5715 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5716 insns = num_insns_constant_multi (val, DImode);
5717 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5718 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5719 insns += num_insns_constant_multi (val, DImode);
5720 return insns;
5722 else
5723 gcc_unreachable ();
5725 break;
5727 default:
5728 gcc_unreachable ();
5731 return num_insns_constant_multi (val, mode);
5734 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5735 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5736 corresponding element of the vector, but for V4SFmode, the
5737 corresponding "float" is interpreted as an SImode integer. */
5739 HOST_WIDE_INT
5740 const_vector_elt_as_int (rtx op, unsigned int elt)
5742 rtx tmp;
5744 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5745 gcc_assert (GET_MODE (op) != V2DImode
5746 && GET_MODE (op) != V2DFmode);
5748 tmp = CONST_VECTOR_ELT (op, elt);
5749 if (GET_MODE (op) == V4SFmode)
5750 tmp = gen_lowpart (SImode, tmp);
5751 return INTVAL (tmp);
5754 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5755 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5756 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5757 all items are set to the same value and contain COPIES replicas of the
5758 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5759 operand and the others are set to the value of the operand's msb. */
5761 static bool
5762 vspltis_constant (rtx op, unsigned step, unsigned copies)
5764 machine_mode mode = GET_MODE (op);
5765 machine_mode inner = GET_MODE_INNER (mode);
5767 unsigned i;
5768 unsigned nunits;
5769 unsigned bitsize;
5770 unsigned mask;
5772 HOST_WIDE_INT val;
5773 HOST_WIDE_INT splat_val;
5774 HOST_WIDE_INT msb_val;
5776 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5777 return false;
5779 nunits = GET_MODE_NUNITS (mode);
5780 bitsize = GET_MODE_BITSIZE (inner);
5781 mask = GET_MODE_MASK (inner);
5783 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5784 splat_val = val;
5785 msb_val = val >= 0 ? 0 : -1;
5787 /* Construct the value to be splatted, if possible. If not, return 0. */
5788 for (i = 2; i <= copies; i *= 2)
5790 HOST_WIDE_INT small_val;
5791 bitsize /= 2;
5792 small_val = splat_val >> bitsize;
5793 mask >>= bitsize;
5794 if (splat_val != ((HOST_WIDE_INT)
5795 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5796 | (small_val & mask)))
5797 return false;
5798 splat_val = small_val;
5801 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5802 if (EASY_VECTOR_15 (splat_val))
5805 /* Also check if we can splat, and then add the result to itself. Do so if
5806 the value is positive, of if the splat instruction is using OP's mode;
5807 for splat_val < 0, the splat and the add should use the same mode. */
5808 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5809 && (splat_val >= 0 || (step == 1 && copies == 1)))
5812 /* Also check if are loading up the most significant bit which can be done by
5813 loading up -1 and shifting the value left by -1. */
5814 else if (EASY_VECTOR_MSB (splat_val, inner))
5817 else
5818 return false;
5820 /* Check if VAL is present in every STEP-th element, and the
5821 other elements are filled with its most significant bit. */
5822 for (i = 1; i < nunits; ++i)
5824 HOST_WIDE_INT desired_val;
5825 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5826 if ((i & (step - 1)) == 0)
5827 desired_val = val;
5828 else
5829 desired_val = msb_val;
5831 if (desired_val != const_vector_elt_as_int (op, elt))
5832 return false;
5835 return true;
5838 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5839 instruction, filling in the bottom elements with 0 or -1.
5841 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5842 for the number of zeroes to shift in, or negative for the number of 0xff
5843 bytes to shift in.
5845 OP is a CONST_VECTOR. */
5848 vspltis_shifted (rtx op)
5850 machine_mode mode = GET_MODE (op);
5851 machine_mode inner = GET_MODE_INNER (mode);
5853 unsigned i, j;
5854 unsigned nunits;
5855 unsigned mask;
5857 HOST_WIDE_INT val;
5859 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5860 return false;
5862 /* We need to create pseudo registers to do the shift, so don't recognize
5863 shift vector constants after reload. */
5864 if (!can_create_pseudo_p ())
5865 return false;
5867 nunits = GET_MODE_NUNITS (mode);
5868 mask = GET_MODE_MASK (inner);
5870 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5872 /* Check if the value can really be the operand of a vspltis[bhw]. */
5873 if (EASY_VECTOR_15 (val))
5876 /* Also check if we are loading up the most significant bit which can be done
5877 by loading up -1 and shifting the value left by -1. */
5878 else if (EASY_VECTOR_MSB (val, inner))
5881 else
5882 return 0;
5884 /* Check if VAL is present in every STEP-th element until we find elements
5885 that are 0 or all 1 bits. */
5886 for (i = 1; i < nunits; ++i)
5888 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5889 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5891 /* If the value isn't the splat value, check for the remaining elements
5892 being 0/-1. */
5893 if (val != elt_val)
5895 if (elt_val == 0)
5897 for (j = i+1; j < nunits; ++j)
5899 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5900 if (const_vector_elt_as_int (op, elt2) != 0)
5901 return 0;
5904 return (nunits - i) * GET_MODE_SIZE (inner);
5907 else if ((elt_val & mask) == mask)
5909 for (j = i+1; j < nunits; ++j)
5911 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5912 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5913 return 0;
5916 return -((nunits - i) * GET_MODE_SIZE (inner));
5919 else
5920 return 0;
5924 /* If all elements are equal, we don't need to do VLSDOI. */
5925 return 0;
5929 /* Return true if OP is of the given MODE and can be synthesized
5930 with a vspltisb, vspltish or vspltisw. */
5932 bool
5933 easy_altivec_constant (rtx op, machine_mode mode)
5935 unsigned step, copies;
5937 if (mode == VOIDmode)
5938 mode = GET_MODE (op);
5939 else if (mode != GET_MODE (op))
5940 return false;
5942 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5943 constants. */
5944 if (mode == V2DFmode)
5945 return zero_constant (op, mode);
5947 else if (mode == V2DImode)
5949 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5950 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5951 return false;
5953 if (zero_constant (op, mode))
5954 return true;
5956 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5957 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5958 return true;
5960 return false;
5963 /* V1TImode is a special container for TImode. Ignore for now. */
5964 else if (mode == V1TImode)
5965 return false;
5967 /* Start with a vspltisw. */
5968 step = GET_MODE_NUNITS (mode) / 4;
5969 copies = 1;
5971 if (vspltis_constant (op, step, copies))
5972 return true;
5974 /* Then try with a vspltish. */
5975 if (step == 1)
5976 copies <<= 1;
5977 else
5978 step >>= 1;
5980 if (vspltis_constant (op, step, copies))
5981 return true;
5983 /* And finally a vspltisb. */
5984 if (step == 1)
5985 copies <<= 1;
5986 else
5987 step >>= 1;
5989 if (vspltis_constant (op, step, copies))
5990 return true;
5992 if (vspltis_shifted (op) != 0)
5993 return true;
5995 return false;
5998 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5999 result is OP. Abort if it is not possible. */
6002 gen_easy_altivec_constant (rtx op)
6004 machine_mode mode = GET_MODE (op);
6005 int nunits = GET_MODE_NUNITS (mode);
6006 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6007 unsigned step = nunits / 4;
6008 unsigned copies = 1;
6010 /* Start with a vspltisw. */
6011 if (vspltis_constant (op, step, copies))
6012 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6014 /* Then try with a vspltish. */
6015 if (step == 1)
6016 copies <<= 1;
6017 else
6018 step >>= 1;
6020 if (vspltis_constant (op, step, copies))
6021 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6023 /* And finally a vspltisb. */
6024 if (step == 1)
6025 copies <<= 1;
6026 else
6027 step >>= 1;
6029 if (vspltis_constant (op, step, copies))
6030 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6032 gcc_unreachable ();
6035 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6036 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6038 Return the number of instructions needed (1 or 2) into the address pointed
6039 via NUM_INSNS_PTR.
6041 Return the constant that is being split via CONSTANT_PTR. */
6043 bool
6044 xxspltib_constant_p (rtx op,
6045 machine_mode mode,
6046 int *num_insns_ptr,
6047 int *constant_ptr)
6049 size_t nunits = GET_MODE_NUNITS (mode);
6050 size_t i;
6051 HOST_WIDE_INT value;
6052 rtx element;
6054 /* Set the returned values to out of bound values. */
6055 *num_insns_ptr = -1;
6056 *constant_ptr = 256;
6058 if (!TARGET_P9_VECTOR)
6059 return false;
6061 if (mode == VOIDmode)
6062 mode = GET_MODE (op);
6064 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6065 return false;
6067 /* Handle (vec_duplicate <constant>). */
6068 if (GET_CODE (op) == VEC_DUPLICATE)
6070 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6071 && mode != V2DImode)
6072 return false;
6074 element = XEXP (op, 0);
6075 if (!CONST_INT_P (element))
6076 return false;
6078 value = INTVAL (element);
6079 if (!IN_RANGE (value, -128, 127))
6080 return false;
6083 /* Handle (const_vector [...]). */
6084 else if (GET_CODE (op) == CONST_VECTOR)
6086 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6087 && mode != V2DImode)
6088 return false;
6090 element = CONST_VECTOR_ELT (op, 0);
6091 if (!CONST_INT_P (element))
6092 return false;
6094 value = INTVAL (element);
6095 if (!IN_RANGE (value, -128, 127))
6096 return false;
6098 for (i = 1; i < nunits; i++)
6100 element = CONST_VECTOR_ELT (op, i);
6101 if (!CONST_INT_P (element))
6102 return false;
6104 if (value != INTVAL (element))
6105 return false;
6109 /* Handle integer constants being loaded into the upper part of the VSX
6110 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6111 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6112 else if (CONST_INT_P (op))
6114 if (!SCALAR_INT_MODE_P (mode))
6115 return false;
6117 value = INTVAL (op);
6118 if (!IN_RANGE (value, -128, 127))
6119 return false;
6121 if (!IN_RANGE (value, -1, 0))
6123 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6124 return false;
6126 if (EASY_VECTOR_15 (value))
6127 return false;
6131 else
6132 return false;
6134 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6135 sign extend. Special case 0/-1 to allow getting any VSX register instead
6136 of an Altivec register. */
6137 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6138 && EASY_VECTOR_15 (value))
6139 return false;
6141 /* Return # of instructions and the constant byte for XXSPLTIB. */
6142 if (mode == V16QImode)
6143 *num_insns_ptr = 1;
6145 else if (IN_RANGE (value, -1, 0))
6146 *num_insns_ptr = 1;
6148 else
6149 *num_insns_ptr = 2;
6151 *constant_ptr = (int) value;
6152 return true;
6155 const char *
6156 output_vec_const_move (rtx *operands)
6158 int shift;
6159 machine_mode mode;
6160 rtx dest, vec;
6162 dest = operands[0];
6163 vec = operands[1];
6164 mode = GET_MODE (dest);
6166 if (TARGET_VSX)
6168 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6169 int xxspltib_value = 256;
6170 int num_insns = -1;
6172 if (zero_constant (vec, mode))
6174 if (TARGET_P9_VECTOR)
6175 return "xxspltib %x0,0";
6177 else if (dest_vmx_p)
6178 return "vspltisw %0,0";
6180 else
6181 return "xxlxor %x0,%x0,%x0";
6184 if (all_ones_constant (vec, mode))
6186 if (TARGET_P9_VECTOR)
6187 return "xxspltib %x0,255";
6189 else if (dest_vmx_p)
6190 return "vspltisw %0,-1";
6192 else if (TARGET_P8_VECTOR)
6193 return "xxlorc %x0,%x0,%x0";
6195 else
6196 gcc_unreachable ();
6199 if (TARGET_P9_VECTOR
6200 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6202 if (num_insns == 1)
6204 operands[2] = GEN_INT (xxspltib_value & 0xff);
6205 return "xxspltib %x0,%2";
6208 return "#";
6212 if (TARGET_ALTIVEC)
6214 rtx splat_vec;
6216 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6217 if (zero_constant (vec, mode))
6218 return "vspltisw %0,0";
6220 if (all_ones_constant (vec, mode))
6221 return "vspltisw %0,-1";
6223 /* Do we need to construct a value using VSLDOI? */
6224 shift = vspltis_shifted (vec);
6225 if (shift != 0)
6226 return "#";
6228 splat_vec = gen_easy_altivec_constant (vec);
6229 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6230 operands[1] = XEXP (splat_vec, 0);
6231 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6232 return "#";
6234 switch (GET_MODE (splat_vec))
6236 case E_V4SImode:
6237 return "vspltisw %0,%1";
6239 case E_V8HImode:
6240 return "vspltish %0,%1";
6242 case E_V16QImode:
6243 return "vspltisb %0,%1";
6245 default:
6246 gcc_unreachable ();
6250 gcc_unreachable ();
6253 /* Initialize vector TARGET to VALS. */
6255 void
6256 rs6000_expand_vector_init (rtx target, rtx vals)
6258 machine_mode mode = GET_MODE (target);
6259 machine_mode inner_mode = GET_MODE_INNER (mode);
6260 int n_elts = GET_MODE_NUNITS (mode);
6261 int n_var = 0, one_var = -1;
6262 bool all_same = true, all_const_zero = true;
6263 rtx x, mem;
6264 int i;
6266 for (i = 0; i < n_elts; ++i)
6268 x = XVECEXP (vals, 0, i);
6269 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6270 ++n_var, one_var = i;
6271 else if (x != CONST0_RTX (inner_mode))
6272 all_const_zero = false;
6274 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6275 all_same = false;
6278 if (n_var == 0)
6280 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6281 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6282 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6284 /* Zero register. */
6285 emit_move_insn (target, CONST0_RTX (mode));
6286 return;
6288 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6290 /* Splat immediate. */
6291 emit_insn (gen_rtx_SET (target, const_vec));
6292 return;
6294 else
6296 /* Load from constant pool. */
6297 emit_move_insn (target, const_vec);
6298 return;
6302 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6303 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6305 rtx op[2];
6306 size_t i;
6307 size_t num_elements = all_same ? 1 : 2;
6308 for (i = 0; i < num_elements; i++)
6310 op[i] = XVECEXP (vals, 0, i);
6311 /* Just in case there is a SUBREG with a smaller mode, do a
6312 conversion. */
6313 if (GET_MODE (op[i]) != inner_mode)
6315 rtx tmp = gen_reg_rtx (inner_mode);
6316 convert_move (tmp, op[i], 0);
6317 op[i] = tmp;
6319 /* Allow load with splat double word. */
6320 else if (MEM_P (op[i]))
6322 if (!all_same)
6323 op[i] = force_reg (inner_mode, op[i]);
6325 else if (!REG_P (op[i]))
6326 op[i] = force_reg (inner_mode, op[i]);
6329 if (all_same)
6331 if (mode == V2DFmode)
6332 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6333 else
6334 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6336 else
6338 if (mode == V2DFmode)
6339 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6340 else
6341 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6343 return;
6346 /* Special case initializing vector int if we are on 64-bit systems with
6347 direct move or we have the ISA 3.0 instructions. */
6348 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6349 && TARGET_DIRECT_MOVE_64BIT)
6351 if (all_same)
6353 rtx element0 = XVECEXP (vals, 0, 0);
6354 if (MEM_P (element0))
6355 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6356 else
6357 element0 = force_reg (SImode, element0);
6359 if (TARGET_P9_VECTOR)
6360 emit_insn (gen_vsx_splat_v4si (target, element0));
6361 else
6363 rtx tmp = gen_reg_rtx (DImode);
6364 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6365 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6367 return;
6369 else
6371 rtx elements[4];
6372 size_t i;
6374 for (i = 0; i < 4; i++)
6375 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6377 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6378 elements[2], elements[3]));
6379 return;
6383 /* With single precision floating point on VSX, know that internally single
6384 precision is actually represented as a double, and either make 2 V2DF
6385 vectors, and convert these vectors to single precision, or do one
6386 conversion, and splat the result to the other elements. */
6387 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6389 if (all_same)
6391 rtx element0 = XVECEXP (vals, 0, 0);
6393 if (TARGET_P9_VECTOR)
6395 if (MEM_P (element0))
6396 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6398 emit_insn (gen_vsx_splat_v4sf (target, element0));
6401 else
6403 rtx freg = gen_reg_rtx (V4SFmode);
6404 rtx sreg = force_reg (SFmode, element0);
6405 rtx cvt = (TARGET_XSCVDPSPN
6406 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6407 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6409 emit_insn (cvt);
6410 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6411 const0_rtx));
6414 else
6416 rtx dbl_even = gen_reg_rtx (V2DFmode);
6417 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6418 rtx flt_even = gen_reg_rtx (V4SFmode);
6419 rtx flt_odd = gen_reg_rtx (V4SFmode);
6420 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6421 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6422 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6423 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6425 /* Use VMRGEW if we can instead of doing a permute. */
6426 if (TARGET_P8_VECTOR)
6428 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6429 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6430 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6431 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6432 if (BYTES_BIG_ENDIAN)
6433 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6434 else
6435 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6437 else
6439 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6440 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6441 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6442 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6443 rs6000_expand_extract_even (target, flt_even, flt_odd);
6446 return;
6449 /* Special case initializing vector short/char that are splats if we are on
6450 64-bit systems with direct move. */
6451 if (all_same && TARGET_DIRECT_MOVE_64BIT
6452 && (mode == V16QImode || mode == V8HImode))
6454 rtx op0 = XVECEXP (vals, 0, 0);
6455 rtx di_tmp = gen_reg_rtx (DImode);
6457 if (!REG_P (op0))
6458 op0 = force_reg (GET_MODE_INNER (mode), op0);
6460 if (mode == V16QImode)
6462 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6463 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6464 return;
6467 if (mode == V8HImode)
6469 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6470 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6471 return;
6475 /* Store value to stack temp. Load vector element. Splat. However, splat
6476 of 64-bit items is not supported on Altivec. */
6477 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6479 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6480 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6481 XVECEXP (vals, 0, 0));
6482 x = gen_rtx_UNSPEC (VOIDmode,
6483 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6484 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6485 gen_rtvec (2,
6486 gen_rtx_SET (target, mem),
6487 x)));
6488 x = gen_rtx_VEC_SELECT (inner_mode, target,
6489 gen_rtx_PARALLEL (VOIDmode,
6490 gen_rtvec (1, const0_rtx)));
6491 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6492 return;
6495 /* One field is non-constant. Load constant then overwrite
6496 varying field. */
6497 if (n_var == 1)
6499 rtx copy = copy_rtx (vals);
6501 /* Load constant part of vector, substitute neighboring value for
6502 varying element. */
6503 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6504 rs6000_expand_vector_init (target, copy);
6506 /* Insert variable. */
6507 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6508 return;
6511 /* Construct the vector in memory one field at a time
6512 and load the whole vector. */
6513 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6514 for (i = 0; i < n_elts; i++)
6515 emit_move_insn (adjust_address_nv (mem, inner_mode,
6516 i * GET_MODE_SIZE (inner_mode)),
6517 XVECEXP (vals, 0, i));
6518 emit_move_insn (target, mem);
6521 /* Set field ELT of TARGET to VAL. */
6523 void
6524 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6526 machine_mode mode = GET_MODE (target);
6527 machine_mode inner_mode = GET_MODE_INNER (mode);
6528 rtx reg = gen_reg_rtx (mode);
6529 rtx mask, mem, x;
6530 int width = GET_MODE_SIZE (inner_mode);
6531 int i;
6533 val = force_reg (GET_MODE (val), val);
6535 if (VECTOR_MEM_VSX_P (mode))
6537 rtx insn = NULL_RTX;
6538 rtx elt_rtx = GEN_INT (elt);
6540 if (mode == V2DFmode)
6541 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6543 else if (mode == V2DImode)
6544 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6546 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6548 if (mode == V4SImode)
6549 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6550 else if (mode == V8HImode)
6551 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6552 else if (mode == V16QImode)
6553 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6554 else if (mode == V4SFmode)
6555 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6558 if (insn)
6560 emit_insn (insn);
6561 return;
6565 /* Simplify setting single element vectors like V1TImode. */
6566 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6568 emit_move_insn (target, gen_lowpart (mode, val));
6569 return;
6572 /* Load single variable value. */
6573 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6574 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6575 x = gen_rtx_UNSPEC (VOIDmode,
6576 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6577 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6578 gen_rtvec (2,
6579 gen_rtx_SET (reg, mem),
6580 x)));
6582 /* Linear sequence. */
6583 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6584 for (i = 0; i < 16; ++i)
6585 XVECEXP (mask, 0, i) = GEN_INT (i);
6587 /* Set permute mask to insert element into target. */
6588 for (i = 0; i < width; ++i)
6589 XVECEXP (mask, 0, elt*width + i)
6590 = GEN_INT (i + 0x10);
6591 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6593 if (BYTES_BIG_ENDIAN)
6594 x = gen_rtx_UNSPEC (mode,
6595 gen_rtvec (3, target, reg,
6596 force_reg (V16QImode, x)),
6597 UNSPEC_VPERM);
6598 else
6600 if (TARGET_P9_VECTOR)
6601 x = gen_rtx_UNSPEC (mode,
6602 gen_rtvec (3, reg, target,
6603 force_reg (V16QImode, x)),
6604 UNSPEC_VPERMR);
6605 else
6607 /* Invert selector. We prefer to generate VNAND on P8 so
6608 that future fusion opportunities can kick in, but must
6609 generate VNOR elsewhere. */
6610 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6611 rtx iorx = (TARGET_P8_VECTOR
6612 ? gen_rtx_IOR (V16QImode, notx, notx)
6613 : gen_rtx_AND (V16QImode, notx, notx));
6614 rtx tmp = gen_reg_rtx (V16QImode);
6615 emit_insn (gen_rtx_SET (tmp, iorx));
6617 /* Permute with operands reversed and adjusted selector. */
6618 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6619 UNSPEC_VPERM);
6623 emit_insn (gen_rtx_SET (target, x));
6626 /* Extract field ELT from VEC into TARGET. */
6628 void
6629 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6631 machine_mode mode = GET_MODE (vec);
6632 machine_mode inner_mode = GET_MODE_INNER (mode);
6633 rtx mem;
6635 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6637 switch (mode)
6639 default:
6640 break;
6641 case E_V1TImode:
6642 emit_move_insn (target, gen_lowpart (TImode, vec));
6643 break;
6644 case E_V2DFmode:
6645 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6646 return;
6647 case E_V2DImode:
6648 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6649 return;
6650 case E_V4SFmode:
6651 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6652 return;
6653 case E_V16QImode:
6654 if (TARGET_DIRECT_MOVE_64BIT)
6656 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6657 return;
6659 else
6660 break;
6661 case E_V8HImode:
6662 if (TARGET_DIRECT_MOVE_64BIT)
6664 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6665 return;
6667 else
6668 break;
6669 case E_V4SImode:
6670 if (TARGET_DIRECT_MOVE_64BIT)
6672 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6673 return;
6675 break;
6678 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6679 && TARGET_DIRECT_MOVE_64BIT)
6681 if (GET_MODE (elt) != DImode)
6683 rtx tmp = gen_reg_rtx (DImode);
6684 convert_move (tmp, elt, 0);
6685 elt = tmp;
6687 else if (!REG_P (elt))
6688 elt = force_reg (DImode, elt);
6690 switch (mode)
6692 case E_V1TImode:
6693 emit_move_insn (target, gen_lowpart (TImode, vec));
6694 return;
6696 case E_V2DFmode:
6697 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6698 return;
6700 case E_V2DImode:
6701 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6702 return;
6704 case E_V4SFmode:
6705 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6706 return;
6708 case E_V4SImode:
6709 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6710 return;
6712 case E_V8HImode:
6713 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6714 return;
6716 case E_V16QImode:
6717 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6718 return;
6720 default:
6721 gcc_unreachable ();
6725 /* Allocate mode-sized buffer. */
6726 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6728 emit_move_insn (mem, vec);
6729 if (CONST_INT_P (elt))
6731 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6733 /* Add offset to field within buffer matching vector element. */
6734 mem = adjust_address_nv (mem, inner_mode,
6735 modulo_elt * GET_MODE_SIZE (inner_mode));
6736 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6738 else
6740 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6741 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6742 rtx new_addr = gen_reg_rtx (Pmode);
6744 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6745 if (ele_size > 1)
6746 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6747 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6748 new_addr = change_address (mem, inner_mode, new_addr);
6749 emit_move_insn (target, new_addr);
6753 /* Return the offset within a memory object (MEM) of a vector type to a given
6754 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6755 the element is constant, we return a constant integer.
6757 Otherwise, we use a base register temporary to calculate the offset after
6758 masking it to fit within the bounds of the vector and scaling it. The
6759 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6760 built-in function. */
6762 static rtx
6763 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6765 if (CONST_INT_P (element))
6766 return GEN_INT (INTVAL (element) * scalar_size);
6768 /* All insns should use the 'Q' constraint (address is a single register) if
6769 the element number is not a constant. */
6770 gcc_assert (satisfies_constraint_Q (mem));
6772 /* Mask the element to make sure the element number is between 0 and the
6773 maximum number of elements - 1 so that we don't generate an address
6774 outside the vector. */
6775 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6776 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6777 emit_insn (gen_rtx_SET (base_tmp, and_op));
6779 /* Shift the element to get the byte offset from the element number. */
6780 int shift = exact_log2 (scalar_size);
6781 gcc_assert (shift >= 0);
6783 if (shift > 0)
6785 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6786 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6789 return base_tmp;
6792 /* Helper function update PC-relative addresses when we are adjusting a memory
6793 address (ADDR) to a vector to point to a scalar field within the vector with
6794 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
6795 use the base register temporary (BASE_TMP) to form the address. */
6797 static rtx
6798 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
6800 rtx new_addr = NULL;
6802 gcc_assert (CONST_INT_P (element_offset));
6804 if (GET_CODE (addr) == CONST)
6805 addr = XEXP (addr, 0);
6807 if (GET_CODE (addr) == PLUS)
6809 rtx op0 = XEXP (addr, 0);
6810 rtx op1 = XEXP (addr, 1);
6812 if (CONST_INT_P (op1))
6814 HOST_WIDE_INT offset
6815 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
6817 if (offset == 0)
6818 new_addr = op0;
6820 else
6822 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
6823 new_addr = gen_rtx_CONST (Pmode, plus);
6827 else
6829 emit_move_insn (base_tmp, addr);
6830 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6834 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
6836 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
6837 new_addr = gen_rtx_CONST (Pmode, plus);
6840 else
6841 gcc_unreachable ();
6843 return new_addr;
6846 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6847 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6848 temporary (BASE_TMP) to fixup the address. Return the new memory address
6849 that is valid for reads or writes to a given register (SCALAR_REG).
6851 This function is expected to be called after reload is completed when we are
6852 splitting insns. The temporary BASE_TMP might be set multiple times with
6853 this code. */
6856 rs6000_adjust_vec_address (rtx scalar_reg,
6857 rtx mem,
6858 rtx element,
6859 rtx base_tmp,
6860 machine_mode scalar_mode)
6862 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6863 rtx addr = XEXP (mem, 0);
6864 rtx new_addr;
6866 gcc_assert (!reg_mentioned_p (base_tmp, addr));
6867 gcc_assert (!reg_mentioned_p (base_tmp, element));
6869 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6870 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6872 /* Calculate what we need to add to the address to get the element
6873 address. */
6874 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
6876 /* Create the new address pointing to the element within the vector. If we
6877 are adding 0, we don't have to change the address. */
6878 if (element_offset == const0_rtx)
6879 new_addr = addr;
6881 /* A simple indirect address can be converted into a reg + offset
6882 address. */
6883 else if (REG_P (addr) || SUBREG_P (addr))
6884 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6886 /* For references to local static variables, fold a constant offset into the
6887 address. */
6888 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
6889 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
6891 /* Optimize D-FORM addresses with constant offset with a constant element, to
6892 include the element offset in the address directly. */
6893 else if (GET_CODE (addr) == PLUS)
6895 rtx op0 = XEXP (addr, 0);
6896 rtx op1 = XEXP (addr, 1);
6898 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6899 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6901 /* op0 should never be r0, because r0+offset is not valid. But it
6902 doesn't hurt to make sure it is not r0. */
6903 gcc_assert (reg_or_subregno (op0) != 0);
6905 /* D-FORM address with constant element number. */
6906 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6907 rtx offset_rtx = GEN_INT (offset);
6908 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6910 else
6912 /* If we don't have a D-FORM address with a constant element number,
6913 add the two elements in the current address. Then add the offset.
6915 Previously, we tried to add the offset to OP1 and change the
6916 address to an X-FORM format adding OP0 and BASE_TMP, but it became
6917 complicated because we had to verify that op1 was not GPR0 and we
6918 had a constant element offset (due to the way ADDI is defined).
6919 By doing the add of OP0 and OP1 first, and then adding in the
6920 offset, it has the benefit that if D-FORM instructions are
6921 allowed, the offset is part of the memory access to the vector
6922 element. */
6923 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
6924 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6928 else
6930 emit_move_insn (base_tmp, addr);
6931 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6934 /* If the address isn't valid, move the address into the temporary base
6935 register. Some reasons it could not be valid include:
6937 The address offset overflowed the 16 or 34 bit offset size;
6938 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
6939 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
6940 Only X_FORM loads can be done, and the address is D_FORM. */
6942 enum insn_form iform
6943 = address_to_insn_form (new_addr, scalar_mode,
6944 reg_to_non_prefixed (scalar_reg, scalar_mode));
6946 if (iform == INSN_FORM_BAD)
6948 emit_move_insn (base_tmp, new_addr);
6949 new_addr = base_tmp;
6952 return change_address (mem, scalar_mode, new_addr);
6955 /* Split a variable vec_extract operation into the component instructions. */
6957 void
6958 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6959 rtx tmp_altivec)
6961 machine_mode mode = GET_MODE (src);
6962 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6963 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6964 int byte_shift = exact_log2 (scalar_size);
6966 gcc_assert (byte_shift >= 0);
6968 /* If we are given a memory address, optimize to load just the element. We
6969 don't have to adjust the vector element number on little endian
6970 systems. */
6971 if (MEM_P (src))
6973 emit_move_insn (dest,
6974 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
6975 scalar_mode));
6976 return;
6979 else if (REG_P (src) || SUBREG_P (src))
6981 int num_elements = GET_MODE_NUNITS (mode);
6982 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6983 int bit_shift = 7 - exact_log2 (num_elements);
6984 rtx element2;
6985 unsigned int dest_regno = reg_or_subregno (dest);
6986 unsigned int src_regno = reg_or_subregno (src);
6987 unsigned int element_regno = reg_or_subregno (element);
6989 gcc_assert (REG_P (tmp_gpr));
6991 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6992 a general purpose register. */
6993 if (TARGET_P9_VECTOR
6994 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6995 && INT_REGNO_P (dest_regno)
6996 && ALTIVEC_REGNO_P (src_regno)
6997 && INT_REGNO_P (element_regno))
6999 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7000 rtx element_si = gen_rtx_REG (SImode, element_regno);
7002 if (mode == V16QImode)
7003 emit_insn (BYTES_BIG_ENDIAN
7004 ? gen_vextublx (dest_si, element_si, src)
7005 : gen_vextubrx (dest_si, element_si, src));
7007 else if (mode == V8HImode)
7009 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7010 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7011 emit_insn (BYTES_BIG_ENDIAN
7012 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7013 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7017 else
7019 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7020 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7021 emit_insn (BYTES_BIG_ENDIAN
7022 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7023 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7026 return;
7030 gcc_assert (REG_P (tmp_altivec));
7032 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7033 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7034 will shift the element into the upper position (adding 3 to convert a
7035 byte shift into a bit shift). */
7036 if (scalar_size == 8)
7038 if (!BYTES_BIG_ENDIAN)
7040 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7041 element2 = tmp_gpr;
7043 else
7044 element2 = element;
7046 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7047 bit. */
7048 emit_insn (gen_rtx_SET (tmp_gpr,
7049 gen_rtx_AND (DImode,
7050 gen_rtx_ASHIFT (DImode,
7051 element2,
7052 GEN_INT (6)),
7053 GEN_INT (64))));
7055 else
7057 if (!BYTES_BIG_ENDIAN)
7059 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7061 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7062 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7063 element2 = tmp_gpr;
7065 else
7066 element2 = element;
7068 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7071 /* Get the value into the lower byte of the Altivec register where VSLO
7072 expects it. */
7073 if (TARGET_P9_VECTOR)
7074 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7075 else if (can_create_pseudo_p ())
7076 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7077 else
7079 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7080 emit_move_insn (tmp_di, tmp_gpr);
7081 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7084 /* Do the VSLO to get the value into the final location. */
7085 switch (mode)
7087 case E_V2DFmode:
7088 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7089 return;
7091 case E_V2DImode:
7092 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7093 return;
7095 case E_V4SFmode:
7097 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7098 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7099 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7100 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7101 tmp_altivec));
7103 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7104 return;
7107 case E_V4SImode:
7108 case E_V8HImode:
7109 case E_V16QImode:
7111 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7112 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7113 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7114 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7115 tmp_altivec));
7116 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7117 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7118 GEN_INT (64 - bits_in_element)));
7119 return;
7122 default:
7123 gcc_unreachable ();
7126 return;
7128 else
7129 gcc_unreachable ();
7132 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7133 selects whether the alignment is abi mandated, optional, or
7134 both abi and optional alignment. */
7136 unsigned int
7137 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7139 if (how != align_opt)
7141 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7142 align = 128;
7145 if (how != align_abi)
7147 if (TREE_CODE (type) == ARRAY_TYPE
7148 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7150 if (align < BITS_PER_WORD)
7151 align = BITS_PER_WORD;
7155 return align;
7158 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7159 instructions simply ignore the low bits; VSX memory instructions
7160 are aligned to 4 or 8 bytes. */
7162 static bool
7163 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7165 return (STRICT_ALIGNMENT
7166 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7167 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7168 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7169 && (int) align < VECTOR_ALIGN (mode)))));
7172 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7174 bool
7175 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7177 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7179 if (computed != 128)
7181 static bool warned;
7182 if (!warned && warn_psabi)
7184 warned = true;
7185 inform (input_location,
7186 "the layout of aggregates containing vectors with"
7187 " %d-byte alignment has changed in GCC 5",
7188 computed / BITS_PER_UNIT);
7191 /* In current GCC there is no special case. */
7192 return false;
7195 return false;
7198 /* AIX increases natural record alignment to doubleword if the first
7199 field is an FP double while the FP fields remain word aligned. */
7201 unsigned int
7202 rs6000_special_round_type_align (tree type, unsigned int computed,
7203 unsigned int specified)
7205 unsigned int align = MAX (computed, specified);
7206 tree field = TYPE_FIELDS (type);
7208 /* Skip all non field decls */
7209 while (field != NULL
7210 && (TREE_CODE (field) != FIELD_DECL
7211 || DECL_FIELD_ABI_IGNORED (field)))
7212 field = DECL_CHAIN (field);
7214 if (field != NULL && field != type)
7216 type = TREE_TYPE (field);
7217 while (TREE_CODE (type) == ARRAY_TYPE)
7218 type = TREE_TYPE (type);
7220 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7221 align = MAX (align, 64);
7224 return align;
7227 /* Darwin increases record alignment to the natural alignment of
7228 the first field. */
7230 unsigned int
7231 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7232 unsigned int specified)
7234 unsigned int align = MAX (computed, specified);
7236 if (TYPE_PACKED (type))
7237 return align;
7239 /* Find the first field, looking down into aggregates. */
7240 do {
7241 tree field = TYPE_FIELDS (type);
7242 /* Skip all non field decls */
7243 while (field != NULL
7244 && (TREE_CODE (field) != FIELD_DECL
7245 || DECL_FIELD_ABI_IGNORED (field)))
7246 field = DECL_CHAIN (field);
7247 if (! field)
7248 break;
7249 /* A packed field does not contribute any extra alignment. */
7250 if (DECL_PACKED (field))
7251 return align;
7252 type = TREE_TYPE (field);
7253 while (TREE_CODE (type) == ARRAY_TYPE)
7254 type = TREE_TYPE (type);
7255 } while (AGGREGATE_TYPE_P (type));
7257 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7258 align = MAX (align, TYPE_ALIGN (type));
7260 return align;
7263 /* Return 1 for an operand in small memory on V.4/eabi. */
7266 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7267 machine_mode mode ATTRIBUTE_UNUSED)
7269 #if TARGET_ELF
7270 rtx sym_ref;
7272 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7273 return 0;
7275 if (DEFAULT_ABI != ABI_V4)
7276 return 0;
7278 if (SYMBOL_REF_P (op))
7279 sym_ref = op;
7281 else if (GET_CODE (op) != CONST
7282 || GET_CODE (XEXP (op, 0)) != PLUS
7283 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7284 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7285 return 0;
7287 else
7289 rtx sum = XEXP (op, 0);
7290 HOST_WIDE_INT summand;
7292 /* We have to be careful here, because it is the referenced address
7293 that must be 32k from _SDA_BASE_, not just the symbol. */
7294 summand = INTVAL (XEXP (sum, 1));
7295 if (summand < 0 || summand > g_switch_value)
7296 return 0;
7298 sym_ref = XEXP (sum, 0);
7301 return SYMBOL_REF_SMALL_P (sym_ref);
7302 #else
7303 return 0;
7304 #endif
7307 /* Return true if either operand is a general purpose register. */
7309 bool
7310 gpr_or_gpr_p (rtx op0, rtx op1)
7312 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7313 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7316 /* Return true if this is a move direct operation between GPR registers and
7317 floating point/VSX registers. */
7319 bool
7320 direct_move_p (rtx op0, rtx op1)
7322 if (!REG_P (op0) || !REG_P (op1))
7323 return false;
7325 if (!TARGET_DIRECT_MOVE)
7326 return false;
7328 int regno0 = REGNO (op0);
7329 int regno1 = REGNO (op1);
7330 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7331 return false;
7333 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7334 return true;
7336 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7337 return true;
7339 return false;
7342 /* Return true if the ADDR is an acceptable address for a quad memory
7343 operation of mode MODE (either LQ/STQ for general purpose registers, or
7344 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7345 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7346 3.0 LXV/STXV instruction. */
7348 bool
7349 quad_address_p (rtx addr, machine_mode mode, bool strict)
7351 rtx op0, op1;
7353 if (GET_MODE_SIZE (mode) != 16)
7354 return false;
7356 if (legitimate_indirect_address_p (addr, strict))
7357 return true;
7359 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7360 return false;
7362 /* Is this a valid prefixed address? If the bottom four bits of the offset
7363 are non-zero, we could use a prefixed instruction (which does not have the
7364 DQ-form constraint that the traditional instruction had) instead of
7365 forcing the unaligned offset to a GPR. */
7366 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7367 return true;
7369 if (GET_CODE (addr) != PLUS)
7370 return false;
7372 op0 = XEXP (addr, 0);
7373 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7374 return false;
7376 op1 = XEXP (addr, 1);
7377 if (!CONST_INT_P (op1))
7378 return false;
7380 return quad_address_offset_p (INTVAL (op1));
7383 /* Return true if this is a load or store quad operation. This function does
7384 not handle the atomic quad memory instructions. */
7386 bool
7387 quad_load_store_p (rtx op0, rtx op1)
7389 bool ret;
7391 if (!TARGET_QUAD_MEMORY)
7392 ret = false;
7394 else if (REG_P (op0) && MEM_P (op1))
7395 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7396 && quad_memory_operand (op1, GET_MODE (op1))
7397 && !reg_overlap_mentioned_p (op0, op1));
7399 else if (MEM_P (op0) && REG_P (op1))
7400 ret = (quad_memory_operand (op0, GET_MODE (op0))
7401 && quad_int_reg_operand (op1, GET_MODE (op1)));
7403 else
7404 ret = false;
7406 if (TARGET_DEBUG_ADDR)
7408 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7409 ret ? "true" : "false");
7410 debug_rtx (gen_rtx_SET (op0, op1));
7413 return ret;
7416 /* Given an address, return a constant offset term if one exists. */
7418 static rtx
7419 address_offset (rtx op)
7421 if (GET_CODE (op) == PRE_INC
7422 || GET_CODE (op) == PRE_DEC)
7423 op = XEXP (op, 0);
7424 else if (GET_CODE (op) == PRE_MODIFY
7425 || GET_CODE (op) == LO_SUM)
7426 op = XEXP (op, 1);
7428 if (GET_CODE (op) == CONST)
7429 op = XEXP (op, 0);
7431 if (GET_CODE (op) == PLUS)
7432 op = XEXP (op, 1);
7434 if (CONST_INT_P (op))
7435 return op;
7437 return NULL_RTX;
7440 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7441 the mode. If we can't find (or don't know) the alignment of the symbol
7442 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7443 should be pessimistic]. Offsets are validated in the same way as for
7444 reg + offset. */
7445 static bool
7446 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7448 /* We should not get here with this. */
7449 gcc_checking_assert (! mode_supports_dq_form (mode));
7451 if (GET_CODE (x) == CONST)
7452 x = XEXP (x, 0);
7454 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7455 x = XVECEXP (x, 0, 0);
7457 rtx sym = NULL_RTX;
7458 unsigned HOST_WIDE_INT offset = 0;
7460 if (GET_CODE (x) == PLUS)
7462 sym = XEXP (x, 0);
7463 if (! SYMBOL_REF_P (sym))
7464 return false;
7465 if (!CONST_INT_P (XEXP (x, 1)))
7466 return false;
7467 offset = INTVAL (XEXP (x, 1));
7469 else if (SYMBOL_REF_P (x))
7470 sym = x;
7471 else if (CONST_INT_P (x))
7472 offset = INTVAL (x);
7473 else if (GET_CODE (x) == LABEL_REF)
7474 offset = 0; // We assume code labels are Pmode aligned
7475 else
7476 return false; // not sure what we have here.
7478 /* If we don't know the alignment of the thing to which the symbol refers,
7479 we assume optimistically it is "enough".
7480 ??? maybe we should be pessimistic instead. */
7481 unsigned align = 0;
7483 if (sym)
7485 tree decl = SYMBOL_REF_DECL (sym);
7486 #if TARGET_MACHO
7487 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7488 /* The decl in an indirection symbol is the original one, which might
7489 be less aligned than the indirection. Our indirections are always
7490 pointer-aligned. */
7492 else
7493 #endif
7494 if (decl && DECL_ALIGN (decl))
7495 align = DECL_ALIGN_UNIT (decl);
7498 unsigned int extra = 0;
7499 switch (mode)
7501 case E_DFmode:
7502 case E_DDmode:
7503 case E_DImode:
7504 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7505 addressing. */
7506 if (VECTOR_MEM_VSX_P (mode))
7507 return false;
7509 if (!TARGET_POWERPC64)
7510 extra = 4;
7511 else if ((offset & 3) || (align & 3))
7512 return false;
7513 break;
7515 case E_TFmode:
7516 case E_IFmode:
7517 case E_KFmode:
7518 case E_TDmode:
7519 case E_TImode:
7520 case E_PTImode:
7521 extra = 8;
7522 if (!TARGET_POWERPC64)
7523 extra = 12;
7524 else if ((offset & 3) || (align & 3))
7525 return false;
7526 break;
7528 default:
7529 break;
7532 /* We only care if the access(es) would cause a change to the high part. */
7533 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7534 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7537 /* Return true if the MEM operand is a memory operand suitable for use
7538 with a (full width, possibly multiple) gpr load/store. On
7539 powerpc64 this means the offset must be divisible by 4.
7540 Implements 'Y' constraint.
7542 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7543 a constraint function we know the operand has satisfied a suitable
7544 memory predicate.
7546 Offsetting a lo_sum should not be allowed, except where we know by
7547 alignment that a 32k boundary is not crossed. Note that by
7548 "offsetting" here we mean a further offset to access parts of the
7549 MEM. It's fine to have a lo_sum where the inner address is offset
7550 from a sym, since the same sym+offset will appear in the high part
7551 of the address calculation. */
7553 bool
7554 mem_operand_gpr (rtx op, machine_mode mode)
7556 unsigned HOST_WIDE_INT offset;
7557 int extra;
7558 rtx addr = XEXP (op, 0);
7560 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7561 if (TARGET_UPDATE
7562 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7563 && mode_supports_pre_incdec_p (mode)
7564 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7565 return true;
7567 /* Allow prefixed instructions if supported. If the bottom two bits of the
7568 offset are non-zero, we could use a prefixed instruction (which does not
7569 have the DS-form constraint that the traditional instruction had) instead
7570 of forcing the unaligned offset to a GPR. */
7571 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7572 return true;
7574 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7575 really OK. Doing this early avoids teaching all the other machinery
7576 about them. */
7577 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7578 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7580 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7581 if (!rs6000_offsettable_memref_p (op, mode, false))
7582 return false;
7584 op = address_offset (addr);
7585 if (op == NULL_RTX)
7586 return true;
7588 offset = INTVAL (op);
7589 if (TARGET_POWERPC64 && (offset & 3) != 0)
7590 return false;
7592 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7593 if (extra < 0)
7594 extra = 0;
7596 if (GET_CODE (addr) == LO_SUM)
7597 /* For lo_sum addresses, we must allow any offset except one that
7598 causes a wrap, so test only the low 16 bits. */
7599 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7601 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7604 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7605 enforce an offset divisible by 4 even for 32-bit. */
7607 bool
7608 mem_operand_ds_form (rtx op, machine_mode mode)
7610 unsigned HOST_WIDE_INT offset;
7611 int extra;
7612 rtx addr = XEXP (op, 0);
7614 /* Allow prefixed instructions if supported. If the bottom two bits of the
7615 offset are non-zero, we could use a prefixed instruction (which does not
7616 have the DS-form constraint that the traditional instruction had) instead
7617 of forcing the unaligned offset to a GPR. */
7618 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7619 return true;
7621 if (!offsettable_address_p (false, mode, addr))
7622 return false;
7624 op = address_offset (addr);
7625 if (op == NULL_RTX)
7626 return true;
7628 offset = INTVAL (op);
7629 if ((offset & 3) != 0)
7630 return false;
7632 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7633 if (extra < 0)
7634 extra = 0;
7636 if (GET_CODE (addr) == LO_SUM)
7637 /* For lo_sum addresses, we must allow any offset except one that
7638 causes a wrap, so test only the low 16 bits. */
7639 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7641 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7644 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7646 static bool
7647 reg_offset_addressing_ok_p (machine_mode mode)
7649 switch (mode)
7651 case E_V16QImode:
7652 case E_V8HImode:
7653 case E_V4SFmode:
7654 case E_V4SImode:
7655 case E_V2DFmode:
7656 case E_V2DImode:
7657 case E_V1TImode:
7658 case E_TImode:
7659 case E_TFmode:
7660 case E_KFmode:
7661 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7662 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7663 a vector mode, if we want to use the VSX registers to move it around,
7664 we need to restrict ourselves to reg+reg addressing. Similarly for
7665 IEEE 128-bit floating point that is passed in a single vector
7666 register. */
7667 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7668 return mode_supports_dq_form (mode);
7669 break;
7671 case E_SDmode:
7672 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7673 addressing for the LFIWZX and STFIWX instructions. */
7674 if (TARGET_NO_SDMODE_STACK)
7675 return false;
7676 break;
7678 default:
7679 break;
7682 return true;
7685 static bool
7686 virtual_stack_registers_memory_p (rtx op)
7688 int regnum;
7690 if (REG_P (op))
7691 regnum = REGNO (op);
7693 else if (GET_CODE (op) == PLUS
7694 && REG_P (XEXP (op, 0))
7695 && CONST_INT_P (XEXP (op, 1)))
7696 regnum = REGNO (XEXP (op, 0));
7698 else
7699 return false;
7701 return (regnum >= FIRST_VIRTUAL_REGISTER
7702 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7705 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7706 is known to not straddle a 32k boundary. This function is used
7707 to determine whether -mcmodel=medium code can use TOC pointer
7708 relative addressing for OP. This means the alignment of the TOC
7709 pointer must also be taken into account, and unfortunately that is
7710 only 8 bytes. */
7712 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7713 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7714 #endif
7716 static bool
7717 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7718 machine_mode mode)
7720 tree decl;
7721 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7723 if (!SYMBOL_REF_P (op))
7724 return false;
7726 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7727 SYMBOL_REF. */
7728 if (mode_supports_dq_form (mode))
7729 return false;
7731 dsize = GET_MODE_SIZE (mode);
7732 decl = SYMBOL_REF_DECL (op);
7733 if (!decl)
7735 if (dsize == 0)
7736 return false;
7738 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7739 replacing memory addresses with an anchor plus offset. We
7740 could find the decl by rummaging around in the block->objects
7741 VEC for the given offset but that seems like too much work. */
7742 dalign = BITS_PER_UNIT;
7743 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7744 && SYMBOL_REF_ANCHOR_P (op)
7745 && SYMBOL_REF_BLOCK (op) != NULL)
7747 struct object_block *block = SYMBOL_REF_BLOCK (op);
7749 dalign = block->alignment;
7750 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7752 else if (CONSTANT_POOL_ADDRESS_P (op))
7754 /* It would be nice to have get_pool_align().. */
7755 machine_mode cmode = get_pool_mode (op);
7757 dalign = GET_MODE_ALIGNMENT (cmode);
7760 else if (DECL_P (decl))
7762 dalign = DECL_ALIGN (decl);
7764 if (dsize == 0)
7766 /* Allow BLKmode when the entire object is known to not
7767 cross a 32k boundary. */
7768 if (!DECL_SIZE_UNIT (decl))
7769 return false;
7771 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7772 return false;
7774 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7775 if (dsize > 32768)
7776 return false;
7778 dalign /= BITS_PER_UNIT;
7779 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7780 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7781 return dalign >= dsize;
7784 else
7785 gcc_unreachable ();
7787 /* Find how many bits of the alignment we know for this access. */
7788 dalign /= BITS_PER_UNIT;
7789 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7790 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7791 mask = dalign - 1;
7792 lsb = offset & -offset;
7793 mask &= lsb - 1;
7794 dalign = mask + 1;
7796 return dalign >= dsize;
7799 static bool
7800 constant_pool_expr_p (rtx op)
7802 rtx base, offset;
7804 split_const (op, &base, &offset);
7805 return (SYMBOL_REF_P (base)
7806 && CONSTANT_POOL_ADDRESS_P (base)
7807 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7810 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7811 use that as the register to put the HIGH value into if register allocation
7812 is already done. */
7815 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7817 rtx tocrel, tocreg, hi;
7819 gcc_assert (TARGET_TOC);
7821 if (TARGET_DEBUG_ADDR)
7823 if (SYMBOL_REF_P (symbol))
7824 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7825 XSTR (symbol, 0));
7826 else
7828 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7829 GET_RTX_NAME (GET_CODE (symbol)));
7830 debug_rtx (symbol);
7834 if (!can_create_pseudo_p ())
7835 df_set_regs_ever_live (TOC_REGISTER, true);
7837 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7838 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7839 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7840 return tocrel;
7842 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7843 if (largetoc_reg != NULL)
7845 emit_move_insn (largetoc_reg, hi);
7846 hi = largetoc_reg;
7848 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7851 /* These are only used to pass through from print_operand/print_operand_address
7852 to rs6000_output_addr_const_extra over the intervening function
7853 output_addr_const which is not target code. */
7854 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7856 /* Return true if OP is a toc pointer relative address (the output
7857 of create_TOC_reference). If STRICT, do not match non-split
7858 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7859 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7860 TOCREL_OFFSET_RET respectively. */
7862 bool
7863 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7864 const_rtx *tocrel_offset_ret)
7866 if (!TARGET_TOC)
7867 return false;
7869 if (TARGET_CMODEL != CMODEL_SMALL)
7871 /* When strict ensure we have everything tidy. */
7872 if (strict
7873 && !(GET_CODE (op) == LO_SUM
7874 && REG_P (XEXP (op, 0))
7875 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7876 return false;
7878 /* When not strict, allow non-split TOC addresses and also allow
7879 (lo_sum (high ..)) TOC addresses created during reload. */
7880 if (GET_CODE (op) == LO_SUM)
7881 op = XEXP (op, 1);
7884 const_rtx tocrel_base = op;
7885 const_rtx tocrel_offset = const0_rtx;
7887 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7889 tocrel_base = XEXP (op, 0);
7890 tocrel_offset = XEXP (op, 1);
7893 if (tocrel_base_ret)
7894 *tocrel_base_ret = tocrel_base;
7895 if (tocrel_offset_ret)
7896 *tocrel_offset_ret = tocrel_offset;
7898 return (GET_CODE (tocrel_base) == UNSPEC
7899 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7900 && REG_P (XVECEXP (tocrel_base, 0, 1))
7901 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7904 /* Return true if X is a constant pool address, and also for cmodel=medium
7905 if X is a toc-relative address known to be offsettable within MODE. */
7907 bool
7908 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7909 bool strict)
7911 const_rtx tocrel_base, tocrel_offset;
7912 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7913 && (TARGET_CMODEL != CMODEL_MEDIUM
7914 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7915 || mode == QImode
7916 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7917 INTVAL (tocrel_offset), mode)));
7920 static bool
7921 legitimate_small_data_p (machine_mode mode, rtx x)
7923 return (DEFAULT_ABI == ABI_V4
7924 && !flag_pic && !TARGET_TOC
7925 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7926 && small_data_operand (x, mode));
7929 bool
7930 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7931 bool strict, bool worst_case)
7933 unsigned HOST_WIDE_INT offset;
7934 unsigned int extra;
7936 if (GET_CODE (x) != PLUS)
7937 return false;
7938 if (!REG_P (XEXP (x, 0)))
7939 return false;
7940 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7941 return false;
7942 if (mode_supports_dq_form (mode))
7943 return quad_address_p (x, mode, strict);
7944 if (!reg_offset_addressing_ok_p (mode))
7945 return virtual_stack_registers_memory_p (x);
7946 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7947 return true;
7948 if (!CONST_INT_P (XEXP (x, 1)))
7949 return false;
7951 offset = INTVAL (XEXP (x, 1));
7952 extra = 0;
7953 switch (mode)
7955 case E_DFmode:
7956 case E_DDmode:
7957 case E_DImode:
7958 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7959 addressing. */
7960 if (VECTOR_MEM_VSX_P (mode))
7961 return false;
7963 if (!worst_case)
7964 break;
7965 if (!TARGET_POWERPC64)
7966 extra = 4;
7967 else if (offset & 3)
7968 return false;
7969 break;
7971 case E_TFmode:
7972 case E_IFmode:
7973 case E_KFmode:
7974 case E_TDmode:
7975 case E_TImode:
7976 case E_PTImode:
7977 extra = 8;
7978 if (!worst_case)
7979 break;
7980 if (!TARGET_POWERPC64)
7981 extra = 12;
7982 else if (offset & 3)
7983 return false;
7984 break;
7986 default:
7987 break;
7990 if (TARGET_PREFIXED)
7991 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7992 else
7993 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7996 bool
7997 legitimate_indexed_address_p (rtx x, int strict)
7999 rtx op0, op1;
8001 if (GET_CODE (x) != PLUS)
8002 return false;
8004 op0 = XEXP (x, 0);
8005 op1 = XEXP (x, 1);
8007 return (REG_P (op0) && REG_P (op1)
8008 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8009 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8010 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8011 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8014 bool
8015 avoiding_indexed_address_p (machine_mode mode)
8017 /* Avoid indexed addressing for modes that have non-indexed
8018 load/store instruction forms. */
8019 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8022 bool
8023 legitimate_indirect_address_p (rtx x, int strict)
8025 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8028 bool
8029 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8031 if (!TARGET_MACHO || !flag_pic
8032 || mode != SImode || !MEM_P (x))
8033 return false;
8034 x = XEXP (x, 0);
8036 if (GET_CODE (x) != LO_SUM)
8037 return false;
8038 if (!REG_P (XEXP (x, 0)))
8039 return false;
8040 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8041 return false;
8042 x = XEXP (x, 1);
8044 return CONSTANT_P (x);
8047 static bool
8048 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8050 if (GET_CODE (x) != LO_SUM)
8051 return false;
8052 if (!REG_P (XEXP (x, 0)))
8053 return false;
8054 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8055 return false;
8056 /* quad word addresses are restricted, and we can't use LO_SUM. */
8057 if (mode_supports_dq_form (mode))
8058 return false;
8059 x = XEXP (x, 1);
8061 if (TARGET_ELF || TARGET_MACHO)
8063 bool large_toc_ok;
8065 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8066 return false;
8067 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8068 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8069 recognizes some LO_SUM addresses as valid although this
8070 function says opposite. In most cases, LRA through different
8071 transformations can generate correct code for address reloads.
8072 It cannot manage only some LO_SUM cases. So we need to add
8073 code here saying that some addresses are still valid. */
8074 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8075 && small_toc_ref (x, VOIDmode));
8076 if (TARGET_TOC && ! large_toc_ok)
8077 return false;
8078 if (GET_MODE_NUNITS (mode) != 1)
8079 return false;
8080 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8081 && !(/* ??? Assume floating point reg based on mode? */
8082 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8083 return false;
8085 return CONSTANT_P (x) || large_toc_ok;
8088 return false;
8092 /* Try machine-dependent ways of modifying an illegitimate address
8093 to be legitimate. If we find one, return the new, valid address.
8094 This is used from only one place: `memory_address' in explow.c.
8096 OLDX is the address as it was before break_out_memory_refs was
8097 called. In some cases it is useful to look at this to decide what
8098 needs to be done.
8100 It is always safe for this function to do nothing. It exists to
8101 recognize opportunities to optimize the output.
8103 On RS/6000, first check for the sum of a register with a constant
8104 integer that is out of range. If so, generate code to add the
8105 constant with the low-order 16 bits masked to the register and force
8106 this result into another register (this can be done with `cau').
8107 Then generate an address of REG+(CONST&0xffff), allowing for the
8108 possibility of bit 16 being a one.
8110 Then check for the sum of a register and something not constant, try to
8111 load the other things into a register and return the sum. */
8113 static rtx
8114 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8115 machine_mode mode)
8117 unsigned int extra;
8119 if (!reg_offset_addressing_ok_p (mode)
8120 || mode_supports_dq_form (mode))
8122 if (virtual_stack_registers_memory_p (x))
8123 return x;
8125 /* In theory we should not be seeing addresses of the form reg+0,
8126 but just in case it is generated, optimize it away. */
8127 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8128 return force_reg (Pmode, XEXP (x, 0));
8130 /* For TImode with load/store quad, restrict addresses to just a single
8131 pointer, so it works with both GPRs and VSX registers. */
8132 /* Make sure both operands are registers. */
8133 else if (GET_CODE (x) == PLUS
8134 && (mode != TImode || !TARGET_VSX))
8135 return gen_rtx_PLUS (Pmode,
8136 force_reg (Pmode, XEXP (x, 0)),
8137 force_reg (Pmode, XEXP (x, 1)));
8138 else
8139 return force_reg (Pmode, x);
8141 if (SYMBOL_REF_P (x))
8143 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8144 if (model != 0)
8145 return rs6000_legitimize_tls_address (x, model);
8148 extra = 0;
8149 switch (mode)
8151 case E_TFmode:
8152 case E_TDmode:
8153 case E_TImode:
8154 case E_PTImode:
8155 case E_IFmode:
8156 case E_KFmode:
8157 /* As in legitimate_offset_address_p we do not assume
8158 worst-case. The mode here is just a hint as to the registers
8159 used. A TImode is usually in gprs, but may actually be in
8160 fprs. Leave worst-case scenario for reload to handle via
8161 insn constraints. PTImode is only GPRs. */
8162 extra = 8;
8163 break;
8164 default:
8165 break;
8168 if (GET_CODE (x) == PLUS
8169 && REG_P (XEXP (x, 0))
8170 && CONST_INT_P (XEXP (x, 1))
8171 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8172 >= 0x10000 - extra))
8174 HOST_WIDE_INT high_int, low_int;
8175 rtx sum;
8176 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8177 if (low_int >= 0x8000 - extra)
8178 low_int = 0;
8179 high_int = INTVAL (XEXP (x, 1)) - low_int;
8180 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8181 GEN_INT (high_int)), 0);
8182 return plus_constant (Pmode, sum, low_int);
8184 else if (GET_CODE (x) == PLUS
8185 && REG_P (XEXP (x, 0))
8186 && !CONST_INT_P (XEXP (x, 1))
8187 && GET_MODE_NUNITS (mode) == 1
8188 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8189 || (/* ??? Assume floating point reg based on mode? */
8190 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8191 && !avoiding_indexed_address_p (mode))
8193 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8194 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8196 else if ((TARGET_ELF
8197 #if TARGET_MACHO
8198 || !MACHO_DYNAMIC_NO_PIC_P
8199 #endif
8201 && TARGET_32BIT
8202 && TARGET_NO_TOC_OR_PCREL
8203 && !flag_pic
8204 && !CONST_INT_P (x)
8205 && !CONST_WIDE_INT_P (x)
8206 && !CONST_DOUBLE_P (x)
8207 && CONSTANT_P (x)
8208 && GET_MODE_NUNITS (mode) == 1
8209 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8210 || (/* ??? Assume floating point reg based on mode? */
8211 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8213 rtx reg = gen_reg_rtx (Pmode);
8214 if (TARGET_ELF)
8215 emit_insn (gen_elf_high (reg, x));
8216 else
8217 emit_insn (gen_macho_high (Pmode, reg, x));
8218 return gen_rtx_LO_SUM (Pmode, reg, x);
8220 else if (TARGET_TOC
8221 && SYMBOL_REF_P (x)
8222 && constant_pool_expr_p (x)
8223 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8224 return create_TOC_reference (x, NULL_RTX);
8225 else
8226 return x;
8229 /* Debug version of rs6000_legitimize_address. */
8230 static rtx
8231 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8233 rtx ret;
8234 rtx_insn *insns;
8236 start_sequence ();
8237 ret = rs6000_legitimize_address (x, oldx, mode);
8238 insns = get_insns ();
8239 end_sequence ();
8241 if (ret != x)
8243 fprintf (stderr,
8244 "\nrs6000_legitimize_address: mode %s, old code %s, "
8245 "new code %s, modified\n",
8246 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8247 GET_RTX_NAME (GET_CODE (ret)));
8249 fprintf (stderr, "Original address:\n");
8250 debug_rtx (x);
8252 fprintf (stderr, "oldx:\n");
8253 debug_rtx (oldx);
8255 fprintf (stderr, "New address:\n");
8256 debug_rtx (ret);
8258 if (insns)
8260 fprintf (stderr, "Insns added:\n");
8261 debug_rtx_list (insns, 20);
8264 else
8266 fprintf (stderr,
8267 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8268 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8270 debug_rtx (x);
8273 if (insns)
8274 emit_insn (insns);
8276 return ret;
8279 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8280 We need to emit DTP-relative relocations. */
8282 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8283 static void
8284 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8286 switch (size)
8288 case 4:
8289 fputs ("\t.long\t", file);
8290 break;
8291 case 8:
8292 fputs (DOUBLE_INT_ASM_OP, file);
8293 break;
8294 default:
8295 gcc_unreachable ();
8297 output_addr_const (file, x);
8298 if (TARGET_ELF)
8299 fputs ("@dtprel+0x8000", file);
8300 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8302 switch (SYMBOL_REF_TLS_MODEL (x))
8304 case 0:
8305 break;
8306 case TLS_MODEL_LOCAL_EXEC:
8307 fputs ("@le", file);
8308 break;
8309 case TLS_MODEL_INITIAL_EXEC:
8310 fputs ("@ie", file);
8311 break;
8312 case TLS_MODEL_GLOBAL_DYNAMIC:
8313 case TLS_MODEL_LOCAL_DYNAMIC:
8314 fputs ("@m", file);
8315 break;
8316 default:
8317 gcc_unreachable ();
8322 /* Return true if X is a symbol that refers to real (rather than emulated)
8323 TLS. */
8325 static bool
8326 rs6000_real_tls_symbol_ref_p (rtx x)
8328 return (SYMBOL_REF_P (x)
8329 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8332 /* In the name of slightly smaller debug output, and to cater to
8333 general assembler lossage, recognize various UNSPEC sequences
8334 and turn them back into a direct symbol reference. */
8336 static rtx
8337 rs6000_delegitimize_address (rtx orig_x)
8339 rtx x, y, offset;
8341 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8342 orig_x = XVECEXP (orig_x, 0, 0);
8344 orig_x = delegitimize_mem_from_attrs (orig_x);
8346 x = orig_x;
8347 if (MEM_P (x))
8348 x = XEXP (x, 0);
8350 y = x;
8351 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8352 y = XEXP (y, 1);
8354 offset = NULL_RTX;
8355 if (GET_CODE (y) == PLUS
8356 && GET_MODE (y) == Pmode
8357 && CONST_INT_P (XEXP (y, 1)))
8359 offset = XEXP (y, 1);
8360 y = XEXP (y, 0);
8363 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8365 y = XVECEXP (y, 0, 0);
8367 #ifdef HAVE_AS_TLS
8368 /* Do not associate thread-local symbols with the original
8369 constant pool symbol. */
8370 if (TARGET_XCOFF
8371 && SYMBOL_REF_P (y)
8372 && CONSTANT_POOL_ADDRESS_P (y)
8373 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8374 return orig_x;
8375 #endif
8377 if (offset != NULL_RTX)
8378 y = gen_rtx_PLUS (Pmode, y, offset);
8379 if (!MEM_P (orig_x))
8380 return y;
8381 else
8382 return replace_equiv_address_nv (orig_x, y);
8385 if (TARGET_MACHO
8386 && GET_CODE (orig_x) == LO_SUM
8387 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8389 y = XEXP (XEXP (orig_x, 1), 0);
8390 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8391 return XVECEXP (y, 0, 0);
8394 return orig_x;
8397 /* Return true if X shouldn't be emitted into the debug info.
8398 The linker doesn't like .toc section references from
8399 .debug_* sections, so reject .toc section symbols. */
8401 static bool
8402 rs6000_const_not_ok_for_debug_p (rtx x)
8404 if (GET_CODE (x) == UNSPEC)
8405 return true;
8406 if (SYMBOL_REF_P (x)
8407 && CONSTANT_POOL_ADDRESS_P (x))
8409 rtx c = get_pool_constant (x);
8410 machine_mode cmode = get_pool_mode (x);
8411 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8412 return true;
8415 return false;
8418 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8420 static bool
8421 rs6000_legitimate_combined_insn (rtx_insn *insn)
8423 int icode = INSN_CODE (insn);
8425 /* Reject creating doloop insns. Combine should not be allowed
8426 to create these for a number of reasons:
8427 1) In a nested loop, if combine creates one of these in an
8428 outer loop and the register allocator happens to allocate ctr
8429 to the outer loop insn, then the inner loop can't use ctr.
8430 Inner loops ought to be more highly optimized.
8431 2) Combine often wants to create one of these from what was
8432 originally a three insn sequence, first combining the three
8433 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8434 allocated ctr, the splitter takes use back to the three insn
8435 sequence. It's better to stop combine at the two insn
8436 sequence.
8437 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8438 insns, the register allocator sometimes uses floating point
8439 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8440 jump insn and output reloads are not implemented for jumps,
8441 the ctrsi/ctrdi splitters need to handle all possible cases.
8442 That's a pain, and it gets to be seriously difficult when a
8443 splitter that runs after reload needs memory to transfer from
8444 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8445 for the difficult case. It's better to not create problems
8446 in the first place. */
8447 if (icode != CODE_FOR_nothing
8448 && (icode == CODE_FOR_bdz_si
8449 || icode == CODE_FOR_bdz_di
8450 || icode == CODE_FOR_bdnz_si
8451 || icode == CODE_FOR_bdnz_di
8452 || icode == CODE_FOR_bdztf_si
8453 || icode == CODE_FOR_bdztf_di
8454 || icode == CODE_FOR_bdnztf_si
8455 || icode == CODE_FOR_bdnztf_di))
8456 return false;
8458 return true;
8461 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8463 static GTY(()) rtx rs6000_tls_symbol;
8464 static rtx
8465 rs6000_tls_get_addr (void)
8467 if (!rs6000_tls_symbol)
8468 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8470 return rs6000_tls_symbol;
8473 /* Construct the SYMBOL_REF for TLS GOT references. */
8475 static GTY(()) rtx rs6000_got_symbol;
8477 rs6000_got_sym (void)
8479 if (!rs6000_got_symbol)
8481 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8482 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8483 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8486 return rs6000_got_symbol;
8489 /* AIX Thread-Local Address support. */
8491 static rtx
8492 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8494 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8495 const char *name;
8496 char *tlsname;
8498 name = XSTR (addr, 0);
8499 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8500 or the symbol will be in TLS private data section. */
8501 if (name[strlen (name) - 1] != ']'
8502 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8503 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8505 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8506 strcpy (tlsname, name);
8507 strcat (tlsname,
8508 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8509 tlsaddr = copy_rtx (addr);
8510 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8512 else
8513 tlsaddr = addr;
8515 /* Place addr into TOC constant pool. */
8516 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8518 /* Output the TOC entry and create the MEM referencing the value. */
8519 if (constant_pool_expr_p (XEXP (sym, 0))
8520 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8522 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8523 mem = gen_const_mem (Pmode, tocref);
8524 set_mem_alias_set (mem, get_TOC_alias_set ());
8526 else
8527 return sym;
8529 /* Use global-dynamic for local-dynamic. */
8530 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8531 || model == TLS_MODEL_LOCAL_DYNAMIC)
8533 /* Create new TOC reference for @m symbol. */
8534 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8535 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8536 strcpy (tlsname, "*LCM");
8537 strcat (tlsname, name + 3);
8538 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8539 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8540 tocref = create_TOC_reference (modaddr, NULL_RTX);
8541 rtx modmem = gen_const_mem (Pmode, tocref);
8542 set_mem_alias_set (modmem, get_TOC_alias_set ());
8544 rtx modreg = gen_reg_rtx (Pmode);
8545 emit_insn (gen_rtx_SET (modreg, modmem));
8547 tmpreg = gen_reg_rtx (Pmode);
8548 emit_insn (gen_rtx_SET (tmpreg, mem));
8550 dest = gen_reg_rtx (Pmode);
8551 if (TARGET_32BIT)
8552 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8553 else
8554 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8555 return dest;
8557 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8558 else if (TARGET_32BIT)
8560 tlsreg = gen_reg_rtx (SImode);
8561 emit_insn (gen_tls_get_tpointer (tlsreg));
8563 else
8564 tlsreg = gen_rtx_REG (DImode, 13);
8566 /* Load the TOC value into temporary register. */
8567 tmpreg = gen_reg_rtx (Pmode);
8568 emit_insn (gen_rtx_SET (tmpreg, mem));
8569 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8570 gen_rtx_MINUS (Pmode, addr, tlsreg));
8572 /* Add TOC symbol value to TLS pointer. */
8573 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8575 return dest;
8578 /* Passes the tls arg value for global dynamic and local dynamic
8579 emit_library_call_value in rs6000_legitimize_tls_address to
8580 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8581 marker relocs put on __tls_get_addr calls. */
8582 static rtx global_tlsarg;
8584 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8585 this (thread-local) address. */
8587 static rtx
8588 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8590 rtx dest, insn;
8592 if (TARGET_XCOFF)
8593 return rs6000_legitimize_tls_address_aix (addr, model);
8595 dest = gen_reg_rtx (Pmode);
8596 if (model == TLS_MODEL_LOCAL_EXEC
8597 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8599 rtx tlsreg;
8601 if (TARGET_64BIT)
8603 tlsreg = gen_rtx_REG (Pmode, 13);
8604 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8606 else
8608 tlsreg = gen_rtx_REG (Pmode, 2);
8609 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8611 emit_insn (insn);
8613 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8615 rtx tlsreg, tmp;
8617 tmp = gen_reg_rtx (Pmode);
8618 if (TARGET_64BIT)
8620 tlsreg = gen_rtx_REG (Pmode, 13);
8621 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8623 else
8625 tlsreg = gen_rtx_REG (Pmode, 2);
8626 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8628 emit_insn (insn);
8629 if (TARGET_64BIT)
8630 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8631 else
8632 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8633 emit_insn (insn);
8635 else
8637 rtx got, tga, tmp1, tmp2;
8639 /* We currently use relocations like @got@tlsgd for tls, which
8640 means the linker will handle allocation of tls entries, placing
8641 them in the .got section. So use a pointer to the .got section,
8642 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8643 or to secondary GOT sections used by 32-bit -fPIC. */
8644 if (rs6000_pcrel_p (cfun))
8645 got = const0_rtx;
8646 else if (TARGET_64BIT)
8647 got = gen_rtx_REG (Pmode, 2);
8648 else
8650 if (flag_pic == 1)
8651 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8652 else
8654 rtx gsym = rs6000_got_sym ();
8655 got = gen_reg_rtx (Pmode);
8656 if (flag_pic == 0)
8657 rs6000_emit_move (got, gsym, Pmode);
8658 else
8660 rtx mem, lab;
8662 tmp1 = gen_reg_rtx (Pmode);
8663 tmp2 = gen_reg_rtx (Pmode);
8664 mem = gen_const_mem (Pmode, tmp1);
8665 lab = gen_label_rtx ();
8666 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8667 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8668 if (TARGET_LINK_STACK)
8669 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8670 emit_move_insn (tmp2, mem);
8671 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8672 set_unique_reg_note (last, REG_EQUAL, gsym);
8677 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8679 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8680 UNSPEC_TLSGD);
8681 tga = rs6000_tls_get_addr ();
8682 rtx argreg = gen_rtx_REG (Pmode, 3);
8683 emit_insn (gen_rtx_SET (argreg, arg));
8684 global_tlsarg = arg;
8685 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8686 global_tlsarg = NULL_RTX;
8688 /* Make a note so that the result of this call can be CSEd. */
8689 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8690 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8691 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8693 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8695 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8696 tga = rs6000_tls_get_addr ();
8697 tmp1 = gen_reg_rtx (Pmode);
8698 rtx argreg = gen_rtx_REG (Pmode, 3);
8699 emit_insn (gen_rtx_SET (argreg, arg));
8700 global_tlsarg = arg;
8701 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8702 global_tlsarg = NULL_RTX;
8704 /* Make a note so that the result of this call can be CSEd. */
8705 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8706 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8707 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8709 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8711 if (TARGET_64BIT)
8712 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8713 else
8714 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8716 else if (rs6000_tls_size == 32)
8718 tmp2 = gen_reg_rtx (Pmode);
8719 if (TARGET_64BIT)
8720 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8721 else
8722 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8723 emit_insn (insn);
8724 if (TARGET_64BIT)
8725 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8726 else
8727 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8729 else
8731 tmp2 = gen_reg_rtx (Pmode);
8732 if (TARGET_64BIT)
8733 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8734 else
8735 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8736 emit_insn (insn);
8737 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8739 emit_insn (insn);
8741 else
8743 /* IE, or 64-bit offset LE. */
8744 tmp2 = gen_reg_rtx (Pmode);
8745 if (TARGET_64BIT)
8746 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8747 else
8748 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8749 emit_insn (insn);
8750 if (rs6000_pcrel_p (cfun))
8752 if (TARGET_64BIT)
8753 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8754 else
8755 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8757 else if (TARGET_64BIT)
8758 insn = gen_tls_tls_64 (dest, tmp2, addr);
8759 else
8760 insn = gen_tls_tls_32 (dest, tmp2, addr);
8761 emit_insn (insn);
8765 return dest;
8768 /* Only create the global variable for the stack protect guard if we are using
8769 the global flavor of that guard. */
8770 static tree
8771 rs6000_init_stack_protect_guard (void)
8773 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8774 return default_stack_protect_guard ();
8776 return NULL_TREE;
8779 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8781 static bool
8782 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8784 if (GET_CODE (x) == HIGH
8785 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8786 return true;
8788 /* A TLS symbol in the TOC cannot contain a sum. */
8789 if (GET_CODE (x) == CONST
8790 && GET_CODE (XEXP (x, 0)) == PLUS
8791 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8792 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8793 return true;
8795 /* Do not place an ELF TLS symbol in the constant pool. */
8796 return TARGET_ELF && tls_referenced_p (x);
8799 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8800 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8801 can be addressed relative to the toc pointer. */
8803 static bool
8804 use_toc_relative_ref (rtx sym, machine_mode mode)
8806 return ((constant_pool_expr_p (sym)
8807 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8808 get_pool_mode (sym)))
8809 || (TARGET_CMODEL == CMODEL_MEDIUM
8810 && SYMBOL_REF_LOCAL_P (sym)
8811 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8814 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8815 that is a valid memory address for an instruction.
8816 The MODE argument is the machine mode for the MEM expression
8817 that wants to use this address.
8819 On the RS/6000, there are four valid address: a SYMBOL_REF that
8820 refers to a constant pool entry of an address (or the sum of it
8821 plus a constant), a short (16-bit signed) constant plus a register,
8822 the sum of two registers, or a register indirect, possibly with an
8823 auto-increment. For DFmode, DDmode and DImode with a constant plus
8824 register, we must ensure that both words are addressable or PowerPC64
8825 with offset word aligned.
8827 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8828 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8829 because adjacent memory cells are accessed by adding word-sized offsets
8830 during assembly output. */
8831 static bool
8832 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8834 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8835 bool quad_offset_p = mode_supports_dq_form (mode);
8837 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8838 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
8839 && GET_CODE (x) == AND
8840 && CONST_INT_P (XEXP (x, 1))
8841 && INTVAL (XEXP (x, 1)) == -16)
8842 x = XEXP (x, 0);
8844 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8845 return 0;
8846 if (legitimate_indirect_address_p (x, reg_ok_strict))
8847 return 1;
8848 if (TARGET_UPDATE
8849 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8850 && mode_supports_pre_incdec_p (mode)
8851 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8852 return 1;
8854 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8855 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8856 return 1;
8858 /* Handle restricted vector d-form offsets in ISA 3.0. */
8859 if (quad_offset_p)
8861 if (quad_address_p (x, mode, reg_ok_strict))
8862 return 1;
8864 else if (virtual_stack_registers_memory_p (x))
8865 return 1;
8867 else if (reg_offset_p)
8869 if (legitimate_small_data_p (mode, x))
8870 return 1;
8871 if (legitimate_constant_pool_address_p (x, mode,
8872 reg_ok_strict || lra_in_progress))
8873 return 1;
8876 /* For TImode, if we have TImode in VSX registers, only allow register
8877 indirect addresses. This will allow the values to go in either GPRs
8878 or VSX registers without reloading. The vector types would tend to
8879 go into VSX registers, so we allow REG+REG, while TImode seems
8880 somewhat split, in that some uses are GPR based, and some VSX based. */
8881 /* FIXME: We could loosen this by changing the following to
8882 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8883 but currently we cannot allow REG+REG addressing for TImode. See
8884 PR72827 for complete details on how this ends up hoodwinking DSE. */
8885 if (mode == TImode && TARGET_VSX)
8886 return 0;
8887 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8888 if (! reg_ok_strict
8889 && reg_offset_p
8890 && GET_CODE (x) == PLUS
8891 && REG_P (XEXP (x, 0))
8892 && (XEXP (x, 0) == virtual_stack_vars_rtx
8893 || XEXP (x, 0) == arg_pointer_rtx)
8894 && CONST_INT_P (XEXP (x, 1)))
8895 return 1;
8896 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8897 return 1;
8898 if (!FLOAT128_2REG_P (mode)
8899 && (TARGET_HARD_FLOAT
8900 || TARGET_POWERPC64
8901 || (mode != DFmode && mode != DDmode))
8902 && (TARGET_POWERPC64 || mode != DImode)
8903 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8904 && mode != PTImode
8905 && !avoiding_indexed_address_p (mode)
8906 && legitimate_indexed_address_p (x, reg_ok_strict))
8907 return 1;
8908 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8909 && mode_supports_pre_modify_p (mode)
8910 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8911 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8912 reg_ok_strict, false)
8913 || (!avoiding_indexed_address_p (mode)
8914 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8915 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8917 /* There is no prefixed version of the load/store with update. */
8918 rtx addr = XEXP (x, 1);
8919 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8921 if (reg_offset_p && !quad_offset_p
8922 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8923 return 1;
8924 return 0;
8927 /* Debug version of rs6000_legitimate_address_p. */
8928 static bool
8929 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8930 bool reg_ok_strict)
8932 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8933 fprintf (stderr,
8934 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8935 "strict = %d, reload = %s, code = %s\n",
8936 ret ? "true" : "false",
8937 GET_MODE_NAME (mode),
8938 reg_ok_strict,
8939 (reload_completed ? "after" : "before"),
8940 GET_RTX_NAME (GET_CODE (x)));
8941 debug_rtx (x);
8943 return ret;
8946 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8948 static bool
8949 rs6000_mode_dependent_address_p (const_rtx addr,
8950 addr_space_t as ATTRIBUTE_UNUSED)
8952 return rs6000_mode_dependent_address_ptr (addr);
8955 /* Go to LABEL if ADDR (a legitimate address expression)
8956 has an effect that depends on the machine mode it is used for.
8958 On the RS/6000 this is true of all integral offsets (since AltiVec
8959 and VSX modes don't allow them) or is a pre-increment or decrement.
8961 ??? Except that due to conceptual problems in offsettable_address_p
8962 we can't really report the problems of integral offsets. So leave
8963 this assuming that the adjustable offset must be valid for the
8964 sub-words of a TFmode operand, which is what we had before. */
8966 static bool
8967 rs6000_mode_dependent_address (const_rtx addr)
8969 switch (GET_CODE (addr))
8971 case PLUS:
8972 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8973 is considered a legitimate address before reload, so there
8974 are no offset restrictions in that case. Note that this
8975 condition is safe in strict mode because any address involving
8976 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8977 been rejected as illegitimate. */
8978 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8979 && XEXP (addr, 0) != arg_pointer_rtx
8980 && CONST_INT_P (XEXP (addr, 1)))
8982 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8983 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8984 if (TARGET_PREFIXED)
8985 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8986 else
8987 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8989 break;
8991 case LO_SUM:
8992 /* Anything in the constant pool is sufficiently aligned that
8993 all bytes have the same high part address. */
8994 return !legitimate_constant_pool_address_p (addr, QImode, false);
8996 /* Auto-increment cases are now treated generically in recog.c. */
8997 case PRE_MODIFY:
8998 return TARGET_UPDATE;
9000 /* AND is only allowed in Altivec loads. */
9001 case AND:
9002 return true;
9004 default:
9005 break;
9008 return false;
9011 /* Debug version of rs6000_mode_dependent_address. */
9012 static bool
9013 rs6000_debug_mode_dependent_address (const_rtx addr)
9015 bool ret = rs6000_mode_dependent_address (addr);
9017 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9018 ret ? "true" : "false");
9019 debug_rtx (addr);
9021 return ret;
9024 /* Implement FIND_BASE_TERM. */
9027 rs6000_find_base_term (rtx op)
9029 rtx base;
9031 base = op;
9032 if (GET_CODE (base) == CONST)
9033 base = XEXP (base, 0);
9034 if (GET_CODE (base) == PLUS)
9035 base = XEXP (base, 0);
9036 if (GET_CODE (base) == UNSPEC)
9037 switch (XINT (base, 1))
9039 case UNSPEC_TOCREL:
9040 case UNSPEC_MACHOPIC_OFFSET:
9041 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9042 for aliasing purposes. */
9043 return XVECEXP (base, 0, 0);
9046 return op;
9049 /* More elaborate version of recog's offsettable_memref_p predicate
9050 that works around the ??? note of rs6000_mode_dependent_address.
9051 In particular it accepts
9053 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9055 in 32-bit mode, that the recog predicate rejects. */
9057 static bool
9058 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9060 bool worst_case;
9062 if (!MEM_P (op))
9063 return false;
9065 /* First mimic offsettable_memref_p. */
9066 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9067 return true;
9069 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9070 the latter predicate knows nothing about the mode of the memory
9071 reference and, therefore, assumes that it is the largest supported
9072 mode (TFmode). As a consequence, legitimate offsettable memory
9073 references are rejected. rs6000_legitimate_offset_address_p contains
9074 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9075 at least with a little bit of help here given that we know the
9076 actual registers used. */
9077 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9078 || GET_MODE_SIZE (reg_mode) == 4);
9079 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9080 strict, worst_case);
9083 /* Determine the reassociation width to be used in reassociate_bb.
9084 This takes into account how many parallel operations we
9085 can actually do of a given type, and also the latency.
9087 int add/sub 6/cycle
9088 mul 2/cycle
9089 vect add/sub/mul 2/cycle
9090 fp add/sub/mul 2/cycle
9091 dfp 1/cycle
9094 static int
9095 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9096 machine_mode mode)
9098 switch (rs6000_tune)
9100 case PROCESSOR_POWER8:
9101 case PROCESSOR_POWER9:
9102 case PROCESSOR_FUTURE:
9103 if (DECIMAL_FLOAT_MODE_P (mode))
9104 return 1;
9105 if (VECTOR_MODE_P (mode))
9106 return 4;
9107 if (INTEGRAL_MODE_P (mode))
9108 return 1;
9109 if (FLOAT_MODE_P (mode))
9110 return 4;
9111 break;
9112 default:
9113 break;
9115 return 1;
9118 /* Change register usage conditional on target flags. */
9119 static void
9120 rs6000_conditional_register_usage (void)
9122 int i;
9124 if (TARGET_DEBUG_TARGET)
9125 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9127 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9128 if (TARGET_64BIT)
9129 fixed_regs[13] = call_used_regs[13] = 1;
9131 /* Conditionally disable FPRs. */
9132 if (TARGET_SOFT_FLOAT)
9133 for (i = 32; i < 64; i++)
9134 fixed_regs[i] = call_used_regs[i] = 1;
9136 /* The TOC register is not killed across calls in a way that is
9137 visible to the compiler. */
9138 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9139 call_used_regs[2] = 0;
9141 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9142 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9144 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9145 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9146 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9148 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9149 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9150 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9152 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9153 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9155 if (!TARGET_ALTIVEC && !TARGET_VSX)
9157 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9158 fixed_regs[i] = call_used_regs[i] = 1;
9159 call_used_regs[VRSAVE_REGNO] = 1;
9162 if (TARGET_ALTIVEC || TARGET_VSX)
9163 global_regs[VSCR_REGNO] = 1;
9165 if (TARGET_ALTIVEC_ABI)
9167 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9168 call_used_regs[i] = 1;
9170 /* AIX reserves VR20:31 in non-extended ABI mode. */
9171 if (TARGET_XCOFF)
9172 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9173 fixed_regs[i] = call_used_regs[i] = 1;
9178 /* Output insns to set DEST equal to the constant SOURCE as a series of
9179 lis, ori and shl instructions and return TRUE. */
9181 bool
9182 rs6000_emit_set_const (rtx dest, rtx source)
9184 machine_mode mode = GET_MODE (dest);
9185 rtx temp, set;
9186 rtx_insn *insn;
9187 HOST_WIDE_INT c;
9189 gcc_checking_assert (CONST_INT_P (source));
9190 c = INTVAL (source);
9191 switch (mode)
9193 case E_QImode:
9194 case E_HImode:
9195 emit_insn (gen_rtx_SET (dest, source));
9196 return true;
9198 case E_SImode:
9199 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9201 emit_insn (gen_rtx_SET (copy_rtx (temp),
9202 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9203 emit_insn (gen_rtx_SET (dest,
9204 gen_rtx_IOR (SImode, copy_rtx (temp),
9205 GEN_INT (c & 0xffff))));
9206 break;
9208 case E_DImode:
9209 if (!TARGET_POWERPC64)
9211 rtx hi, lo;
9213 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9214 DImode);
9215 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9216 DImode);
9217 emit_move_insn (hi, GEN_INT (c >> 32));
9218 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9219 emit_move_insn (lo, GEN_INT (c));
9221 else
9222 rs6000_emit_set_long_const (dest, c);
9223 break;
9225 default:
9226 gcc_unreachable ();
9229 insn = get_last_insn ();
9230 set = single_set (insn);
9231 if (! CONSTANT_P (SET_SRC (set)))
9232 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9234 return true;
9237 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9238 Output insns to set DEST equal to the constant C as a series of
9239 lis, ori and shl instructions. */
9241 static void
9242 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9244 rtx temp;
9245 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9247 ud1 = c & 0xffff;
9248 c = c >> 16;
9249 ud2 = c & 0xffff;
9250 c = c >> 16;
9251 ud3 = c & 0xffff;
9252 c = c >> 16;
9253 ud4 = c & 0xffff;
9255 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9256 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9257 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9259 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9260 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9262 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9264 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9265 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9266 if (ud1 != 0)
9267 emit_move_insn (dest,
9268 gen_rtx_IOR (DImode, copy_rtx (temp),
9269 GEN_INT (ud1)));
9271 else if (ud3 == 0 && ud4 == 0)
9273 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9275 gcc_assert (ud2 & 0x8000);
9276 emit_move_insn (copy_rtx (temp),
9277 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9278 if (ud1 != 0)
9279 emit_move_insn (copy_rtx (temp),
9280 gen_rtx_IOR (DImode, copy_rtx (temp),
9281 GEN_INT (ud1)));
9282 emit_move_insn (dest,
9283 gen_rtx_ZERO_EXTEND (DImode,
9284 gen_lowpart (SImode,
9285 copy_rtx (temp))));
9287 else if (ud1 == ud3 && ud2 == ud4)
9289 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9290 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9291 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9292 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9293 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9294 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9296 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9297 || (ud4 == 0 && ! (ud3 & 0x8000)))
9299 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9301 emit_move_insn (copy_rtx (temp),
9302 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9303 if (ud2 != 0)
9304 emit_move_insn (copy_rtx (temp),
9305 gen_rtx_IOR (DImode, copy_rtx (temp),
9306 GEN_INT (ud2)));
9307 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9308 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9309 GEN_INT (16)));
9310 if (ud1 != 0)
9311 emit_move_insn (dest,
9312 gen_rtx_IOR (DImode, copy_rtx (temp),
9313 GEN_INT (ud1)));
9315 else
9317 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9319 emit_move_insn (copy_rtx (temp),
9320 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9321 if (ud3 != 0)
9322 emit_move_insn (copy_rtx (temp),
9323 gen_rtx_IOR (DImode, copy_rtx (temp),
9324 GEN_INT (ud3)));
9326 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9327 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9328 GEN_INT (32)));
9329 if (ud2 != 0)
9330 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9331 gen_rtx_IOR (DImode, copy_rtx (temp),
9332 GEN_INT (ud2 << 16)));
9333 if (ud1 != 0)
9334 emit_move_insn (dest,
9335 gen_rtx_IOR (DImode, copy_rtx (temp),
9336 GEN_INT (ud1)));
9340 /* Helper for the following. Get rid of [r+r] memory refs
9341 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9343 static void
9344 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9346 if (MEM_P (operands[0])
9347 && !REG_P (XEXP (operands[0], 0))
9348 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9349 GET_MODE (operands[0]), false))
9350 operands[0]
9351 = replace_equiv_address (operands[0],
9352 copy_addr_to_reg (XEXP (operands[0], 0)));
9354 if (MEM_P (operands[1])
9355 && !REG_P (XEXP (operands[1], 0))
9356 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9357 GET_MODE (operands[1]), false))
9358 operands[1]
9359 = replace_equiv_address (operands[1],
9360 copy_addr_to_reg (XEXP (operands[1], 0)));
9363 /* Generate a vector of constants to permute MODE for a little-endian
9364 storage operation by swapping the two halves of a vector. */
9365 static rtvec
9366 rs6000_const_vec (machine_mode mode)
9368 int i, subparts;
9369 rtvec v;
9371 switch (mode)
9373 case E_V1TImode:
9374 subparts = 1;
9375 break;
9376 case E_V2DFmode:
9377 case E_V2DImode:
9378 subparts = 2;
9379 break;
9380 case E_V4SFmode:
9381 case E_V4SImode:
9382 subparts = 4;
9383 break;
9384 case E_V8HImode:
9385 subparts = 8;
9386 break;
9387 case E_V16QImode:
9388 subparts = 16;
9389 break;
9390 default:
9391 gcc_unreachable();
9394 v = rtvec_alloc (subparts);
9396 for (i = 0; i < subparts / 2; ++i)
9397 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9398 for (i = subparts / 2; i < subparts; ++i)
9399 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9401 return v;
9404 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9405 store operation. */
9406 void
9407 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9409 /* Scalar permutations are easier to express in integer modes rather than
9410 floating-point modes, so cast them here. We use V1TImode instead
9411 of TImode to ensure that the values don't go through GPRs. */
9412 if (FLOAT128_VECTOR_P (mode))
9414 dest = gen_lowpart (V1TImode, dest);
9415 source = gen_lowpart (V1TImode, source);
9416 mode = V1TImode;
9419 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9420 scalar. */
9421 if (mode == TImode || mode == V1TImode)
9422 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9423 GEN_INT (64))));
9424 else
9426 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9427 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9431 /* Emit a little-endian load from vector memory location SOURCE to VSX
9432 register DEST in mode MODE. The load is done with two permuting
9433 insn's that represent an lxvd2x and xxpermdi. */
9434 void
9435 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9437 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9438 V1TImode). */
9439 if (mode == TImode || mode == V1TImode)
9441 mode = V2DImode;
9442 dest = gen_lowpart (V2DImode, dest);
9443 source = adjust_address (source, V2DImode, 0);
9446 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9447 rs6000_emit_le_vsx_permute (tmp, source, mode);
9448 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9451 /* Emit a little-endian store to vector memory location DEST from VSX
9452 register SOURCE in mode MODE. The store is done with two permuting
9453 insn's that represent an xxpermdi and an stxvd2x. */
9454 void
9455 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9457 /* This should never be called during or after LRA, because it does
9458 not re-permute the source register. It is intended only for use
9459 during expand. */
9460 gcc_assert (!lra_in_progress && !reload_completed);
9462 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9463 V1TImode). */
9464 if (mode == TImode || mode == V1TImode)
9466 mode = V2DImode;
9467 dest = adjust_address (dest, V2DImode, 0);
9468 source = gen_lowpart (V2DImode, source);
9471 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9472 rs6000_emit_le_vsx_permute (tmp, source, mode);
9473 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9476 /* Emit a sequence representing a little-endian VSX load or store,
9477 moving data from SOURCE to DEST in mode MODE. This is done
9478 separately from rs6000_emit_move to ensure it is called only
9479 during expand. LE VSX loads and stores introduced later are
9480 handled with a split. The expand-time RTL generation allows
9481 us to optimize away redundant pairs of register-permutes. */
9482 void
9483 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9485 gcc_assert (!BYTES_BIG_ENDIAN
9486 && VECTOR_MEM_VSX_P (mode)
9487 && !TARGET_P9_VECTOR
9488 && !gpr_or_gpr_p (dest, source)
9489 && (MEM_P (source) ^ MEM_P (dest)));
9491 if (MEM_P (source))
9493 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9494 rs6000_emit_le_vsx_load (dest, source, mode);
9496 else
9498 if (!REG_P (source))
9499 source = force_reg (mode, source);
9500 rs6000_emit_le_vsx_store (dest, source, mode);
9504 /* Return whether a SFmode or SImode move can be done without converting one
9505 mode to another. This arrises when we have:
9507 (SUBREG:SF (REG:SI ...))
9508 (SUBREG:SI (REG:SF ...))
9510 and one of the values is in a floating point/vector register, where SFmode
9511 scalars are stored in DFmode format. */
9513 bool
9514 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9516 if (TARGET_ALLOW_SF_SUBREG)
9517 return true;
9519 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9520 return true;
9522 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9523 return true;
9525 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9526 if (SUBREG_P (dest))
9528 rtx dest_subreg = SUBREG_REG (dest);
9529 rtx src_subreg = SUBREG_REG (src);
9530 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9533 return false;
9537 /* Helper function to change moves with:
9539 (SUBREG:SF (REG:SI)) and
9540 (SUBREG:SI (REG:SF))
9542 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9543 values are stored as DFmode values in the VSX registers. We need to convert
9544 the bits before we can use a direct move or operate on the bits in the
9545 vector register as an integer type.
9547 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9549 static bool
9550 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9552 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9553 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9554 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9556 rtx inner_source = SUBREG_REG (source);
9557 machine_mode inner_mode = GET_MODE (inner_source);
9559 if (mode == SImode && inner_mode == SFmode)
9561 emit_insn (gen_movsi_from_sf (dest, inner_source));
9562 return true;
9565 if (mode == SFmode && inner_mode == SImode)
9567 emit_insn (gen_movsf_from_si (dest, inner_source));
9568 return true;
9572 return false;
9575 /* Emit a move from SOURCE to DEST in mode MODE. */
9576 void
9577 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9579 rtx operands[2];
9580 operands[0] = dest;
9581 operands[1] = source;
9583 if (TARGET_DEBUG_ADDR)
9585 fprintf (stderr,
9586 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9587 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9588 GET_MODE_NAME (mode),
9589 lra_in_progress,
9590 reload_completed,
9591 can_create_pseudo_p ());
9592 debug_rtx (dest);
9593 fprintf (stderr, "source:\n");
9594 debug_rtx (source);
9597 /* Check that we get CONST_WIDE_INT only when we should. */
9598 if (CONST_WIDE_INT_P (operands[1])
9599 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9600 gcc_unreachable ();
9602 #ifdef HAVE_AS_GNU_ATTRIBUTE
9603 /* If we use a long double type, set the flags in .gnu_attribute that say
9604 what the long double type is. This is to allow the linker's warning
9605 message for the wrong long double to be useful, even if the function does
9606 not do a call (for example, doing a 128-bit add on power9 if the long
9607 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9608 used if they aren't the default long dobule type. */
9609 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9611 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9612 rs6000_passes_float = rs6000_passes_long_double = true;
9614 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9615 rs6000_passes_float = rs6000_passes_long_double = true;
9617 #endif
9619 /* See if we need to special case SImode/SFmode SUBREG moves. */
9620 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9621 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9622 return;
9624 /* Check if GCC is setting up a block move that will end up using FP
9625 registers as temporaries. We must make sure this is acceptable. */
9626 if (MEM_P (operands[0])
9627 && MEM_P (operands[1])
9628 && mode == DImode
9629 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9630 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9631 && ! (rs6000_slow_unaligned_access (SImode,
9632 (MEM_ALIGN (operands[0]) > 32
9633 ? 32 : MEM_ALIGN (operands[0])))
9634 || rs6000_slow_unaligned_access (SImode,
9635 (MEM_ALIGN (operands[1]) > 32
9636 ? 32 : MEM_ALIGN (operands[1]))))
9637 && ! MEM_VOLATILE_P (operands [0])
9638 && ! MEM_VOLATILE_P (operands [1]))
9640 emit_move_insn (adjust_address (operands[0], SImode, 0),
9641 adjust_address (operands[1], SImode, 0));
9642 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9643 adjust_address (copy_rtx (operands[1]), SImode, 4));
9644 return;
9647 if (can_create_pseudo_p () && MEM_P (operands[0])
9648 && !gpc_reg_operand (operands[1], mode))
9649 operands[1] = force_reg (mode, operands[1]);
9651 /* Recognize the case where operand[1] is a reference to thread-local
9652 data and load its address to a register. */
9653 if (tls_referenced_p (operands[1]))
9655 enum tls_model model;
9656 rtx tmp = operands[1];
9657 rtx addend = NULL;
9659 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9661 addend = XEXP (XEXP (tmp, 0), 1);
9662 tmp = XEXP (XEXP (tmp, 0), 0);
9665 gcc_assert (SYMBOL_REF_P (tmp));
9666 model = SYMBOL_REF_TLS_MODEL (tmp);
9667 gcc_assert (model != 0);
9669 tmp = rs6000_legitimize_tls_address (tmp, model);
9670 if (addend)
9672 tmp = gen_rtx_PLUS (mode, tmp, addend);
9673 tmp = force_operand (tmp, operands[0]);
9675 operands[1] = tmp;
9678 /* 128-bit constant floating-point values on Darwin should really be loaded
9679 as two parts. However, this premature splitting is a problem when DFmode
9680 values can go into Altivec registers. */
9681 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9682 && !reg_addr[DFmode].scalar_in_vmx_p)
9684 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9685 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9686 DFmode);
9687 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9688 GET_MODE_SIZE (DFmode)),
9689 simplify_gen_subreg (DFmode, operands[1], mode,
9690 GET_MODE_SIZE (DFmode)),
9691 DFmode);
9692 return;
9695 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9696 p1:SD) if p1 is not of floating point class and p0 is spilled as
9697 we can have no analogous movsd_store for this. */
9698 if (lra_in_progress && mode == DDmode
9699 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9700 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9701 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9702 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9704 enum reg_class cl;
9705 int regno = REGNO (SUBREG_REG (operands[1]));
9707 if (!HARD_REGISTER_NUM_P (regno))
9709 cl = reg_preferred_class (regno);
9710 regno = reg_renumber[regno];
9711 if (regno < 0)
9712 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9714 if (regno >= 0 && ! FP_REGNO_P (regno))
9716 mode = SDmode;
9717 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9718 operands[1] = SUBREG_REG (operands[1]);
9721 if (lra_in_progress
9722 && mode == SDmode
9723 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9724 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9725 && (REG_P (operands[1])
9726 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9728 int regno = reg_or_subregno (operands[1]);
9729 enum reg_class cl;
9731 if (!HARD_REGISTER_NUM_P (regno))
9733 cl = reg_preferred_class (regno);
9734 gcc_assert (cl != NO_REGS);
9735 regno = reg_renumber[regno];
9736 if (regno < 0)
9737 regno = ira_class_hard_regs[cl][0];
9739 if (FP_REGNO_P (regno))
9741 if (GET_MODE (operands[0]) != DDmode)
9742 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9743 emit_insn (gen_movsd_store (operands[0], operands[1]));
9745 else if (INT_REGNO_P (regno))
9746 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9747 else
9748 gcc_unreachable();
9749 return;
9751 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9752 p:DD)) if p0 is not of floating point class and p1 is spilled as
9753 we can have no analogous movsd_load for this. */
9754 if (lra_in_progress && mode == DDmode
9755 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9756 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9757 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9758 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9760 enum reg_class cl;
9761 int regno = REGNO (SUBREG_REG (operands[0]));
9763 if (!HARD_REGISTER_NUM_P (regno))
9765 cl = reg_preferred_class (regno);
9766 regno = reg_renumber[regno];
9767 if (regno < 0)
9768 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9770 if (regno >= 0 && ! FP_REGNO_P (regno))
9772 mode = SDmode;
9773 operands[0] = SUBREG_REG (operands[0]);
9774 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9777 if (lra_in_progress
9778 && mode == SDmode
9779 && (REG_P (operands[0])
9780 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9781 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9782 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9784 int regno = reg_or_subregno (operands[0]);
9785 enum reg_class cl;
9787 if (!HARD_REGISTER_NUM_P (regno))
9789 cl = reg_preferred_class (regno);
9790 gcc_assert (cl != NO_REGS);
9791 regno = reg_renumber[regno];
9792 if (regno < 0)
9793 regno = ira_class_hard_regs[cl][0];
9795 if (FP_REGNO_P (regno))
9797 if (GET_MODE (operands[1]) != DDmode)
9798 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9799 emit_insn (gen_movsd_load (operands[0], operands[1]));
9801 else if (INT_REGNO_P (regno))
9802 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9803 else
9804 gcc_unreachable();
9805 return;
9808 /* FIXME: In the long term, this switch statement should go away
9809 and be replaced by a sequence of tests based on things like
9810 mode == Pmode. */
9811 switch (mode)
9813 case E_HImode:
9814 case E_QImode:
9815 if (CONSTANT_P (operands[1])
9816 && !CONST_INT_P (operands[1]))
9817 operands[1] = force_const_mem (mode, operands[1]);
9818 break;
9820 case E_TFmode:
9821 case E_TDmode:
9822 case E_IFmode:
9823 case E_KFmode:
9824 if (FLOAT128_2REG_P (mode))
9825 rs6000_eliminate_indexed_memrefs (operands);
9826 /* fall through */
9828 case E_DFmode:
9829 case E_DDmode:
9830 case E_SFmode:
9831 case E_SDmode:
9832 if (CONSTANT_P (operands[1])
9833 && ! easy_fp_constant (operands[1], mode))
9834 operands[1] = force_const_mem (mode, operands[1]);
9835 break;
9837 case E_V16QImode:
9838 case E_V8HImode:
9839 case E_V4SFmode:
9840 case E_V4SImode:
9841 case E_V2DFmode:
9842 case E_V2DImode:
9843 case E_V1TImode:
9844 if (CONSTANT_P (operands[1])
9845 && !easy_vector_constant (operands[1], mode))
9846 operands[1] = force_const_mem (mode, operands[1]);
9847 break;
9849 case E_SImode:
9850 case E_DImode:
9851 /* Use default pattern for address of ELF small data */
9852 if (TARGET_ELF
9853 && mode == Pmode
9854 && DEFAULT_ABI == ABI_V4
9855 && (SYMBOL_REF_P (operands[1])
9856 || GET_CODE (operands[1]) == CONST)
9857 && small_data_operand (operands[1], mode))
9859 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9860 return;
9863 /* Use the default pattern for loading up PC-relative addresses. */
9864 if (TARGET_PCREL && mode == Pmode
9865 && pcrel_local_or_external_address (operands[1], Pmode))
9867 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9868 return;
9871 if (DEFAULT_ABI == ABI_V4
9872 && mode == Pmode && mode == SImode
9873 && flag_pic == 1 && got_operand (operands[1], mode))
9875 emit_insn (gen_movsi_got (operands[0], operands[1]));
9876 return;
9879 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9880 && TARGET_NO_TOC_OR_PCREL
9881 && ! flag_pic
9882 && mode == Pmode
9883 && CONSTANT_P (operands[1])
9884 && GET_CODE (operands[1]) != HIGH
9885 && !CONST_INT_P (operands[1]))
9887 rtx target = (!can_create_pseudo_p ()
9888 ? operands[0]
9889 : gen_reg_rtx (mode));
9891 /* If this is a function address on -mcall-aixdesc,
9892 convert it to the address of the descriptor. */
9893 if (DEFAULT_ABI == ABI_AIX
9894 && SYMBOL_REF_P (operands[1])
9895 && XSTR (operands[1], 0)[0] == '.')
9897 const char *name = XSTR (operands[1], 0);
9898 rtx new_ref;
9899 while (*name == '.')
9900 name++;
9901 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9902 CONSTANT_POOL_ADDRESS_P (new_ref)
9903 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9904 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9905 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9906 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9907 operands[1] = new_ref;
9910 if (DEFAULT_ABI == ABI_DARWIN)
9912 #if TARGET_MACHO
9913 /* This is not PIC code, but could require the subset of
9914 indirections used by mdynamic-no-pic. */
9915 if (MACHO_DYNAMIC_NO_PIC_P)
9917 /* Take care of any required data indirection. */
9918 operands[1] = rs6000_machopic_legitimize_pic_address (
9919 operands[1], mode, operands[0]);
9920 if (operands[0] != operands[1])
9921 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9922 return;
9924 #endif
9925 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9926 emit_insn (gen_macho_low (Pmode, operands[0],
9927 target, operands[1]));
9928 return;
9931 emit_insn (gen_elf_high (target, operands[1]));
9932 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9933 return;
9936 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9937 and we have put it in the TOC, we just need to make a TOC-relative
9938 reference to it. */
9939 if (TARGET_TOC
9940 && SYMBOL_REF_P (operands[1])
9941 && use_toc_relative_ref (operands[1], mode))
9942 operands[1] = create_TOC_reference (operands[1], operands[0]);
9943 else if (mode == Pmode
9944 && CONSTANT_P (operands[1])
9945 && GET_CODE (operands[1]) != HIGH
9946 && ((REG_P (operands[0])
9947 && FP_REGNO_P (REGNO (operands[0])))
9948 || !CONST_INT_P (operands[1])
9949 || (num_insns_constant (operands[1], mode)
9950 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9951 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9952 && (TARGET_CMODEL == CMODEL_SMALL
9953 || can_create_pseudo_p ()
9954 || (REG_P (operands[0])
9955 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9958 #if TARGET_MACHO
9959 /* Darwin uses a special PIC legitimizer. */
9960 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9962 operands[1] =
9963 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9964 operands[0]);
9965 if (operands[0] != operands[1])
9966 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9967 return;
9969 #endif
9971 /* If we are to limit the number of things we put in the TOC and
9972 this is a symbol plus a constant we can add in one insn,
9973 just put the symbol in the TOC and add the constant. */
9974 if (GET_CODE (operands[1]) == CONST
9975 && TARGET_NO_SUM_IN_TOC
9976 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9977 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9978 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9979 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9980 && ! side_effects_p (operands[0]))
9982 rtx sym =
9983 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9984 rtx other = XEXP (XEXP (operands[1], 0), 1);
9986 sym = force_reg (mode, sym);
9987 emit_insn (gen_add3_insn (operands[0], sym, other));
9988 return;
9991 operands[1] = force_const_mem (mode, operands[1]);
9993 if (TARGET_TOC
9994 && SYMBOL_REF_P (XEXP (operands[1], 0))
9995 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9997 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9998 operands[0]);
9999 operands[1] = gen_const_mem (mode, tocref);
10000 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10003 break;
10005 case E_TImode:
10006 if (!VECTOR_MEM_VSX_P (TImode))
10007 rs6000_eliminate_indexed_memrefs (operands);
10008 break;
10010 case E_PTImode:
10011 rs6000_eliminate_indexed_memrefs (operands);
10012 break;
10014 default:
10015 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10018 /* Above, we may have called force_const_mem which may have returned
10019 an invalid address. If we can, fix this up; otherwise, reload will
10020 have to deal with it. */
10021 if (MEM_P (operands[1]))
10022 operands[1] = validize_mem (operands[1]);
10024 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10028 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10029 static void
10030 init_float128_ibm (machine_mode mode)
10032 if (!TARGET_XL_COMPAT)
10034 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10035 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10036 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10037 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10039 if (!TARGET_HARD_FLOAT)
10041 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10042 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10043 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10044 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10045 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10046 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10047 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10048 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10050 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10051 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10052 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10053 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10054 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10055 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10056 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10057 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10060 else
10062 set_optab_libfunc (add_optab, mode, "_xlqadd");
10063 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10064 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10065 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10068 /* Add various conversions for IFmode to use the traditional TFmode
10069 names. */
10070 if (mode == IFmode)
10072 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10073 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10074 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10075 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10076 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10077 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10079 if (TARGET_POWERPC64)
10081 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10082 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10083 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10084 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10089 /* Create a decl for either complex long double multiply or complex long double
10090 divide when long double is IEEE 128-bit floating point. We can't use
10091 __multc3 and __divtc3 because the original long double using IBM extended
10092 double used those names. The complex multiply/divide functions are encoded
10093 as builtin functions with a complex result and 4 scalar inputs. */
10095 static void
10096 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10098 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10099 name, NULL_TREE);
10101 set_builtin_decl (fncode, fndecl, true);
10103 if (TARGET_DEBUG_BUILTIN)
10104 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10106 return;
10109 /* Set up IEEE 128-bit floating point routines. Use different names if the
10110 arguments can be passed in a vector register. The historical PowerPC
10111 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10112 continue to use that if we aren't using vector registers to pass IEEE
10113 128-bit floating point. */
10115 static void
10116 init_float128_ieee (machine_mode mode)
10118 if (FLOAT128_VECTOR_P (mode))
10120 static bool complex_muldiv_init_p = false;
10122 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10123 we have clone or target attributes, this will be called a second
10124 time. We want to create the built-in function only once. */
10125 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10127 complex_muldiv_init_p = true;
10128 built_in_function fncode_mul =
10129 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10130 - MIN_MODE_COMPLEX_FLOAT);
10131 built_in_function fncode_div =
10132 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10133 - MIN_MODE_COMPLEX_FLOAT);
10135 tree fntype = build_function_type_list (complex_long_double_type_node,
10136 long_double_type_node,
10137 long_double_type_node,
10138 long_double_type_node,
10139 long_double_type_node,
10140 NULL_TREE);
10142 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10143 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10146 set_optab_libfunc (add_optab, mode, "__addkf3");
10147 set_optab_libfunc (sub_optab, mode, "__subkf3");
10148 set_optab_libfunc (neg_optab, mode, "__negkf2");
10149 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10150 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10151 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10152 set_optab_libfunc (abs_optab, mode, "__abskf2");
10153 set_optab_libfunc (powi_optab, mode, "__powikf2");
10155 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10156 set_optab_libfunc (ne_optab, mode, "__nekf2");
10157 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10158 set_optab_libfunc (ge_optab, mode, "__gekf2");
10159 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10160 set_optab_libfunc (le_optab, mode, "__lekf2");
10161 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10163 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10164 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10165 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10166 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10168 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10169 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10170 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10172 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10173 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10174 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10176 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10177 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10178 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10179 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10180 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10181 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10183 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10184 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10185 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10186 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10188 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10189 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10190 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10191 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10193 if (TARGET_POWERPC64)
10195 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10196 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10197 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10198 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10202 else
10204 set_optab_libfunc (add_optab, mode, "_q_add");
10205 set_optab_libfunc (sub_optab, mode, "_q_sub");
10206 set_optab_libfunc (neg_optab, mode, "_q_neg");
10207 set_optab_libfunc (smul_optab, mode, "_q_mul");
10208 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10209 if (TARGET_PPC_GPOPT)
10210 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10212 set_optab_libfunc (eq_optab, mode, "_q_feq");
10213 set_optab_libfunc (ne_optab, mode, "_q_fne");
10214 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10215 set_optab_libfunc (ge_optab, mode, "_q_fge");
10216 set_optab_libfunc (lt_optab, mode, "_q_flt");
10217 set_optab_libfunc (le_optab, mode, "_q_fle");
10219 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10220 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10221 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10222 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10223 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10224 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10225 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10226 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10230 static void
10231 rs6000_init_libfuncs (void)
10233 /* __float128 support. */
10234 if (TARGET_FLOAT128_TYPE)
10236 init_float128_ibm (IFmode);
10237 init_float128_ieee (KFmode);
10240 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10241 if (TARGET_LONG_DOUBLE_128)
10243 if (!TARGET_IEEEQUAD)
10244 init_float128_ibm (TFmode);
10246 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10247 else
10248 init_float128_ieee (TFmode);
10252 /* Emit a potentially record-form instruction, setting DST from SRC.
10253 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10254 signed comparison of DST with zero. If DOT is 1, the generated RTL
10255 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10256 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10257 a separate COMPARE. */
10259 void
10260 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10262 if (dot == 0)
10264 emit_move_insn (dst, src);
10265 return;
10268 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10270 emit_move_insn (dst, src);
10271 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10272 return;
10275 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10276 if (dot == 1)
10278 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10279 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10281 else
10283 rtx set = gen_rtx_SET (dst, src);
10284 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10289 /* A validation routine: say whether CODE, a condition code, and MODE
10290 match. The other alternatives either don't make sense or should
10291 never be generated. */
10293 void
10294 validate_condition_mode (enum rtx_code code, machine_mode mode)
10296 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10297 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10298 && GET_MODE_CLASS (mode) == MODE_CC);
10300 /* These don't make sense. */
10301 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10302 || mode != CCUNSmode);
10304 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10305 || mode == CCUNSmode);
10307 gcc_assert (mode == CCFPmode
10308 || (code != ORDERED && code != UNORDERED
10309 && code != UNEQ && code != LTGT
10310 && code != UNGT && code != UNLT
10311 && code != UNGE && code != UNLE));
10313 /* These are invalid; the information is not there. */
10314 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10318 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10319 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10320 not zero, store there the bit offset (counted from the right) where
10321 the single stretch of 1 bits begins; and similarly for B, the bit
10322 offset where it ends. */
10324 bool
10325 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10327 unsigned HOST_WIDE_INT val = INTVAL (mask);
10328 unsigned HOST_WIDE_INT bit;
10329 int nb, ne;
10330 int n = GET_MODE_PRECISION (mode);
10332 if (mode != DImode && mode != SImode)
10333 return false;
10335 if (INTVAL (mask) >= 0)
10337 bit = val & -val;
10338 ne = exact_log2 (bit);
10339 nb = exact_log2 (val + bit);
10341 else if (val + 1 == 0)
10343 nb = n;
10344 ne = 0;
10346 else if (val & 1)
10348 val = ~val;
10349 bit = val & -val;
10350 nb = exact_log2 (bit);
10351 ne = exact_log2 (val + bit);
10353 else
10355 bit = val & -val;
10356 ne = exact_log2 (bit);
10357 if (val + bit == 0)
10358 nb = n;
10359 else
10360 nb = 0;
10363 nb--;
10365 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10366 return false;
10368 if (b)
10369 *b = nb;
10370 if (e)
10371 *e = ne;
10373 return true;
10376 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10377 or rldicr instruction, to implement an AND with it in mode MODE. */
10379 bool
10380 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10382 int nb, ne;
10384 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10385 return false;
10387 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10388 does not wrap. */
10389 if (mode == DImode)
10390 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10392 /* For SImode, rlwinm can do everything. */
10393 if (mode == SImode)
10394 return (nb < 32 && ne < 32);
10396 return false;
10399 /* Return the instruction template for an AND with mask in mode MODE, with
10400 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10402 const char *
10403 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10405 int nb, ne;
10407 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10408 gcc_unreachable ();
10410 if (mode == DImode && ne == 0)
10412 operands[3] = GEN_INT (63 - nb);
10413 if (dot)
10414 return "rldicl. %0,%1,0,%3";
10415 return "rldicl %0,%1,0,%3";
10418 if (mode == DImode && nb == 63)
10420 operands[3] = GEN_INT (63 - ne);
10421 if (dot)
10422 return "rldicr. %0,%1,0,%3";
10423 return "rldicr %0,%1,0,%3";
10426 if (nb < 32 && ne < 32)
10428 operands[3] = GEN_INT (31 - nb);
10429 operands[4] = GEN_INT (31 - ne);
10430 if (dot)
10431 return "rlwinm. %0,%1,0,%3,%4";
10432 return "rlwinm %0,%1,0,%3,%4";
10435 gcc_unreachable ();
10438 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10439 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10440 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10442 bool
10443 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10445 int nb, ne;
10447 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10448 return false;
10450 int n = GET_MODE_PRECISION (mode);
10451 int sh = -1;
10453 if (CONST_INT_P (XEXP (shift, 1)))
10455 sh = INTVAL (XEXP (shift, 1));
10456 if (sh < 0 || sh >= n)
10457 return false;
10460 rtx_code code = GET_CODE (shift);
10462 /* Convert any shift by 0 to a rotate, to simplify below code. */
10463 if (sh == 0)
10464 code = ROTATE;
10466 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10467 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10468 code = ASHIFT;
10469 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10471 code = LSHIFTRT;
10472 sh = n - sh;
10475 /* DImode rotates need rld*. */
10476 if (mode == DImode && code == ROTATE)
10477 return (nb == 63 || ne == 0 || ne == sh);
10479 /* SImode rotates need rlw*. */
10480 if (mode == SImode && code == ROTATE)
10481 return (nb < 32 && ne < 32 && sh < 32);
10483 /* Wrap-around masks are only okay for rotates. */
10484 if (ne > nb)
10485 return false;
10487 /* Variable shifts are only okay for rotates. */
10488 if (sh < 0)
10489 return false;
10491 /* Don't allow ASHIFT if the mask is wrong for that. */
10492 if (code == ASHIFT && ne < sh)
10493 return false;
10495 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10496 if the mask is wrong for that. */
10497 if (nb < 32 && ne < 32 && sh < 32
10498 && !(code == LSHIFTRT && nb >= 32 - sh))
10499 return true;
10501 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10502 if the mask is wrong for that. */
10503 if (code == LSHIFTRT)
10504 sh = 64 - sh;
10505 if (nb == 63 || ne == 0 || ne == sh)
10506 return !(code == LSHIFTRT && nb >= sh);
10508 return false;
10511 /* Return the instruction template for a shift with mask in mode MODE, with
10512 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10514 const char *
10515 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10517 int nb, ne;
10519 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10520 gcc_unreachable ();
10522 if (mode == DImode && ne == 0)
10524 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10525 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10526 operands[3] = GEN_INT (63 - nb);
10527 if (dot)
10528 return "rld%I2cl. %0,%1,%2,%3";
10529 return "rld%I2cl %0,%1,%2,%3";
10532 if (mode == DImode && nb == 63)
10534 operands[3] = GEN_INT (63 - ne);
10535 if (dot)
10536 return "rld%I2cr. %0,%1,%2,%3";
10537 return "rld%I2cr %0,%1,%2,%3";
10540 if (mode == DImode
10541 && GET_CODE (operands[4]) != LSHIFTRT
10542 && CONST_INT_P (operands[2])
10543 && ne == INTVAL (operands[2]))
10545 operands[3] = GEN_INT (63 - nb);
10546 if (dot)
10547 return "rld%I2c. %0,%1,%2,%3";
10548 return "rld%I2c %0,%1,%2,%3";
10551 if (nb < 32 && ne < 32)
10553 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10554 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10555 operands[3] = GEN_INT (31 - nb);
10556 operands[4] = GEN_INT (31 - ne);
10557 /* This insn can also be a 64-bit rotate with mask that really makes
10558 it just a shift right (with mask); the %h below are to adjust for
10559 that situation (shift count is >= 32 in that case). */
10560 if (dot)
10561 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10562 return "rlw%I2nm %0,%1,%h2,%3,%4";
10565 gcc_unreachable ();
10568 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10569 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10570 ASHIFT, or LSHIFTRT) in mode MODE. */
10572 bool
10573 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10575 int nb, ne;
10577 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10578 return false;
10580 int n = GET_MODE_PRECISION (mode);
10582 int sh = INTVAL (XEXP (shift, 1));
10583 if (sh < 0 || sh >= n)
10584 return false;
10586 rtx_code code = GET_CODE (shift);
10588 /* Convert any shift by 0 to a rotate, to simplify below code. */
10589 if (sh == 0)
10590 code = ROTATE;
10592 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10593 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10594 code = ASHIFT;
10595 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10597 code = LSHIFTRT;
10598 sh = n - sh;
10601 /* DImode rotates need rldimi. */
10602 if (mode == DImode && code == ROTATE)
10603 return (ne == sh);
10605 /* SImode rotates need rlwimi. */
10606 if (mode == SImode && code == ROTATE)
10607 return (nb < 32 && ne < 32 && sh < 32);
10609 /* Wrap-around masks are only okay for rotates. */
10610 if (ne > nb)
10611 return false;
10613 /* Don't allow ASHIFT if the mask is wrong for that. */
10614 if (code == ASHIFT && ne < sh)
10615 return false;
10617 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10618 if the mask is wrong for that. */
10619 if (nb < 32 && ne < 32 && sh < 32
10620 && !(code == LSHIFTRT && nb >= 32 - sh))
10621 return true;
10623 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10624 if the mask is wrong for that. */
10625 if (code == LSHIFTRT)
10626 sh = 64 - sh;
10627 if (ne == sh)
10628 return !(code == LSHIFTRT && nb >= sh);
10630 return false;
10633 /* Return the instruction template for an insert with mask in mode MODE, with
10634 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10636 const char *
10637 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10639 int nb, ne;
10641 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10642 gcc_unreachable ();
10644 /* Prefer rldimi because rlwimi is cracked. */
10645 if (TARGET_POWERPC64
10646 && (!dot || mode == DImode)
10647 && GET_CODE (operands[4]) != LSHIFTRT
10648 && ne == INTVAL (operands[2]))
10650 operands[3] = GEN_INT (63 - nb);
10651 if (dot)
10652 return "rldimi. %0,%1,%2,%3";
10653 return "rldimi %0,%1,%2,%3";
10656 if (nb < 32 && ne < 32)
10658 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10659 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10660 operands[3] = GEN_INT (31 - nb);
10661 operands[4] = GEN_INT (31 - ne);
10662 if (dot)
10663 return "rlwimi. %0,%1,%2,%3,%4";
10664 return "rlwimi %0,%1,%2,%3,%4";
10667 gcc_unreachable ();
10670 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10671 using two machine instructions. */
10673 bool
10674 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10676 /* There are two kinds of AND we can handle with two insns:
10677 1) those we can do with two rl* insn;
10678 2) ori[s];xori[s].
10680 We do not handle that last case yet. */
10682 /* If there is just one stretch of ones, we can do it. */
10683 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10684 return true;
10686 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10687 one insn, we can do the whole thing with two. */
10688 unsigned HOST_WIDE_INT val = INTVAL (c);
10689 unsigned HOST_WIDE_INT bit1 = val & -val;
10690 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10691 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10692 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10693 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10696 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10697 If EXPAND is true, split rotate-and-mask instructions we generate to
10698 their constituent parts as well (this is used during expand); if DOT
10699 is 1, make the last insn a record-form instruction clobbering the
10700 destination GPR and setting the CC reg (from operands[3]); if 2, set
10701 that GPR as well as the CC reg. */
10703 void
10704 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10706 gcc_assert (!(expand && dot));
10708 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10710 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10711 shift right. This generates better code than doing the masks without
10712 shifts, or shifting first right and then left. */
10713 int nb, ne;
10714 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10716 gcc_assert (mode == DImode);
10718 int shift = 63 - nb;
10719 if (expand)
10721 rtx tmp1 = gen_reg_rtx (DImode);
10722 rtx tmp2 = gen_reg_rtx (DImode);
10723 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10724 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10725 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10727 else
10729 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10730 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10731 emit_move_insn (operands[0], tmp);
10732 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10733 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10735 return;
10738 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10739 that does the rest. */
10740 unsigned HOST_WIDE_INT bit1 = val & -val;
10741 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10742 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10743 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10745 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10746 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10748 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10750 /* Two "no-rotate"-and-mask instructions, for SImode. */
10751 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10753 gcc_assert (mode == SImode);
10755 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10756 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10757 emit_move_insn (reg, tmp);
10758 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10759 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10760 return;
10763 gcc_assert (mode == DImode);
10765 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10766 insns; we have to do the first in SImode, because it wraps. */
10767 if (mask2 <= 0xffffffff
10768 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10770 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10771 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10772 GEN_INT (mask1));
10773 rtx reg_low = gen_lowpart (SImode, reg);
10774 emit_move_insn (reg_low, tmp);
10775 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10776 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10777 return;
10780 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10781 at the top end), rotate back and clear the other hole. */
10782 int right = exact_log2 (bit3);
10783 int left = 64 - right;
10785 /* Rotate the mask too. */
10786 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10788 if (expand)
10790 rtx tmp1 = gen_reg_rtx (DImode);
10791 rtx tmp2 = gen_reg_rtx (DImode);
10792 rtx tmp3 = gen_reg_rtx (DImode);
10793 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10794 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10795 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10796 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10798 else
10800 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10801 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10802 emit_move_insn (operands[0], tmp);
10803 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10804 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10805 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10809 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10810 for lfq and stfq insns iff the registers are hard registers. */
10813 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10815 /* We might have been passed a SUBREG. */
10816 if (!REG_P (reg1) || !REG_P (reg2))
10817 return 0;
10819 /* We might have been passed non floating point registers. */
10820 if (!FP_REGNO_P (REGNO (reg1))
10821 || !FP_REGNO_P (REGNO (reg2)))
10822 return 0;
10824 return (REGNO (reg1) == REGNO (reg2) - 1);
10827 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10828 addr1 and addr2 must be in consecutive memory locations
10829 (addr2 == addr1 + 8). */
10832 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10834 rtx addr1, addr2;
10835 unsigned int reg1, reg2;
10836 int offset1, offset2;
10838 /* The mems cannot be volatile. */
10839 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10840 return 0;
10842 addr1 = XEXP (mem1, 0);
10843 addr2 = XEXP (mem2, 0);
10845 /* Extract an offset (if used) from the first addr. */
10846 if (GET_CODE (addr1) == PLUS)
10848 /* If not a REG, return zero. */
10849 if (!REG_P (XEXP (addr1, 0)))
10850 return 0;
10851 else
10853 reg1 = REGNO (XEXP (addr1, 0));
10854 /* The offset must be constant! */
10855 if (!CONST_INT_P (XEXP (addr1, 1)))
10856 return 0;
10857 offset1 = INTVAL (XEXP (addr1, 1));
10860 else if (!REG_P (addr1))
10861 return 0;
10862 else
10864 reg1 = REGNO (addr1);
10865 /* This was a simple (mem (reg)) expression. Offset is 0. */
10866 offset1 = 0;
10869 /* And now for the second addr. */
10870 if (GET_CODE (addr2) == PLUS)
10872 /* If not a REG, return zero. */
10873 if (!REG_P (XEXP (addr2, 0)))
10874 return 0;
10875 else
10877 reg2 = REGNO (XEXP (addr2, 0));
10878 /* The offset must be constant. */
10879 if (!CONST_INT_P (XEXP (addr2, 1)))
10880 return 0;
10881 offset2 = INTVAL (XEXP (addr2, 1));
10884 else if (!REG_P (addr2))
10885 return 0;
10886 else
10888 reg2 = REGNO (addr2);
10889 /* This was a simple (mem (reg)) expression. Offset is 0. */
10890 offset2 = 0;
10893 /* Both of these must have the same base register. */
10894 if (reg1 != reg2)
10895 return 0;
10897 /* The offset for the second addr must be 8 more than the first addr. */
10898 if (offset2 != offset1 + 8)
10899 return 0;
10901 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10902 instructions. */
10903 return 1;
10906 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10907 need to use DDmode, in all other cases we can use the same mode. */
10908 static machine_mode
10909 rs6000_secondary_memory_needed_mode (machine_mode mode)
10911 if (lra_in_progress && mode == SDmode)
10912 return DDmode;
10913 return mode;
10916 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10917 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10918 only work on the traditional altivec registers, note if an altivec register
10919 was chosen. */
10921 static enum rs6000_reg_type
10922 register_to_reg_type (rtx reg, bool *is_altivec)
10924 HOST_WIDE_INT regno;
10925 enum reg_class rclass;
10927 if (SUBREG_P (reg))
10928 reg = SUBREG_REG (reg);
10930 if (!REG_P (reg))
10931 return NO_REG_TYPE;
10933 regno = REGNO (reg);
10934 if (!HARD_REGISTER_NUM_P (regno))
10936 if (!lra_in_progress && !reload_completed)
10937 return PSEUDO_REG_TYPE;
10939 regno = true_regnum (reg);
10940 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10941 return PSEUDO_REG_TYPE;
10944 gcc_assert (regno >= 0);
10946 if (is_altivec && ALTIVEC_REGNO_P (regno))
10947 *is_altivec = true;
10949 rclass = rs6000_regno_regclass[regno];
10950 return reg_class_to_reg_type[(int)rclass];
10953 /* Helper function to return the cost of adding a TOC entry address. */
10955 static inline int
10956 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10958 int ret;
10960 if (TARGET_CMODEL != CMODEL_SMALL)
10961 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10963 else
10964 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10966 return ret;
10969 /* Helper function for rs6000_secondary_reload to determine whether the memory
10970 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10971 needs reloading. Return negative if the memory is not handled by the memory
10972 helper functions and to try a different reload method, 0 if no additional
10973 instructions are need, and positive to give the extra cost for the
10974 memory. */
10976 static int
10977 rs6000_secondary_reload_memory (rtx addr,
10978 enum reg_class rclass,
10979 machine_mode mode)
10981 int extra_cost = 0;
10982 rtx reg, and_arg, plus_arg0, plus_arg1;
10983 addr_mask_type addr_mask;
10984 const char *type = NULL;
10985 const char *fail_msg = NULL;
10987 if (GPR_REG_CLASS_P (rclass))
10988 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10990 else if (rclass == FLOAT_REGS)
10991 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10993 else if (rclass == ALTIVEC_REGS)
10994 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10996 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10997 else if (rclass == VSX_REGS)
10998 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10999 & ~RELOAD_REG_AND_M16);
11001 /* If the register allocator hasn't made up its mind yet on the register
11002 class to use, settle on defaults to use. */
11003 else if (rclass == NO_REGS)
11005 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11006 & ~RELOAD_REG_AND_M16);
11008 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11009 addr_mask &= ~(RELOAD_REG_INDEXED
11010 | RELOAD_REG_PRE_INCDEC
11011 | RELOAD_REG_PRE_MODIFY);
11014 else
11015 addr_mask = 0;
11017 /* If the register isn't valid in this register class, just return now. */
11018 if ((addr_mask & RELOAD_REG_VALID) == 0)
11020 if (TARGET_DEBUG_ADDR)
11022 fprintf (stderr,
11023 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11024 "not valid in class\n",
11025 GET_MODE_NAME (mode), reg_class_names[rclass]);
11026 debug_rtx (addr);
11029 return -1;
11032 switch (GET_CODE (addr))
11034 /* Does the register class supports auto update forms for this mode? We
11035 don't need a scratch register, since the powerpc only supports
11036 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11037 case PRE_INC:
11038 case PRE_DEC:
11039 reg = XEXP (addr, 0);
11040 if (!base_reg_operand (addr, GET_MODE (reg)))
11042 fail_msg = "no base register #1";
11043 extra_cost = -1;
11046 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11048 extra_cost = 1;
11049 type = "update";
11051 break;
11053 case PRE_MODIFY:
11054 reg = XEXP (addr, 0);
11055 plus_arg1 = XEXP (addr, 1);
11056 if (!base_reg_operand (reg, GET_MODE (reg))
11057 || GET_CODE (plus_arg1) != PLUS
11058 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11060 fail_msg = "bad PRE_MODIFY";
11061 extra_cost = -1;
11064 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11066 extra_cost = 1;
11067 type = "update";
11069 break;
11071 /* Do we need to simulate AND -16 to clear the bottom address bits used
11072 in VMX load/stores? Only allow the AND for vector sizes. */
11073 case AND:
11074 and_arg = XEXP (addr, 0);
11075 if (GET_MODE_SIZE (mode) != 16
11076 || !CONST_INT_P (XEXP (addr, 1))
11077 || INTVAL (XEXP (addr, 1)) != -16)
11079 fail_msg = "bad Altivec AND #1";
11080 extra_cost = -1;
11083 if (rclass != ALTIVEC_REGS)
11085 if (legitimate_indirect_address_p (and_arg, false))
11086 extra_cost = 1;
11088 else if (legitimate_indexed_address_p (and_arg, false))
11089 extra_cost = 2;
11091 else
11093 fail_msg = "bad Altivec AND #2";
11094 extra_cost = -1;
11097 type = "and";
11099 break;
11101 /* If this is an indirect address, make sure it is a base register. */
11102 case REG:
11103 case SUBREG:
11104 if (!legitimate_indirect_address_p (addr, false))
11106 extra_cost = 1;
11107 type = "move";
11109 break;
11111 /* If this is an indexed address, make sure the register class can handle
11112 indexed addresses for this mode. */
11113 case PLUS:
11114 plus_arg0 = XEXP (addr, 0);
11115 plus_arg1 = XEXP (addr, 1);
11117 /* (plus (plus (reg) (constant)) (constant)) is generated during
11118 push_reload processing, so handle it now. */
11119 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11121 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11123 extra_cost = 1;
11124 type = "offset";
11128 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11129 push_reload processing, so handle it now. */
11130 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11132 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11134 extra_cost = 1;
11135 type = "indexed #2";
11139 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11141 fail_msg = "no base register #2";
11142 extra_cost = -1;
11145 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11147 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11148 || !legitimate_indexed_address_p (addr, false))
11150 extra_cost = 1;
11151 type = "indexed";
11155 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11156 && CONST_INT_P (plus_arg1))
11158 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11160 extra_cost = 1;
11161 type = "vector d-form offset";
11165 /* Make sure the register class can handle offset addresses. */
11166 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11168 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11170 extra_cost = 1;
11171 type = "offset #2";
11175 else
11177 fail_msg = "bad PLUS";
11178 extra_cost = -1;
11181 break;
11183 case LO_SUM:
11184 /* Quad offsets are restricted and can't handle normal addresses. */
11185 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11187 extra_cost = -1;
11188 type = "vector d-form lo_sum";
11191 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11193 fail_msg = "bad LO_SUM";
11194 extra_cost = -1;
11197 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11199 extra_cost = 1;
11200 type = "lo_sum";
11202 break;
11204 /* Static addresses need to create a TOC entry. */
11205 case CONST:
11206 case SYMBOL_REF:
11207 case LABEL_REF:
11208 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11210 extra_cost = -1;
11211 type = "vector d-form lo_sum #2";
11214 else
11216 type = "address";
11217 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11219 break;
11221 /* TOC references look like offsetable memory. */
11222 case UNSPEC:
11223 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11225 fail_msg = "bad UNSPEC";
11226 extra_cost = -1;
11229 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11231 extra_cost = -1;
11232 type = "vector d-form lo_sum #3";
11235 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11237 extra_cost = 1;
11238 type = "toc reference";
11240 break;
11242 default:
11244 fail_msg = "bad address";
11245 extra_cost = -1;
11249 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11251 if (extra_cost < 0)
11252 fprintf (stderr,
11253 "rs6000_secondary_reload_memory error: mode = %s, "
11254 "class = %s, addr_mask = '%s', %s\n",
11255 GET_MODE_NAME (mode),
11256 reg_class_names[rclass],
11257 rs6000_debug_addr_mask (addr_mask, false),
11258 (fail_msg != NULL) ? fail_msg : "<bad address>");
11260 else
11261 fprintf (stderr,
11262 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11263 "addr_mask = '%s', extra cost = %d, %s\n",
11264 GET_MODE_NAME (mode),
11265 reg_class_names[rclass],
11266 rs6000_debug_addr_mask (addr_mask, false),
11267 extra_cost,
11268 (type) ? type : "<none>");
11270 debug_rtx (addr);
11273 return extra_cost;
11276 /* Helper function for rs6000_secondary_reload to return true if a move to a
11277 different register classe is really a simple move. */
11279 static bool
11280 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11281 enum rs6000_reg_type from_type,
11282 machine_mode mode)
11284 int size = GET_MODE_SIZE (mode);
11286 /* Add support for various direct moves available. In this function, we only
11287 look at cases where we don't need any extra registers, and one or more
11288 simple move insns are issued. Originally small integers are not allowed
11289 in FPR/VSX registers. Single precision binary floating is not a simple
11290 move because we need to convert to the single precision memory layout.
11291 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11292 need special direct move handling, which we do not support yet. */
11293 if (TARGET_DIRECT_MOVE
11294 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11295 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11297 if (TARGET_POWERPC64)
11299 /* ISA 2.07: MTVSRD or MVFVSRD. */
11300 if (size == 8)
11301 return true;
11303 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11304 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11305 return true;
11308 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11309 if (TARGET_P8_VECTOR)
11311 if (mode == SImode)
11312 return true;
11314 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11315 return true;
11318 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11319 if (mode == SDmode)
11320 return true;
11323 /* Move to/from SPR. */
11324 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11325 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11326 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11327 return true;
11329 return false;
11332 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11333 special direct moves that involve allocating an extra register, return the
11334 insn code of the helper function if there is such a function or
11335 CODE_FOR_nothing if not. */
11337 static bool
11338 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11339 enum rs6000_reg_type from_type,
11340 machine_mode mode,
11341 secondary_reload_info *sri,
11342 bool altivec_p)
11344 bool ret = false;
11345 enum insn_code icode = CODE_FOR_nothing;
11346 int cost = 0;
11347 int size = GET_MODE_SIZE (mode);
11349 if (TARGET_POWERPC64 && size == 16)
11351 /* Handle moving 128-bit values from GPRs to VSX point registers on
11352 ISA 2.07 (power8, power9) when running in 64-bit mode using
11353 XXPERMDI to glue the two 64-bit values back together. */
11354 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11356 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11357 icode = reg_addr[mode].reload_vsx_gpr;
11360 /* Handle moving 128-bit values from VSX point registers to GPRs on
11361 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11362 bottom 64-bit value. */
11363 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11365 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11366 icode = reg_addr[mode].reload_gpr_vsx;
11370 else if (TARGET_POWERPC64 && mode == SFmode)
11372 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11374 cost = 3; /* xscvdpspn, mfvsrd, and. */
11375 icode = reg_addr[mode].reload_gpr_vsx;
11378 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11380 cost = 2; /* mtvsrz, xscvspdpn. */
11381 icode = reg_addr[mode].reload_vsx_gpr;
11385 else if (!TARGET_POWERPC64 && size == 8)
11387 /* Handle moving 64-bit values from GPRs to floating point registers on
11388 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11389 32-bit values back together. Altivec register classes must be handled
11390 specially since a different instruction is used, and the secondary
11391 reload support requires a single instruction class in the scratch
11392 register constraint. However, right now TFmode is not allowed in
11393 Altivec registers, so the pattern will never match. */
11394 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11396 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11397 icode = reg_addr[mode].reload_fpr_gpr;
11401 if (icode != CODE_FOR_nothing)
11403 ret = true;
11404 if (sri)
11406 sri->icode = icode;
11407 sri->extra_cost = cost;
11411 return ret;
11414 /* Return whether a move between two register classes can be done either
11415 directly (simple move) or via a pattern that uses a single extra temporary
11416 (using ISA 2.07's direct move in this case. */
11418 static bool
11419 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11420 enum rs6000_reg_type from_type,
11421 machine_mode mode,
11422 secondary_reload_info *sri,
11423 bool altivec_p)
11425 /* Fall back to load/store reloads if either type is not a register. */
11426 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11427 return false;
11429 /* If we haven't allocated registers yet, assume the move can be done for the
11430 standard register types. */
11431 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11432 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11433 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11434 return true;
11436 /* Moves to the same set of registers is a simple move for non-specialized
11437 registers. */
11438 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11439 return true;
11441 /* Check whether a simple move can be done directly. */
11442 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11444 if (sri)
11446 sri->icode = CODE_FOR_nothing;
11447 sri->extra_cost = 0;
11449 return true;
11452 /* Now check if we can do it in a few steps. */
11453 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11454 altivec_p);
11457 /* Inform reload about cases where moving X with a mode MODE to a register in
11458 RCLASS requires an extra scratch or immediate register. Return the class
11459 needed for the immediate register.
11461 For VSX and Altivec, we may need a register to convert sp+offset into
11462 reg+sp.
11464 For misaligned 64-bit gpr loads and stores we need a register to
11465 convert an offset address to indirect. */
11467 static reg_class_t
11468 rs6000_secondary_reload (bool in_p,
11469 rtx x,
11470 reg_class_t rclass_i,
11471 machine_mode mode,
11472 secondary_reload_info *sri)
11474 enum reg_class rclass = (enum reg_class) rclass_i;
11475 reg_class_t ret = ALL_REGS;
11476 enum insn_code icode;
11477 bool default_p = false;
11478 bool done_p = false;
11480 /* Allow subreg of memory before/during reload. */
11481 bool memory_p = (MEM_P (x)
11482 || (!reload_completed && SUBREG_P (x)
11483 && MEM_P (SUBREG_REG (x))));
11485 sri->icode = CODE_FOR_nothing;
11486 sri->t_icode = CODE_FOR_nothing;
11487 sri->extra_cost = 0;
11488 icode = ((in_p)
11489 ? reg_addr[mode].reload_load
11490 : reg_addr[mode].reload_store);
11492 if (REG_P (x) || register_operand (x, mode))
11494 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11495 bool altivec_p = (rclass == ALTIVEC_REGS);
11496 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11498 if (!in_p)
11499 std::swap (to_type, from_type);
11501 /* Can we do a direct move of some sort? */
11502 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11503 altivec_p))
11505 icode = (enum insn_code)sri->icode;
11506 default_p = false;
11507 done_p = true;
11508 ret = NO_REGS;
11512 /* Make sure 0.0 is not reloaded or forced into memory. */
11513 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11515 ret = NO_REGS;
11516 default_p = false;
11517 done_p = true;
11520 /* If this is a scalar floating point value and we want to load it into the
11521 traditional Altivec registers, do it via a move via a traditional floating
11522 point register, unless we have D-form addressing. Also make sure that
11523 non-zero constants use a FPR. */
11524 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11525 && !mode_supports_vmx_dform (mode)
11526 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11527 && (memory_p || CONST_DOUBLE_P (x)))
11529 ret = FLOAT_REGS;
11530 default_p = false;
11531 done_p = true;
11534 /* Handle reload of load/stores if we have reload helper functions. */
11535 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11537 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11538 mode);
11540 if (extra_cost >= 0)
11542 done_p = true;
11543 ret = NO_REGS;
11544 if (extra_cost > 0)
11546 sri->extra_cost = extra_cost;
11547 sri->icode = icode;
11552 /* Handle unaligned loads and stores of integer registers. */
11553 if (!done_p && TARGET_POWERPC64
11554 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11555 && memory_p
11556 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11558 rtx addr = XEXP (x, 0);
11559 rtx off = address_offset (addr);
11561 if (off != NULL_RTX)
11563 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11564 unsigned HOST_WIDE_INT offset = INTVAL (off);
11566 /* We need a secondary reload when our legitimate_address_p
11567 says the address is good (as otherwise the entire address
11568 will be reloaded), and the offset is not a multiple of
11569 four or we have an address wrap. Address wrap will only
11570 occur for LO_SUMs since legitimate_offset_address_p
11571 rejects addresses for 16-byte mems that will wrap. */
11572 if (GET_CODE (addr) == LO_SUM
11573 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11574 && ((offset & 3) != 0
11575 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11576 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11577 && (offset & 3) != 0))
11579 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11580 if (in_p)
11581 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11582 : CODE_FOR_reload_di_load);
11583 else
11584 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11585 : CODE_FOR_reload_di_store);
11586 sri->extra_cost = 2;
11587 ret = NO_REGS;
11588 done_p = true;
11590 else
11591 default_p = true;
11593 else
11594 default_p = true;
11597 if (!done_p && !TARGET_POWERPC64
11598 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11599 && memory_p
11600 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11602 rtx addr = XEXP (x, 0);
11603 rtx off = address_offset (addr);
11605 if (off != NULL_RTX)
11607 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11608 unsigned HOST_WIDE_INT offset = INTVAL (off);
11610 /* We need a secondary reload when our legitimate_address_p
11611 says the address is good (as otherwise the entire address
11612 will be reloaded), and we have a wrap.
11614 legitimate_lo_sum_address_p allows LO_SUM addresses to
11615 have any offset so test for wrap in the low 16 bits.
11617 legitimate_offset_address_p checks for the range
11618 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11619 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11620 [0x7ff4,0x7fff] respectively, so test for the
11621 intersection of these ranges, [0x7ffc,0x7fff] and
11622 [0x7ff4,0x7ff7] respectively.
11624 Note that the address we see here may have been
11625 manipulated by legitimize_reload_address. */
11626 if (GET_CODE (addr) == LO_SUM
11627 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11628 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11630 if (in_p)
11631 sri->icode = CODE_FOR_reload_si_load;
11632 else
11633 sri->icode = CODE_FOR_reload_si_store;
11634 sri->extra_cost = 2;
11635 ret = NO_REGS;
11636 done_p = true;
11638 else
11639 default_p = true;
11641 else
11642 default_p = true;
11645 if (!done_p)
11646 default_p = true;
11648 if (default_p)
11649 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11651 gcc_assert (ret != ALL_REGS);
11653 if (TARGET_DEBUG_ADDR)
11655 fprintf (stderr,
11656 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11657 "mode = %s",
11658 reg_class_names[ret],
11659 in_p ? "true" : "false",
11660 reg_class_names[rclass],
11661 GET_MODE_NAME (mode));
11663 if (reload_completed)
11664 fputs (", after reload", stderr);
11666 if (!done_p)
11667 fputs (", done_p not set", stderr);
11669 if (default_p)
11670 fputs (", default secondary reload", stderr);
11672 if (sri->icode != CODE_FOR_nothing)
11673 fprintf (stderr, ", reload func = %s, extra cost = %d",
11674 insn_data[sri->icode].name, sri->extra_cost);
11676 else if (sri->extra_cost > 0)
11677 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11679 fputs ("\n", stderr);
11680 debug_rtx (x);
11683 return ret;
11686 /* Better tracing for rs6000_secondary_reload_inner. */
11688 static void
11689 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11690 bool store_p)
11692 rtx set, clobber;
11694 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11696 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11697 store_p ? "store" : "load");
11699 if (store_p)
11700 set = gen_rtx_SET (mem, reg);
11701 else
11702 set = gen_rtx_SET (reg, mem);
11704 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11705 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11708 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11709 ATTRIBUTE_NORETURN;
11711 static void
11712 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11713 bool store_p)
11715 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11716 gcc_unreachable ();
11719 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11720 reload helper functions. These were identified in
11721 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11722 reload, it calls the insns:
11723 reload_<RELOAD:mode>_<P:mptrsize>_store
11724 reload_<RELOAD:mode>_<P:mptrsize>_load
11726 which in turn calls this function, to do whatever is necessary to create
11727 valid addresses. */
11729 void
11730 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11732 int regno = true_regnum (reg);
11733 machine_mode mode = GET_MODE (reg);
11734 addr_mask_type addr_mask;
11735 rtx addr;
11736 rtx new_addr;
11737 rtx op_reg, op0, op1;
11738 rtx and_op;
11739 rtx cc_clobber;
11740 rtvec rv;
11742 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11743 || !base_reg_operand (scratch, GET_MODE (scratch)))
11744 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11746 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11747 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11749 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11750 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11752 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11753 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11755 else
11756 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11758 /* Make sure the mode is valid in this register class. */
11759 if ((addr_mask & RELOAD_REG_VALID) == 0)
11760 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11762 if (TARGET_DEBUG_ADDR)
11763 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11765 new_addr = addr = XEXP (mem, 0);
11766 switch (GET_CODE (addr))
11768 /* Does the register class support auto update forms for this mode? If
11769 not, do the update now. We don't need a scratch register, since the
11770 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11771 case PRE_INC:
11772 case PRE_DEC:
11773 op_reg = XEXP (addr, 0);
11774 if (!base_reg_operand (op_reg, Pmode))
11775 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11777 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11779 int delta = GET_MODE_SIZE (mode);
11780 if (GET_CODE (addr) == PRE_DEC)
11781 delta = -delta;
11782 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11783 new_addr = op_reg;
11785 break;
11787 case PRE_MODIFY:
11788 op0 = XEXP (addr, 0);
11789 op1 = XEXP (addr, 1);
11790 if (!base_reg_operand (op0, Pmode)
11791 || GET_CODE (op1) != PLUS
11792 || !rtx_equal_p (op0, XEXP (op1, 0)))
11793 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11795 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11797 emit_insn (gen_rtx_SET (op0, op1));
11798 new_addr = reg;
11800 break;
11802 /* Do we need to simulate AND -16 to clear the bottom address bits used
11803 in VMX load/stores? */
11804 case AND:
11805 op0 = XEXP (addr, 0);
11806 op1 = XEXP (addr, 1);
11807 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11809 if (REG_P (op0) || SUBREG_P (op0))
11810 op_reg = op0;
11812 else if (GET_CODE (op1) == PLUS)
11814 emit_insn (gen_rtx_SET (scratch, op1));
11815 op_reg = scratch;
11818 else
11819 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11821 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11822 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11823 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11824 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11825 new_addr = scratch;
11827 break;
11829 /* If this is an indirect address, make sure it is a base register. */
11830 case REG:
11831 case SUBREG:
11832 if (!base_reg_operand (addr, GET_MODE (addr)))
11834 emit_insn (gen_rtx_SET (scratch, addr));
11835 new_addr = scratch;
11837 break;
11839 /* If this is an indexed address, make sure the register class can handle
11840 indexed addresses for this mode. */
11841 case PLUS:
11842 op0 = XEXP (addr, 0);
11843 op1 = XEXP (addr, 1);
11844 if (!base_reg_operand (op0, Pmode))
11845 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11847 else if (int_reg_operand (op1, Pmode))
11849 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11851 emit_insn (gen_rtx_SET (scratch, addr));
11852 new_addr = scratch;
11856 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11858 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11859 || !quad_address_p (addr, mode, false))
11861 emit_insn (gen_rtx_SET (scratch, addr));
11862 new_addr = scratch;
11866 /* Make sure the register class can handle offset addresses. */
11867 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11869 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11871 emit_insn (gen_rtx_SET (scratch, addr));
11872 new_addr = scratch;
11876 else
11877 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11879 break;
11881 case LO_SUM:
11882 op0 = XEXP (addr, 0);
11883 op1 = XEXP (addr, 1);
11884 if (!base_reg_operand (op0, Pmode))
11885 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11887 else if (int_reg_operand (op1, Pmode))
11889 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11891 emit_insn (gen_rtx_SET (scratch, addr));
11892 new_addr = scratch;
11896 /* Quad offsets are restricted and can't handle normal addresses. */
11897 else if (mode_supports_dq_form (mode))
11899 emit_insn (gen_rtx_SET (scratch, addr));
11900 new_addr = scratch;
11903 /* Make sure the register class can handle offset addresses. */
11904 else if (legitimate_lo_sum_address_p (mode, addr, false))
11906 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11908 emit_insn (gen_rtx_SET (scratch, addr));
11909 new_addr = scratch;
11913 else
11914 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11916 break;
11918 case SYMBOL_REF:
11919 case CONST:
11920 case LABEL_REF:
11921 rs6000_emit_move (scratch, addr, Pmode);
11922 new_addr = scratch;
11923 break;
11925 default:
11926 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11929 /* Adjust the address if it changed. */
11930 if (addr != new_addr)
11932 mem = replace_equiv_address_nv (mem, new_addr);
11933 if (TARGET_DEBUG_ADDR)
11934 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11937 /* Now create the move. */
11938 if (store_p)
11939 emit_insn (gen_rtx_SET (mem, reg));
11940 else
11941 emit_insn (gen_rtx_SET (reg, mem));
11943 return;
11946 /* Convert reloads involving 64-bit gprs and misaligned offset
11947 addressing, or multiple 32-bit gprs and offsets that are too large,
11948 to use indirect addressing. */
11950 void
11951 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11953 int regno = true_regnum (reg);
11954 enum reg_class rclass;
11955 rtx addr;
11956 rtx scratch_or_premodify = scratch;
11958 if (TARGET_DEBUG_ADDR)
11960 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11961 store_p ? "store" : "load");
11962 fprintf (stderr, "reg:\n");
11963 debug_rtx (reg);
11964 fprintf (stderr, "mem:\n");
11965 debug_rtx (mem);
11966 fprintf (stderr, "scratch:\n");
11967 debug_rtx (scratch);
11970 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11971 gcc_assert (MEM_P (mem));
11972 rclass = REGNO_REG_CLASS (regno);
11973 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11974 addr = XEXP (mem, 0);
11976 if (GET_CODE (addr) == PRE_MODIFY)
11978 gcc_assert (REG_P (XEXP (addr, 0))
11979 && GET_CODE (XEXP (addr, 1)) == PLUS
11980 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11981 scratch_or_premodify = XEXP (addr, 0);
11982 addr = XEXP (addr, 1);
11984 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11986 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11988 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11990 /* Now create the move. */
11991 if (store_p)
11992 emit_insn (gen_rtx_SET (mem, reg));
11993 else
11994 emit_insn (gen_rtx_SET (reg, mem));
11996 return;
11999 /* Given an rtx X being reloaded into a reg required to be
12000 in class CLASS, return the class of reg to actually use.
12001 In general this is just CLASS; but on some machines
12002 in some cases it is preferable to use a more restrictive class.
12004 On the RS/6000, we have to return NO_REGS when we want to reload a
12005 floating-point CONST_DOUBLE to force it to be copied to memory.
12007 We also don't want to reload integer values into floating-point
12008 registers if we can at all help it. In fact, this can
12009 cause reload to die, if it tries to generate a reload of CTR
12010 into a FP register and discovers it doesn't have the memory location
12011 required.
12013 ??? Would it be a good idea to have reload do the converse, that is
12014 try to reload floating modes into FP registers if possible?
12017 static enum reg_class
12018 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12020 machine_mode mode = GET_MODE (x);
12021 bool is_constant = CONSTANT_P (x);
12023 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12024 reload class for it. */
12025 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12026 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12027 return NO_REGS;
12029 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12030 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12031 return NO_REGS;
12033 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12034 the reloading of address expressions using PLUS into floating point
12035 registers. */
12036 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12038 if (is_constant)
12040 /* Zero is always allowed in all VSX registers. */
12041 if (x == CONST0_RTX (mode))
12042 return rclass;
12044 /* If this is a vector constant that can be formed with a few Altivec
12045 instructions, we want altivec registers. */
12046 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12047 return ALTIVEC_REGS;
12049 /* If this is an integer constant that can easily be loaded into
12050 vector registers, allow it. */
12051 if (CONST_INT_P (x))
12053 HOST_WIDE_INT value = INTVAL (x);
12055 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12056 2.06 can generate it in the Altivec registers with
12057 VSPLTI<x>. */
12058 if (value == -1)
12060 if (TARGET_P8_VECTOR)
12061 return rclass;
12062 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12063 return ALTIVEC_REGS;
12064 else
12065 return NO_REGS;
12068 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12069 a sign extend in the Altivec registers. */
12070 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12071 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12072 return ALTIVEC_REGS;
12075 /* Force constant to memory. */
12076 return NO_REGS;
12079 /* D-form addressing can easily reload the value. */
12080 if (mode_supports_vmx_dform (mode)
12081 || mode_supports_dq_form (mode))
12082 return rclass;
12084 /* If this is a scalar floating point value and we don't have D-form
12085 addressing, prefer the traditional floating point registers so that we
12086 can use D-form (register+offset) addressing. */
12087 if (rclass == VSX_REGS
12088 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12089 return FLOAT_REGS;
12091 /* Prefer the Altivec registers if Altivec is handling the vector
12092 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12093 loads. */
12094 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12095 || mode == V1TImode)
12096 return ALTIVEC_REGS;
12098 return rclass;
12101 if (is_constant || GET_CODE (x) == PLUS)
12103 if (reg_class_subset_p (GENERAL_REGS, rclass))
12104 return GENERAL_REGS;
12105 if (reg_class_subset_p (BASE_REGS, rclass))
12106 return BASE_REGS;
12107 return NO_REGS;
12110 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12111 return GENERAL_REGS;
12113 return rclass;
12116 /* Debug version of rs6000_preferred_reload_class. */
12117 static enum reg_class
12118 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12120 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12122 fprintf (stderr,
12123 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12124 "mode = %s, x:\n",
12125 reg_class_names[ret], reg_class_names[rclass],
12126 GET_MODE_NAME (GET_MODE (x)));
12127 debug_rtx (x);
12129 return ret;
12132 /* If we are copying between FP or AltiVec registers and anything else, we need
12133 a memory location. The exception is when we are targeting ppc64 and the
12134 move to/from fpr to gpr instructions are available. Also, under VSX, you
12135 can copy vector registers from the FP register set to the Altivec register
12136 set and vice versa. */
12138 static bool
12139 rs6000_secondary_memory_needed (machine_mode mode,
12140 reg_class_t from_class,
12141 reg_class_t to_class)
12143 enum rs6000_reg_type from_type, to_type;
12144 bool altivec_p = ((from_class == ALTIVEC_REGS)
12145 || (to_class == ALTIVEC_REGS));
12147 /* If a simple/direct move is available, we don't need secondary memory */
12148 from_type = reg_class_to_reg_type[(int)from_class];
12149 to_type = reg_class_to_reg_type[(int)to_class];
12151 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12152 (secondary_reload_info *)0, altivec_p))
12153 return false;
12155 /* If we have a floating point or vector register class, we need to use
12156 memory to transfer the data. */
12157 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12158 return true;
12160 return false;
12163 /* Debug version of rs6000_secondary_memory_needed. */
12164 static bool
12165 rs6000_debug_secondary_memory_needed (machine_mode mode,
12166 reg_class_t from_class,
12167 reg_class_t to_class)
12169 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12171 fprintf (stderr,
12172 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12173 "to_class = %s, mode = %s\n",
12174 ret ? "true" : "false",
12175 reg_class_names[from_class],
12176 reg_class_names[to_class],
12177 GET_MODE_NAME (mode));
12179 return ret;
12182 /* Return the register class of a scratch register needed to copy IN into
12183 or out of a register in RCLASS in MODE. If it can be done directly,
12184 NO_REGS is returned. */
12186 static enum reg_class
12187 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12188 rtx in)
12190 int regno;
12192 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12193 #if TARGET_MACHO
12194 && MACHOPIC_INDIRECT
12195 #endif
12198 /* We cannot copy a symbolic operand directly into anything
12199 other than BASE_REGS for TARGET_ELF. So indicate that a
12200 register from BASE_REGS is needed as an intermediate
12201 register.
12203 On Darwin, pic addresses require a load from memory, which
12204 needs a base register. */
12205 if (rclass != BASE_REGS
12206 && (SYMBOL_REF_P (in)
12207 || GET_CODE (in) == HIGH
12208 || GET_CODE (in) == LABEL_REF
12209 || GET_CODE (in) == CONST))
12210 return BASE_REGS;
12213 if (REG_P (in))
12215 regno = REGNO (in);
12216 if (!HARD_REGISTER_NUM_P (regno))
12218 regno = true_regnum (in);
12219 if (!HARD_REGISTER_NUM_P (regno))
12220 regno = -1;
12223 else if (SUBREG_P (in))
12225 regno = true_regnum (in);
12226 if (!HARD_REGISTER_NUM_P (regno))
12227 regno = -1;
12229 else
12230 regno = -1;
12232 /* If we have VSX register moves, prefer moving scalar values between
12233 Altivec registers and GPR by going via an FPR (and then via memory)
12234 instead of reloading the secondary memory address for Altivec moves. */
12235 if (TARGET_VSX
12236 && GET_MODE_SIZE (mode) < 16
12237 && !mode_supports_vmx_dform (mode)
12238 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12239 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12240 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12241 && (regno >= 0 && INT_REGNO_P (regno)))))
12242 return FLOAT_REGS;
12244 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12245 into anything. */
12246 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12247 || (regno >= 0 && INT_REGNO_P (regno)))
12248 return NO_REGS;
12250 /* Constants, memory, and VSX registers can go into VSX registers (both the
12251 traditional floating point and the altivec registers). */
12252 if (rclass == VSX_REGS
12253 && (regno == -1 || VSX_REGNO_P (regno)))
12254 return NO_REGS;
12256 /* Constants, memory, and FP registers can go into FP registers. */
12257 if ((regno == -1 || FP_REGNO_P (regno))
12258 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12259 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12261 /* Memory, and AltiVec registers can go into AltiVec registers. */
12262 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12263 && rclass == ALTIVEC_REGS)
12264 return NO_REGS;
12266 /* We can copy among the CR registers. */
12267 if ((rclass == CR_REGS || rclass == CR0_REGS)
12268 && regno >= 0 && CR_REGNO_P (regno))
12269 return NO_REGS;
12271 /* Otherwise, we need GENERAL_REGS. */
12272 return GENERAL_REGS;
12275 /* Debug version of rs6000_secondary_reload_class. */
12276 static enum reg_class
12277 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12278 machine_mode mode, rtx in)
12280 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12281 fprintf (stderr,
12282 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12283 "mode = %s, input rtx:\n",
12284 reg_class_names[ret], reg_class_names[rclass],
12285 GET_MODE_NAME (mode));
12286 debug_rtx (in);
12288 return ret;
12291 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12293 static bool
12294 rs6000_can_change_mode_class (machine_mode from,
12295 machine_mode to,
12296 reg_class_t rclass)
12298 unsigned from_size = GET_MODE_SIZE (from);
12299 unsigned to_size = GET_MODE_SIZE (to);
12301 if (from_size != to_size)
12303 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12305 if (reg_classes_intersect_p (xclass, rclass))
12307 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12308 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12309 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12310 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12312 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12313 single register under VSX because the scalar part of the register
12314 is in the upper 64-bits, and not the lower 64-bits. Types like
12315 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12316 IEEE floating point can't overlap, and neither can small
12317 values. */
12319 if (to_float128_vector_p && from_float128_vector_p)
12320 return true;
12322 else if (to_float128_vector_p || from_float128_vector_p)
12323 return false;
12325 /* TDmode in floating-mode registers must always go into a register
12326 pair with the most significant word in the even-numbered register
12327 to match ISA requirements. In little-endian mode, this does not
12328 match subreg numbering, so we cannot allow subregs. */
12329 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12330 return false;
12332 /* Allow SD<->DD changes, since SDmode values are stored in
12333 the low half of the DDmode, just like target-independent
12334 code expects. We need to allow at least SD->DD since
12335 rs6000_secondary_memory_needed_mode asks for that change
12336 to be made for SD reloads. */
12337 if ((to == DDmode && from == SDmode)
12338 || (to == SDmode && from == DDmode))
12339 return true;
12341 if (from_size < 8 || to_size < 8)
12342 return false;
12344 if (from_size == 8 && (8 * to_nregs) != to_size)
12345 return false;
12347 if (to_size == 8 && (8 * from_nregs) != from_size)
12348 return false;
12350 return true;
12352 else
12353 return true;
12356 /* Since the VSX register set includes traditional floating point registers
12357 and altivec registers, just check for the size being different instead of
12358 trying to check whether the modes are vector modes. Otherwise it won't
12359 allow say DF and DI to change classes. For types like TFmode and TDmode
12360 that take 2 64-bit registers, rather than a single 128-bit register, don't
12361 allow subregs of those types to other 128 bit types. */
12362 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12364 unsigned num_regs = (from_size + 15) / 16;
12365 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12366 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12367 return false;
12369 return (from_size == 8 || from_size == 16);
12372 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12373 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12374 return false;
12376 return true;
12379 /* Debug version of rs6000_can_change_mode_class. */
12380 static bool
12381 rs6000_debug_can_change_mode_class (machine_mode from,
12382 machine_mode to,
12383 reg_class_t rclass)
12385 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12387 fprintf (stderr,
12388 "rs6000_can_change_mode_class, return %s, from = %s, "
12389 "to = %s, rclass = %s\n",
12390 ret ? "true" : "false",
12391 GET_MODE_NAME (from), GET_MODE_NAME (to),
12392 reg_class_names[rclass]);
12394 return ret;
12397 /* Return a string to do a move operation of 128 bits of data. */
12399 const char *
12400 rs6000_output_move_128bit (rtx operands[])
12402 rtx dest = operands[0];
12403 rtx src = operands[1];
12404 machine_mode mode = GET_MODE (dest);
12405 int dest_regno;
12406 int src_regno;
12407 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12408 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12410 if (REG_P (dest))
12412 dest_regno = REGNO (dest);
12413 dest_gpr_p = INT_REGNO_P (dest_regno);
12414 dest_fp_p = FP_REGNO_P (dest_regno);
12415 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12416 dest_vsx_p = dest_fp_p | dest_vmx_p;
12418 else
12420 dest_regno = -1;
12421 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12424 if (REG_P (src))
12426 src_regno = REGNO (src);
12427 src_gpr_p = INT_REGNO_P (src_regno);
12428 src_fp_p = FP_REGNO_P (src_regno);
12429 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12430 src_vsx_p = src_fp_p | src_vmx_p;
12432 else
12434 src_regno = -1;
12435 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12438 /* Register moves. */
12439 if (dest_regno >= 0 && src_regno >= 0)
12441 if (dest_gpr_p)
12443 if (src_gpr_p)
12444 return "#";
12446 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12447 return (WORDS_BIG_ENDIAN
12448 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12449 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12451 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12452 return "#";
12455 else if (TARGET_VSX && dest_vsx_p)
12457 if (src_vsx_p)
12458 return "xxlor %x0,%x1,%x1";
12460 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12461 return (WORDS_BIG_ENDIAN
12462 ? "mtvsrdd %x0,%1,%L1"
12463 : "mtvsrdd %x0,%L1,%1");
12465 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12466 return "#";
12469 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12470 return "vor %0,%1,%1";
12472 else if (dest_fp_p && src_fp_p)
12473 return "#";
12476 /* Loads. */
12477 else if (dest_regno >= 0 && MEM_P (src))
12479 if (dest_gpr_p)
12481 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12482 return "lq %0,%1";
12483 else
12484 return "#";
12487 else if (TARGET_ALTIVEC && dest_vmx_p
12488 && altivec_indexed_or_indirect_operand (src, mode))
12489 return "lvx %0,%y1";
12491 else if (TARGET_VSX && dest_vsx_p)
12493 if (mode_supports_dq_form (mode)
12494 && quad_address_p (XEXP (src, 0), mode, true))
12495 return "lxv %x0,%1";
12497 else if (TARGET_P9_VECTOR)
12498 return "lxvx %x0,%y1";
12500 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12501 return "lxvw4x %x0,%y1";
12503 else
12504 return "lxvd2x %x0,%y1";
12507 else if (TARGET_ALTIVEC && dest_vmx_p)
12508 return "lvx %0,%y1";
12510 else if (dest_fp_p)
12511 return "#";
12514 /* Stores. */
12515 else if (src_regno >= 0 && MEM_P (dest))
12517 if (src_gpr_p)
12519 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12520 return "stq %1,%0";
12521 else
12522 return "#";
12525 else if (TARGET_ALTIVEC && src_vmx_p
12526 && altivec_indexed_or_indirect_operand (dest, mode))
12527 return "stvx %1,%y0";
12529 else if (TARGET_VSX && src_vsx_p)
12531 if (mode_supports_dq_form (mode)
12532 && quad_address_p (XEXP (dest, 0), mode, true))
12533 return "stxv %x1,%0";
12535 else if (TARGET_P9_VECTOR)
12536 return "stxvx %x1,%y0";
12538 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12539 return "stxvw4x %x1,%y0";
12541 else
12542 return "stxvd2x %x1,%y0";
12545 else if (TARGET_ALTIVEC && src_vmx_p)
12546 return "stvx %1,%y0";
12548 else if (src_fp_p)
12549 return "#";
12552 /* Constants. */
12553 else if (dest_regno >= 0
12554 && (CONST_INT_P (src)
12555 || CONST_WIDE_INT_P (src)
12556 || CONST_DOUBLE_P (src)
12557 || GET_CODE (src) == CONST_VECTOR))
12559 if (dest_gpr_p)
12560 return "#";
12562 else if ((dest_vmx_p && TARGET_ALTIVEC)
12563 || (dest_vsx_p && TARGET_VSX))
12564 return output_vec_const_move (operands);
12567 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12570 /* Validate a 128-bit move. */
12571 bool
12572 rs6000_move_128bit_ok_p (rtx operands[])
12574 machine_mode mode = GET_MODE (operands[0]);
12575 return (gpc_reg_operand (operands[0], mode)
12576 || gpc_reg_operand (operands[1], mode));
12579 /* Return true if a 128-bit move needs to be split. */
12580 bool
12581 rs6000_split_128bit_ok_p (rtx operands[])
12583 if (!reload_completed)
12584 return false;
12586 if (!gpr_or_gpr_p (operands[0], operands[1]))
12587 return false;
12589 if (quad_load_store_p (operands[0], operands[1]))
12590 return false;
12592 return true;
12596 /* Given a comparison operation, return the bit number in CCR to test. We
12597 know this is a valid comparison.
12599 SCC_P is 1 if this is for an scc. That means that %D will have been
12600 used instead of %C, so the bits will be in different places.
12602 Return -1 if OP isn't a valid comparison for some reason. */
12605 ccr_bit (rtx op, int scc_p)
12607 enum rtx_code code = GET_CODE (op);
12608 machine_mode cc_mode;
12609 int cc_regnum;
12610 int base_bit;
12611 rtx reg;
12613 if (!COMPARISON_P (op))
12614 return -1;
12616 reg = XEXP (op, 0);
12618 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12619 return -1;
12621 cc_mode = GET_MODE (reg);
12622 cc_regnum = REGNO (reg);
12623 base_bit = 4 * (cc_regnum - CR0_REGNO);
12625 validate_condition_mode (code, cc_mode);
12627 /* When generating a sCOND operation, only positive conditions are
12628 allowed. */
12629 if (scc_p)
12630 switch (code)
12632 case EQ:
12633 case GT:
12634 case LT:
12635 case UNORDERED:
12636 case GTU:
12637 case LTU:
12638 break;
12639 default:
12640 return -1;
12643 switch (code)
12645 case NE:
12646 return scc_p ? base_bit + 3 : base_bit + 2;
12647 case EQ:
12648 return base_bit + 2;
12649 case GT: case GTU: case UNLE:
12650 return base_bit + 1;
12651 case LT: case LTU: case UNGE:
12652 return base_bit;
12653 case ORDERED: case UNORDERED:
12654 return base_bit + 3;
12656 case GE: case GEU:
12657 /* If scc, we will have done a cror to put the bit in the
12658 unordered position. So test that bit. For integer, this is ! LT
12659 unless this is an scc insn. */
12660 return scc_p ? base_bit + 3 : base_bit;
12662 case LE: case LEU:
12663 return scc_p ? base_bit + 3 : base_bit + 1;
12665 default:
12666 return -1;
12670 /* Return the GOT register. */
12673 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12675 /* The second flow pass currently (June 1999) can't update
12676 regs_ever_live without disturbing other parts of the compiler, so
12677 update it here to make the prolog/epilogue code happy. */
12678 if (!can_create_pseudo_p ()
12679 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12680 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12682 crtl->uses_pic_offset_table = 1;
12684 return pic_offset_table_rtx;
12687 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12689 /* Write out a function code label. */
12691 void
12692 rs6000_output_function_entry (FILE *file, const char *fname)
12694 if (fname[0] != '.')
12696 switch (DEFAULT_ABI)
12698 default:
12699 gcc_unreachable ();
12701 case ABI_AIX:
12702 if (DOT_SYMBOLS)
12703 putc ('.', file);
12704 else
12705 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12706 break;
12708 case ABI_ELFv2:
12709 case ABI_V4:
12710 case ABI_DARWIN:
12711 break;
12715 RS6000_OUTPUT_BASENAME (file, fname);
12718 /* Print an operand. Recognize special options, documented below. */
12720 #if TARGET_ELF
12721 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12722 only introduced by the linker, when applying the sda21
12723 relocation. */
12724 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12725 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12726 #else
12727 #define SMALL_DATA_RELOC "sda21"
12728 #define SMALL_DATA_REG 0
12729 #endif
12731 void
12732 print_operand (FILE *file, rtx x, int code)
12734 int i;
12735 unsigned HOST_WIDE_INT uval;
12737 switch (code)
12739 /* %a is output_address. */
12741 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12742 output_operand. */
12744 case 'D':
12745 /* Like 'J' but get to the GT bit only. */
12746 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12748 output_operand_lossage ("invalid %%D value");
12749 return;
12752 /* Bit 1 is GT bit. */
12753 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12755 /* Add one for shift count in rlinm for scc. */
12756 fprintf (file, "%d", i + 1);
12757 return;
12759 case 'e':
12760 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12761 if (! INT_P (x))
12763 output_operand_lossage ("invalid %%e value");
12764 return;
12767 uval = INTVAL (x);
12768 if ((uval & 0xffff) == 0 && uval != 0)
12769 putc ('s', file);
12770 return;
12772 case 'E':
12773 /* X is a CR register. Print the number of the EQ bit of the CR */
12774 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12775 output_operand_lossage ("invalid %%E value");
12776 else
12777 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12778 return;
12780 case 'f':
12781 /* X is a CR register. Print the shift count needed to move it
12782 to the high-order four bits. */
12783 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12784 output_operand_lossage ("invalid %%f value");
12785 else
12786 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12787 return;
12789 case 'F':
12790 /* Similar, but print the count for the rotate in the opposite
12791 direction. */
12792 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12793 output_operand_lossage ("invalid %%F value");
12794 else
12795 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12796 return;
12798 case 'G':
12799 /* X is a constant integer. If it is negative, print "m",
12800 otherwise print "z". This is to make an aze or ame insn. */
12801 if (!CONST_INT_P (x))
12802 output_operand_lossage ("invalid %%G value");
12803 else if (INTVAL (x) >= 0)
12804 putc ('z', file);
12805 else
12806 putc ('m', file);
12807 return;
12809 case 'h':
12810 /* If constant, output low-order five bits. Otherwise, write
12811 normally. */
12812 if (INT_P (x))
12813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12814 else
12815 print_operand (file, x, 0);
12816 return;
12818 case 'H':
12819 /* If constant, output low-order six bits. Otherwise, write
12820 normally. */
12821 if (INT_P (x))
12822 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12823 else
12824 print_operand (file, x, 0);
12825 return;
12827 case 'I':
12828 /* Print `i' if this is a constant, else nothing. */
12829 if (INT_P (x))
12830 putc ('i', file);
12831 return;
12833 case 'j':
12834 /* Write the bit number in CCR for jump. */
12835 i = ccr_bit (x, 0);
12836 if (i == -1)
12837 output_operand_lossage ("invalid %%j code");
12838 else
12839 fprintf (file, "%d", i);
12840 return;
12842 case 'J':
12843 /* Similar, but add one for shift count in rlinm for scc and pass
12844 scc flag to `ccr_bit'. */
12845 i = ccr_bit (x, 1);
12846 if (i == -1)
12847 output_operand_lossage ("invalid %%J code");
12848 else
12849 /* If we want bit 31, write a shift count of zero, not 32. */
12850 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12851 return;
12853 case 'k':
12854 /* X must be a constant. Write the 1's complement of the
12855 constant. */
12856 if (! INT_P (x))
12857 output_operand_lossage ("invalid %%k value");
12858 else
12859 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12860 return;
12862 case 'K':
12863 /* X must be a symbolic constant on ELF. Write an
12864 expression suitable for an 'addi' that adds in the low 16
12865 bits of the MEM. */
12866 if (GET_CODE (x) == CONST)
12868 if (GET_CODE (XEXP (x, 0)) != PLUS
12869 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12870 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12871 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12872 output_operand_lossage ("invalid %%K value");
12874 print_operand_address (file, x);
12875 fputs ("@l", file);
12876 return;
12878 /* %l is output_asm_label. */
12880 case 'L':
12881 /* Write second word of DImode or DFmode reference. Works on register
12882 or non-indexed memory only. */
12883 if (REG_P (x))
12884 fputs (reg_names[REGNO (x) + 1], file);
12885 else if (MEM_P (x))
12887 machine_mode mode = GET_MODE (x);
12888 /* Handle possible auto-increment. Since it is pre-increment and
12889 we have already done it, we can just use an offset of word. */
12890 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12891 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12892 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12893 UNITS_PER_WORD));
12894 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12895 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12896 UNITS_PER_WORD));
12897 else
12898 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12899 UNITS_PER_WORD),
12900 0));
12902 if (small_data_operand (x, GET_MODE (x)))
12903 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12904 reg_names[SMALL_DATA_REG]);
12906 return;
12908 case 'N': /* Unused */
12909 /* Write the number of elements in the vector times 4. */
12910 if (GET_CODE (x) != PARALLEL)
12911 output_operand_lossage ("invalid %%N value");
12912 else
12913 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12914 return;
12916 case 'O': /* Unused */
12917 /* Similar, but subtract 1 first. */
12918 if (GET_CODE (x) != PARALLEL)
12919 output_operand_lossage ("invalid %%O value");
12920 else
12921 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12922 return;
12924 case 'p':
12925 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12926 if (! INT_P (x)
12927 || INTVAL (x) < 0
12928 || (i = exact_log2 (INTVAL (x))) < 0)
12929 output_operand_lossage ("invalid %%p value");
12930 else
12931 fprintf (file, "%d", i);
12932 return;
12934 case 'P':
12935 /* The operand must be an indirect memory reference. The result
12936 is the register name. */
12937 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12938 || REGNO (XEXP (x, 0)) >= 32)
12939 output_operand_lossage ("invalid %%P value");
12940 else
12941 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12942 return;
12944 case 'q':
12945 /* This outputs the logical code corresponding to a boolean
12946 expression. The expression may have one or both operands
12947 negated (if one, only the first one). For condition register
12948 logical operations, it will also treat the negated
12949 CR codes as NOTs, but not handle NOTs of them. */
12951 const char *const *t = 0;
12952 const char *s;
12953 enum rtx_code code = GET_CODE (x);
12954 static const char * const tbl[3][3] = {
12955 { "and", "andc", "nor" },
12956 { "or", "orc", "nand" },
12957 { "xor", "eqv", "xor" } };
12959 if (code == AND)
12960 t = tbl[0];
12961 else if (code == IOR)
12962 t = tbl[1];
12963 else if (code == XOR)
12964 t = tbl[2];
12965 else
12966 output_operand_lossage ("invalid %%q value");
12968 if (GET_CODE (XEXP (x, 0)) != NOT)
12969 s = t[0];
12970 else
12972 if (GET_CODE (XEXP (x, 1)) == NOT)
12973 s = t[2];
12974 else
12975 s = t[1];
12978 fputs (s, file);
12980 return;
12982 case 'Q':
12983 if (! TARGET_MFCRF)
12984 return;
12985 fputc (',', file);
12986 /* FALLTHRU */
12988 case 'R':
12989 /* X is a CR register. Print the mask for `mtcrf'. */
12990 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12991 output_operand_lossage ("invalid %%R value");
12992 else
12993 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12994 return;
12996 case 's':
12997 /* Low 5 bits of 32 - value */
12998 if (! INT_P (x))
12999 output_operand_lossage ("invalid %%s value");
13000 else
13001 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13002 return;
13004 case 't':
13005 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13006 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13008 output_operand_lossage ("invalid %%t value");
13009 return;
13012 /* Bit 3 is OV bit. */
13013 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13015 /* If we want bit 31, write a shift count of zero, not 32. */
13016 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13017 return;
13019 case 'T':
13020 /* Print the symbolic name of a branch target register. */
13021 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13022 x = XVECEXP (x, 0, 0);
13023 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13024 && REGNO (x) != CTR_REGNO))
13025 output_operand_lossage ("invalid %%T value");
13026 else if (REGNO (x) == LR_REGNO)
13027 fputs ("lr", file);
13028 else
13029 fputs ("ctr", file);
13030 return;
13032 case 'u':
13033 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13034 for use in unsigned operand. */
13035 if (! INT_P (x))
13037 output_operand_lossage ("invalid %%u value");
13038 return;
13041 uval = INTVAL (x);
13042 if ((uval & 0xffff) == 0)
13043 uval >>= 16;
13045 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13046 return;
13048 case 'v':
13049 /* High-order 16 bits of constant for use in signed operand. */
13050 if (! INT_P (x))
13051 output_operand_lossage ("invalid %%v value");
13052 else
13053 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13054 (INTVAL (x) >> 16) & 0xffff);
13055 return;
13057 case 'U':
13058 /* Print `u' if this has an auto-increment or auto-decrement. */
13059 if (MEM_P (x)
13060 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13061 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13062 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13063 putc ('u', file);
13064 return;
13066 case 'V':
13067 /* Print the trap code for this operand. */
13068 switch (GET_CODE (x))
13070 case EQ:
13071 fputs ("eq", file); /* 4 */
13072 break;
13073 case NE:
13074 fputs ("ne", file); /* 24 */
13075 break;
13076 case LT:
13077 fputs ("lt", file); /* 16 */
13078 break;
13079 case LE:
13080 fputs ("le", file); /* 20 */
13081 break;
13082 case GT:
13083 fputs ("gt", file); /* 8 */
13084 break;
13085 case GE:
13086 fputs ("ge", file); /* 12 */
13087 break;
13088 case LTU:
13089 fputs ("llt", file); /* 2 */
13090 break;
13091 case LEU:
13092 fputs ("lle", file); /* 6 */
13093 break;
13094 case GTU:
13095 fputs ("lgt", file); /* 1 */
13096 break;
13097 case GEU:
13098 fputs ("lge", file); /* 5 */
13099 break;
13100 default:
13101 output_operand_lossage ("invalid %%V value");
13103 break;
13105 case 'w':
13106 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13107 normally. */
13108 if (INT_P (x))
13109 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13110 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13111 else
13112 print_operand (file, x, 0);
13113 return;
13115 case 'x':
13116 /* X is a FPR or Altivec register used in a VSX context. */
13117 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13118 output_operand_lossage ("invalid %%x value");
13119 else
13121 int reg = REGNO (x);
13122 int vsx_reg = (FP_REGNO_P (reg)
13123 ? reg - 32
13124 : reg - FIRST_ALTIVEC_REGNO + 32);
13126 #ifdef TARGET_REGNAMES
13127 if (TARGET_REGNAMES)
13128 fprintf (file, "%%vs%d", vsx_reg);
13129 else
13130 #endif
13131 fprintf (file, "%d", vsx_reg);
13133 return;
13135 case 'X':
13136 if (MEM_P (x)
13137 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13138 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13139 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13140 putc ('x', file);
13141 return;
13143 case 'Y':
13144 /* Like 'L', for third word of TImode/PTImode */
13145 if (REG_P (x))
13146 fputs (reg_names[REGNO (x) + 2], file);
13147 else if (MEM_P (x))
13149 machine_mode mode = GET_MODE (x);
13150 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13151 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13152 output_address (mode, plus_constant (Pmode,
13153 XEXP (XEXP (x, 0), 0), 8));
13154 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13155 output_address (mode, plus_constant (Pmode,
13156 XEXP (XEXP (x, 0), 0), 8));
13157 else
13158 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13159 if (small_data_operand (x, GET_MODE (x)))
13160 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13161 reg_names[SMALL_DATA_REG]);
13163 return;
13165 case 'z':
13166 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13167 x = XVECEXP (x, 0, 1);
13168 /* X is a SYMBOL_REF. Write out the name preceded by a
13169 period and without any trailing data in brackets. Used for function
13170 names. If we are configured for System V (or the embedded ABI) on
13171 the PowerPC, do not emit the period, since those systems do not use
13172 TOCs and the like. */
13173 if (!SYMBOL_REF_P (x))
13175 output_operand_lossage ("invalid %%z value");
13176 return;
13179 /* For macho, check to see if we need a stub. */
13180 if (TARGET_MACHO)
13182 const char *name = XSTR (x, 0);
13183 #if TARGET_MACHO
13184 if (darwin_symbol_stubs
13185 && MACHOPIC_INDIRECT
13186 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13187 name = machopic_indirection_name (x, /*stub_p=*/true);
13188 #endif
13189 assemble_name (file, name);
13191 else if (!DOT_SYMBOLS)
13192 assemble_name (file, XSTR (x, 0));
13193 else
13194 rs6000_output_function_entry (file, XSTR (x, 0));
13195 return;
13197 case 'Z':
13198 /* Like 'L', for last word of TImode/PTImode. */
13199 if (REG_P (x))
13200 fputs (reg_names[REGNO (x) + 3], file);
13201 else if (MEM_P (x))
13203 machine_mode mode = GET_MODE (x);
13204 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13205 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13206 output_address (mode, plus_constant (Pmode,
13207 XEXP (XEXP (x, 0), 0), 12));
13208 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13209 output_address (mode, plus_constant (Pmode,
13210 XEXP (XEXP (x, 0), 0), 12));
13211 else
13212 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13213 if (small_data_operand (x, GET_MODE (x)))
13214 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13215 reg_names[SMALL_DATA_REG]);
13217 return;
13219 /* Print AltiVec memory operand. */
13220 case 'y':
13222 rtx tmp;
13224 gcc_assert (MEM_P (x));
13226 tmp = XEXP (x, 0);
13228 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13229 && GET_CODE (tmp) == AND
13230 && CONST_INT_P (XEXP (tmp, 1))
13231 && INTVAL (XEXP (tmp, 1)) == -16)
13232 tmp = XEXP (tmp, 0);
13233 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13234 && GET_CODE (tmp) == PRE_MODIFY)
13235 tmp = XEXP (tmp, 1);
13236 if (REG_P (tmp))
13237 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13238 else
13240 if (GET_CODE (tmp) != PLUS
13241 || !REG_P (XEXP (tmp, 0))
13242 || !REG_P (XEXP (tmp, 1)))
13244 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13245 break;
13248 if (REGNO (XEXP (tmp, 0)) == 0)
13249 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13250 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13251 else
13252 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13253 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13255 break;
13258 case 0:
13259 if (REG_P (x))
13260 fprintf (file, "%s", reg_names[REGNO (x)]);
13261 else if (MEM_P (x))
13263 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13264 know the width from the mode. */
13265 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13266 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13267 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13268 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13269 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13270 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13271 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13272 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13273 else
13274 output_address (GET_MODE (x), XEXP (x, 0));
13276 else if (toc_relative_expr_p (x, false,
13277 &tocrel_base_oac, &tocrel_offset_oac))
13278 /* This hack along with a corresponding hack in
13279 rs6000_output_addr_const_extra arranges to output addends
13280 where the assembler expects to find them. eg.
13281 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13282 without this hack would be output as "x@toc+4". We
13283 want "x+4@toc". */
13284 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13285 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13286 output_addr_const (file, XVECEXP (x, 0, 0));
13287 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13288 output_addr_const (file, XVECEXP (x, 0, 1));
13289 else
13290 output_addr_const (file, x);
13291 return;
13293 case '&':
13294 if (const char *name = get_some_local_dynamic_name ())
13295 assemble_name (file, name);
13296 else
13297 output_operand_lossage ("'%%&' used without any "
13298 "local dynamic TLS references");
13299 return;
13301 default:
13302 output_operand_lossage ("invalid %%xn code");
13306 /* Print the address of an operand. */
13308 void
13309 print_operand_address (FILE *file, rtx x)
13311 if (REG_P (x))
13312 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13314 /* Is it a PC-relative address? */
13315 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13317 HOST_WIDE_INT offset;
13319 if (GET_CODE (x) == CONST)
13320 x = XEXP (x, 0);
13322 if (GET_CODE (x) == PLUS)
13324 offset = INTVAL (XEXP (x, 1));
13325 x = XEXP (x, 0);
13327 else
13328 offset = 0;
13330 output_addr_const (file, x);
13332 if (offset)
13333 fprintf (file, "%+" PRId64, offset);
13335 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13336 fprintf (file, "@got");
13338 fprintf (file, "@pcrel");
13340 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13341 || GET_CODE (x) == LABEL_REF)
13343 output_addr_const (file, x);
13344 if (small_data_operand (x, GET_MODE (x)))
13345 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13346 reg_names[SMALL_DATA_REG]);
13347 else
13348 gcc_assert (!TARGET_TOC);
13350 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13351 && REG_P (XEXP (x, 1)))
13353 if (REGNO (XEXP (x, 0)) == 0)
13354 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13355 reg_names[ REGNO (XEXP (x, 0)) ]);
13356 else
13357 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13358 reg_names[ REGNO (XEXP (x, 1)) ]);
13360 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13361 && CONST_INT_P (XEXP (x, 1)))
13362 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13363 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13364 #if TARGET_MACHO
13365 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13366 && CONSTANT_P (XEXP (x, 1)))
13368 fprintf (file, "lo16(");
13369 output_addr_const (file, XEXP (x, 1));
13370 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13372 #endif
13373 #if TARGET_ELF
13374 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13375 && CONSTANT_P (XEXP (x, 1)))
13377 output_addr_const (file, XEXP (x, 1));
13378 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13380 #endif
13381 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13383 /* This hack along with a corresponding hack in
13384 rs6000_output_addr_const_extra arranges to output addends
13385 where the assembler expects to find them. eg.
13386 (lo_sum (reg 9)
13387 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13388 without this hack would be output as "x@toc+8@l(9)". We
13389 want "x+8@toc@l(9)". */
13390 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13391 if (GET_CODE (x) == LO_SUM)
13392 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13393 else
13394 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13396 else
13397 output_addr_const (file, x);
13400 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13402 bool
13403 rs6000_output_addr_const_extra (FILE *file, rtx x)
13405 if (GET_CODE (x) == UNSPEC)
13406 switch (XINT (x, 1))
13408 case UNSPEC_TOCREL:
13409 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13410 && REG_P (XVECEXP (x, 0, 1))
13411 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13412 output_addr_const (file, XVECEXP (x, 0, 0));
13413 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13415 if (INTVAL (tocrel_offset_oac) >= 0)
13416 fprintf (file, "+");
13417 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13419 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13421 putc ('-', file);
13422 assemble_name (file, toc_label_name);
13423 need_toc_init = 1;
13425 else if (TARGET_ELF)
13426 fputs ("@toc", file);
13427 return true;
13429 #if TARGET_MACHO
13430 case UNSPEC_MACHOPIC_OFFSET:
13431 output_addr_const (file, XVECEXP (x, 0, 0));
13432 putc ('-', file);
13433 machopic_output_function_base_name (file);
13434 return true;
13435 #endif
13437 return false;
13440 /* Target hook for assembling integer objects. The PowerPC version has
13441 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13442 is defined. It also needs to handle DI-mode objects on 64-bit
13443 targets. */
13445 static bool
13446 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13448 #ifdef RELOCATABLE_NEEDS_FIXUP
13449 /* Special handling for SI values. */
13450 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13452 static int recurse = 0;
13454 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13455 the .fixup section. Since the TOC section is already relocated, we
13456 don't need to mark it here. We used to skip the text section, but it
13457 should never be valid for relocated addresses to be placed in the text
13458 section. */
13459 if (DEFAULT_ABI == ABI_V4
13460 && (TARGET_RELOCATABLE || flag_pic > 1)
13461 && in_section != toc_section
13462 && !recurse
13463 && !CONST_SCALAR_INT_P (x)
13464 && CONSTANT_P (x))
13466 char buf[256];
13468 recurse = 1;
13469 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13470 fixuplabelno++;
13471 ASM_OUTPUT_LABEL (asm_out_file, buf);
13472 fprintf (asm_out_file, "\t.long\t(");
13473 output_addr_const (asm_out_file, x);
13474 fprintf (asm_out_file, ")@fixup\n");
13475 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13476 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13477 fprintf (asm_out_file, "\t.long\t");
13478 assemble_name (asm_out_file, buf);
13479 fprintf (asm_out_file, "\n\t.previous\n");
13480 recurse = 0;
13481 return true;
13483 /* Remove initial .'s to turn a -mcall-aixdesc function
13484 address into the address of the descriptor, not the function
13485 itself. */
13486 else if (SYMBOL_REF_P (x)
13487 && XSTR (x, 0)[0] == '.'
13488 && DEFAULT_ABI == ABI_AIX)
13490 const char *name = XSTR (x, 0);
13491 while (*name == '.')
13492 name++;
13494 fprintf (asm_out_file, "\t.long\t%s\n", name);
13495 return true;
13498 #endif /* RELOCATABLE_NEEDS_FIXUP */
13499 return default_assemble_integer (x, size, aligned_p);
13502 /* Return a template string for assembly to emit when making an
13503 external call. FUNOP is the call mem argument operand number. */
13505 static const char *
13506 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13508 /* -Wformat-overflow workaround, without which gcc thinks that %u
13509 might produce 10 digits. */
13510 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13512 char arg[12];
13513 arg[0] = 0;
13514 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13516 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13517 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13518 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13519 sprintf (arg, "(%%&@tlsld)");
13522 /* The magic 32768 offset here corresponds to the offset of
13523 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13524 char z[11];
13525 sprintf (z, "%%z%u%s", funop,
13526 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13527 ? "+32768" : ""));
13529 static char str[32]; /* 1 spare */
13530 if (rs6000_pcrel_p (cfun))
13531 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13532 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13533 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13534 sibcall ? "" : "\n\tnop");
13535 else if (DEFAULT_ABI == ABI_V4)
13536 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13537 flag_pic ? "@plt" : "");
13538 #if TARGET_MACHO
13539 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13540 else if (DEFAULT_ABI == ABI_DARWIN)
13542 /* The cookie is in operand func+2. */
13543 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13544 int cookie = INTVAL (operands[funop + 2]);
13545 if (cookie & CALL_LONG)
13547 tree funname = get_identifier (XSTR (operands[funop], 0));
13548 tree labelname = get_prev_label (funname);
13549 gcc_checking_assert (labelname && !sibcall);
13551 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13552 instruction will reach 'foo', otherwise link as 'bl L42'".
13553 "L42" should be a 'branch island', that will do a far jump to
13554 'foo'. Branch islands are generated in
13555 macho_branch_islands(). */
13556 sprintf (str, "jbsr %%z%u,%.10s", funop,
13557 IDENTIFIER_POINTER (labelname));
13559 else
13560 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13561 after the call. */
13562 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13564 #endif
13565 else
13566 gcc_unreachable ();
13567 return str;
13570 const char *
13571 rs6000_call_template (rtx *operands, unsigned int funop)
13573 return rs6000_call_template_1 (operands, funop, false);
13576 const char *
13577 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13579 return rs6000_call_template_1 (operands, funop, true);
13582 /* As above, for indirect calls. */
13584 static const char *
13585 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13586 bool sibcall)
13588 /* -Wformat-overflow workaround, without which gcc thinks that %u
13589 might produce 10 digits. Note that -Wformat-overflow will not
13590 currently warn here for str[], so do not rely on a warning to
13591 ensure str[] is correctly sized. */
13592 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13594 /* Currently, funop is either 0 or 1. The maximum string is always
13595 a !speculate 64-bit __tls_get_addr call.
13597 ABI_ELFv2, pcrel:
13598 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13599 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13600 . 9 crset 2\n\t
13601 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13602 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13603 . 8 beq%T1l-
13604 .---
13605 .142
13607 ABI_AIX:
13608 . 9 ld 2,%3\n\t
13609 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13610 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13611 . 9 crset 2\n\t
13612 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13613 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13614 . 10 beq%T1l-\n\t
13615 . 10 ld 2,%4(1)
13616 .---
13617 .151
13619 ABI_ELFv2:
13620 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13621 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13622 . 9 crset 2\n\t
13623 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13624 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13625 . 10 beq%T1l-\n\t
13626 . 10 ld 2,%3(1)
13627 .---
13628 .142
13630 ABI_V4:
13631 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13632 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13633 . 9 crset 2\n\t
13634 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13635 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13636 . 8 beq%T1l-
13637 .---
13638 .141 */
13639 static char str[160]; /* 8 spare */
13640 char *s = str;
13641 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13643 if (DEFAULT_ABI == ABI_AIX)
13644 s += sprintf (s,
13645 "l%s 2,%%%u\n\t",
13646 ptrload, funop + 3);
13648 /* We don't need the extra code to stop indirect call speculation if
13649 calling via LR. */
13650 bool speculate = (TARGET_MACHO
13651 || rs6000_speculate_indirect_jumps
13652 || (REG_P (operands[funop])
13653 && REGNO (operands[funop]) == LR_REGNO));
13655 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13657 const char *rel64 = TARGET_64BIT ? "64" : "";
13658 char tls[29];
13659 tls[0] = 0;
13660 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13662 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13663 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13664 rel64, funop + 1);
13665 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13666 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13667 rel64);
13670 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13671 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13672 && flag_pic == 2 ? "+32768" : "");
13673 if (!speculate)
13675 s += sprintf (s,
13676 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13677 tls, rel64, notoc, funop, addend);
13678 s += sprintf (s, "crset 2\n\t");
13680 s += sprintf (s,
13681 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13682 tls, rel64, notoc, funop, addend);
13684 else if (!speculate)
13685 s += sprintf (s, "crset 2\n\t");
13687 if (rs6000_pcrel_p (cfun))
13689 if (speculate)
13690 sprintf (s, "b%%T%ul", funop);
13691 else
13692 sprintf (s, "beq%%T%ul-", funop);
13694 else if (DEFAULT_ABI == ABI_AIX)
13696 if (speculate)
13697 sprintf (s,
13698 "b%%T%ul\n\t"
13699 "l%s 2,%%%u(1)",
13700 funop, ptrload, funop + 4);
13701 else
13702 sprintf (s,
13703 "beq%%T%ul-\n\t"
13704 "l%s 2,%%%u(1)",
13705 funop, ptrload, funop + 4);
13707 else if (DEFAULT_ABI == ABI_ELFv2)
13709 if (speculate)
13710 sprintf (s,
13711 "b%%T%ul\n\t"
13712 "l%s 2,%%%u(1)",
13713 funop, ptrload, funop + 3);
13714 else
13715 sprintf (s,
13716 "beq%%T%ul-\n\t"
13717 "l%s 2,%%%u(1)",
13718 funop, ptrload, funop + 3);
13720 else
13722 if (speculate)
13723 sprintf (s,
13724 "b%%T%u%s",
13725 funop, sibcall ? "" : "l");
13726 else
13727 sprintf (s,
13728 "beq%%T%u%s-%s",
13729 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13731 return str;
13734 const char *
13735 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13737 return rs6000_indirect_call_template_1 (operands, funop, false);
13740 const char *
13741 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13743 return rs6000_indirect_call_template_1 (operands, funop, true);
13746 #if HAVE_AS_PLTSEQ
13747 /* Output indirect call insns. WHICH identifies the type of sequence. */
13748 const char *
13749 rs6000_pltseq_template (rtx *operands, int which)
13751 const char *rel64 = TARGET_64BIT ? "64" : "";
13752 char tls[30];
13753 tls[0] = 0;
13754 if (GET_CODE (operands[3]) == UNSPEC)
13756 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13757 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13758 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13759 off, rel64);
13760 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13761 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13762 off, rel64);
13765 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13766 static char str[96]; /* 10 spare */
13767 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13768 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13769 && flag_pic == 2 ? "+32768" : "");
13770 switch (which)
13772 case RS6000_PLTSEQ_TOCSAVE:
13773 sprintf (str,
13774 "st%s\n\t"
13775 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13776 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13777 tls, rel64);
13778 break;
13779 case RS6000_PLTSEQ_PLT16_HA:
13780 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13781 sprintf (str,
13782 "lis %%0,0\n\t"
13783 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13784 tls, off, rel64);
13785 else
13786 sprintf (str,
13787 "addis %%0,%%1,0\n\t"
13788 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13789 tls, off, rel64, addend);
13790 break;
13791 case RS6000_PLTSEQ_PLT16_LO:
13792 sprintf (str,
13793 "l%s %%0,0(%%1)\n\t"
13794 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13795 TARGET_64BIT ? "d" : "wz",
13796 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13797 break;
13798 case RS6000_PLTSEQ_MTCTR:
13799 sprintf (str,
13800 "mtctr %%1\n\t"
13801 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13802 tls, rel64, addend);
13803 break;
13804 case RS6000_PLTSEQ_PLT_PCREL34:
13805 sprintf (str,
13806 "pl%s %%0,0(0),1\n\t"
13807 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13808 TARGET_64BIT ? "d" : "wz",
13809 tls, rel64);
13810 break;
13811 default:
13812 gcc_unreachable ();
13814 return str;
13816 #endif
13818 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13819 /* Emit an assembler directive to set symbol visibility for DECL to
13820 VISIBILITY_TYPE. */
13822 static void
13823 rs6000_assemble_visibility (tree decl, int vis)
13825 if (TARGET_XCOFF)
13826 return;
13828 /* Functions need to have their entry point symbol visibility set as
13829 well as their descriptor symbol visibility. */
13830 if (DEFAULT_ABI == ABI_AIX
13831 && DOT_SYMBOLS
13832 && TREE_CODE (decl) == FUNCTION_DECL)
13834 static const char * const visibility_types[] = {
13835 NULL, "protected", "hidden", "internal"
13838 const char *name, *type;
13840 name = ((* targetm.strip_name_encoding)
13841 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13842 type = visibility_types[vis];
13844 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13845 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13847 else
13848 default_assemble_visibility (decl, vis);
13850 #endif
13852 enum rtx_code
13853 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13855 /* Reversal of FP compares takes care -- an ordered compare
13856 becomes an unordered compare and vice versa. */
13857 if (mode == CCFPmode
13858 && (!flag_finite_math_only
13859 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13860 || code == UNEQ || code == LTGT))
13861 return reverse_condition_maybe_unordered (code);
13862 else
13863 return reverse_condition (code);
13866 /* Generate a compare for CODE. Return a brand-new rtx that
13867 represents the result of the compare. */
13869 static rtx
13870 rs6000_generate_compare (rtx cmp, machine_mode mode)
13872 machine_mode comp_mode;
13873 rtx compare_result;
13874 enum rtx_code code = GET_CODE (cmp);
13875 rtx op0 = XEXP (cmp, 0);
13876 rtx op1 = XEXP (cmp, 1);
13878 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13879 comp_mode = CCmode;
13880 else if (FLOAT_MODE_P (mode))
13881 comp_mode = CCFPmode;
13882 else if (code == GTU || code == LTU
13883 || code == GEU || code == LEU)
13884 comp_mode = CCUNSmode;
13885 else if ((code == EQ || code == NE)
13886 && unsigned_reg_p (op0)
13887 && (unsigned_reg_p (op1)
13888 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13889 /* These are unsigned values, perhaps there will be a later
13890 ordering compare that can be shared with this one. */
13891 comp_mode = CCUNSmode;
13892 else
13893 comp_mode = CCmode;
13895 /* If we have an unsigned compare, make sure we don't have a signed value as
13896 an immediate. */
13897 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13898 && INTVAL (op1) < 0)
13900 op0 = copy_rtx_if_shared (op0);
13901 op1 = force_reg (GET_MODE (op0), op1);
13902 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13905 /* First, the compare. */
13906 compare_result = gen_reg_rtx (comp_mode);
13908 /* IEEE 128-bit support in VSX registers when we do not have hardware
13909 support. */
13910 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13912 rtx libfunc = NULL_RTX;
13913 bool check_nan = false;
13914 rtx dest;
13916 switch (code)
13918 case EQ:
13919 case NE:
13920 libfunc = optab_libfunc (eq_optab, mode);
13921 break;
13923 case GT:
13924 case GE:
13925 libfunc = optab_libfunc (ge_optab, mode);
13926 break;
13928 case LT:
13929 case LE:
13930 libfunc = optab_libfunc (le_optab, mode);
13931 break;
13933 case UNORDERED:
13934 case ORDERED:
13935 libfunc = optab_libfunc (unord_optab, mode);
13936 code = (code == UNORDERED) ? NE : EQ;
13937 break;
13939 case UNGE:
13940 case UNGT:
13941 check_nan = true;
13942 libfunc = optab_libfunc (ge_optab, mode);
13943 code = (code == UNGE) ? GE : GT;
13944 break;
13946 case UNLE:
13947 case UNLT:
13948 check_nan = true;
13949 libfunc = optab_libfunc (le_optab, mode);
13950 code = (code == UNLE) ? LE : LT;
13951 break;
13953 case UNEQ:
13954 case LTGT:
13955 check_nan = true;
13956 libfunc = optab_libfunc (eq_optab, mode);
13957 code = (code = UNEQ) ? EQ : NE;
13958 break;
13960 default:
13961 gcc_unreachable ();
13964 gcc_assert (libfunc);
13966 if (!check_nan)
13967 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13968 SImode, op0, mode, op1, mode);
13970 /* The library signals an exception for signalling NaNs, so we need to
13971 handle isgreater, etc. by first checking isordered. */
13972 else
13974 rtx ne_rtx, normal_dest, unord_dest;
13975 rtx unord_func = optab_libfunc (unord_optab, mode);
13976 rtx join_label = gen_label_rtx ();
13977 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13978 rtx unord_cmp = gen_reg_rtx (comp_mode);
13981 /* Test for either value being a NaN. */
13982 gcc_assert (unord_func);
13983 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13984 SImode, op0, mode, op1, mode);
13986 /* Set value (0) if either value is a NaN, and jump to the join
13987 label. */
13988 dest = gen_reg_rtx (SImode);
13989 emit_move_insn (dest, const1_rtx);
13990 emit_insn (gen_rtx_SET (unord_cmp,
13991 gen_rtx_COMPARE (comp_mode, unord_dest,
13992 const0_rtx)));
13994 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13995 emit_jump_insn (gen_rtx_SET (pc_rtx,
13996 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13997 join_ref,
13998 pc_rtx)));
14000 /* Do the normal comparison, knowing that the values are not
14001 NaNs. */
14002 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14003 SImode, op0, mode, op1, mode);
14005 emit_insn (gen_cstoresi4 (dest,
14006 gen_rtx_fmt_ee (code, SImode, normal_dest,
14007 const0_rtx),
14008 normal_dest, const0_rtx));
14010 /* Join NaN and non-Nan paths. Compare dest against 0. */
14011 emit_label (join_label);
14012 code = NE;
14015 emit_insn (gen_rtx_SET (compare_result,
14016 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14019 else
14021 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14022 CLOBBERs to match cmptf_internal2 pattern. */
14023 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14024 && FLOAT128_IBM_P (GET_MODE (op0))
14025 && TARGET_HARD_FLOAT)
14026 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14027 gen_rtvec (10,
14028 gen_rtx_SET (compare_result,
14029 gen_rtx_COMPARE (comp_mode, op0, op1)),
14030 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14031 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14032 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14033 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14034 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14035 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14036 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14037 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14038 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14039 else if (GET_CODE (op1) == UNSPEC
14040 && XINT (op1, 1) == UNSPEC_SP_TEST)
14042 rtx op1b = XVECEXP (op1, 0, 0);
14043 comp_mode = CCEQmode;
14044 compare_result = gen_reg_rtx (CCEQmode);
14045 if (TARGET_64BIT)
14046 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14047 else
14048 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14050 else
14051 emit_insn (gen_rtx_SET (compare_result,
14052 gen_rtx_COMPARE (comp_mode, op0, op1)));
14055 validate_condition_mode (code, GET_MODE (compare_result));
14057 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14061 /* Return the diagnostic message string if the binary operation OP is
14062 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14064 static const char*
14065 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14066 const_tree type1,
14067 const_tree type2)
14069 machine_mode mode1 = TYPE_MODE (type1);
14070 machine_mode mode2 = TYPE_MODE (type2);
14072 /* For complex modes, use the inner type. */
14073 if (COMPLEX_MODE_P (mode1))
14074 mode1 = GET_MODE_INNER (mode1);
14076 if (COMPLEX_MODE_P (mode2))
14077 mode2 = GET_MODE_INNER (mode2);
14079 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14080 double to intermix unless -mfloat128-convert. */
14081 if (mode1 == mode2)
14082 return NULL;
14084 if (!TARGET_FLOAT128_CVT)
14086 if ((mode1 == KFmode && mode2 == IFmode)
14087 || (mode1 == IFmode && mode2 == KFmode))
14088 return N_("__float128 and __ibm128 cannot be used in the same "
14089 "expression");
14091 if (TARGET_IEEEQUAD
14092 && ((mode1 == IFmode && mode2 == TFmode)
14093 || (mode1 == TFmode && mode2 == IFmode)))
14094 return N_("__ibm128 and long double cannot be used in the same "
14095 "expression");
14097 if (!TARGET_IEEEQUAD
14098 && ((mode1 == KFmode && mode2 == TFmode)
14099 || (mode1 == TFmode && mode2 == KFmode)))
14100 return N_("__float128 and long double cannot be used in the same "
14101 "expression");
14104 return NULL;
14108 /* Expand floating point conversion to/from __float128 and __ibm128. */
14110 void
14111 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14113 machine_mode dest_mode = GET_MODE (dest);
14114 machine_mode src_mode = GET_MODE (src);
14115 convert_optab cvt = unknown_optab;
14116 bool do_move = false;
14117 rtx libfunc = NULL_RTX;
14118 rtx dest2;
14119 typedef rtx (*rtx_2func_t) (rtx, rtx);
14120 rtx_2func_t hw_convert = (rtx_2func_t)0;
14121 size_t kf_or_tf;
14123 struct hw_conv_t {
14124 rtx_2func_t from_df;
14125 rtx_2func_t from_sf;
14126 rtx_2func_t from_si_sign;
14127 rtx_2func_t from_si_uns;
14128 rtx_2func_t from_di_sign;
14129 rtx_2func_t from_di_uns;
14130 rtx_2func_t to_df;
14131 rtx_2func_t to_sf;
14132 rtx_2func_t to_si_sign;
14133 rtx_2func_t to_si_uns;
14134 rtx_2func_t to_di_sign;
14135 rtx_2func_t to_di_uns;
14136 } hw_conversions[2] = {
14137 /* convertions to/from KFmode */
14139 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14140 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14141 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14142 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14143 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14144 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14145 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14146 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14147 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14148 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14149 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14150 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14153 /* convertions to/from TFmode */
14155 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14156 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14157 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14158 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14159 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14160 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14161 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14162 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14163 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14164 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14165 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14166 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14170 if (dest_mode == src_mode)
14171 gcc_unreachable ();
14173 /* Eliminate memory operations. */
14174 if (MEM_P (src))
14175 src = force_reg (src_mode, src);
14177 if (MEM_P (dest))
14179 rtx tmp = gen_reg_rtx (dest_mode);
14180 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14181 rs6000_emit_move (dest, tmp, dest_mode);
14182 return;
14185 /* Convert to IEEE 128-bit floating point. */
14186 if (FLOAT128_IEEE_P (dest_mode))
14188 if (dest_mode == KFmode)
14189 kf_or_tf = 0;
14190 else if (dest_mode == TFmode)
14191 kf_or_tf = 1;
14192 else
14193 gcc_unreachable ();
14195 switch (src_mode)
14197 case E_DFmode:
14198 cvt = sext_optab;
14199 hw_convert = hw_conversions[kf_or_tf].from_df;
14200 break;
14202 case E_SFmode:
14203 cvt = sext_optab;
14204 hw_convert = hw_conversions[kf_or_tf].from_sf;
14205 break;
14207 case E_KFmode:
14208 case E_IFmode:
14209 case E_TFmode:
14210 if (FLOAT128_IBM_P (src_mode))
14211 cvt = sext_optab;
14212 else
14213 do_move = true;
14214 break;
14216 case E_SImode:
14217 if (unsigned_p)
14219 cvt = ufloat_optab;
14220 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14222 else
14224 cvt = sfloat_optab;
14225 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14227 break;
14229 case E_DImode:
14230 if (unsigned_p)
14232 cvt = ufloat_optab;
14233 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14235 else
14237 cvt = sfloat_optab;
14238 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14240 break;
14242 default:
14243 gcc_unreachable ();
14247 /* Convert from IEEE 128-bit floating point. */
14248 else if (FLOAT128_IEEE_P (src_mode))
14250 if (src_mode == KFmode)
14251 kf_or_tf = 0;
14252 else if (src_mode == TFmode)
14253 kf_or_tf = 1;
14254 else
14255 gcc_unreachable ();
14257 switch (dest_mode)
14259 case E_DFmode:
14260 cvt = trunc_optab;
14261 hw_convert = hw_conversions[kf_or_tf].to_df;
14262 break;
14264 case E_SFmode:
14265 cvt = trunc_optab;
14266 hw_convert = hw_conversions[kf_or_tf].to_sf;
14267 break;
14269 case E_KFmode:
14270 case E_IFmode:
14271 case E_TFmode:
14272 if (FLOAT128_IBM_P (dest_mode))
14273 cvt = trunc_optab;
14274 else
14275 do_move = true;
14276 break;
14278 case E_SImode:
14279 if (unsigned_p)
14281 cvt = ufix_optab;
14282 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14284 else
14286 cvt = sfix_optab;
14287 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14289 break;
14291 case E_DImode:
14292 if (unsigned_p)
14294 cvt = ufix_optab;
14295 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14297 else
14299 cvt = sfix_optab;
14300 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14302 break;
14304 default:
14305 gcc_unreachable ();
14309 /* Both IBM format. */
14310 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14311 do_move = true;
14313 else
14314 gcc_unreachable ();
14316 /* Handle conversion between TFmode/KFmode/IFmode. */
14317 if (do_move)
14318 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14320 /* Handle conversion if we have hardware support. */
14321 else if (TARGET_FLOAT128_HW && hw_convert)
14322 emit_insn ((hw_convert) (dest, src));
14324 /* Call an external function to do the conversion. */
14325 else if (cvt != unknown_optab)
14327 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14328 gcc_assert (libfunc != NULL_RTX);
14330 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14331 src, src_mode);
14333 gcc_assert (dest2 != NULL_RTX);
14334 if (!rtx_equal_p (dest, dest2))
14335 emit_move_insn (dest, dest2);
14338 else
14339 gcc_unreachable ();
14341 return;
14345 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14346 can be used as that dest register. Return the dest register. */
14349 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14351 if (op2 == const0_rtx)
14352 return op1;
14354 if (GET_CODE (scratch) == SCRATCH)
14355 scratch = gen_reg_rtx (mode);
14357 if (logical_operand (op2, mode))
14358 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14359 else
14360 emit_insn (gen_rtx_SET (scratch,
14361 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14363 return scratch;
14366 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14367 requires this. The result is mode MODE. */
14369 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14371 rtx cond[2];
14372 int n = 0;
14373 if (code == LTGT || code == LE || code == UNLT)
14374 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14375 if (code == LTGT || code == GE || code == UNGT)
14376 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14377 if (code == LE || code == GE || code == UNEQ)
14378 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14379 if (code == UNLT || code == UNGT || code == UNEQ)
14380 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14382 gcc_assert (n == 2);
14384 rtx cc = gen_reg_rtx (CCEQmode);
14385 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14386 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14388 return cc;
14391 void
14392 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14394 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14395 rtx_code cond_code = GET_CODE (condition_rtx);
14397 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14398 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14400 else if (cond_code == NE
14401 || cond_code == GE || cond_code == LE
14402 || cond_code == GEU || cond_code == LEU
14403 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14405 rtx not_result = gen_reg_rtx (CCEQmode);
14406 rtx not_op, rev_cond_rtx;
14407 machine_mode cc_mode;
14409 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14411 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14412 SImode, XEXP (condition_rtx, 0), const0_rtx);
14413 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14414 emit_insn (gen_rtx_SET (not_result, not_op));
14415 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14418 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14419 if (op_mode == VOIDmode)
14420 op_mode = GET_MODE (XEXP (operands[1], 1));
14422 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14424 PUT_MODE (condition_rtx, DImode);
14425 convert_move (operands[0], condition_rtx, 0);
14427 else
14429 PUT_MODE (condition_rtx, SImode);
14430 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14434 /* Emit a branch of kind CODE to location LOC. */
14436 void
14437 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14439 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14440 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14441 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14442 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14445 /* Return the string to output a conditional branch to LABEL, which is
14446 the operand template of the label, or NULL if the branch is really a
14447 conditional return.
14449 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14450 condition code register and its mode specifies what kind of
14451 comparison we made.
14453 REVERSED is nonzero if we should reverse the sense of the comparison.
14455 INSN is the insn. */
14457 char *
14458 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14460 static char string[64];
14461 enum rtx_code code = GET_CODE (op);
14462 rtx cc_reg = XEXP (op, 0);
14463 machine_mode mode = GET_MODE (cc_reg);
14464 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14465 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14466 int really_reversed = reversed ^ need_longbranch;
14467 char *s = string;
14468 const char *ccode;
14469 const char *pred;
14470 rtx note;
14472 validate_condition_mode (code, mode);
14474 /* Work out which way this really branches. We could use
14475 reverse_condition_maybe_unordered here always but this
14476 makes the resulting assembler clearer. */
14477 if (really_reversed)
14479 /* Reversal of FP compares takes care -- an ordered compare
14480 becomes an unordered compare and vice versa. */
14481 if (mode == CCFPmode)
14482 code = reverse_condition_maybe_unordered (code);
14483 else
14484 code = reverse_condition (code);
14487 switch (code)
14489 /* Not all of these are actually distinct opcodes, but
14490 we distinguish them for clarity of the resulting assembler. */
14491 case NE: case LTGT:
14492 ccode = "ne"; break;
14493 case EQ: case UNEQ:
14494 ccode = "eq"; break;
14495 case GE: case GEU:
14496 ccode = "ge"; break;
14497 case GT: case GTU: case UNGT:
14498 ccode = "gt"; break;
14499 case LE: case LEU:
14500 ccode = "le"; break;
14501 case LT: case LTU: case UNLT:
14502 ccode = "lt"; break;
14503 case UNORDERED: ccode = "un"; break;
14504 case ORDERED: ccode = "nu"; break;
14505 case UNGE: ccode = "nl"; break;
14506 case UNLE: ccode = "ng"; break;
14507 default:
14508 gcc_unreachable ();
14511 /* Maybe we have a guess as to how likely the branch is. */
14512 pred = "";
14513 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14514 if (note != NULL_RTX)
14516 /* PROB is the difference from 50%. */
14517 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14518 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14520 /* Only hint for highly probable/improbable branches on newer cpus when
14521 we have real profile data, as static prediction overrides processor
14522 dynamic prediction. For older cpus we may as well always hint, but
14523 assume not taken for branches that are very close to 50% as a
14524 mispredicted taken branch is more expensive than a
14525 mispredicted not-taken branch. */
14526 if (rs6000_always_hint
14527 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14528 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14529 && br_prob_note_reliable_p (note)))
14531 if (abs (prob) > REG_BR_PROB_BASE / 20
14532 && ((prob > 0) ^ need_longbranch))
14533 pred = "+";
14534 else
14535 pred = "-";
14539 if (label == NULL)
14540 s += sprintf (s, "b%slr%s ", ccode, pred);
14541 else
14542 s += sprintf (s, "b%s%s ", ccode, pred);
14544 /* We need to escape any '%' characters in the reg_names string.
14545 Assume they'd only be the first character.... */
14546 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14547 *s++ = '%';
14548 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14550 if (label != NULL)
14552 /* If the branch distance was too far, we may have to use an
14553 unconditional branch to go the distance. */
14554 if (need_longbranch)
14555 s += sprintf (s, ",$+8\n\tb %s", label);
14556 else
14557 s += sprintf (s, ",%s", label);
14560 return string;
14563 /* Return insn for VSX or Altivec comparisons. */
14565 static rtx
14566 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14568 rtx mask;
14569 machine_mode mode = GET_MODE (op0);
14571 switch (code)
14573 default:
14574 break;
14576 case GE:
14577 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14578 return NULL_RTX;
14579 /* FALLTHRU */
14581 case EQ:
14582 case GT:
14583 case GTU:
14584 case ORDERED:
14585 case UNORDERED:
14586 case UNEQ:
14587 case LTGT:
14588 mask = gen_reg_rtx (mode);
14589 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14590 return mask;
14593 return NULL_RTX;
14596 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14597 DMODE is expected destination mode. This is a recursive function. */
14599 static rtx
14600 rs6000_emit_vector_compare (enum rtx_code rcode,
14601 rtx op0, rtx op1,
14602 machine_mode dmode)
14604 rtx mask;
14605 bool swap_operands = false;
14606 bool try_again = false;
14608 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14609 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14611 /* See if the comparison works as is. */
14612 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14613 if (mask)
14614 return mask;
14616 switch (rcode)
14618 case LT:
14619 rcode = GT;
14620 swap_operands = true;
14621 try_again = true;
14622 break;
14623 case LTU:
14624 rcode = GTU;
14625 swap_operands = true;
14626 try_again = true;
14627 break;
14628 case NE:
14629 case UNLE:
14630 case UNLT:
14631 case UNGE:
14632 case UNGT:
14633 /* Invert condition and try again.
14634 e.g., A != B becomes ~(A==B). */
14636 enum rtx_code rev_code;
14637 enum insn_code nor_code;
14638 rtx mask2;
14640 rev_code = reverse_condition_maybe_unordered (rcode);
14641 if (rev_code == UNKNOWN)
14642 return NULL_RTX;
14644 nor_code = optab_handler (one_cmpl_optab, dmode);
14645 if (nor_code == CODE_FOR_nothing)
14646 return NULL_RTX;
14648 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14649 if (!mask2)
14650 return NULL_RTX;
14652 mask = gen_reg_rtx (dmode);
14653 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14654 return mask;
14656 break;
14657 case GE:
14658 case GEU:
14659 case LE:
14660 case LEU:
14661 /* Try GT/GTU/LT/LTU OR EQ */
14663 rtx c_rtx, eq_rtx;
14664 enum insn_code ior_code;
14665 enum rtx_code new_code;
14667 switch (rcode)
14669 case GE:
14670 new_code = GT;
14671 break;
14673 case GEU:
14674 new_code = GTU;
14675 break;
14677 case LE:
14678 new_code = LT;
14679 break;
14681 case LEU:
14682 new_code = LTU;
14683 break;
14685 default:
14686 gcc_unreachable ();
14689 ior_code = optab_handler (ior_optab, dmode);
14690 if (ior_code == CODE_FOR_nothing)
14691 return NULL_RTX;
14693 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14694 if (!c_rtx)
14695 return NULL_RTX;
14697 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14698 if (!eq_rtx)
14699 return NULL_RTX;
14701 mask = gen_reg_rtx (dmode);
14702 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14703 return mask;
14705 break;
14706 default:
14707 return NULL_RTX;
14710 if (try_again)
14712 if (swap_operands)
14713 std::swap (op0, op1);
14715 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14716 if (mask)
14717 return mask;
14720 /* You only get two chances. */
14721 return NULL_RTX;
14724 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14725 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14726 operands for the relation operation COND. */
14729 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14730 rtx cond, rtx cc_op0, rtx cc_op1)
14732 machine_mode dest_mode = GET_MODE (dest);
14733 machine_mode mask_mode = GET_MODE (cc_op0);
14734 enum rtx_code rcode = GET_CODE (cond);
14735 machine_mode cc_mode = CCmode;
14736 rtx mask;
14737 rtx cond2;
14738 bool invert_move = false;
14740 if (VECTOR_UNIT_NONE_P (dest_mode))
14741 return 0;
14743 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14744 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14746 switch (rcode)
14748 /* Swap operands if we can, and fall back to doing the operation as
14749 specified, and doing a NOR to invert the test. */
14750 case NE:
14751 case UNLE:
14752 case UNLT:
14753 case UNGE:
14754 case UNGT:
14755 /* Invert condition and try again.
14756 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14757 invert_move = true;
14758 rcode = reverse_condition_maybe_unordered (rcode);
14759 if (rcode == UNKNOWN)
14760 return 0;
14761 break;
14763 case GE:
14764 case LE:
14765 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14767 /* Invert condition to avoid compound test. */
14768 invert_move = true;
14769 rcode = reverse_condition (rcode);
14771 break;
14773 case GTU:
14774 case GEU:
14775 case LTU:
14776 case LEU:
14777 /* Mark unsigned tests with CCUNSmode. */
14778 cc_mode = CCUNSmode;
14780 /* Invert condition to avoid compound test if necessary. */
14781 if (rcode == GEU || rcode == LEU)
14783 invert_move = true;
14784 rcode = reverse_condition (rcode);
14786 break;
14788 default:
14789 break;
14792 /* Get the vector mask for the given relational operations. */
14793 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14795 if (!mask)
14796 return 0;
14798 if (invert_move)
14799 std::swap (op_true, op_false);
14801 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14802 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14803 && (GET_CODE (op_true) == CONST_VECTOR
14804 || GET_CODE (op_false) == CONST_VECTOR))
14806 rtx constant_0 = CONST0_RTX (dest_mode);
14807 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14809 if (op_true == constant_m1 && op_false == constant_0)
14811 emit_move_insn (dest, mask);
14812 return 1;
14815 else if (op_true == constant_0 && op_false == constant_m1)
14817 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14818 return 1;
14821 /* If we can't use the vector comparison directly, perhaps we can use
14822 the mask for the true or false fields, instead of loading up a
14823 constant. */
14824 if (op_true == constant_m1)
14825 op_true = mask;
14827 if (op_false == constant_0)
14828 op_false = mask;
14831 if (!REG_P (op_true) && !SUBREG_P (op_true))
14832 op_true = force_reg (dest_mode, op_true);
14834 if (!REG_P (op_false) && !SUBREG_P (op_false))
14835 op_false = force_reg (dest_mode, op_false);
14837 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14838 CONST0_RTX (dest_mode));
14839 emit_insn (gen_rtx_SET (dest,
14840 gen_rtx_IF_THEN_ELSE (dest_mode,
14841 cond2,
14842 op_true,
14843 op_false)));
14844 return 1;
14847 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14848 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14849 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14850 hardware has no such operation. */
14852 static int
14853 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14855 enum rtx_code code = GET_CODE (op);
14856 rtx op0 = XEXP (op, 0);
14857 rtx op1 = XEXP (op, 1);
14858 machine_mode compare_mode = GET_MODE (op0);
14859 machine_mode result_mode = GET_MODE (dest);
14860 bool max_p = false;
14862 if (result_mode != compare_mode)
14863 return 0;
14865 if (code == GE || code == GT)
14866 max_p = true;
14867 else if (code == LE || code == LT)
14868 max_p = false;
14869 else
14870 return 0;
14872 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14875 /* Only when NaNs and signed-zeros are not in effect, smax could be
14876 used for `op0 < op1 ? op1 : op0`, and smin could be used for
14877 `op0 > op1 ? op1 : op0`. */
14878 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
14879 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
14880 max_p = !max_p;
14882 else
14883 return 0;
14885 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14886 return 1;
14889 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14890 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14891 operands of the last comparison is nonzero/true, FALSE_COND if it is
14892 zero/false. Return 0 if the hardware has no such operation. */
14894 static int
14895 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14897 enum rtx_code code = GET_CODE (op);
14898 rtx op0 = XEXP (op, 0);
14899 rtx op1 = XEXP (op, 1);
14900 machine_mode result_mode = GET_MODE (dest);
14901 rtx compare_rtx;
14902 rtx cmove_rtx;
14903 rtx clobber_rtx;
14905 if (!can_create_pseudo_p ())
14906 return 0;
14908 switch (code)
14910 case EQ:
14911 case GE:
14912 case GT:
14913 break;
14915 case NE:
14916 case LT:
14917 case LE:
14918 code = swap_condition (code);
14919 std::swap (op0, op1);
14920 break;
14922 default:
14923 return 0;
14926 /* Generate: [(parallel [(set (dest)
14927 (if_then_else (op (cmp1) (cmp2))
14928 (true)
14929 (false)))
14930 (clobber (scratch))])]. */
14932 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14933 cmove_rtx = gen_rtx_SET (dest,
14934 gen_rtx_IF_THEN_ELSE (result_mode,
14935 compare_rtx,
14936 true_cond,
14937 false_cond));
14939 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14940 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14941 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14943 return 1;
14946 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14947 operands of the last comparison is nonzero/true, FALSE_COND if it
14948 is zero/false. Return 0 if the hardware has no such operation. */
14951 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14953 enum rtx_code code = GET_CODE (op);
14954 rtx op0 = XEXP (op, 0);
14955 rtx op1 = XEXP (op, 1);
14956 machine_mode compare_mode = GET_MODE (op0);
14957 machine_mode result_mode = GET_MODE (dest);
14958 rtx temp;
14959 bool is_against_zero;
14961 /* These modes should always match. */
14962 if (GET_MODE (op1) != compare_mode
14963 /* In the isel case however, we can use a compare immediate, so
14964 op1 may be a small constant. */
14965 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14966 return 0;
14967 if (GET_MODE (true_cond) != result_mode)
14968 return 0;
14969 if (GET_MODE (false_cond) != result_mode)
14970 return 0;
14972 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14973 if (TARGET_P9_MINMAX
14974 && (compare_mode == SFmode || compare_mode == DFmode)
14975 && (result_mode == SFmode || result_mode == DFmode))
14977 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14978 return 1;
14980 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14981 return 1;
14984 /* Don't allow using floating point comparisons for integer results for
14985 now. */
14986 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14987 return 0;
14989 /* First, work out if the hardware can do this at all, or
14990 if it's too slow.... */
14991 if (!FLOAT_MODE_P (compare_mode))
14993 if (TARGET_ISEL)
14994 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14995 return 0;
14998 is_against_zero = op1 == CONST0_RTX (compare_mode);
15000 /* A floating-point subtract might overflow, underflow, or produce
15001 an inexact result, thus changing the floating-point flags, so it
15002 can't be generated if we care about that. It's safe if one side
15003 of the construct is zero, since then no subtract will be
15004 generated. */
15005 if (SCALAR_FLOAT_MODE_P (compare_mode)
15006 && flag_trapping_math && ! is_against_zero)
15007 return 0;
15009 /* Eliminate half of the comparisons by switching operands, this
15010 makes the remaining code simpler. */
15011 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15012 || code == LTGT || code == LT || code == UNLE)
15014 code = reverse_condition_maybe_unordered (code);
15015 temp = true_cond;
15016 true_cond = false_cond;
15017 false_cond = temp;
15020 /* UNEQ and LTGT take four instructions for a comparison with zero,
15021 it'll probably be faster to use a branch here too. */
15022 if (code == UNEQ && HONOR_NANS (compare_mode))
15023 return 0;
15025 /* We're going to try to implement comparisons by performing
15026 a subtract, then comparing against zero. Unfortunately,
15027 Inf - Inf is NaN which is not zero, and so if we don't
15028 know that the operand is finite and the comparison
15029 would treat EQ different to UNORDERED, we can't do it. */
15030 if (HONOR_INFINITIES (compare_mode)
15031 && code != GT && code != UNGE
15032 && (!CONST_DOUBLE_P (op1)
15033 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15034 /* Constructs of the form (a OP b ? a : b) are safe. */
15035 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15036 || (! rtx_equal_p (op0, true_cond)
15037 && ! rtx_equal_p (op1, true_cond))))
15038 return 0;
15040 /* At this point we know we can use fsel. */
15042 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15043 is no fsel instruction. */
15044 if (compare_mode != SFmode && compare_mode != DFmode)
15045 return 0;
15047 /* Reduce the comparison to a comparison against zero. */
15048 if (! is_against_zero)
15050 temp = gen_reg_rtx (compare_mode);
15051 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15052 op0 = temp;
15053 op1 = CONST0_RTX (compare_mode);
15056 /* If we don't care about NaNs we can reduce some of the comparisons
15057 down to faster ones. */
15058 if (! HONOR_NANS (compare_mode))
15059 switch (code)
15061 case GT:
15062 code = LE;
15063 temp = true_cond;
15064 true_cond = false_cond;
15065 false_cond = temp;
15066 break;
15067 case UNGE:
15068 code = GE;
15069 break;
15070 case UNEQ:
15071 code = EQ;
15072 break;
15073 default:
15074 break;
15077 /* Now, reduce everything down to a GE. */
15078 switch (code)
15080 case GE:
15081 break;
15083 case LE:
15084 temp = gen_reg_rtx (compare_mode);
15085 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15086 op0 = temp;
15087 break;
15089 case ORDERED:
15090 temp = gen_reg_rtx (compare_mode);
15091 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15092 op0 = temp;
15093 break;
15095 case EQ:
15096 temp = gen_reg_rtx (compare_mode);
15097 emit_insn (gen_rtx_SET (temp,
15098 gen_rtx_NEG (compare_mode,
15099 gen_rtx_ABS (compare_mode, op0))));
15100 op0 = temp;
15101 break;
15103 case UNGE:
15104 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15105 temp = gen_reg_rtx (result_mode);
15106 emit_insn (gen_rtx_SET (temp,
15107 gen_rtx_IF_THEN_ELSE (result_mode,
15108 gen_rtx_GE (VOIDmode,
15109 op0, op1),
15110 true_cond, false_cond)));
15111 false_cond = true_cond;
15112 true_cond = temp;
15114 temp = gen_reg_rtx (compare_mode);
15115 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15116 op0 = temp;
15117 break;
15119 case GT:
15120 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15121 temp = gen_reg_rtx (result_mode);
15122 emit_insn (gen_rtx_SET (temp,
15123 gen_rtx_IF_THEN_ELSE (result_mode,
15124 gen_rtx_GE (VOIDmode,
15125 op0, op1),
15126 true_cond, false_cond)));
15127 true_cond = false_cond;
15128 false_cond = temp;
15130 temp = gen_reg_rtx (compare_mode);
15131 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15132 op0 = temp;
15133 break;
15135 default:
15136 gcc_unreachable ();
15139 emit_insn (gen_rtx_SET (dest,
15140 gen_rtx_IF_THEN_ELSE (result_mode,
15141 gen_rtx_GE (VOIDmode,
15142 op0, op1),
15143 true_cond, false_cond)));
15144 return 1;
15147 /* Same as above, but for ints (isel). */
15150 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15152 rtx condition_rtx, cr;
15153 machine_mode mode = GET_MODE (dest);
15154 enum rtx_code cond_code;
15155 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15156 bool signedp;
15158 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15159 return 0;
15161 /* We still have to do the compare, because isel doesn't do a
15162 compare, it just looks at the CRx bits set by a previous compare
15163 instruction. */
15164 condition_rtx = rs6000_generate_compare (op, mode);
15165 cond_code = GET_CODE (condition_rtx);
15166 cr = XEXP (condition_rtx, 0);
15167 signedp = GET_MODE (cr) == CCmode;
15169 isel_func = (mode == SImode
15170 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15171 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15173 switch (cond_code)
15175 case LT: case GT: case LTU: case GTU: case EQ:
15176 /* isel handles these directly. */
15177 break;
15179 default:
15180 /* We need to swap the sense of the comparison. */
15182 std::swap (false_cond, true_cond);
15183 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15185 break;
15188 false_cond = force_reg (mode, false_cond);
15189 if (true_cond != const0_rtx)
15190 true_cond = force_reg (mode, true_cond);
15192 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15194 return 1;
15197 void
15198 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15200 machine_mode mode = GET_MODE (op0);
15201 enum rtx_code c;
15202 rtx target;
15204 /* VSX/altivec have direct min/max insns. */
15205 if ((code == SMAX || code == SMIN)
15206 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15207 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15209 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15210 return;
15213 if (code == SMAX || code == SMIN)
15214 c = GE;
15215 else
15216 c = GEU;
15218 if (code == SMAX || code == UMAX)
15219 target = emit_conditional_move (dest, c, op0, op1, mode,
15220 op0, op1, mode, 0);
15221 else
15222 target = emit_conditional_move (dest, c, op0, op1, mode,
15223 op1, op0, mode, 0);
15224 gcc_assert (target);
15225 if (target != dest)
15226 emit_move_insn (dest, target);
15229 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15230 COND is true. Mark the jump as unlikely to be taken. */
15232 static void
15233 emit_unlikely_jump (rtx cond, rtx label)
15235 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15236 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15237 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15240 /* A subroutine of the atomic operation splitters. Emit a load-locked
15241 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15242 the zero_extend operation. */
15244 static void
15245 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15247 rtx (*fn) (rtx, rtx) = NULL;
15249 switch (mode)
15251 case E_QImode:
15252 fn = gen_load_lockedqi;
15253 break;
15254 case E_HImode:
15255 fn = gen_load_lockedhi;
15256 break;
15257 case E_SImode:
15258 if (GET_MODE (mem) == QImode)
15259 fn = gen_load_lockedqi_si;
15260 else if (GET_MODE (mem) == HImode)
15261 fn = gen_load_lockedhi_si;
15262 else
15263 fn = gen_load_lockedsi;
15264 break;
15265 case E_DImode:
15266 fn = gen_load_lockeddi;
15267 break;
15268 case E_TImode:
15269 fn = gen_load_lockedti;
15270 break;
15271 default:
15272 gcc_unreachable ();
15274 emit_insn (fn (reg, mem));
15277 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15278 instruction in MODE. */
15280 static void
15281 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15283 rtx (*fn) (rtx, rtx, rtx) = NULL;
15285 switch (mode)
15287 case E_QImode:
15288 fn = gen_store_conditionalqi;
15289 break;
15290 case E_HImode:
15291 fn = gen_store_conditionalhi;
15292 break;
15293 case E_SImode:
15294 fn = gen_store_conditionalsi;
15295 break;
15296 case E_DImode:
15297 fn = gen_store_conditionaldi;
15298 break;
15299 case E_TImode:
15300 fn = gen_store_conditionalti;
15301 break;
15302 default:
15303 gcc_unreachable ();
15306 /* Emit sync before stwcx. to address PPC405 Erratum. */
15307 if (PPC405_ERRATUM77)
15308 emit_insn (gen_hwsync ());
15310 emit_insn (fn (res, mem, val));
15313 /* Expand barriers before and after a load_locked/store_cond sequence. */
15315 static rtx
15316 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15318 rtx addr = XEXP (mem, 0);
15320 if (!legitimate_indirect_address_p (addr, reload_completed)
15321 && !legitimate_indexed_address_p (addr, reload_completed))
15323 addr = force_reg (Pmode, addr);
15324 mem = replace_equiv_address_nv (mem, addr);
15327 switch (model)
15329 case MEMMODEL_RELAXED:
15330 case MEMMODEL_CONSUME:
15331 case MEMMODEL_ACQUIRE:
15332 break;
15333 case MEMMODEL_RELEASE:
15334 case MEMMODEL_ACQ_REL:
15335 emit_insn (gen_lwsync ());
15336 break;
15337 case MEMMODEL_SEQ_CST:
15338 emit_insn (gen_hwsync ());
15339 break;
15340 default:
15341 gcc_unreachable ();
15343 return mem;
15346 static void
15347 rs6000_post_atomic_barrier (enum memmodel model)
15349 switch (model)
15351 case MEMMODEL_RELAXED:
15352 case MEMMODEL_CONSUME:
15353 case MEMMODEL_RELEASE:
15354 break;
15355 case MEMMODEL_ACQUIRE:
15356 case MEMMODEL_ACQ_REL:
15357 case MEMMODEL_SEQ_CST:
15358 emit_insn (gen_isync ());
15359 break;
15360 default:
15361 gcc_unreachable ();
15365 /* A subroutine of the various atomic expanders. For sub-word operations,
15366 we must adjust things to operate on SImode. Given the original MEM,
15367 return a new aligned memory. Also build and return the quantities by
15368 which to shift and mask. */
15370 static rtx
15371 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15373 rtx addr, align, shift, mask, mem;
15374 HOST_WIDE_INT shift_mask;
15375 machine_mode mode = GET_MODE (orig_mem);
15377 /* For smaller modes, we have to implement this via SImode. */
15378 shift_mask = (mode == QImode ? 0x18 : 0x10);
15380 addr = XEXP (orig_mem, 0);
15381 addr = force_reg (GET_MODE (addr), addr);
15383 /* Aligned memory containing subword. Generate a new memory. We
15384 do not want any of the existing MEM_ATTR data, as we're now
15385 accessing memory outside the original object. */
15386 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15387 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15388 mem = gen_rtx_MEM (SImode, align);
15389 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15390 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15391 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15393 /* Shift amount for subword relative to aligned word. */
15394 shift = gen_reg_rtx (SImode);
15395 addr = gen_lowpart (SImode, addr);
15396 rtx tmp = gen_reg_rtx (SImode);
15397 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15398 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15399 if (BYTES_BIG_ENDIAN)
15400 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15401 shift, 1, OPTAB_LIB_WIDEN);
15402 *pshift = shift;
15404 /* Mask for insertion. */
15405 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15406 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15407 *pmask = mask;
15409 return mem;
15412 /* A subroutine of the various atomic expanders. For sub-word operands,
15413 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15415 static rtx
15416 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15418 rtx x;
15420 x = gen_reg_rtx (SImode);
15421 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15422 gen_rtx_NOT (SImode, mask),
15423 oldval)));
15425 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15427 return x;
15430 /* A subroutine of the various atomic expanders. For sub-word operands,
15431 extract WIDE to NARROW via SHIFT. */
15433 static void
15434 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15436 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15437 wide, 1, OPTAB_LIB_WIDEN);
15438 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15441 /* Expand an atomic compare and swap operation. */
15443 void
15444 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15446 rtx boolval, retval, mem, oldval, newval, cond;
15447 rtx label1, label2, x, mask, shift;
15448 machine_mode mode, orig_mode;
15449 enum memmodel mod_s, mod_f;
15450 bool is_weak;
15452 boolval = operands[0];
15453 retval = operands[1];
15454 mem = operands[2];
15455 oldval = operands[3];
15456 newval = operands[4];
15457 is_weak = (INTVAL (operands[5]) != 0);
15458 mod_s = memmodel_base (INTVAL (operands[6]));
15459 mod_f = memmodel_base (INTVAL (operands[7]));
15460 orig_mode = mode = GET_MODE (mem);
15462 mask = shift = NULL_RTX;
15463 if (mode == QImode || mode == HImode)
15465 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15466 lwarx and shift/mask operations. With power8, we need to do the
15467 comparison in SImode, but the store is still done in QI/HImode. */
15468 oldval = convert_modes (SImode, mode, oldval, 1);
15470 if (!TARGET_SYNC_HI_QI)
15472 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15474 /* Shift and mask OLDVAL into position with the word. */
15475 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15476 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15478 /* Shift and mask NEWVAL into position within the word. */
15479 newval = convert_modes (SImode, mode, newval, 1);
15480 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15481 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15484 /* Prepare to adjust the return value. */
15485 retval = gen_reg_rtx (SImode);
15486 mode = SImode;
15488 else if (reg_overlap_mentioned_p (retval, oldval))
15489 oldval = copy_to_reg (oldval);
15491 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15492 oldval = copy_to_mode_reg (mode, oldval);
15494 if (reg_overlap_mentioned_p (retval, newval))
15495 newval = copy_to_reg (newval);
15497 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15499 label1 = NULL_RTX;
15500 if (!is_weak)
15502 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15503 emit_label (XEXP (label1, 0));
15505 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15507 emit_load_locked (mode, retval, mem);
15509 x = retval;
15510 if (mask)
15511 x = expand_simple_binop (SImode, AND, retval, mask,
15512 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15514 cond = gen_reg_rtx (CCmode);
15515 /* If we have TImode, synthesize a comparison. */
15516 if (mode != TImode)
15517 x = gen_rtx_COMPARE (CCmode, x, oldval);
15518 else
15520 rtx xor1_result = gen_reg_rtx (DImode);
15521 rtx xor2_result = gen_reg_rtx (DImode);
15522 rtx or_result = gen_reg_rtx (DImode);
15523 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15524 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15525 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15526 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15528 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15529 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15530 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15531 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15534 emit_insn (gen_rtx_SET (cond, x));
15536 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15537 emit_unlikely_jump (x, label2);
15539 x = newval;
15540 if (mask)
15541 x = rs6000_mask_atomic_subword (retval, newval, mask);
15543 emit_store_conditional (orig_mode, cond, mem, x);
15545 if (!is_weak)
15547 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15548 emit_unlikely_jump (x, label1);
15551 if (!is_mm_relaxed (mod_f))
15552 emit_label (XEXP (label2, 0));
15554 rs6000_post_atomic_barrier (mod_s);
15556 if (is_mm_relaxed (mod_f))
15557 emit_label (XEXP (label2, 0));
15559 if (shift)
15560 rs6000_finish_atomic_subword (operands[1], retval, shift);
15561 else if (mode != GET_MODE (operands[1]))
15562 convert_move (operands[1], retval, 1);
15564 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15565 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15566 emit_insn (gen_rtx_SET (boolval, x));
15569 /* Expand an atomic exchange operation. */
15571 void
15572 rs6000_expand_atomic_exchange (rtx operands[])
15574 rtx retval, mem, val, cond;
15575 machine_mode mode;
15576 enum memmodel model;
15577 rtx label, x, mask, shift;
15579 retval = operands[0];
15580 mem = operands[1];
15581 val = operands[2];
15582 model = memmodel_base (INTVAL (operands[3]));
15583 mode = GET_MODE (mem);
15585 mask = shift = NULL_RTX;
15586 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15588 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15590 /* Shift and mask VAL into position with the word. */
15591 val = convert_modes (SImode, mode, val, 1);
15592 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15593 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15595 /* Prepare to adjust the return value. */
15596 retval = gen_reg_rtx (SImode);
15597 mode = SImode;
15600 mem = rs6000_pre_atomic_barrier (mem, model);
15602 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15603 emit_label (XEXP (label, 0));
15605 emit_load_locked (mode, retval, mem);
15607 x = val;
15608 if (mask)
15609 x = rs6000_mask_atomic_subword (retval, val, mask);
15611 cond = gen_reg_rtx (CCmode);
15612 emit_store_conditional (mode, cond, mem, x);
15614 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15615 emit_unlikely_jump (x, label);
15617 rs6000_post_atomic_barrier (model);
15619 if (shift)
15620 rs6000_finish_atomic_subword (operands[0], retval, shift);
15623 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15624 to perform. MEM is the memory on which to operate. VAL is the second
15625 operand of the binary operator. BEFORE and AFTER are optional locations to
15626 return the value of MEM either before of after the operation. MODEL_RTX
15627 is a CONST_INT containing the memory model to use. */
15629 void
15630 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15631 rtx orig_before, rtx orig_after, rtx model_rtx)
15633 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15634 machine_mode mode = GET_MODE (mem);
15635 machine_mode store_mode = mode;
15636 rtx label, x, cond, mask, shift;
15637 rtx before = orig_before, after = orig_after;
15639 mask = shift = NULL_RTX;
15640 /* On power8, we want to use SImode for the operation. On previous systems,
15641 use the operation in a subword and shift/mask to get the proper byte or
15642 halfword. */
15643 if (mode == QImode || mode == HImode)
15645 if (TARGET_SYNC_HI_QI)
15647 val = convert_modes (SImode, mode, val, 1);
15649 /* Prepare to adjust the return value. */
15650 before = gen_reg_rtx (SImode);
15651 if (after)
15652 after = gen_reg_rtx (SImode);
15653 mode = SImode;
15655 else
15657 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15659 /* Shift and mask VAL into position with the word. */
15660 val = convert_modes (SImode, mode, val, 1);
15661 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15662 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15664 switch (code)
15666 case IOR:
15667 case XOR:
15668 /* We've already zero-extended VAL. That is sufficient to
15669 make certain that it does not affect other bits. */
15670 mask = NULL;
15671 break;
15673 case AND:
15674 /* If we make certain that all of the other bits in VAL are
15675 set, that will be sufficient to not affect other bits. */
15676 x = gen_rtx_NOT (SImode, mask);
15677 x = gen_rtx_IOR (SImode, x, val);
15678 emit_insn (gen_rtx_SET (val, x));
15679 mask = NULL;
15680 break;
15682 case NOT:
15683 case PLUS:
15684 case MINUS:
15685 /* These will all affect bits outside the field and need
15686 adjustment via MASK within the loop. */
15687 break;
15689 default:
15690 gcc_unreachable ();
15693 /* Prepare to adjust the return value. */
15694 before = gen_reg_rtx (SImode);
15695 if (after)
15696 after = gen_reg_rtx (SImode);
15697 store_mode = mode = SImode;
15701 mem = rs6000_pre_atomic_barrier (mem, model);
15703 label = gen_label_rtx ();
15704 emit_label (label);
15705 label = gen_rtx_LABEL_REF (VOIDmode, label);
15707 if (before == NULL_RTX)
15708 before = gen_reg_rtx (mode);
15710 emit_load_locked (mode, before, mem);
15712 if (code == NOT)
15714 x = expand_simple_binop (mode, AND, before, val,
15715 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15716 after = expand_simple_unop (mode, NOT, x, after, 1);
15718 else
15720 after = expand_simple_binop (mode, code, before, val,
15721 after, 1, OPTAB_LIB_WIDEN);
15724 x = after;
15725 if (mask)
15727 x = expand_simple_binop (SImode, AND, after, mask,
15728 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15729 x = rs6000_mask_atomic_subword (before, x, mask);
15731 else if (store_mode != mode)
15732 x = convert_modes (store_mode, mode, x, 1);
15734 cond = gen_reg_rtx (CCmode);
15735 emit_store_conditional (store_mode, cond, mem, x);
15737 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15738 emit_unlikely_jump (x, label);
15740 rs6000_post_atomic_barrier (model);
15742 if (shift)
15744 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15745 then do the calcuations in a SImode register. */
15746 if (orig_before)
15747 rs6000_finish_atomic_subword (orig_before, before, shift);
15748 if (orig_after)
15749 rs6000_finish_atomic_subword (orig_after, after, shift);
15751 else if (store_mode != mode)
15753 /* QImode/HImode on machines with lbarx/lharx where we do the native
15754 operation and then do the calcuations in a SImode register. */
15755 if (orig_before)
15756 convert_move (orig_before, before, 1);
15757 if (orig_after)
15758 convert_move (orig_after, after, 1);
15760 else if (orig_after && after != orig_after)
15761 emit_move_insn (orig_after, after);
15764 /* Emit instructions to move SRC to DST. Called by splitters for
15765 multi-register moves. It will emit at most one instruction for
15766 each register that is accessed; that is, it won't emit li/lis pairs
15767 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15768 register. */
15770 void
15771 rs6000_split_multireg_move (rtx dst, rtx src)
15773 /* The register number of the first register being moved. */
15774 int reg;
15775 /* The mode that is to be moved. */
15776 machine_mode mode;
15777 /* The mode that the move is being done in, and its size. */
15778 machine_mode reg_mode;
15779 int reg_mode_size;
15780 /* The number of registers that will be moved. */
15781 int nregs;
15783 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15784 mode = GET_MODE (dst);
15785 nregs = hard_regno_nregs (reg, mode);
15786 if (FP_REGNO_P (reg))
15787 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15788 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15789 else if (ALTIVEC_REGNO_P (reg))
15790 reg_mode = V16QImode;
15791 else
15792 reg_mode = word_mode;
15793 reg_mode_size = GET_MODE_SIZE (reg_mode);
15795 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15797 /* TDmode residing in FP registers is special, since the ISA requires that
15798 the lower-numbered word of a register pair is always the most significant
15799 word, even in little-endian mode. This does not match the usual subreg
15800 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15801 the appropriate constituent registers "by hand" in little-endian mode.
15803 Note we do not need to check for destructive overlap here since TDmode
15804 can only reside in even/odd register pairs. */
15805 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15807 rtx p_src, p_dst;
15808 int i;
15810 for (i = 0; i < nregs; i++)
15812 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15813 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15814 else
15815 p_src = simplify_gen_subreg (reg_mode, src, mode,
15816 i * reg_mode_size);
15818 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15819 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15820 else
15821 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15822 i * reg_mode_size);
15824 emit_insn (gen_rtx_SET (p_dst, p_src));
15827 return;
15830 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15832 /* Move register range backwards, if we might have destructive
15833 overlap. */
15834 int i;
15835 for (i = nregs - 1; i >= 0; i--)
15836 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15837 i * reg_mode_size),
15838 simplify_gen_subreg (reg_mode, src, mode,
15839 i * reg_mode_size)));
15841 else
15843 int i;
15844 int j = -1;
15845 bool used_update = false;
15846 rtx restore_basereg = NULL_RTX;
15848 if (MEM_P (src) && INT_REGNO_P (reg))
15850 rtx breg;
15852 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15853 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15855 rtx delta_rtx;
15856 breg = XEXP (XEXP (src, 0), 0);
15857 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15858 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15859 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15860 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15861 src = replace_equiv_address (src, breg);
15863 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15865 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15867 rtx basereg = XEXP (XEXP (src, 0), 0);
15868 if (TARGET_UPDATE)
15870 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15871 emit_insn (gen_rtx_SET (ndst,
15872 gen_rtx_MEM (reg_mode,
15873 XEXP (src, 0))));
15874 used_update = true;
15876 else
15877 emit_insn (gen_rtx_SET (basereg,
15878 XEXP (XEXP (src, 0), 1)));
15879 src = replace_equiv_address (src, basereg);
15881 else
15883 rtx basereg = gen_rtx_REG (Pmode, reg);
15884 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15885 src = replace_equiv_address (src, basereg);
15889 breg = XEXP (src, 0);
15890 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15891 breg = XEXP (breg, 0);
15893 /* If the base register we are using to address memory is
15894 also a destination reg, then change that register last. */
15895 if (REG_P (breg)
15896 && REGNO (breg) >= REGNO (dst)
15897 && REGNO (breg) < REGNO (dst) + nregs)
15898 j = REGNO (breg) - REGNO (dst);
15900 else if (MEM_P (dst) && INT_REGNO_P (reg))
15902 rtx breg;
15904 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15905 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15907 rtx delta_rtx;
15908 breg = XEXP (XEXP (dst, 0), 0);
15909 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15910 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15911 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15913 /* We have to update the breg before doing the store.
15914 Use store with update, if available. */
15916 if (TARGET_UPDATE)
15918 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15919 emit_insn (TARGET_32BIT
15920 ? (TARGET_POWERPC64
15921 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15922 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15923 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15924 used_update = true;
15926 else
15927 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15928 dst = replace_equiv_address (dst, breg);
15930 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15931 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15933 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15935 rtx basereg = XEXP (XEXP (dst, 0), 0);
15936 if (TARGET_UPDATE)
15938 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15939 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15940 XEXP (dst, 0)),
15941 nsrc));
15942 used_update = true;
15944 else
15945 emit_insn (gen_rtx_SET (basereg,
15946 XEXP (XEXP (dst, 0), 1)));
15947 dst = replace_equiv_address (dst, basereg);
15949 else
15951 rtx basereg = XEXP (XEXP (dst, 0), 0);
15952 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15953 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15954 && REG_P (basereg)
15955 && REG_P (offsetreg)
15956 && REGNO (basereg) != REGNO (offsetreg));
15957 if (REGNO (basereg) == 0)
15959 rtx tmp = offsetreg;
15960 offsetreg = basereg;
15961 basereg = tmp;
15963 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15964 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15965 dst = replace_equiv_address (dst, basereg);
15968 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15969 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15972 for (i = 0; i < nregs; i++)
15974 /* Calculate index to next subword. */
15975 ++j;
15976 if (j == nregs)
15977 j = 0;
15979 /* If compiler already emitted move of first word by
15980 store with update, no need to do anything. */
15981 if (j == 0 && used_update)
15982 continue;
15984 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15985 j * reg_mode_size),
15986 simplify_gen_subreg (reg_mode, src, mode,
15987 j * reg_mode_size)));
15989 if (restore_basereg != NULL_RTX)
15990 emit_insn (restore_basereg);
15994 static GTY(()) alias_set_type TOC_alias_set = -1;
15996 alias_set_type
15997 get_TOC_alias_set (void)
15999 if (TOC_alias_set == -1)
16000 TOC_alias_set = new_alias_set ();
16001 return TOC_alias_set;
16004 /* The mode the ABI uses for a word. This is not the same as word_mode
16005 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16007 static scalar_int_mode
16008 rs6000_abi_word_mode (void)
16010 return TARGET_32BIT ? SImode : DImode;
16013 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16014 static char *
16015 rs6000_offload_options (void)
16017 if (TARGET_64BIT)
16018 return xstrdup ("-foffload-abi=lp64");
16019 else
16020 return xstrdup ("-foffload-abi=ilp32");
16024 /* A quick summary of the various types of 'constant-pool tables'
16025 under PowerPC:
16027 Target Flags Name One table per
16028 AIX (none) AIX TOC object file
16029 AIX -mfull-toc AIX TOC object file
16030 AIX -mminimal-toc AIX minimal TOC translation unit
16031 SVR4/EABI (none) SVR4 SDATA object file
16032 SVR4/EABI -fpic SVR4 pic object file
16033 SVR4/EABI -fPIC SVR4 PIC translation unit
16034 SVR4/EABI -mrelocatable EABI TOC function
16035 SVR4/EABI -maix AIX TOC object file
16036 SVR4/EABI -maix -mminimal-toc
16037 AIX minimal TOC translation unit
16039 Name Reg. Set by entries contains:
16040 made by addrs? fp? sum?
16042 AIX TOC 2 crt0 as Y option option
16043 AIX minimal TOC 30 prolog gcc Y Y option
16044 SVR4 SDATA 13 crt0 gcc N Y N
16045 SVR4 pic 30 prolog ld Y not yet N
16046 SVR4 PIC 30 prolog gcc Y option option
16047 EABI TOC 30 prolog gcc Y option option
16051 /* Hash functions for the hash table. */
16053 static unsigned
16054 rs6000_hash_constant (rtx k)
16056 enum rtx_code code = GET_CODE (k);
16057 machine_mode mode = GET_MODE (k);
16058 unsigned result = (code << 3) ^ mode;
16059 const char *format;
16060 int flen, fidx;
16062 format = GET_RTX_FORMAT (code);
16063 flen = strlen (format);
16064 fidx = 0;
16066 switch (code)
16068 case LABEL_REF:
16069 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16071 case CONST_WIDE_INT:
16073 int i;
16074 flen = CONST_WIDE_INT_NUNITS (k);
16075 for (i = 0; i < flen; i++)
16076 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16077 return result;
16080 case CONST_DOUBLE:
16081 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16083 case CODE_LABEL:
16084 fidx = 3;
16085 break;
16087 default:
16088 break;
16091 for (; fidx < flen; fidx++)
16092 switch (format[fidx])
16094 case 's':
16096 unsigned i, len;
16097 const char *str = XSTR (k, fidx);
16098 len = strlen (str);
16099 result = result * 613 + len;
16100 for (i = 0; i < len; i++)
16101 result = result * 613 + (unsigned) str[i];
16102 break;
16104 case 'u':
16105 case 'e':
16106 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16107 break;
16108 case 'i':
16109 case 'n':
16110 result = result * 613 + (unsigned) XINT (k, fidx);
16111 break;
16112 case 'w':
16113 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16114 result = result * 613 + (unsigned) XWINT (k, fidx);
16115 else
16117 size_t i;
16118 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16119 result = result * 613 + (unsigned) (XWINT (k, fidx)
16120 >> CHAR_BIT * i);
16122 break;
16123 case '0':
16124 break;
16125 default:
16126 gcc_unreachable ();
16129 return result;
16132 hashval_t
16133 toc_hasher::hash (toc_hash_struct *thc)
16135 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16138 /* Compare H1 and H2 for equivalence. */
16140 bool
16141 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16143 rtx r1 = h1->key;
16144 rtx r2 = h2->key;
16146 if (h1->key_mode != h2->key_mode)
16147 return 0;
16149 return rtx_equal_p (r1, r2);
16152 /* These are the names given by the C++ front-end to vtables, and
16153 vtable-like objects. Ideally, this logic should not be here;
16154 instead, there should be some programmatic way of inquiring as
16155 to whether or not an object is a vtable. */
16157 #define VTABLE_NAME_P(NAME) \
16158 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16159 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16160 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16161 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16162 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16164 #ifdef NO_DOLLAR_IN_LABEL
16165 /* Return a GGC-allocated character string translating dollar signs in
16166 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16168 const char *
16169 rs6000_xcoff_strip_dollar (const char *name)
16171 char *strip, *p;
16172 const char *q;
16173 size_t len;
16175 q = (const char *) strchr (name, '$');
16177 if (q == 0 || q == name)
16178 return name;
16180 len = strlen (name);
16181 strip = XALLOCAVEC (char, len + 1);
16182 strcpy (strip, name);
16183 p = strip + (q - name);
16184 while (p)
16186 *p = '_';
16187 p = strchr (p + 1, '$');
16190 return ggc_alloc_string (strip, len);
16192 #endif
16194 void
16195 rs6000_output_symbol_ref (FILE *file, rtx x)
16197 const char *name = XSTR (x, 0);
16199 /* Currently C++ toc references to vtables can be emitted before it
16200 is decided whether the vtable is public or private. If this is
16201 the case, then the linker will eventually complain that there is
16202 a reference to an unknown section. Thus, for vtables only,
16203 we emit the TOC reference to reference the identifier and not the
16204 symbol. */
16205 if (VTABLE_NAME_P (name))
16207 RS6000_OUTPUT_BASENAME (file, name);
16209 else
16210 assemble_name (file, name);
16213 /* Output a TOC entry. We derive the entry name from what is being
16214 written. */
16216 void
16217 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16219 char buf[256];
16220 const char *name = buf;
16221 rtx base = x;
16222 HOST_WIDE_INT offset = 0;
16224 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16226 /* When the linker won't eliminate them, don't output duplicate
16227 TOC entries (this happens on AIX if there is any kind of TOC,
16228 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16229 CODE_LABELs. */
16230 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16232 struct toc_hash_struct *h;
16234 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16235 time because GGC is not initialized at that point. */
16236 if (toc_hash_table == NULL)
16237 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16239 h = ggc_alloc<toc_hash_struct> ();
16240 h->key = x;
16241 h->key_mode = mode;
16242 h->labelno = labelno;
16244 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16245 if (*found == NULL)
16246 *found = h;
16247 else /* This is indeed a duplicate.
16248 Set this label equal to that label. */
16250 fputs ("\t.set ", file);
16251 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16252 fprintf (file, "%d,", labelno);
16253 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16254 fprintf (file, "%d\n", ((*found)->labelno));
16256 #ifdef HAVE_AS_TLS
16257 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16258 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16259 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16261 fputs ("\t.set ", file);
16262 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16263 fprintf (file, "%d,", labelno);
16264 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16265 fprintf (file, "%d\n", ((*found)->labelno));
16267 #endif
16268 return;
16272 /* If we're going to put a double constant in the TOC, make sure it's
16273 aligned properly when strict alignment is on. */
16274 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16275 && STRICT_ALIGNMENT
16276 && GET_MODE_BITSIZE (mode) >= 64
16277 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16278 ASM_OUTPUT_ALIGN (file, 3);
16281 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16283 /* Handle FP constants specially. Note that if we have a minimal
16284 TOC, things we put here aren't actually in the TOC, so we can allow
16285 FP constants. */
16286 if (CONST_DOUBLE_P (x)
16287 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16288 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16290 long k[4];
16292 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16293 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16294 else
16295 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16297 if (TARGET_64BIT)
16299 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16300 fputs (DOUBLE_INT_ASM_OP, file);
16301 else
16302 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16303 k[0] & 0xffffffff, k[1] & 0xffffffff,
16304 k[2] & 0xffffffff, k[3] & 0xffffffff);
16305 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16306 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16307 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16308 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16309 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16310 return;
16312 else
16314 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16315 fputs ("\t.long ", file);
16316 else
16317 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16318 k[0] & 0xffffffff, k[1] & 0xffffffff,
16319 k[2] & 0xffffffff, k[3] & 0xffffffff);
16320 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16321 k[0] & 0xffffffff, k[1] & 0xffffffff,
16322 k[2] & 0xffffffff, k[3] & 0xffffffff);
16323 return;
16326 else if (CONST_DOUBLE_P (x)
16327 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16329 long k[2];
16331 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16332 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16333 else
16334 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16336 if (TARGET_64BIT)
16338 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16339 fputs (DOUBLE_INT_ASM_OP, file);
16340 else
16341 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16342 k[0] & 0xffffffff, k[1] & 0xffffffff);
16343 fprintf (file, "0x%lx%08lx\n",
16344 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16345 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16346 return;
16348 else
16350 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16351 fputs ("\t.long ", file);
16352 else
16353 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16354 k[0] & 0xffffffff, k[1] & 0xffffffff);
16355 fprintf (file, "0x%lx,0x%lx\n",
16356 k[0] & 0xffffffff, k[1] & 0xffffffff);
16357 return;
16360 else if (CONST_DOUBLE_P (x)
16361 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16363 long l;
16365 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16366 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16367 else
16368 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16370 if (TARGET_64BIT)
16372 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16373 fputs (DOUBLE_INT_ASM_OP, file);
16374 else
16375 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16376 if (WORDS_BIG_ENDIAN)
16377 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16378 else
16379 fprintf (file, "0x%lx\n", l & 0xffffffff);
16380 return;
16382 else
16384 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16385 fputs ("\t.long ", file);
16386 else
16387 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16388 fprintf (file, "0x%lx\n", l & 0xffffffff);
16389 return;
16392 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16394 unsigned HOST_WIDE_INT low;
16395 HOST_WIDE_INT high;
16397 low = INTVAL (x) & 0xffffffff;
16398 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16400 /* TOC entries are always Pmode-sized, so when big-endian
16401 smaller integer constants in the TOC need to be padded.
16402 (This is still a win over putting the constants in
16403 a separate constant pool, because then we'd have
16404 to have both a TOC entry _and_ the actual constant.)
16406 For a 32-bit target, CONST_INT values are loaded and shifted
16407 entirely within `low' and can be stored in one TOC entry. */
16409 /* It would be easy to make this work, but it doesn't now. */
16410 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16412 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16414 low |= high << 32;
16415 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16416 high = (HOST_WIDE_INT) low >> 32;
16417 low &= 0xffffffff;
16420 if (TARGET_64BIT)
16422 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16423 fputs (DOUBLE_INT_ASM_OP, file);
16424 else
16425 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16426 (long) high & 0xffffffff, (long) low & 0xffffffff);
16427 fprintf (file, "0x%lx%08lx\n",
16428 (long) high & 0xffffffff, (long) low & 0xffffffff);
16429 return;
16431 else
16433 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16435 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16436 fputs ("\t.long ", file);
16437 else
16438 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16439 (long) high & 0xffffffff, (long) low & 0xffffffff);
16440 fprintf (file, "0x%lx,0x%lx\n",
16441 (long) high & 0xffffffff, (long) low & 0xffffffff);
16443 else
16445 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16446 fputs ("\t.long ", file);
16447 else
16448 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16449 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16451 return;
16455 if (GET_CODE (x) == CONST)
16457 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16458 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16460 base = XEXP (XEXP (x, 0), 0);
16461 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16464 switch (GET_CODE (base))
16466 case SYMBOL_REF:
16467 name = XSTR (base, 0);
16468 break;
16470 case LABEL_REF:
16471 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16472 CODE_LABEL_NUMBER (XEXP (base, 0)));
16473 break;
16475 case CODE_LABEL:
16476 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16477 break;
16479 default:
16480 gcc_unreachable ();
16483 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16484 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16485 else
16487 fputs ("\t.tc ", file);
16488 RS6000_OUTPUT_BASENAME (file, name);
16490 if (offset < 0)
16491 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16492 else if (offset)
16493 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16495 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16496 after other TOC symbols, reducing overflow of small TOC access
16497 to [TC] symbols. */
16498 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16499 ? "[TE]," : "[TC],", file);
16502 /* Currently C++ toc references to vtables can be emitted before it
16503 is decided whether the vtable is public or private. If this is
16504 the case, then the linker will eventually complain that there is
16505 a TOC reference to an unknown section. Thus, for vtables only,
16506 we emit the TOC reference to reference the symbol and not the
16507 section. */
16508 if (VTABLE_NAME_P (name))
16510 RS6000_OUTPUT_BASENAME (file, name);
16511 if (offset < 0)
16512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16513 else if (offset > 0)
16514 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16516 else
16517 output_addr_const (file, x);
16519 #if HAVE_AS_TLS
16520 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16522 switch (SYMBOL_REF_TLS_MODEL (base))
16524 case 0:
16525 break;
16526 case TLS_MODEL_LOCAL_EXEC:
16527 fputs ("@le", file);
16528 break;
16529 case TLS_MODEL_INITIAL_EXEC:
16530 fputs ("@ie", file);
16531 break;
16532 /* Use global-dynamic for local-dynamic. */
16533 case TLS_MODEL_GLOBAL_DYNAMIC:
16534 case TLS_MODEL_LOCAL_DYNAMIC:
16535 putc ('\n', file);
16536 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16537 fputs ("\t.tc .", file);
16538 RS6000_OUTPUT_BASENAME (file, name);
16539 fputs ("[TC],", file);
16540 output_addr_const (file, x);
16541 fputs ("@m", file);
16542 break;
16543 default:
16544 gcc_unreachable ();
16547 #endif
16549 putc ('\n', file);
16552 /* Output an assembler pseudo-op to write an ASCII string of N characters
16553 starting at P to FILE.
16555 On the RS/6000, we have to do this using the .byte operation and
16556 write out special characters outside the quoted string.
16557 Also, the assembler is broken; very long strings are truncated,
16558 so we must artificially break them up early. */
16560 void
16561 output_ascii (FILE *file, const char *p, int n)
16563 char c;
16564 int i, count_string;
16565 const char *for_string = "\t.byte \"";
16566 const char *for_decimal = "\t.byte ";
16567 const char *to_close = NULL;
16569 count_string = 0;
16570 for (i = 0; i < n; i++)
16572 c = *p++;
16573 if (c >= ' ' && c < 0177)
16575 if (for_string)
16576 fputs (for_string, file);
16577 putc (c, file);
16579 /* Write two quotes to get one. */
16580 if (c == '"')
16582 putc (c, file);
16583 ++count_string;
16586 for_string = NULL;
16587 for_decimal = "\"\n\t.byte ";
16588 to_close = "\"\n";
16589 ++count_string;
16591 if (count_string >= 512)
16593 fputs (to_close, file);
16595 for_string = "\t.byte \"";
16596 for_decimal = "\t.byte ";
16597 to_close = NULL;
16598 count_string = 0;
16601 else
16603 if (for_decimal)
16604 fputs (for_decimal, file);
16605 fprintf (file, "%d", c);
16607 for_string = "\n\t.byte \"";
16608 for_decimal = ", ";
16609 to_close = "\n";
16610 count_string = 0;
16614 /* Now close the string if we have written one. Then end the line. */
16615 if (to_close)
16616 fputs (to_close, file);
16619 /* Generate a unique section name for FILENAME for a section type
16620 represented by SECTION_DESC. Output goes into BUF.
16622 SECTION_DESC can be any string, as long as it is different for each
16623 possible section type.
16625 We name the section in the same manner as xlc. The name begins with an
16626 underscore followed by the filename (after stripping any leading directory
16627 names) with the last period replaced by the string SECTION_DESC. If
16628 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16629 the name. */
16631 void
16632 rs6000_gen_section_name (char **buf, const char *filename,
16633 const char *section_desc)
16635 const char *q, *after_last_slash, *last_period = 0;
16636 char *p;
16637 int len;
16639 after_last_slash = filename;
16640 for (q = filename; *q; q++)
16642 if (*q == '/')
16643 after_last_slash = q + 1;
16644 else if (*q == '.')
16645 last_period = q;
16648 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16649 *buf = (char *) xmalloc (len);
16651 p = *buf;
16652 *p++ = '_';
16654 for (q = after_last_slash; *q; q++)
16656 if (q == last_period)
16658 strcpy (p, section_desc);
16659 p += strlen (section_desc);
16660 break;
16663 else if (ISALNUM (*q))
16664 *p++ = *q;
16667 if (last_period == 0)
16668 strcpy (p, section_desc);
16669 else
16670 *p = '\0';
16673 /* Emit profile function. */
16675 void
16676 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16678 /* Non-standard profiling for kernels, which just saves LR then calls
16679 _mcount without worrying about arg saves. The idea is to change
16680 the function prologue as little as possible as it isn't easy to
16681 account for arg save/restore code added just for _mcount. */
16682 if (TARGET_PROFILE_KERNEL)
16683 return;
16685 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16687 #ifndef NO_PROFILE_COUNTERS
16688 # define NO_PROFILE_COUNTERS 0
16689 #endif
16690 if (NO_PROFILE_COUNTERS)
16691 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16692 LCT_NORMAL, VOIDmode);
16693 else
16695 char buf[30];
16696 const char *label_name;
16697 rtx fun;
16699 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16700 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16701 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16703 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16704 LCT_NORMAL, VOIDmode, fun, Pmode);
16707 else if (DEFAULT_ABI == ABI_DARWIN)
16709 const char *mcount_name = RS6000_MCOUNT;
16710 int caller_addr_regno = LR_REGNO;
16712 /* Be conservative and always set this, at least for now. */
16713 crtl->uses_pic_offset_table = 1;
16715 #if TARGET_MACHO
16716 /* For PIC code, set up a stub and collect the caller's address
16717 from r0, which is where the prologue puts it. */
16718 if (MACHOPIC_INDIRECT
16719 && crtl->uses_pic_offset_table)
16720 caller_addr_regno = 0;
16721 #endif
16722 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16723 LCT_NORMAL, VOIDmode,
16724 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16728 /* Write function profiler code. */
16730 void
16731 output_function_profiler (FILE *file, int labelno)
16733 char buf[100];
16735 switch (DEFAULT_ABI)
16737 default:
16738 gcc_unreachable ();
16740 case ABI_V4:
16741 if (!TARGET_32BIT)
16743 warning (0, "no profiling of 64-bit code for this ABI");
16744 return;
16746 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16747 fprintf (file, "\tmflr %s\n", reg_names[0]);
16748 if (NO_PROFILE_COUNTERS)
16750 asm_fprintf (file, "\tstw %s,4(%s)\n",
16751 reg_names[0], reg_names[1]);
16753 else if (TARGET_SECURE_PLT && flag_pic)
16755 if (TARGET_LINK_STACK)
16757 char name[32];
16758 get_ppc476_thunk_name (name);
16759 asm_fprintf (file, "\tbl %s\n", name);
16761 else
16762 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16763 asm_fprintf (file, "\tstw %s,4(%s)\n",
16764 reg_names[0], reg_names[1]);
16765 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16766 asm_fprintf (file, "\taddis %s,%s,",
16767 reg_names[12], reg_names[12]);
16768 assemble_name (file, buf);
16769 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16770 assemble_name (file, buf);
16771 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16773 else if (flag_pic == 1)
16775 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16776 asm_fprintf (file, "\tstw %s,4(%s)\n",
16777 reg_names[0], reg_names[1]);
16778 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16779 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16780 assemble_name (file, buf);
16781 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16783 else if (flag_pic > 1)
16785 asm_fprintf (file, "\tstw %s,4(%s)\n",
16786 reg_names[0], reg_names[1]);
16787 /* Now, we need to get the address of the label. */
16788 if (TARGET_LINK_STACK)
16790 char name[32];
16791 get_ppc476_thunk_name (name);
16792 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16793 assemble_name (file, buf);
16794 fputs ("-.\n1:", file);
16795 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16796 asm_fprintf (file, "\taddi %s,%s,4\n",
16797 reg_names[11], reg_names[11]);
16799 else
16801 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16802 assemble_name (file, buf);
16803 fputs ("-.\n1:", file);
16804 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16806 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16807 reg_names[0], reg_names[11]);
16808 asm_fprintf (file, "\tadd %s,%s,%s\n",
16809 reg_names[0], reg_names[0], reg_names[11]);
16811 else
16813 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16814 assemble_name (file, buf);
16815 fputs ("@ha\n", file);
16816 asm_fprintf (file, "\tstw %s,4(%s)\n",
16817 reg_names[0], reg_names[1]);
16818 asm_fprintf (file, "\tla %s,", reg_names[0]);
16819 assemble_name (file, buf);
16820 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16823 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16824 fprintf (file, "\tbl %s%s\n",
16825 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16826 break;
16828 case ABI_AIX:
16829 case ABI_ELFv2:
16830 case ABI_DARWIN:
16831 /* Don't do anything, done in output_profile_hook (). */
16832 break;
16838 /* The following variable value is the last issued insn. */
16840 static rtx_insn *last_scheduled_insn;
16842 /* The following variable helps to balance issuing of load and
16843 store instructions */
16845 static int load_store_pendulum;
16847 /* The following variable helps pair divide insns during scheduling. */
16848 static int divide_cnt;
16849 /* The following variable helps pair and alternate vector and vector load
16850 insns during scheduling. */
16851 static int vec_pairing;
16854 /* Power4 load update and store update instructions are cracked into a
16855 load or store and an integer insn which are executed in the same cycle.
16856 Branches have their own dispatch slot which does not count against the
16857 GCC issue rate, but it changes the program flow so there are no other
16858 instructions to issue in this cycle. */
16860 static int
16861 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16863 last_scheduled_insn = insn;
16864 if (GET_CODE (PATTERN (insn)) == USE
16865 || GET_CODE (PATTERN (insn)) == CLOBBER)
16867 cached_can_issue_more = more;
16868 return cached_can_issue_more;
16871 if (insn_terminates_group_p (insn, current_group))
16873 cached_can_issue_more = 0;
16874 return cached_can_issue_more;
16877 /* If no reservation, but reach here */
16878 if (recog_memoized (insn) < 0)
16879 return more;
16881 if (rs6000_sched_groups)
16883 if (is_microcoded_insn (insn))
16884 cached_can_issue_more = 0;
16885 else if (is_cracked_insn (insn))
16886 cached_can_issue_more = more > 2 ? more - 2 : 0;
16887 else
16888 cached_can_issue_more = more - 1;
16890 return cached_can_issue_more;
16893 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16894 return 0;
16896 cached_can_issue_more = more - 1;
16897 return cached_can_issue_more;
16900 static int
16901 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16903 int r = rs6000_variable_issue_1 (insn, more);
16904 if (verbose)
16905 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16906 return r;
16909 /* Adjust the cost of a scheduling dependency. Return the new cost of
16910 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16912 static int
16913 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16914 unsigned int)
16916 enum attr_type attr_type;
16918 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16919 return cost;
16921 switch (dep_type)
16923 case REG_DEP_TRUE:
16925 /* Data dependency; DEP_INSN writes a register that INSN reads
16926 some cycles later. */
16928 /* Separate a load from a narrower, dependent store. */
16929 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16930 || rs6000_tune == PROCESSOR_FUTURE)
16931 && GET_CODE (PATTERN (insn)) == SET
16932 && GET_CODE (PATTERN (dep_insn)) == SET
16933 && MEM_P (XEXP (PATTERN (insn), 1))
16934 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16935 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16936 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16937 return cost + 14;
16939 attr_type = get_attr_type (insn);
16941 switch (attr_type)
16943 case TYPE_JMPREG:
16944 /* Tell the first scheduling pass about the latency between
16945 a mtctr and bctr (and mtlr and br/blr). The first
16946 scheduling pass will not know about this latency since
16947 the mtctr instruction, which has the latency associated
16948 to it, will be generated by reload. */
16949 return 4;
16950 case TYPE_BRANCH:
16951 /* Leave some extra cycles between a compare and its
16952 dependent branch, to inhibit expensive mispredicts. */
16953 if ((rs6000_tune == PROCESSOR_PPC603
16954 || rs6000_tune == PROCESSOR_PPC604
16955 || rs6000_tune == PROCESSOR_PPC604e
16956 || rs6000_tune == PROCESSOR_PPC620
16957 || rs6000_tune == PROCESSOR_PPC630
16958 || rs6000_tune == PROCESSOR_PPC750
16959 || rs6000_tune == PROCESSOR_PPC7400
16960 || rs6000_tune == PROCESSOR_PPC7450
16961 || rs6000_tune == PROCESSOR_PPCE5500
16962 || rs6000_tune == PROCESSOR_PPCE6500
16963 || rs6000_tune == PROCESSOR_POWER4
16964 || rs6000_tune == PROCESSOR_POWER5
16965 || rs6000_tune == PROCESSOR_POWER7
16966 || rs6000_tune == PROCESSOR_POWER8
16967 || rs6000_tune == PROCESSOR_POWER9
16968 || rs6000_tune == PROCESSOR_FUTURE
16969 || rs6000_tune == PROCESSOR_CELL)
16970 && recog_memoized (dep_insn)
16971 && (INSN_CODE (dep_insn) >= 0))
16973 switch (get_attr_type (dep_insn))
16975 case TYPE_CMP:
16976 case TYPE_FPCOMPARE:
16977 case TYPE_CR_LOGICAL:
16978 return cost + 2;
16979 case TYPE_EXTS:
16980 case TYPE_MUL:
16981 if (get_attr_dot (dep_insn) == DOT_YES)
16982 return cost + 2;
16983 else
16984 break;
16985 case TYPE_SHIFT:
16986 if (get_attr_dot (dep_insn) == DOT_YES
16987 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16988 return cost + 2;
16989 else
16990 break;
16991 default:
16992 break;
16994 break;
16996 case TYPE_STORE:
16997 case TYPE_FPSTORE:
16998 if ((rs6000_tune == PROCESSOR_POWER6)
16999 && recog_memoized (dep_insn)
17000 && (INSN_CODE (dep_insn) >= 0))
17003 if (GET_CODE (PATTERN (insn)) != SET)
17004 /* If this happens, we have to extend this to schedule
17005 optimally. Return default for now. */
17006 return cost;
17008 /* Adjust the cost for the case where the value written
17009 by a fixed point operation is used as the address
17010 gen value on a store. */
17011 switch (get_attr_type (dep_insn))
17013 case TYPE_LOAD:
17014 case TYPE_CNTLZ:
17016 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17017 return get_attr_sign_extend (dep_insn)
17018 == SIGN_EXTEND_YES ? 6 : 4;
17019 break;
17021 case TYPE_SHIFT:
17023 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17024 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17025 6 : 3;
17026 break;
17028 case TYPE_INTEGER:
17029 case TYPE_ADD:
17030 case TYPE_LOGICAL:
17031 case TYPE_EXTS:
17032 case TYPE_INSERT:
17034 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17035 return 3;
17036 break;
17038 case TYPE_STORE:
17039 case TYPE_FPLOAD:
17040 case TYPE_FPSTORE:
17042 if (get_attr_update (dep_insn) == UPDATE_YES
17043 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17044 return 3;
17045 break;
17047 case TYPE_MUL:
17049 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17050 return 17;
17051 break;
17053 case TYPE_DIV:
17055 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17056 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17057 break;
17059 default:
17060 break;
17063 break;
17065 case TYPE_LOAD:
17066 if ((rs6000_tune == PROCESSOR_POWER6)
17067 && recog_memoized (dep_insn)
17068 && (INSN_CODE (dep_insn) >= 0))
17071 /* Adjust the cost for the case where the value written
17072 by a fixed point instruction is used within the address
17073 gen portion of a subsequent load(u)(x) */
17074 switch (get_attr_type (dep_insn))
17076 case TYPE_LOAD:
17077 case TYPE_CNTLZ:
17079 if (set_to_load_agen (dep_insn, insn))
17080 return get_attr_sign_extend (dep_insn)
17081 == SIGN_EXTEND_YES ? 6 : 4;
17082 break;
17084 case TYPE_SHIFT:
17086 if (set_to_load_agen (dep_insn, insn))
17087 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17088 6 : 3;
17089 break;
17091 case TYPE_INTEGER:
17092 case TYPE_ADD:
17093 case TYPE_LOGICAL:
17094 case TYPE_EXTS:
17095 case TYPE_INSERT:
17097 if (set_to_load_agen (dep_insn, insn))
17098 return 3;
17099 break;
17101 case TYPE_STORE:
17102 case TYPE_FPLOAD:
17103 case TYPE_FPSTORE:
17105 if (get_attr_update (dep_insn) == UPDATE_YES
17106 && set_to_load_agen (dep_insn, insn))
17107 return 3;
17108 break;
17110 case TYPE_MUL:
17112 if (set_to_load_agen (dep_insn, insn))
17113 return 17;
17114 break;
17116 case TYPE_DIV:
17118 if (set_to_load_agen (dep_insn, insn))
17119 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17120 break;
17122 default:
17123 break;
17126 break;
17128 case TYPE_FPLOAD:
17129 if ((rs6000_tune == PROCESSOR_POWER6)
17130 && get_attr_update (insn) == UPDATE_NO
17131 && recog_memoized (dep_insn)
17132 && (INSN_CODE (dep_insn) >= 0)
17133 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17134 return 2;
17136 default:
17137 break;
17140 /* Fall out to return default cost. */
17142 break;
17144 case REG_DEP_OUTPUT:
17145 /* Output dependency; DEP_INSN writes a register that INSN writes some
17146 cycles later. */
17147 if ((rs6000_tune == PROCESSOR_POWER6)
17148 && recog_memoized (dep_insn)
17149 && (INSN_CODE (dep_insn) >= 0))
17151 attr_type = get_attr_type (insn);
17153 switch (attr_type)
17155 case TYPE_FP:
17156 case TYPE_FPSIMPLE:
17157 if (get_attr_type (dep_insn) == TYPE_FP
17158 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17159 return 1;
17160 break;
17161 case TYPE_FPLOAD:
17162 if (get_attr_update (insn) == UPDATE_NO
17163 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17164 return 2;
17165 break;
17166 default:
17167 break;
17170 /* Fall through, no cost for output dependency. */
17171 /* FALLTHRU */
17173 case REG_DEP_ANTI:
17174 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17175 cycles later. */
17176 return 0;
17178 default:
17179 gcc_unreachable ();
17182 return cost;
17185 /* Debug version of rs6000_adjust_cost. */
17187 static int
17188 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17189 int cost, unsigned int dw)
17191 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17193 if (ret != cost)
17195 const char *dep;
17197 switch (dep_type)
17199 default: dep = "unknown depencency"; break;
17200 case REG_DEP_TRUE: dep = "data dependency"; break;
17201 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17202 case REG_DEP_ANTI: dep = "anti depencency"; break;
17205 fprintf (stderr,
17206 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17207 "%s, insn:\n", ret, cost, dep);
17209 debug_rtx (insn);
17212 return ret;
17215 /* The function returns a true if INSN is microcoded.
17216 Return false otherwise. */
17218 static bool
17219 is_microcoded_insn (rtx_insn *insn)
17221 if (!insn || !NONDEBUG_INSN_P (insn)
17222 || GET_CODE (PATTERN (insn)) == USE
17223 || GET_CODE (PATTERN (insn)) == CLOBBER)
17224 return false;
17226 if (rs6000_tune == PROCESSOR_CELL)
17227 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17229 if (rs6000_sched_groups
17230 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17232 enum attr_type type = get_attr_type (insn);
17233 if ((type == TYPE_LOAD
17234 && get_attr_update (insn) == UPDATE_YES
17235 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17236 || ((type == TYPE_LOAD || type == TYPE_STORE)
17237 && get_attr_update (insn) == UPDATE_YES
17238 && get_attr_indexed (insn) == INDEXED_YES)
17239 || type == TYPE_MFCR)
17240 return true;
17243 return false;
17246 /* The function returns true if INSN is cracked into 2 instructions
17247 by the processor (and therefore occupies 2 issue slots). */
17249 static bool
17250 is_cracked_insn (rtx_insn *insn)
17252 if (!insn || !NONDEBUG_INSN_P (insn)
17253 || GET_CODE (PATTERN (insn)) == USE
17254 || GET_CODE (PATTERN (insn)) == CLOBBER)
17255 return false;
17257 if (rs6000_sched_groups
17258 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17260 enum attr_type type = get_attr_type (insn);
17261 if ((type == TYPE_LOAD
17262 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17263 && get_attr_update (insn) == UPDATE_NO)
17264 || (type == TYPE_LOAD
17265 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17266 && get_attr_update (insn) == UPDATE_YES
17267 && get_attr_indexed (insn) == INDEXED_NO)
17268 || (type == TYPE_STORE
17269 && get_attr_update (insn) == UPDATE_YES
17270 && get_attr_indexed (insn) == INDEXED_NO)
17271 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17272 && get_attr_update (insn) == UPDATE_YES)
17273 || (type == TYPE_CR_LOGICAL
17274 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17275 || (type == TYPE_EXTS
17276 && get_attr_dot (insn) == DOT_YES)
17277 || (type == TYPE_SHIFT
17278 && get_attr_dot (insn) == DOT_YES
17279 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17280 || (type == TYPE_MUL
17281 && get_attr_dot (insn) == DOT_YES)
17282 || type == TYPE_DIV
17283 || (type == TYPE_INSERT
17284 && get_attr_size (insn) == SIZE_32))
17285 return true;
17288 return false;
17291 /* The function returns true if INSN can be issued only from
17292 the branch slot. */
17294 static bool
17295 is_branch_slot_insn (rtx_insn *insn)
17297 if (!insn || !NONDEBUG_INSN_P (insn)
17298 || GET_CODE (PATTERN (insn)) == USE
17299 || GET_CODE (PATTERN (insn)) == CLOBBER)
17300 return false;
17302 if (rs6000_sched_groups)
17304 enum attr_type type = get_attr_type (insn);
17305 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17306 return true;
17307 return false;
17310 return false;
17313 /* The function returns true if out_inst sets a value that is
17314 used in the address generation computation of in_insn */
17315 static bool
17316 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17318 rtx out_set, in_set;
17320 /* For performance reasons, only handle the simple case where
17321 both loads are a single_set. */
17322 out_set = single_set (out_insn);
17323 if (out_set)
17325 in_set = single_set (in_insn);
17326 if (in_set)
17327 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17330 return false;
17333 /* Try to determine base/offset/size parts of the given MEM.
17334 Return true if successful, false if all the values couldn't
17335 be determined.
17337 This function only looks for REG or REG+CONST address forms.
17338 REG+REG address form will return false. */
17340 static bool
17341 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17342 HOST_WIDE_INT *size)
17344 rtx addr_rtx;
17345 if MEM_SIZE_KNOWN_P (mem)
17346 *size = MEM_SIZE (mem);
17347 else
17348 return false;
17350 addr_rtx = (XEXP (mem, 0));
17351 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17352 addr_rtx = XEXP (addr_rtx, 1);
17354 *offset = 0;
17355 while (GET_CODE (addr_rtx) == PLUS
17356 && CONST_INT_P (XEXP (addr_rtx, 1)))
17358 *offset += INTVAL (XEXP (addr_rtx, 1));
17359 addr_rtx = XEXP (addr_rtx, 0);
17361 if (!REG_P (addr_rtx))
17362 return false;
17364 *base = addr_rtx;
17365 return true;
17368 /* The function returns true if the target storage location of
17369 mem1 is adjacent to the target storage location of mem2 */
17370 /* Return 1 if memory locations are adjacent. */
17372 static bool
17373 adjacent_mem_locations (rtx mem1, rtx mem2)
17375 rtx reg1, reg2;
17376 HOST_WIDE_INT off1, size1, off2, size2;
17378 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17379 && get_memref_parts (mem2, &reg2, &off2, &size2))
17380 return ((REGNO (reg1) == REGNO (reg2))
17381 && ((off1 + size1 == off2)
17382 || (off2 + size2 == off1)));
17384 return false;
17387 /* This function returns true if it can be determined that the two MEM
17388 locations overlap by at least 1 byte based on base reg/offset/size. */
17390 static bool
17391 mem_locations_overlap (rtx mem1, rtx mem2)
17393 rtx reg1, reg2;
17394 HOST_WIDE_INT off1, size1, off2, size2;
17396 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17397 && get_memref_parts (mem2, &reg2, &off2, &size2))
17398 return ((REGNO (reg1) == REGNO (reg2))
17399 && (((off1 <= off2) && (off1 + size1 > off2))
17400 || ((off2 <= off1) && (off2 + size2 > off1))));
17402 return false;
17405 /* A C statement (sans semicolon) to update the integer scheduling
17406 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17407 INSN earlier, reduce the priority to execute INSN later. Do not
17408 define this macro if you do not need to adjust the scheduling
17409 priorities of insns. */
17411 static int
17412 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17414 rtx load_mem, str_mem;
17415 /* On machines (like the 750) which have asymmetric integer units,
17416 where one integer unit can do multiply and divides and the other
17417 can't, reduce the priority of multiply/divide so it is scheduled
17418 before other integer operations. */
17420 #if 0
17421 if (! INSN_P (insn))
17422 return priority;
17424 if (GET_CODE (PATTERN (insn)) == USE)
17425 return priority;
17427 switch (rs6000_tune) {
17428 case PROCESSOR_PPC750:
17429 switch (get_attr_type (insn))
17431 default:
17432 break;
17434 case TYPE_MUL:
17435 case TYPE_DIV:
17436 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17437 priority, priority);
17438 if (priority >= 0 && priority < 0x01000000)
17439 priority >>= 3;
17440 break;
17443 #endif
17445 if (insn_must_be_first_in_group (insn)
17446 && reload_completed
17447 && current_sched_info->sched_max_insns_priority
17448 && rs6000_sched_restricted_insns_priority)
17451 /* Prioritize insns that can be dispatched only in the first
17452 dispatch slot. */
17453 if (rs6000_sched_restricted_insns_priority == 1)
17454 /* Attach highest priority to insn. This means that in
17455 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17456 precede 'priority' (critical path) considerations. */
17457 return current_sched_info->sched_max_insns_priority;
17458 else if (rs6000_sched_restricted_insns_priority == 2)
17459 /* Increase priority of insn by a minimal amount. This means that in
17460 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17461 considerations precede dispatch-slot restriction considerations. */
17462 return (priority + 1);
17465 if (rs6000_tune == PROCESSOR_POWER6
17466 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17467 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17468 /* Attach highest priority to insn if the scheduler has just issued two
17469 stores and this instruction is a load, or two loads and this instruction
17470 is a store. Power6 wants loads and stores scheduled alternately
17471 when possible */
17472 return current_sched_info->sched_max_insns_priority;
17474 return priority;
17477 /* Return true if the instruction is nonpipelined on the Cell. */
17478 static bool
17479 is_nonpipeline_insn (rtx_insn *insn)
17481 enum attr_type type;
17482 if (!insn || !NONDEBUG_INSN_P (insn)
17483 || GET_CODE (PATTERN (insn)) == USE
17484 || GET_CODE (PATTERN (insn)) == CLOBBER)
17485 return false;
17487 type = get_attr_type (insn);
17488 if (type == TYPE_MUL
17489 || type == TYPE_DIV
17490 || type == TYPE_SDIV
17491 || type == TYPE_DDIV
17492 || type == TYPE_SSQRT
17493 || type == TYPE_DSQRT
17494 || type == TYPE_MFCR
17495 || type == TYPE_MFCRF
17496 || type == TYPE_MFJMPR)
17498 return true;
17500 return false;
17504 /* Return how many instructions the machine can issue per cycle. */
17506 static int
17507 rs6000_issue_rate (void)
17509 /* Unless scheduling for register pressure, use issue rate of 1 for
17510 first scheduling pass to decrease degradation. */
17511 if (!reload_completed && !flag_sched_pressure)
17512 return 1;
17514 switch (rs6000_tune) {
17515 case PROCESSOR_RS64A:
17516 case PROCESSOR_PPC601: /* ? */
17517 case PROCESSOR_PPC7450:
17518 return 3;
17519 case PROCESSOR_PPC440:
17520 case PROCESSOR_PPC603:
17521 case PROCESSOR_PPC750:
17522 case PROCESSOR_PPC7400:
17523 case PROCESSOR_PPC8540:
17524 case PROCESSOR_PPC8548:
17525 case PROCESSOR_CELL:
17526 case PROCESSOR_PPCE300C2:
17527 case PROCESSOR_PPCE300C3:
17528 case PROCESSOR_PPCE500MC:
17529 case PROCESSOR_PPCE500MC64:
17530 case PROCESSOR_PPCE5500:
17531 case PROCESSOR_PPCE6500:
17532 case PROCESSOR_TITAN:
17533 return 2;
17534 case PROCESSOR_PPC476:
17535 case PROCESSOR_PPC604:
17536 case PROCESSOR_PPC604e:
17537 case PROCESSOR_PPC620:
17538 case PROCESSOR_PPC630:
17539 return 4;
17540 case PROCESSOR_POWER4:
17541 case PROCESSOR_POWER5:
17542 case PROCESSOR_POWER6:
17543 case PROCESSOR_POWER7:
17544 return 5;
17545 case PROCESSOR_POWER8:
17546 return 7;
17547 case PROCESSOR_POWER9:
17548 case PROCESSOR_FUTURE:
17549 return 6;
17550 default:
17551 return 1;
17555 /* Return how many instructions to look ahead for better insn
17556 scheduling. */
17558 static int
17559 rs6000_use_sched_lookahead (void)
17561 switch (rs6000_tune)
17563 case PROCESSOR_PPC8540:
17564 case PROCESSOR_PPC8548:
17565 return 4;
17567 case PROCESSOR_CELL:
17568 return (reload_completed ? 8 : 0);
17570 default:
17571 return 0;
17575 /* We are choosing insn from the ready queue. Return zero if INSN can be
17576 chosen. */
17577 static int
17578 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17580 if (ready_index == 0)
17581 return 0;
17583 if (rs6000_tune != PROCESSOR_CELL)
17584 return 0;
17586 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17588 if (!reload_completed
17589 || is_nonpipeline_insn (insn)
17590 || is_microcoded_insn (insn))
17591 return 1;
17593 return 0;
17596 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17597 and return true. */
17599 static bool
17600 find_mem_ref (rtx pat, rtx *mem_ref)
17602 const char * fmt;
17603 int i, j;
17605 /* stack_tie does not produce any real memory traffic. */
17606 if (tie_operand (pat, VOIDmode))
17607 return false;
17609 if (MEM_P (pat))
17611 *mem_ref = pat;
17612 return true;
17615 /* Recursively process the pattern. */
17616 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17618 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17620 if (fmt[i] == 'e')
17622 if (find_mem_ref (XEXP (pat, i), mem_ref))
17623 return true;
17625 else if (fmt[i] == 'E')
17626 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17628 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17629 return true;
17633 return false;
17636 /* Determine if PAT is a PATTERN of a load insn. */
17638 static bool
17639 is_load_insn1 (rtx pat, rtx *load_mem)
17641 if (!pat || pat == NULL_RTX)
17642 return false;
17644 if (GET_CODE (pat) == SET)
17645 return find_mem_ref (SET_SRC (pat), load_mem);
17647 if (GET_CODE (pat) == PARALLEL)
17649 int i;
17651 for (i = 0; i < XVECLEN (pat, 0); i++)
17652 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17653 return true;
17656 return false;
17659 /* Determine if INSN loads from memory. */
17661 static bool
17662 is_load_insn (rtx insn, rtx *load_mem)
17664 if (!insn || !INSN_P (insn))
17665 return false;
17667 if (CALL_P (insn))
17668 return false;
17670 return is_load_insn1 (PATTERN (insn), load_mem);
17673 /* Determine if PAT is a PATTERN of a store insn. */
17675 static bool
17676 is_store_insn1 (rtx pat, rtx *str_mem)
17678 if (!pat || pat == NULL_RTX)
17679 return false;
17681 if (GET_CODE (pat) == SET)
17682 return find_mem_ref (SET_DEST (pat), str_mem);
17684 if (GET_CODE (pat) == PARALLEL)
17686 int i;
17688 for (i = 0; i < XVECLEN (pat, 0); i++)
17689 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17690 return true;
17693 return false;
17696 /* Determine if INSN stores to memory. */
17698 static bool
17699 is_store_insn (rtx insn, rtx *str_mem)
17701 if (!insn || !INSN_P (insn))
17702 return false;
17704 return is_store_insn1 (PATTERN (insn), str_mem);
17707 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17709 static bool
17710 is_power9_pairable_vec_type (enum attr_type type)
17712 switch (type)
17714 case TYPE_VECSIMPLE:
17715 case TYPE_VECCOMPLEX:
17716 case TYPE_VECDIV:
17717 case TYPE_VECCMP:
17718 case TYPE_VECPERM:
17719 case TYPE_VECFLOAT:
17720 case TYPE_VECFDIV:
17721 case TYPE_VECDOUBLE:
17722 return true;
17723 default:
17724 break;
17726 return false;
17729 /* Returns whether the dependence between INSN and NEXT is considered
17730 costly by the given target. */
17732 static bool
17733 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17735 rtx insn;
17736 rtx next;
17737 rtx load_mem, str_mem;
17739 /* If the flag is not enabled - no dependence is considered costly;
17740 allow all dependent insns in the same group.
17741 This is the most aggressive option. */
17742 if (rs6000_sched_costly_dep == no_dep_costly)
17743 return false;
17745 /* If the flag is set to 1 - a dependence is always considered costly;
17746 do not allow dependent instructions in the same group.
17747 This is the most conservative option. */
17748 if (rs6000_sched_costly_dep == all_deps_costly)
17749 return true;
17751 insn = DEP_PRO (dep);
17752 next = DEP_CON (dep);
17754 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17755 && is_load_insn (next, &load_mem)
17756 && is_store_insn (insn, &str_mem))
17757 /* Prevent load after store in the same group. */
17758 return true;
17760 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17761 && is_load_insn (next, &load_mem)
17762 && is_store_insn (insn, &str_mem)
17763 && DEP_TYPE (dep) == REG_DEP_TRUE
17764 && mem_locations_overlap(str_mem, load_mem))
17765 /* Prevent load after store in the same group if it is a true
17766 dependence. */
17767 return true;
17769 /* The flag is set to X; dependences with latency >= X are considered costly,
17770 and will not be scheduled in the same group. */
17771 if (rs6000_sched_costly_dep <= max_dep_latency
17772 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17773 return true;
17775 return false;
17778 /* Return the next insn after INSN that is found before TAIL is reached,
17779 skipping any "non-active" insns - insns that will not actually occupy
17780 an issue slot. Return NULL_RTX if such an insn is not found. */
17782 static rtx_insn *
17783 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17785 if (insn == NULL_RTX || insn == tail)
17786 return NULL;
17788 while (1)
17790 insn = NEXT_INSN (insn);
17791 if (insn == NULL_RTX || insn == tail)
17792 return NULL;
17794 if (CALL_P (insn)
17795 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17796 || (NONJUMP_INSN_P (insn)
17797 && GET_CODE (PATTERN (insn)) != USE
17798 && GET_CODE (PATTERN (insn)) != CLOBBER
17799 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17800 break;
17802 return insn;
17805 /* Move instruction at POS to the end of the READY list. */
17807 static void
17808 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17810 rtx_insn *tmp;
17811 int i;
17813 tmp = ready[pos];
17814 for (i = pos; i < lastpos; i++)
17815 ready[i] = ready[i + 1];
17816 ready[lastpos] = tmp;
17819 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17821 static int
17822 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17824 /* For Power6, we need to handle some special cases to try and keep the
17825 store queue from overflowing and triggering expensive flushes.
17827 This code monitors how load and store instructions are being issued
17828 and skews the ready list one way or the other to increase the likelihood
17829 that a desired instruction is issued at the proper time.
17831 A couple of things are done. First, we maintain a "load_store_pendulum"
17832 to track the current state of load/store issue.
17834 - If the pendulum is at zero, then no loads or stores have been
17835 issued in the current cycle so we do nothing.
17837 - If the pendulum is 1, then a single load has been issued in this
17838 cycle and we attempt to locate another load in the ready list to
17839 issue with it.
17841 - If the pendulum is -2, then two stores have already been
17842 issued in this cycle, so we increase the priority of the first load
17843 in the ready list to increase it's likelihood of being chosen first
17844 in the next cycle.
17846 - If the pendulum is -1, then a single store has been issued in this
17847 cycle and we attempt to locate another store in the ready list to
17848 issue with it, preferring a store to an adjacent memory location to
17849 facilitate store pairing in the store queue.
17851 - If the pendulum is 2, then two loads have already been
17852 issued in this cycle, so we increase the priority of the first store
17853 in the ready list to increase it's likelihood of being chosen first
17854 in the next cycle.
17856 - If the pendulum < -2 or > 2, then do nothing.
17858 Note: This code covers the most common scenarios. There exist non
17859 load/store instructions which make use of the LSU and which
17860 would need to be accounted for to strictly model the behavior
17861 of the machine. Those instructions are currently unaccounted
17862 for to help minimize compile time overhead of this code.
17864 int pos;
17865 rtx load_mem, str_mem;
17867 if (is_store_insn (last_scheduled_insn, &str_mem))
17868 /* Issuing a store, swing the load_store_pendulum to the left */
17869 load_store_pendulum--;
17870 else if (is_load_insn (last_scheduled_insn, &load_mem))
17871 /* Issuing a load, swing the load_store_pendulum to the right */
17872 load_store_pendulum++;
17873 else
17874 return cached_can_issue_more;
17876 /* If the pendulum is balanced, or there is only one instruction on
17877 the ready list, then all is well, so return. */
17878 if ((load_store_pendulum == 0) || (lastpos <= 0))
17879 return cached_can_issue_more;
17881 if (load_store_pendulum == 1)
17883 /* A load has been issued in this cycle. Scan the ready list
17884 for another load to issue with it */
17885 pos = lastpos;
17887 while (pos >= 0)
17889 if (is_load_insn (ready[pos], &load_mem))
17891 /* Found a load. Move it to the head of the ready list,
17892 and adjust it's priority so that it is more likely to
17893 stay there */
17894 move_to_end_of_ready (ready, pos, lastpos);
17896 if (!sel_sched_p ()
17897 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17898 INSN_PRIORITY (ready[lastpos])++;
17899 break;
17901 pos--;
17904 else if (load_store_pendulum == -2)
17906 /* Two stores have been issued in this cycle. Increase the
17907 priority of the first load in the ready list to favor it for
17908 issuing in the next cycle. */
17909 pos = lastpos;
17911 while (pos >= 0)
17913 if (is_load_insn (ready[pos], &load_mem)
17914 && !sel_sched_p ()
17915 && INSN_PRIORITY_KNOWN (ready[pos]))
17917 INSN_PRIORITY (ready[pos])++;
17919 /* Adjust the pendulum to account for the fact that a load
17920 was found and increased in priority. This is to prevent
17921 increasing the priority of multiple loads */
17922 load_store_pendulum--;
17924 break;
17926 pos--;
17929 else if (load_store_pendulum == -1)
17931 /* A store has been issued in this cycle. Scan the ready list for
17932 another store to issue with it, preferring a store to an adjacent
17933 memory location */
17934 int first_store_pos = -1;
17936 pos = lastpos;
17938 while (pos >= 0)
17940 if (is_store_insn (ready[pos], &str_mem))
17942 rtx str_mem2;
17943 /* Maintain the index of the first store found on the
17944 list */
17945 if (first_store_pos == -1)
17946 first_store_pos = pos;
17948 if (is_store_insn (last_scheduled_insn, &str_mem2)
17949 && adjacent_mem_locations (str_mem, str_mem2))
17951 /* Found an adjacent store. Move it to the head of the
17952 ready list, and adjust it's priority so that it is
17953 more likely to stay there */
17954 move_to_end_of_ready (ready, pos, lastpos);
17956 if (!sel_sched_p ()
17957 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17958 INSN_PRIORITY (ready[lastpos])++;
17960 first_store_pos = -1;
17962 break;
17965 pos--;
17968 if (first_store_pos >= 0)
17970 /* An adjacent store wasn't found, but a non-adjacent store was,
17971 so move the non-adjacent store to the front of the ready
17972 list, and adjust its priority so that it is more likely to
17973 stay there. */
17974 move_to_end_of_ready (ready, first_store_pos, lastpos);
17975 if (!sel_sched_p ()
17976 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17977 INSN_PRIORITY (ready[lastpos])++;
17980 else if (load_store_pendulum == 2)
17982 /* Two loads have been issued in this cycle. Increase the priority
17983 of the first store in the ready list to favor it for issuing in
17984 the next cycle. */
17985 pos = lastpos;
17987 while (pos >= 0)
17989 if (is_store_insn (ready[pos], &str_mem)
17990 && !sel_sched_p ()
17991 && INSN_PRIORITY_KNOWN (ready[pos]))
17993 INSN_PRIORITY (ready[pos])++;
17995 /* Adjust the pendulum to account for the fact that a store
17996 was found and increased in priority. This is to prevent
17997 increasing the priority of multiple stores */
17998 load_store_pendulum++;
18000 break;
18002 pos--;
18006 return cached_can_issue_more;
18009 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18011 static int
18012 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18014 int pos;
18015 enum attr_type type, type2;
18017 type = get_attr_type (last_scheduled_insn);
18019 /* Try to issue fixed point divides back-to-back in pairs so they will be
18020 routed to separate execution units and execute in parallel. */
18021 if (type == TYPE_DIV && divide_cnt == 0)
18023 /* First divide has been scheduled. */
18024 divide_cnt = 1;
18026 /* Scan the ready list looking for another divide, if found move it
18027 to the end of the list so it is chosen next. */
18028 pos = lastpos;
18029 while (pos >= 0)
18031 if (recog_memoized (ready[pos]) >= 0
18032 && get_attr_type (ready[pos]) == TYPE_DIV)
18034 move_to_end_of_ready (ready, pos, lastpos);
18035 break;
18037 pos--;
18040 else
18042 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18043 divide_cnt = 0;
18045 /* The best dispatch throughput for vector and vector load insns can be
18046 achieved by interleaving a vector and vector load such that they'll
18047 dispatch to the same superslice. If this pairing cannot be achieved
18048 then it is best to pair vector insns together and vector load insns
18049 together.
18051 To aid in this pairing, vec_pairing maintains the current state with
18052 the following values:
18054 0 : Initial state, no vecload/vector pairing has been started.
18056 1 : A vecload or vector insn has been issued and a candidate for
18057 pairing has been found and moved to the end of the ready
18058 list. */
18059 if (type == TYPE_VECLOAD)
18061 /* Issued a vecload. */
18062 if (vec_pairing == 0)
18064 int vecload_pos = -1;
18065 /* We issued a single vecload, look for a vector insn to pair it
18066 with. If one isn't found, try to pair another vecload. */
18067 pos = lastpos;
18068 while (pos >= 0)
18070 if (recog_memoized (ready[pos]) >= 0)
18072 type2 = get_attr_type (ready[pos]);
18073 if (is_power9_pairable_vec_type (type2))
18075 /* Found a vector insn to pair with, move it to the
18076 end of the ready list so it is scheduled next. */
18077 move_to_end_of_ready (ready, pos, lastpos);
18078 vec_pairing = 1;
18079 return cached_can_issue_more;
18081 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18082 /* Remember position of first vecload seen. */
18083 vecload_pos = pos;
18085 pos--;
18087 if (vecload_pos >= 0)
18089 /* Didn't find a vector to pair with but did find a vecload,
18090 move it to the end of the ready list. */
18091 move_to_end_of_ready (ready, vecload_pos, lastpos);
18092 vec_pairing = 1;
18093 return cached_can_issue_more;
18097 else if (is_power9_pairable_vec_type (type))
18099 /* Issued a vector operation. */
18100 if (vec_pairing == 0)
18102 int vec_pos = -1;
18103 /* We issued a single vector insn, look for a vecload to pair it
18104 with. If one isn't found, try to pair another vector. */
18105 pos = lastpos;
18106 while (pos >= 0)
18108 if (recog_memoized (ready[pos]) >= 0)
18110 type2 = get_attr_type (ready[pos]);
18111 if (type2 == TYPE_VECLOAD)
18113 /* Found a vecload insn to pair with, move it to the
18114 end of the ready list so it is scheduled next. */
18115 move_to_end_of_ready (ready, pos, lastpos);
18116 vec_pairing = 1;
18117 return cached_can_issue_more;
18119 else if (is_power9_pairable_vec_type (type2)
18120 && vec_pos == -1)
18121 /* Remember position of first vector insn seen. */
18122 vec_pos = pos;
18124 pos--;
18126 if (vec_pos >= 0)
18128 /* Didn't find a vecload to pair with but did find a vector
18129 insn, move it to the end of the ready list. */
18130 move_to_end_of_ready (ready, vec_pos, lastpos);
18131 vec_pairing = 1;
18132 return cached_can_issue_more;
18137 /* We've either finished a vec/vecload pair, couldn't find an insn to
18138 continue the current pair, or the last insn had nothing to do with
18139 with pairing. In any case, reset the state. */
18140 vec_pairing = 0;
18143 return cached_can_issue_more;
18146 /* We are about to begin issuing insns for this clock cycle. */
18148 static int
18149 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18150 rtx_insn **ready ATTRIBUTE_UNUSED,
18151 int *pn_ready ATTRIBUTE_UNUSED,
18152 int clock_var ATTRIBUTE_UNUSED)
18154 int n_ready = *pn_ready;
18156 if (sched_verbose)
18157 fprintf (dump, "// rs6000_sched_reorder :\n");
18159 /* Reorder the ready list, if the second to last ready insn
18160 is a nonepipeline insn. */
18161 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18163 if (is_nonpipeline_insn (ready[n_ready - 1])
18164 && (recog_memoized (ready[n_ready - 2]) > 0))
18165 /* Simply swap first two insns. */
18166 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18169 if (rs6000_tune == PROCESSOR_POWER6)
18170 load_store_pendulum = 0;
18172 return rs6000_issue_rate ();
18175 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18177 static int
18178 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18179 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18181 if (sched_verbose)
18182 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18184 /* Do Power6 dependent reordering if necessary. */
18185 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18186 return power6_sched_reorder2 (ready, *pn_ready - 1);
18188 /* Do Power9 dependent reordering if necessary. */
18189 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18190 && recog_memoized (last_scheduled_insn) >= 0)
18191 return power9_sched_reorder2 (ready, *pn_ready - 1);
18193 return cached_can_issue_more;
18196 /* Return whether the presence of INSN causes a dispatch group termination
18197 of group WHICH_GROUP.
18199 If WHICH_GROUP == current_group, this function will return true if INSN
18200 causes the termination of the current group (i.e, the dispatch group to
18201 which INSN belongs). This means that INSN will be the last insn in the
18202 group it belongs to.
18204 If WHICH_GROUP == previous_group, this function will return true if INSN
18205 causes the termination of the previous group (i.e, the dispatch group that
18206 precedes the group to which INSN belongs). This means that INSN will be
18207 the first insn in the group it belongs to). */
18209 static bool
18210 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18212 bool first, last;
18214 if (! insn)
18215 return false;
18217 first = insn_must_be_first_in_group (insn);
18218 last = insn_must_be_last_in_group (insn);
18220 if (first && last)
18221 return true;
18223 if (which_group == current_group)
18224 return last;
18225 else if (which_group == previous_group)
18226 return first;
18228 return false;
18232 static bool
18233 insn_must_be_first_in_group (rtx_insn *insn)
18235 enum attr_type type;
18237 if (!insn
18238 || NOTE_P (insn)
18239 || DEBUG_INSN_P (insn)
18240 || GET_CODE (PATTERN (insn)) == USE
18241 || GET_CODE (PATTERN (insn)) == CLOBBER)
18242 return false;
18244 switch (rs6000_tune)
18246 case PROCESSOR_POWER5:
18247 if (is_cracked_insn (insn))
18248 return true;
18249 /* FALLTHRU */
18250 case PROCESSOR_POWER4:
18251 if (is_microcoded_insn (insn))
18252 return true;
18254 if (!rs6000_sched_groups)
18255 return false;
18257 type = get_attr_type (insn);
18259 switch (type)
18261 case TYPE_MFCR:
18262 case TYPE_MFCRF:
18263 case TYPE_MTCR:
18264 case TYPE_CR_LOGICAL:
18265 case TYPE_MTJMPR:
18266 case TYPE_MFJMPR:
18267 case TYPE_DIV:
18268 case TYPE_LOAD_L:
18269 case TYPE_STORE_C:
18270 case TYPE_ISYNC:
18271 case TYPE_SYNC:
18272 return true;
18273 default:
18274 break;
18276 break;
18277 case PROCESSOR_POWER6:
18278 type = get_attr_type (insn);
18280 switch (type)
18282 case TYPE_EXTS:
18283 case TYPE_CNTLZ:
18284 case TYPE_TRAP:
18285 case TYPE_MUL:
18286 case TYPE_INSERT:
18287 case TYPE_FPCOMPARE:
18288 case TYPE_MFCR:
18289 case TYPE_MTCR:
18290 case TYPE_MFJMPR:
18291 case TYPE_MTJMPR:
18292 case TYPE_ISYNC:
18293 case TYPE_SYNC:
18294 case TYPE_LOAD_L:
18295 case TYPE_STORE_C:
18296 return true;
18297 case TYPE_SHIFT:
18298 if (get_attr_dot (insn) == DOT_NO
18299 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18300 return true;
18301 else
18302 break;
18303 case TYPE_DIV:
18304 if (get_attr_size (insn) == SIZE_32)
18305 return true;
18306 else
18307 break;
18308 case TYPE_LOAD:
18309 case TYPE_STORE:
18310 case TYPE_FPLOAD:
18311 case TYPE_FPSTORE:
18312 if (get_attr_update (insn) == UPDATE_YES)
18313 return true;
18314 else
18315 break;
18316 default:
18317 break;
18319 break;
18320 case PROCESSOR_POWER7:
18321 type = get_attr_type (insn);
18323 switch (type)
18325 case TYPE_CR_LOGICAL:
18326 case TYPE_MFCR:
18327 case TYPE_MFCRF:
18328 case TYPE_MTCR:
18329 case TYPE_DIV:
18330 case TYPE_ISYNC:
18331 case TYPE_LOAD_L:
18332 case TYPE_STORE_C:
18333 case TYPE_MFJMPR:
18334 case TYPE_MTJMPR:
18335 return true;
18336 case TYPE_MUL:
18337 case TYPE_SHIFT:
18338 case TYPE_EXTS:
18339 if (get_attr_dot (insn) == DOT_YES)
18340 return true;
18341 else
18342 break;
18343 case TYPE_LOAD:
18344 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18345 || get_attr_update (insn) == UPDATE_YES)
18346 return true;
18347 else
18348 break;
18349 case TYPE_STORE:
18350 case TYPE_FPLOAD:
18351 case TYPE_FPSTORE:
18352 if (get_attr_update (insn) == UPDATE_YES)
18353 return true;
18354 else
18355 break;
18356 default:
18357 break;
18359 break;
18360 case PROCESSOR_POWER8:
18361 type = get_attr_type (insn);
18363 switch (type)
18365 case TYPE_CR_LOGICAL:
18366 case TYPE_MFCR:
18367 case TYPE_MFCRF:
18368 case TYPE_MTCR:
18369 case TYPE_SYNC:
18370 case TYPE_ISYNC:
18371 case TYPE_LOAD_L:
18372 case TYPE_STORE_C:
18373 case TYPE_VECSTORE:
18374 case TYPE_MFJMPR:
18375 case TYPE_MTJMPR:
18376 return true;
18377 case TYPE_SHIFT:
18378 case TYPE_EXTS:
18379 case TYPE_MUL:
18380 if (get_attr_dot (insn) == DOT_YES)
18381 return true;
18382 else
18383 break;
18384 case TYPE_LOAD:
18385 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18386 || get_attr_update (insn) == UPDATE_YES)
18387 return true;
18388 else
18389 break;
18390 case TYPE_STORE:
18391 if (get_attr_update (insn) == UPDATE_YES
18392 && get_attr_indexed (insn) == INDEXED_YES)
18393 return true;
18394 else
18395 break;
18396 default:
18397 break;
18399 break;
18400 default:
18401 break;
18404 return false;
18407 static bool
18408 insn_must_be_last_in_group (rtx_insn *insn)
18410 enum attr_type type;
18412 if (!insn
18413 || NOTE_P (insn)
18414 || DEBUG_INSN_P (insn)
18415 || GET_CODE (PATTERN (insn)) == USE
18416 || GET_CODE (PATTERN (insn)) == CLOBBER)
18417 return false;
18419 switch (rs6000_tune) {
18420 case PROCESSOR_POWER4:
18421 case PROCESSOR_POWER5:
18422 if (is_microcoded_insn (insn))
18423 return true;
18425 if (is_branch_slot_insn (insn))
18426 return true;
18428 break;
18429 case PROCESSOR_POWER6:
18430 type = get_attr_type (insn);
18432 switch (type)
18434 case TYPE_EXTS:
18435 case TYPE_CNTLZ:
18436 case TYPE_TRAP:
18437 case TYPE_MUL:
18438 case TYPE_FPCOMPARE:
18439 case TYPE_MFCR:
18440 case TYPE_MTCR:
18441 case TYPE_MFJMPR:
18442 case TYPE_MTJMPR:
18443 case TYPE_ISYNC:
18444 case TYPE_SYNC:
18445 case TYPE_LOAD_L:
18446 case TYPE_STORE_C:
18447 return true;
18448 case TYPE_SHIFT:
18449 if (get_attr_dot (insn) == DOT_NO
18450 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18451 return true;
18452 else
18453 break;
18454 case TYPE_DIV:
18455 if (get_attr_size (insn) == SIZE_32)
18456 return true;
18457 else
18458 break;
18459 default:
18460 break;
18462 break;
18463 case PROCESSOR_POWER7:
18464 type = get_attr_type (insn);
18466 switch (type)
18468 case TYPE_ISYNC:
18469 case TYPE_SYNC:
18470 case TYPE_LOAD_L:
18471 case TYPE_STORE_C:
18472 return true;
18473 case TYPE_LOAD:
18474 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18475 && get_attr_update (insn) == UPDATE_YES)
18476 return true;
18477 else
18478 break;
18479 case TYPE_STORE:
18480 if (get_attr_update (insn) == UPDATE_YES
18481 && get_attr_indexed (insn) == INDEXED_YES)
18482 return true;
18483 else
18484 break;
18485 default:
18486 break;
18488 break;
18489 case PROCESSOR_POWER8:
18490 type = get_attr_type (insn);
18492 switch (type)
18494 case TYPE_MFCR:
18495 case TYPE_MTCR:
18496 case TYPE_ISYNC:
18497 case TYPE_SYNC:
18498 case TYPE_LOAD_L:
18499 case TYPE_STORE_C:
18500 return true;
18501 case TYPE_LOAD:
18502 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18503 && get_attr_update (insn) == UPDATE_YES)
18504 return true;
18505 else
18506 break;
18507 case TYPE_STORE:
18508 if (get_attr_update (insn) == UPDATE_YES
18509 && get_attr_indexed (insn) == INDEXED_YES)
18510 return true;
18511 else
18512 break;
18513 default:
18514 break;
18516 break;
18517 default:
18518 break;
18521 return false;
18524 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18525 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18527 static bool
18528 is_costly_group (rtx *group_insns, rtx next_insn)
18530 int i;
18531 int issue_rate = rs6000_issue_rate ();
18533 for (i = 0; i < issue_rate; i++)
18535 sd_iterator_def sd_it;
18536 dep_t dep;
18537 rtx insn = group_insns[i];
18539 if (!insn)
18540 continue;
18542 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18544 rtx next = DEP_CON (dep);
18546 if (next == next_insn
18547 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18548 return true;
18552 return false;
18555 /* Utility of the function redefine_groups.
18556 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18557 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18558 to keep it "far" (in a separate group) from GROUP_INSNS, following
18559 one of the following schemes, depending on the value of the flag
18560 -minsert_sched_nops = X:
18561 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18562 in order to force NEXT_INSN into a separate group.
18563 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18564 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18565 insertion (has a group just ended, how many vacant issue slots remain in the
18566 last group, and how many dispatch groups were encountered so far). */
18568 static int
18569 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18570 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18571 int *group_count)
18573 rtx nop;
18574 bool force;
18575 int issue_rate = rs6000_issue_rate ();
18576 bool end = *group_end;
18577 int i;
18579 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18580 return can_issue_more;
18582 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18583 return can_issue_more;
18585 force = is_costly_group (group_insns, next_insn);
18586 if (!force)
18587 return can_issue_more;
18589 if (sched_verbose > 6)
18590 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18591 *group_count ,can_issue_more);
18593 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18595 if (*group_end)
18596 can_issue_more = 0;
18598 /* Since only a branch can be issued in the last issue_slot, it is
18599 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18600 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18601 in this case the last nop will start a new group and the branch
18602 will be forced to the new group. */
18603 if (can_issue_more && !is_branch_slot_insn (next_insn))
18604 can_issue_more--;
18606 /* Do we have a special group ending nop? */
18607 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18608 || rs6000_tune == PROCESSOR_POWER8)
18610 nop = gen_group_ending_nop ();
18611 emit_insn_before (nop, next_insn);
18612 can_issue_more = 0;
18614 else
18615 while (can_issue_more > 0)
18617 nop = gen_nop ();
18618 emit_insn_before (nop, next_insn);
18619 can_issue_more--;
18622 *group_end = true;
18623 return 0;
18626 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18628 int n_nops = rs6000_sched_insert_nops;
18630 /* Nops can't be issued from the branch slot, so the effective
18631 issue_rate for nops is 'issue_rate - 1'. */
18632 if (can_issue_more == 0)
18633 can_issue_more = issue_rate;
18634 can_issue_more--;
18635 if (can_issue_more == 0)
18637 can_issue_more = issue_rate - 1;
18638 (*group_count)++;
18639 end = true;
18640 for (i = 0; i < issue_rate; i++)
18642 group_insns[i] = 0;
18646 while (n_nops > 0)
18648 nop = gen_nop ();
18649 emit_insn_before (nop, next_insn);
18650 if (can_issue_more == issue_rate - 1) /* new group begins */
18651 end = false;
18652 can_issue_more--;
18653 if (can_issue_more == 0)
18655 can_issue_more = issue_rate - 1;
18656 (*group_count)++;
18657 end = true;
18658 for (i = 0; i < issue_rate; i++)
18660 group_insns[i] = 0;
18663 n_nops--;
18666 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18667 can_issue_more++;
18669 /* Is next_insn going to start a new group? */
18670 *group_end
18671 = (end
18672 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18673 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18674 || (can_issue_more < issue_rate &&
18675 insn_terminates_group_p (next_insn, previous_group)));
18676 if (*group_end && end)
18677 (*group_count)--;
18679 if (sched_verbose > 6)
18680 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18681 *group_count, can_issue_more);
18682 return can_issue_more;
18685 return can_issue_more;
18688 /* This function tries to synch the dispatch groups that the compiler "sees"
18689 with the dispatch groups that the processor dispatcher is expected to
18690 form in practice. It tries to achieve this synchronization by forcing the
18691 estimated processor grouping on the compiler (as opposed to the function
18692 'pad_goups' which tries to force the scheduler's grouping on the processor).
18694 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18695 examines the (estimated) dispatch groups that will be formed by the processor
18696 dispatcher. It marks these group boundaries to reflect the estimated
18697 processor grouping, overriding the grouping that the scheduler had marked.
18698 Depending on the value of the flag '-minsert-sched-nops' this function can
18699 force certain insns into separate groups or force a certain distance between
18700 them by inserting nops, for example, if there exists a "costly dependence"
18701 between the insns.
18703 The function estimates the group boundaries that the processor will form as
18704 follows: It keeps track of how many vacant issue slots are available after
18705 each insn. A subsequent insn will start a new group if one of the following
18706 4 cases applies:
18707 - no more vacant issue slots remain in the current dispatch group.
18708 - only the last issue slot, which is the branch slot, is vacant, but the next
18709 insn is not a branch.
18710 - only the last 2 or less issue slots, including the branch slot, are vacant,
18711 which means that a cracked insn (which occupies two issue slots) can't be
18712 issued in this group.
18713 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18714 start a new group. */
18716 static int
18717 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18718 rtx_insn *tail)
18720 rtx_insn *insn, *next_insn;
18721 int issue_rate;
18722 int can_issue_more;
18723 int slot, i;
18724 bool group_end;
18725 int group_count = 0;
18726 rtx *group_insns;
18728 /* Initialize. */
18729 issue_rate = rs6000_issue_rate ();
18730 group_insns = XALLOCAVEC (rtx, issue_rate);
18731 for (i = 0; i < issue_rate; i++)
18733 group_insns[i] = 0;
18735 can_issue_more = issue_rate;
18736 slot = 0;
18737 insn = get_next_active_insn (prev_head_insn, tail);
18738 group_end = false;
18740 while (insn != NULL_RTX)
18742 slot = (issue_rate - can_issue_more);
18743 group_insns[slot] = insn;
18744 can_issue_more =
18745 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18746 if (insn_terminates_group_p (insn, current_group))
18747 can_issue_more = 0;
18749 next_insn = get_next_active_insn (insn, tail);
18750 if (next_insn == NULL_RTX)
18751 return group_count + 1;
18753 /* Is next_insn going to start a new group? */
18754 group_end
18755 = (can_issue_more == 0
18756 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18757 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18758 || (can_issue_more < issue_rate &&
18759 insn_terminates_group_p (next_insn, previous_group)));
18761 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18762 next_insn, &group_end, can_issue_more,
18763 &group_count);
18765 if (group_end)
18767 group_count++;
18768 can_issue_more = 0;
18769 for (i = 0; i < issue_rate; i++)
18771 group_insns[i] = 0;
18775 if (GET_MODE (next_insn) == TImode && can_issue_more)
18776 PUT_MODE (next_insn, VOIDmode);
18777 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18778 PUT_MODE (next_insn, TImode);
18780 insn = next_insn;
18781 if (can_issue_more == 0)
18782 can_issue_more = issue_rate;
18783 } /* while */
18785 return group_count;
18788 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18789 dispatch group boundaries that the scheduler had marked. Pad with nops
18790 any dispatch groups which have vacant issue slots, in order to force the
18791 scheduler's grouping on the processor dispatcher. The function
18792 returns the number of dispatch groups found. */
18794 static int
18795 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18796 rtx_insn *tail)
18798 rtx_insn *insn, *next_insn;
18799 rtx nop;
18800 int issue_rate;
18801 int can_issue_more;
18802 int group_end;
18803 int group_count = 0;
18805 /* Initialize issue_rate. */
18806 issue_rate = rs6000_issue_rate ();
18807 can_issue_more = issue_rate;
18809 insn = get_next_active_insn (prev_head_insn, tail);
18810 next_insn = get_next_active_insn (insn, tail);
18812 while (insn != NULL_RTX)
18814 can_issue_more =
18815 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18817 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18819 if (next_insn == NULL_RTX)
18820 break;
18822 if (group_end)
18824 /* If the scheduler had marked group termination at this location
18825 (between insn and next_insn), and neither insn nor next_insn will
18826 force group termination, pad the group with nops to force group
18827 termination. */
18828 if (can_issue_more
18829 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18830 && !insn_terminates_group_p (insn, current_group)
18831 && !insn_terminates_group_p (next_insn, previous_group))
18833 if (!is_branch_slot_insn (next_insn))
18834 can_issue_more--;
18836 while (can_issue_more)
18838 nop = gen_nop ();
18839 emit_insn_before (nop, next_insn);
18840 can_issue_more--;
18844 can_issue_more = issue_rate;
18845 group_count++;
18848 insn = next_insn;
18849 next_insn = get_next_active_insn (insn, tail);
18852 return group_count;
18855 /* We're beginning a new block. Initialize data structures as necessary. */
18857 static void
18858 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18859 int sched_verbose ATTRIBUTE_UNUSED,
18860 int max_ready ATTRIBUTE_UNUSED)
18862 last_scheduled_insn = NULL;
18863 load_store_pendulum = 0;
18864 divide_cnt = 0;
18865 vec_pairing = 0;
18868 /* The following function is called at the end of scheduling BB.
18869 After reload, it inserts nops at insn group bundling. */
18871 static void
18872 rs6000_sched_finish (FILE *dump, int sched_verbose)
18874 int n_groups;
18876 if (sched_verbose)
18877 fprintf (dump, "=== Finishing schedule.\n");
18879 if (reload_completed && rs6000_sched_groups)
18881 /* Do not run sched_finish hook when selective scheduling enabled. */
18882 if (sel_sched_p ())
18883 return;
18885 if (rs6000_sched_insert_nops == sched_finish_none)
18886 return;
18888 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18889 n_groups = pad_groups (dump, sched_verbose,
18890 current_sched_info->prev_head,
18891 current_sched_info->next_tail);
18892 else
18893 n_groups = redefine_groups (dump, sched_verbose,
18894 current_sched_info->prev_head,
18895 current_sched_info->next_tail);
18897 if (sched_verbose >= 6)
18899 fprintf (dump, "ngroups = %d\n", n_groups);
18900 print_rtl (dump, current_sched_info->prev_head);
18901 fprintf (dump, "Done finish_sched\n");
18906 struct rs6000_sched_context
18908 short cached_can_issue_more;
18909 rtx_insn *last_scheduled_insn;
18910 int load_store_pendulum;
18911 int divide_cnt;
18912 int vec_pairing;
18915 typedef struct rs6000_sched_context rs6000_sched_context_def;
18916 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18918 /* Allocate store for new scheduling context. */
18919 static void *
18920 rs6000_alloc_sched_context (void)
18922 return xmalloc (sizeof (rs6000_sched_context_def));
18925 /* If CLEAN_P is true then initializes _SC with clean data,
18926 and from the global context otherwise. */
18927 static void
18928 rs6000_init_sched_context (void *_sc, bool clean_p)
18930 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18932 if (clean_p)
18934 sc->cached_can_issue_more = 0;
18935 sc->last_scheduled_insn = NULL;
18936 sc->load_store_pendulum = 0;
18937 sc->divide_cnt = 0;
18938 sc->vec_pairing = 0;
18940 else
18942 sc->cached_can_issue_more = cached_can_issue_more;
18943 sc->last_scheduled_insn = last_scheduled_insn;
18944 sc->load_store_pendulum = load_store_pendulum;
18945 sc->divide_cnt = divide_cnt;
18946 sc->vec_pairing = vec_pairing;
18950 /* Sets the global scheduling context to the one pointed to by _SC. */
18951 static void
18952 rs6000_set_sched_context (void *_sc)
18954 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18956 gcc_assert (sc != NULL);
18958 cached_can_issue_more = sc->cached_can_issue_more;
18959 last_scheduled_insn = sc->last_scheduled_insn;
18960 load_store_pendulum = sc->load_store_pendulum;
18961 divide_cnt = sc->divide_cnt;
18962 vec_pairing = sc->vec_pairing;
18965 /* Free _SC. */
18966 static void
18967 rs6000_free_sched_context (void *_sc)
18969 gcc_assert (_sc != NULL);
18971 free (_sc);
18974 static bool
18975 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18977 switch (get_attr_type (insn))
18979 case TYPE_DIV:
18980 case TYPE_SDIV:
18981 case TYPE_DDIV:
18982 case TYPE_VECDIV:
18983 case TYPE_SSQRT:
18984 case TYPE_DSQRT:
18985 return false;
18987 default:
18988 return true;
18992 /* Length in units of the trampoline for entering a nested function. */
18995 rs6000_trampoline_size (void)
18997 int ret = 0;
18999 switch (DEFAULT_ABI)
19001 default:
19002 gcc_unreachable ();
19004 case ABI_AIX:
19005 ret = (TARGET_32BIT) ? 12 : 24;
19006 break;
19008 case ABI_ELFv2:
19009 gcc_assert (!TARGET_32BIT);
19010 ret = 32;
19011 break;
19013 case ABI_DARWIN:
19014 case ABI_V4:
19015 ret = (TARGET_32BIT) ? 40 : 48;
19016 break;
19019 return ret;
19022 /* Emit RTL insns to initialize the variable parts of a trampoline.
19023 FNADDR is an RTX for the address of the function's pure code.
19024 CXT is an RTX for the static chain value for the function. */
19026 static void
19027 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19029 int regsize = (TARGET_32BIT) ? 4 : 8;
19030 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19031 rtx ctx_reg = force_reg (Pmode, cxt);
19032 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19034 switch (DEFAULT_ABI)
19036 default:
19037 gcc_unreachable ();
19039 /* Under AIX, just build the 3 word function descriptor */
19040 case ABI_AIX:
19042 rtx fnmem, fn_reg, toc_reg;
19044 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19045 error ("you cannot take the address of a nested function if you use "
19046 "the %qs option", "-mno-pointers-to-nested-functions");
19048 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19049 fn_reg = gen_reg_rtx (Pmode);
19050 toc_reg = gen_reg_rtx (Pmode);
19052 /* Macro to shorten the code expansions below. */
19053 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19055 m_tramp = replace_equiv_address (m_tramp, addr);
19057 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19058 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19059 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19060 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19061 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19063 # undef MEM_PLUS
19065 break;
19067 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19068 case ABI_ELFv2:
19069 case ABI_DARWIN:
19070 case ABI_V4:
19071 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19072 LCT_NORMAL, VOIDmode,
19073 addr, Pmode,
19074 GEN_INT (rs6000_trampoline_size ()), SImode,
19075 fnaddr, Pmode,
19076 ctx_reg, Pmode);
19077 break;
19082 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19083 identifier as an argument, so the front end shouldn't look it up. */
19085 static bool
19086 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19088 return is_attribute_p ("altivec", attr_id);
19091 /* Handle the "altivec" attribute. The attribute may have
19092 arguments as follows:
19094 __attribute__((altivec(vector__)))
19095 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19096 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19098 and may appear more than once (e.g., 'vector bool char') in a
19099 given declaration. */
19101 static tree
19102 rs6000_handle_altivec_attribute (tree *node,
19103 tree name ATTRIBUTE_UNUSED,
19104 tree args,
19105 int flags ATTRIBUTE_UNUSED,
19106 bool *no_add_attrs)
19108 tree type = *node, result = NULL_TREE;
19109 machine_mode mode;
19110 int unsigned_p;
19111 char altivec_type
19112 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19113 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19114 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19115 : '?');
19117 while (POINTER_TYPE_P (type)
19118 || TREE_CODE (type) == FUNCTION_TYPE
19119 || TREE_CODE (type) == METHOD_TYPE
19120 || TREE_CODE (type) == ARRAY_TYPE)
19121 type = TREE_TYPE (type);
19123 mode = TYPE_MODE (type);
19125 /* Check for invalid AltiVec type qualifiers. */
19126 if (type == long_double_type_node)
19127 error ("use of %<long double%> in AltiVec types is invalid");
19128 else if (type == boolean_type_node)
19129 error ("use of boolean types in AltiVec types is invalid");
19130 else if (TREE_CODE (type) == COMPLEX_TYPE)
19131 error ("use of %<complex%> in AltiVec types is invalid");
19132 else if (DECIMAL_FLOAT_MODE_P (mode))
19133 error ("use of decimal floating point types in AltiVec types is invalid");
19134 else if (!TARGET_VSX)
19136 if (type == long_unsigned_type_node || type == long_integer_type_node)
19138 if (TARGET_64BIT)
19139 error ("use of %<long%> in AltiVec types is invalid for "
19140 "64-bit code without %qs", "-mvsx");
19141 else if (rs6000_warn_altivec_long)
19142 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19143 "use %<int%>");
19145 else if (type == long_long_unsigned_type_node
19146 || type == long_long_integer_type_node)
19147 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19148 "-mvsx");
19149 else if (type == double_type_node)
19150 error ("use of %<double%> in AltiVec types is invalid without %qs",
19151 "-mvsx");
19154 switch (altivec_type)
19156 case 'v':
19157 unsigned_p = TYPE_UNSIGNED (type);
19158 switch (mode)
19160 case E_TImode:
19161 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19162 break;
19163 case E_DImode:
19164 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19165 break;
19166 case E_SImode:
19167 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19168 break;
19169 case E_HImode:
19170 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19171 break;
19172 case E_QImode:
19173 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19174 break;
19175 case E_SFmode: result = V4SF_type_node; break;
19176 case E_DFmode: result = V2DF_type_node; break;
19177 /* If the user says 'vector int bool', we may be handed the 'bool'
19178 attribute _before_ the 'vector' attribute, and so select the
19179 proper type in the 'b' case below. */
19180 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19181 case E_V2DImode: case E_V2DFmode:
19182 result = type;
19183 default: break;
19185 break;
19186 case 'b':
19187 switch (mode)
19189 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19190 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19191 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19192 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19193 default: break;
19195 break;
19196 case 'p':
19197 switch (mode)
19199 case E_V8HImode: result = pixel_V8HI_type_node;
19200 default: break;
19202 default: break;
19205 /* Propagate qualifiers attached to the element type
19206 onto the vector type. */
19207 if (result && result != type && TYPE_QUALS (type))
19208 result = build_qualified_type (result, TYPE_QUALS (type));
19210 *no_add_attrs = true; /* No need to hang on to the attribute. */
19212 if (result)
19213 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19215 return NULL_TREE;
19218 /* AltiVec defines five built-in scalar types that serve as vector
19219 elements; we must teach the compiler how to mangle them. The 128-bit
19220 floating point mangling is target-specific as well. */
19222 static const char *
19223 rs6000_mangle_type (const_tree type)
19225 type = TYPE_MAIN_VARIANT (type);
19227 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19228 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19229 return NULL;
19231 if (type == bool_char_type_node) return "U6__boolc";
19232 if (type == bool_short_type_node) return "U6__bools";
19233 if (type == pixel_type_node) return "u7__pixel";
19234 if (type == bool_int_type_node) return "U6__booli";
19235 if (type == bool_long_long_type_node) return "U6__boolx";
19237 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19238 return "g";
19239 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19240 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19242 /* For all other types, use the default mangling. */
19243 return NULL;
19246 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19247 struct attribute_spec.handler. */
19249 static tree
19250 rs6000_handle_longcall_attribute (tree *node, tree name,
19251 tree args ATTRIBUTE_UNUSED,
19252 int flags ATTRIBUTE_UNUSED,
19253 bool *no_add_attrs)
19255 if (TREE_CODE (*node) != FUNCTION_TYPE
19256 && TREE_CODE (*node) != FIELD_DECL
19257 && TREE_CODE (*node) != TYPE_DECL)
19259 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19260 name);
19261 *no_add_attrs = true;
19264 return NULL_TREE;
19267 /* Set longcall attributes on all functions declared when
19268 rs6000_default_long_calls is true. */
19269 static void
19270 rs6000_set_default_type_attributes (tree type)
19272 if (rs6000_default_long_calls
19273 && (TREE_CODE (type) == FUNCTION_TYPE
19274 || TREE_CODE (type) == METHOD_TYPE))
19275 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19276 NULL_TREE,
19277 TYPE_ATTRIBUTES (type));
19279 #if TARGET_MACHO
19280 darwin_set_default_type_attributes (type);
19281 #endif
19284 /* Return a reference suitable for calling a function with the
19285 longcall attribute. */
19287 static rtx
19288 rs6000_longcall_ref (rtx call_ref, rtx arg)
19290 /* System V adds '.' to the internal name, so skip them. */
19291 const char *call_name = XSTR (call_ref, 0);
19292 if (*call_name == '.')
19294 while (*call_name == '.')
19295 call_name++;
19297 tree node = get_identifier (call_name);
19298 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19301 if (TARGET_PLTSEQ)
19303 rtx base = const0_rtx;
19304 int regno = 12;
19305 if (rs6000_pcrel_p (cfun))
19307 rtx reg = gen_rtx_REG (Pmode, regno);
19308 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19309 gen_rtvec (3, base, call_ref, arg),
19310 UNSPECV_PLT_PCREL);
19311 emit_insn (gen_rtx_SET (reg, u));
19312 return reg;
19315 if (DEFAULT_ABI == ABI_ELFv2)
19316 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19317 else
19319 if (flag_pic)
19320 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19321 regno = 11;
19323 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19324 may be used by a function global entry point. For SysV4, r11
19325 is used by __glink_PLTresolve lazy resolver entry. */
19326 rtx reg = gen_rtx_REG (Pmode, regno);
19327 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19328 UNSPEC_PLT16_HA);
19329 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
19330 gen_rtvec (3, reg, call_ref, arg),
19331 UNSPECV_PLT16_LO);
19332 emit_insn (gen_rtx_SET (reg, hi));
19333 emit_insn (gen_rtx_SET (reg, lo));
19334 return reg;
19337 return force_reg (Pmode, call_ref);
19340 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19341 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19342 #endif
19344 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19345 struct attribute_spec.handler. */
19346 static tree
19347 rs6000_handle_struct_attribute (tree *node, tree name,
19348 tree args ATTRIBUTE_UNUSED,
19349 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19351 tree *type = NULL;
19352 if (DECL_P (*node))
19354 if (TREE_CODE (*node) == TYPE_DECL)
19355 type = &TREE_TYPE (*node);
19357 else
19358 type = node;
19360 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19361 || TREE_CODE (*type) == UNION_TYPE)))
19363 warning (OPT_Wattributes, "%qE attribute ignored", name);
19364 *no_add_attrs = true;
19367 else if ((is_attribute_p ("ms_struct", name)
19368 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19369 || ((is_attribute_p ("gcc_struct", name)
19370 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19372 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19373 name);
19374 *no_add_attrs = true;
19377 return NULL_TREE;
19380 static bool
19381 rs6000_ms_bitfield_layout_p (const_tree record_type)
19383 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19384 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19385 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19388 #ifdef USING_ELFOS_H
19390 /* A get_unnamed_section callback, used for switching to toc_section. */
19392 static void
19393 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19395 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19396 && TARGET_MINIMAL_TOC)
19398 if (!toc_initialized)
19400 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19401 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19402 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19403 fprintf (asm_out_file, "\t.tc ");
19404 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19405 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19406 fprintf (asm_out_file, "\n");
19408 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19409 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19410 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19411 fprintf (asm_out_file, " = .+32768\n");
19412 toc_initialized = 1;
19414 else
19415 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19417 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19419 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19420 if (!toc_initialized)
19422 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19423 toc_initialized = 1;
19426 else
19428 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19429 if (!toc_initialized)
19431 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19432 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19433 fprintf (asm_out_file, " = .+32768\n");
19434 toc_initialized = 1;
19439 /* Implement TARGET_ASM_INIT_SECTIONS. */
19441 static void
19442 rs6000_elf_asm_init_sections (void)
19444 toc_section
19445 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19447 sdata2_section
19448 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19449 SDATA2_SECTION_ASM_OP);
19452 /* Implement TARGET_SELECT_RTX_SECTION. */
19454 static section *
19455 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19456 unsigned HOST_WIDE_INT align)
19458 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19459 return toc_section;
19460 else
19461 return default_elf_select_rtx_section (mode, x, align);
19464 /* For a SYMBOL_REF, set generic flags and then perform some
19465 target-specific processing.
19467 When the AIX ABI is requested on a non-AIX system, replace the
19468 function name with the real name (with a leading .) rather than the
19469 function descriptor name. This saves a lot of overriding code to
19470 read the prefixes. */
19472 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19473 static void
19474 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19476 default_encode_section_info (decl, rtl, first);
19478 if (first
19479 && TREE_CODE (decl) == FUNCTION_DECL
19480 && !TARGET_AIX
19481 && DEFAULT_ABI == ABI_AIX)
19483 rtx sym_ref = XEXP (rtl, 0);
19484 size_t len = strlen (XSTR (sym_ref, 0));
19485 char *str = XALLOCAVEC (char, len + 2);
19486 str[0] = '.';
19487 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19488 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19492 static inline bool
19493 compare_section_name (const char *section, const char *templ)
19495 int len;
19497 len = strlen (templ);
19498 return (strncmp (section, templ, len) == 0
19499 && (section[len] == 0 || section[len] == '.'));
19502 bool
19503 rs6000_elf_in_small_data_p (const_tree decl)
19505 if (rs6000_sdata == SDATA_NONE)
19506 return false;
19508 /* We want to merge strings, so we never consider them small data. */
19509 if (TREE_CODE (decl) == STRING_CST)
19510 return false;
19512 /* Functions are never in the small data area. */
19513 if (TREE_CODE (decl) == FUNCTION_DECL)
19514 return false;
19516 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19518 const char *section = DECL_SECTION_NAME (decl);
19519 if (compare_section_name (section, ".sdata")
19520 || compare_section_name (section, ".sdata2")
19521 || compare_section_name (section, ".gnu.linkonce.s")
19522 || compare_section_name (section, ".sbss")
19523 || compare_section_name (section, ".sbss2")
19524 || compare_section_name (section, ".gnu.linkonce.sb")
19525 || strcmp (section, ".PPC.EMB.sdata0") == 0
19526 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19527 return true;
19529 else
19531 /* If we are told not to put readonly data in sdata, then don't. */
19532 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19533 && !rs6000_readonly_in_sdata)
19534 return false;
19536 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19538 if (size > 0
19539 && size <= g_switch_value
19540 /* If it's not public, and we're not going to reference it there,
19541 there's no need to put it in the small data section. */
19542 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19543 return true;
19546 return false;
19549 #endif /* USING_ELFOS_H */
19551 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19553 static bool
19554 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19556 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19559 /* Do not place thread-local symbols refs in the object blocks. */
19561 static bool
19562 rs6000_use_blocks_for_decl_p (const_tree decl)
19564 return !DECL_THREAD_LOCAL_P (decl);
19567 /* Return a REG that occurs in ADDR with coefficient 1.
19568 ADDR can be effectively incremented by incrementing REG.
19570 r0 is special and we must not select it as an address
19571 register by this routine since our caller will try to
19572 increment the returned register via an "la" instruction. */
19575 find_addr_reg (rtx addr)
19577 while (GET_CODE (addr) == PLUS)
19579 if (REG_P (XEXP (addr, 0))
19580 && REGNO (XEXP (addr, 0)) != 0)
19581 addr = XEXP (addr, 0);
19582 else if (REG_P (XEXP (addr, 1))
19583 && REGNO (XEXP (addr, 1)) != 0)
19584 addr = XEXP (addr, 1);
19585 else if (CONSTANT_P (XEXP (addr, 0)))
19586 addr = XEXP (addr, 1);
19587 else if (CONSTANT_P (XEXP (addr, 1)))
19588 addr = XEXP (addr, 0);
19589 else
19590 gcc_unreachable ();
19592 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19593 return addr;
19596 void
19597 rs6000_fatal_bad_address (rtx op)
19599 fatal_insn ("bad address", op);
19602 #if TARGET_MACHO
19604 vec<branch_island, va_gc> *branch_islands;
19606 /* Remember to generate a branch island for far calls to the given
19607 function. */
19609 static void
19610 add_compiler_branch_island (tree label_name, tree function_name,
19611 int line_number)
19613 branch_island bi = {function_name, label_name, line_number};
19614 vec_safe_push (branch_islands, bi);
19617 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19618 already there or not. */
19620 static int
19621 no_previous_def (tree function_name)
19623 branch_island *bi;
19624 unsigned ix;
19626 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19627 if (function_name == bi->function_name)
19628 return 0;
19629 return 1;
19632 /* GET_PREV_LABEL gets the label name from the previous definition of
19633 the function. */
19635 static tree
19636 get_prev_label (tree function_name)
19638 branch_island *bi;
19639 unsigned ix;
19641 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19642 if (function_name == bi->function_name)
19643 return bi->label_name;
19644 return NULL_TREE;
19647 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19649 void
19650 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19652 unsigned int length;
19653 char *symbol_name, *lazy_ptr_name;
19654 char *local_label_0;
19655 static unsigned label = 0;
19657 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19658 symb = (*targetm.strip_name_encoding) (symb);
19660 length = strlen (symb);
19661 symbol_name = XALLOCAVEC (char, length + 32);
19662 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19664 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19665 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19667 if (MACHOPIC_PURE)
19669 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19670 fprintf (file, "\t.align 5\n");
19672 fprintf (file, "%s:\n", stub);
19673 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19675 label++;
19676 local_label_0 = XALLOCAVEC (char, 16);
19677 sprintf (local_label_0, "L%u$spb", label);
19679 fprintf (file, "\tmflr r0\n");
19680 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19681 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19682 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19683 lazy_ptr_name, local_label_0);
19684 fprintf (file, "\tmtlr r0\n");
19685 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19686 (TARGET_64BIT ? "ldu" : "lwzu"),
19687 lazy_ptr_name, local_label_0);
19688 fprintf (file, "\tmtctr r12\n");
19689 fprintf (file, "\tbctr\n");
19691 else /* mdynamic-no-pic or mkernel. */
19693 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19694 fprintf (file, "\t.align 4\n");
19696 fprintf (file, "%s:\n", stub);
19697 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19699 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19700 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19701 (TARGET_64BIT ? "ldu" : "lwzu"),
19702 lazy_ptr_name);
19703 fprintf (file, "\tmtctr r12\n");
19704 fprintf (file, "\tbctr\n");
19707 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19708 fprintf (file, "%s:\n", lazy_ptr_name);
19709 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19710 fprintf (file, "%sdyld_stub_binding_helper\n",
19711 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19714 /* Legitimize PIC addresses. If the address is already
19715 position-independent, we return ORIG. Newly generated
19716 position-independent addresses go into a reg. This is REG if non
19717 zero, otherwise we allocate register(s) as necessary. */
19719 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19722 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19723 rtx reg)
19725 rtx base, offset;
19727 if (reg == NULL && !reload_completed)
19728 reg = gen_reg_rtx (Pmode);
19730 if (GET_CODE (orig) == CONST)
19732 rtx reg_temp;
19734 if (GET_CODE (XEXP (orig, 0)) == PLUS
19735 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19736 return orig;
19738 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19740 /* Use a different reg for the intermediate value, as
19741 it will be marked UNCHANGING. */
19742 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19743 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19744 Pmode, reg_temp);
19745 offset =
19746 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19747 Pmode, reg);
19749 if (CONST_INT_P (offset))
19751 if (SMALL_INT (offset))
19752 return plus_constant (Pmode, base, INTVAL (offset));
19753 else if (!reload_completed)
19754 offset = force_reg (Pmode, offset);
19755 else
19757 rtx mem = force_const_mem (Pmode, orig);
19758 return machopic_legitimize_pic_address (mem, Pmode, reg);
19761 return gen_rtx_PLUS (Pmode, base, offset);
19764 /* Fall back on generic machopic code. */
19765 return machopic_legitimize_pic_address (orig, mode, reg);
19768 /* Output a .machine directive for the Darwin assembler, and call
19769 the generic start_file routine. */
19771 static void
19772 rs6000_darwin_file_start (void)
19774 static const struct
19776 const char *arg;
19777 const char *name;
19778 HOST_WIDE_INT if_set;
19779 } mapping[] = {
19780 { "ppc64", "ppc64", MASK_64BIT },
19781 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19782 { "power4", "ppc970", 0 },
19783 { "G5", "ppc970", 0 },
19784 { "7450", "ppc7450", 0 },
19785 { "7400", "ppc7400", MASK_ALTIVEC },
19786 { "G4", "ppc7400", 0 },
19787 { "750", "ppc750", 0 },
19788 { "740", "ppc750", 0 },
19789 { "G3", "ppc750", 0 },
19790 { "604e", "ppc604e", 0 },
19791 { "604", "ppc604", 0 },
19792 { "603e", "ppc603", 0 },
19793 { "603", "ppc603", 0 },
19794 { "601", "ppc601", 0 },
19795 { NULL, "ppc", 0 } };
19796 const char *cpu_id = "";
19797 size_t i;
19799 rs6000_file_start ();
19800 darwin_file_start ();
19802 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19804 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19805 cpu_id = rs6000_default_cpu;
19807 if (global_options_set.x_rs6000_cpu_index)
19808 cpu_id = processor_target_table[rs6000_cpu_index].name;
19810 /* Look through the mapping array. Pick the first name that either
19811 matches the argument, has a bit set in IF_SET that is also set
19812 in the target flags, or has a NULL name. */
19814 i = 0;
19815 while (mapping[i].arg != NULL
19816 && strcmp (mapping[i].arg, cpu_id) != 0
19817 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19818 i++;
19820 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19823 #endif /* TARGET_MACHO */
19825 #if TARGET_ELF
19826 static int
19827 rs6000_elf_reloc_rw_mask (void)
19829 if (flag_pic)
19830 return 3;
19831 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19832 return 2;
19833 else
19834 return 0;
19837 /* Record an element in the table of global constructors. SYMBOL is
19838 a SYMBOL_REF of the function to be called; PRIORITY is a number
19839 between 0 and MAX_INIT_PRIORITY.
19841 This differs from default_named_section_asm_out_constructor in
19842 that we have special handling for -mrelocatable. */
19844 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19845 static void
19846 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19848 const char *section = ".ctors";
19849 char buf[18];
19851 if (priority != DEFAULT_INIT_PRIORITY)
19853 sprintf (buf, ".ctors.%.5u",
19854 /* Invert the numbering so the linker puts us in the proper
19855 order; constructors are run from right to left, and the
19856 linker sorts in increasing order. */
19857 MAX_INIT_PRIORITY - priority);
19858 section = buf;
19861 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19862 assemble_align (POINTER_SIZE);
19864 if (DEFAULT_ABI == ABI_V4
19865 && (TARGET_RELOCATABLE || flag_pic > 1))
19867 fputs ("\t.long (", asm_out_file);
19868 output_addr_const (asm_out_file, symbol);
19869 fputs (")@fixup\n", asm_out_file);
19871 else
19872 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19875 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19876 static void
19877 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19879 const char *section = ".dtors";
19880 char buf[18];
19882 if (priority != DEFAULT_INIT_PRIORITY)
19884 sprintf (buf, ".dtors.%.5u",
19885 /* Invert the numbering so the linker puts us in the proper
19886 order; constructors are run from right to left, and the
19887 linker sorts in increasing order. */
19888 MAX_INIT_PRIORITY - priority);
19889 section = buf;
19892 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19893 assemble_align (POINTER_SIZE);
19895 if (DEFAULT_ABI == ABI_V4
19896 && (TARGET_RELOCATABLE || flag_pic > 1))
19898 fputs ("\t.long (", asm_out_file);
19899 output_addr_const (asm_out_file, symbol);
19900 fputs (")@fixup\n", asm_out_file);
19902 else
19903 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19906 void
19907 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19909 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19911 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19912 ASM_OUTPUT_LABEL (file, name);
19913 fputs (DOUBLE_INT_ASM_OP, file);
19914 rs6000_output_function_entry (file, name);
19915 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19916 if (DOT_SYMBOLS)
19918 fputs ("\t.size\t", file);
19919 assemble_name (file, name);
19920 fputs (",24\n\t.type\t.", file);
19921 assemble_name (file, name);
19922 fputs (",@function\n", file);
19923 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19925 fputs ("\t.globl\t.", file);
19926 assemble_name (file, name);
19927 putc ('\n', file);
19930 else
19931 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19932 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19933 rs6000_output_function_entry (file, name);
19934 fputs (":\n", file);
19935 return;
19938 int uses_toc;
19939 if (DEFAULT_ABI == ABI_V4
19940 && (TARGET_RELOCATABLE || flag_pic > 1)
19941 && !TARGET_SECURE_PLT
19942 && (!constant_pool_empty_p () || crtl->profile)
19943 && (uses_toc = uses_TOC ()))
19945 char buf[256];
19947 if (uses_toc == 2)
19948 switch_to_other_text_partition ();
19949 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19951 fprintf (file, "\t.long ");
19952 assemble_name (file, toc_label_name);
19953 need_toc_init = 1;
19954 putc ('-', file);
19955 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19956 assemble_name (file, buf);
19957 putc ('\n', file);
19958 if (uses_toc == 2)
19959 switch_to_other_text_partition ();
19962 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19963 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19965 if (TARGET_CMODEL == CMODEL_LARGE
19966 && rs6000_global_entry_point_prologue_needed_p ())
19968 char buf[256];
19970 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19972 fprintf (file, "\t.quad .TOC.-");
19973 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19974 assemble_name (file, buf);
19975 putc ('\n', file);
19978 if (DEFAULT_ABI == ABI_AIX)
19980 const char *desc_name, *orig_name;
19982 orig_name = (*targetm.strip_name_encoding) (name);
19983 desc_name = orig_name;
19984 while (*desc_name == '.')
19985 desc_name++;
19987 if (TREE_PUBLIC (decl))
19988 fprintf (file, "\t.globl %s\n", desc_name);
19990 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19991 fprintf (file, "%s:\n", desc_name);
19992 fprintf (file, "\t.long %s\n", orig_name);
19993 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19994 fputs ("\t.long 0\n", file);
19995 fprintf (file, "\t.previous\n");
19997 ASM_OUTPUT_LABEL (file, name);
20000 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20001 static void
20002 rs6000_elf_file_end (void)
20004 #ifdef HAVE_AS_GNU_ATTRIBUTE
20005 /* ??? The value emitted depends on options active at file end.
20006 Assume anyone using #pragma or attributes that might change
20007 options knows what they are doing. */
20008 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20009 && rs6000_passes_float)
20011 int fp;
20013 if (TARGET_HARD_FLOAT)
20014 fp = 1;
20015 else
20016 fp = 2;
20017 if (rs6000_passes_long_double)
20019 if (!TARGET_LONG_DOUBLE_128)
20020 fp |= 2 * 4;
20021 else if (TARGET_IEEEQUAD)
20022 fp |= 3 * 4;
20023 else
20024 fp |= 1 * 4;
20026 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20028 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20030 if (rs6000_passes_vector)
20031 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20032 (TARGET_ALTIVEC_ABI ? 2 : 1));
20033 if (rs6000_returns_struct)
20034 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20035 aix_struct_return ? 2 : 1);
20037 #endif
20038 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20039 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20040 file_end_indicate_exec_stack ();
20041 #endif
20043 if (flag_split_stack)
20044 file_end_indicate_split_stack ();
20046 if (cpu_builtin_p)
20048 /* We have expanded a CPU builtin, so we need to emit a reference to
20049 the special symbol that LIBC uses to declare it supports the
20050 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20051 switch_to_section (data_section);
20052 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20053 fprintf (asm_out_file, "\t%s %s\n",
20054 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20057 #endif
20059 #if TARGET_XCOFF
20061 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20062 #define HAVE_XCOFF_DWARF_EXTRAS 0
20063 #endif
20065 static enum unwind_info_type
20066 rs6000_xcoff_debug_unwind_info (void)
20068 return UI_NONE;
20071 static void
20072 rs6000_xcoff_asm_output_anchor (rtx symbol)
20074 char buffer[100];
20076 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20077 SYMBOL_REF_BLOCK_OFFSET (symbol));
20078 fprintf (asm_out_file, "%s", SET_ASM_OP);
20079 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20080 fprintf (asm_out_file, ",");
20081 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20082 fprintf (asm_out_file, "\n");
20085 static void
20086 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20088 fputs (GLOBAL_ASM_OP, stream);
20089 RS6000_OUTPUT_BASENAME (stream, name);
20090 putc ('\n', stream);
20093 /* A get_unnamed_decl callback, used for read-only sections. PTR
20094 points to the section string variable. */
20096 static void
20097 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20099 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20100 *(const char *const *) directive,
20101 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20104 /* Likewise for read-write sections. */
20106 static void
20107 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20109 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20110 *(const char *const *) directive,
20111 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20114 static void
20115 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20117 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20118 *(const char *const *) directive,
20119 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20122 /* A get_unnamed_section callback, used for switching to toc_section. */
20124 static void
20125 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20127 if (TARGET_MINIMAL_TOC)
20129 /* toc_section is always selected at least once from
20130 rs6000_xcoff_file_start, so this is guaranteed to
20131 always be defined once and only once in each file. */
20132 if (!toc_initialized)
20134 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20135 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20136 toc_initialized = 1;
20138 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20139 (TARGET_32BIT ? "" : ",3"));
20141 else
20142 fputs ("\t.toc\n", asm_out_file);
20145 /* Implement TARGET_ASM_INIT_SECTIONS. */
20147 static void
20148 rs6000_xcoff_asm_init_sections (void)
20150 read_only_data_section
20151 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20152 &xcoff_read_only_section_name);
20154 private_data_section
20155 = get_unnamed_section (SECTION_WRITE,
20156 rs6000_xcoff_output_readwrite_section_asm_op,
20157 &xcoff_private_data_section_name);
20159 read_only_private_data_section
20160 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20161 &xcoff_private_rodata_section_name);
20163 tls_data_section
20164 = get_unnamed_section (SECTION_TLS,
20165 rs6000_xcoff_output_tls_section_asm_op,
20166 &xcoff_tls_data_section_name);
20168 tls_private_data_section
20169 = get_unnamed_section (SECTION_TLS,
20170 rs6000_xcoff_output_tls_section_asm_op,
20171 &xcoff_private_data_section_name);
20173 toc_section
20174 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20176 readonly_data_section = read_only_data_section;
20179 static int
20180 rs6000_xcoff_reloc_rw_mask (void)
20182 return 3;
20185 static void
20186 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20187 tree decl ATTRIBUTE_UNUSED)
20189 int smclass;
20190 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20192 if (flags & SECTION_EXCLUDE)
20193 smclass = 4;
20194 else if (flags & SECTION_DEBUG)
20196 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20197 return;
20199 else if (flags & SECTION_CODE)
20200 smclass = 0;
20201 else if (flags & SECTION_TLS)
20202 smclass = 3;
20203 else if (flags & SECTION_WRITE)
20204 smclass = 2;
20205 else
20206 smclass = 1;
20208 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20209 (flags & SECTION_CODE) ? "." : "",
20210 name, suffix[smclass], flags & SECTION_ENTSIZE);
20213 #define IN_NAMED_SECTION(DECL) \
20214 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20215 && DECL_SECTION_NAME (DECL) != NULL)
20217 static section *
20218 rs6000_xcoff_select_section (tree decl, int reloc,
20219 unsigned HOST_WIDE_INT align)
20221 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20222 named section. */
20223 if (align > BIGGEST_ALIGNMENT)
20225 resolve_unique_section (decl, reloc, true);
20226 if (IN_NAMED_SECTION (decl))
20227 return get_named_section (decl, NULL, reloc);
20230 if (decl_readonly_section (decl, reloc))
20232 if (TREE_PUBLIC (decl))
20233 return read_only_data_section;
20234 else
20235 return read_only_private_data_section;
20237 else
20239 #if HAVE_AS_TLS
20240 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20242 if (TREE_PUBLIC (decl))
20243 return tls_data_section;
20244 else if (bss_initializer_p (decl))
20246 /* Convert to COMMON to emit in BSS. */
20247 DECL_COMMON (decl) = 1;
20248 return tls_comm_section;
20250 else
20251 return tls_private_data_section;
20253 else
20254 #endif
20255 if (TREE_PUBLIC (decl))
20256 return data_section;
20257 else
20258 return private_data_section;
20262 static void
20263 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20265 const char *name;
20267 /* Use select_section for private data and uninitialized data with
20268 alignment <= BIGGEST_ALIGNMENT. */
20269 if (!TREE_PUBLIC (decl)
20270 || DECL_COMMON (decl)
20271 || (DECL_INITIAL (decl) == NULL_TREE
20272 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20273 || DECL_INITIAL (decl) == error_mark_node
20274 || (flag_zero_initialized_in_bss
20275 && initializer_zerop (DECL_INITIAL (decl))))
20276 return;
20278 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20279 name = (*targetm.strip_name_encoding) (name);
20280 set_decl_section_name (decl, name);
20283 /* Select section for constant in constant pool.
20285 On RS/6000, all constants are in the private read-only data area.
20286 However, if this is being placed in the TOC it must be output as a
20287 toc entry. */
20289 static section *
20290 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20291 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20293 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20294 return toc_section;
20295 else
20296 return read_only_private_data_section;
20299 /* Remove any trailing [DS] or the like from the symbol name. */
20301 static const char *
20302 rs6000_xcoff_strip_name_encoding (const char *name)
20304 size_t len;
20305 if (*name == '*')
20306 name++;
20307 len = strlen (name);
20308 if (name[len - 1] == ']')
20309 return ggc_alloc_string (name, len - 4);
20310 else
20311 return name;
20314 /* Section attributes. AIX is always PIC. */
20316 static unsigned int
20317 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20319 unsigned int align;
20320 unsigned int flags = default_section_type_flags (decl, name, reloc);
20322 /* Align to at least UNIT size. */
20323 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20324 align = MIN_UNITS_PER_WORD;
20325 else
20326 /* Increase alignment of large objects if not already stricter. */
20327 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20328 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20329 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20331 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20334 /* Output at beginning of assembler file.
20336 Initialize the section names for the RS/6000 at this point.
20338 Specify filename, including full path, to assembler.
20340 We want to go into the TOC section so at least one .toc will be emitted.
20341 Also, in order to output proper .bs/.es pairs, we need at least one static
20342 [RW] section emitted.
20344 Finally, declare mcount when profiling to make the assembler happy. */
20346 static void
20347 rs6000_xcoff_file_start (void)
20349 rs6000_gen_section_name (&xcoff_bss_section_name,
20350 main_input_filename, ".bss_");
20351 rs6000_gen_section_name (&xcoff_private_data_section_name,
20352 main_input_filename, ".rw_");
20353 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20354 main_input_filename, ".rop_");
20355 rs6000_gen_section_name (&xcoff_read_only_section_name,
20356 main_input_filename, ".ro_");
20357 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20358 main_input_filename, ".tls_");
20359 rs6000_gen_section_name (&xcoff_tbss_section_name,
20360 main_input_filename, ".tbss_[UL]");
20362 fputs ("\t.file\t", asm_out_file);
20363 output_quoted_string (asm_out_file, main_input_filename);
20364 fputc ('\n', asm_out_file);
20365 if (write_symbols != NO_DEBUG)
20366 switch_to_section (private_data_section);
20367 switch_to_section (toc_section);
20368 switch_to_section (text_section);
20369 if (profile_flag)
20370 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20371 rs6000_file_start ();
20374 /* Output at end of assembler file.
20375 On the RS/6000, referencing data should automatically pull in text. */
20377 static void
20378 rs6000_xcoff_file_end (void)
20380 switch_to_section (text_section);
20381 fputs ("_section_.text:\n", asm_out_file);
20382 switch_to_section (data_section);
20383 fputs (TARGET_32BIT
20384 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20385 asm_out_file);
20388 struct declare_alias_data
20390 FILE *file;
20391 bool function_descriptor;
20394 /* Declare alias N. A helper function for for_node_and_aliases. */
20396 static bool
20397 rs6000_declare_alias (struct symtab_node *n, void *d)
20399 struct declare_alias_data *data = (struct declare_alias_data *)d;
20400 /* Main symbol is output specially, because varasm machinery does part of
20401 the job for us - we do not need to declare .globl/lglobs and such. */
20402 if (!n->alias || n->weakref)
20403 return false;
20405 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20406 return false;
20408 /* Prevent assemble_alias from trying to use .set pseudo operation
20409 that does not behave as expected by the middle-end. */
20410 TREE_ASM_WRITTEN (n->decl) = true;
20412 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20413 char *buffer = (char *) alloca (strlen (name) + 2);
20414 char *p;
20415 int dollar_inside = 0;
20417 strcpy (buffer, name);
20418 p = strchr (buffer, '$');
20419 while (p) {
20420 *p = '_';
20421 dollar_inside++;
20422 p = strchr (p + 1, '$');
20424 if (TREE_PUBLIC (n->decl))
20426 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20428 if (dollar_inside) {
20429 if (data->function_descriptor)
20430 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20431 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20433 if (data->function_descriptor)
20435 fputs ("\t.globl .", data->file);
20436 RS6000_OUTPUT_BASENAME (data->file, buffer);
20437 putc ('\n', data->file);
20439 fputs ("\t.globl ", data->file);
20440 RS6000_OUTPUT_BASENAME (data->file, buffer);
20441 putc ('\n', data->file);
20443 #ifdef ASM_WEAKEN_DECL
20444 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20445 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20446 #endif
20448 else
20450 if (dollar_inside)
20452 if (data->function_descriptor)
20453 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20454 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20456 if (data->function_descriptor)
20458 fputs ("\t.lglobl .", data->file);
20459 RS6000_OUTPUT_BASENAME (data->file, buffer);
20460 putc ('\n', data->file);
20462 fputs ("\t.lglobl ", data->file);
20463 RS6000_OUTPUT_BASENAME (data->file, buffer);
20464 putc ('\n', data->file);
20466 if (data->function_descriptor)
20467 fputs (".", data->file);
20468 RS6000_OUTPUT_BASENAME (data->file, buffer);
20469 fputs (":\n", data->file);
20470 return false;
20474 #ifdef HAVE_GAS_HIDDEN
20475 /* Helper function to calculate visibility of a DECL
20476 and return the value as a const string. */
20478 static const char *
20479 rs6000_xcoff_visibility (tree decl)
20481 static const char * const visibility_types[] = {
20482 "", ",protected", ",hidden", ",internal"
20485 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20486 return visibility_types[vis];
20488 #endif
20491 /* This macro produces the initial definition of a function name.
20492 On the RS/6000, we need to place an extra '.' in the function name and
20493 output the function descriptor.
20494 Dollar signs are converted to underscores.
20496 The csect for the function will have already been created when
20497 text_section was selected. We do have to go back to that csect, however.
20499 The third and fourth parameters to the .function pseudo-op (16 and 044)
20500 are placeholders which no longer have any use.
20502 Because AIX assembler's .set command has unexpected semantics, we output
20503 all aliases as alternative labels in front of the definition. */
20505 void
20506 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20508 char *buffer = (char *) alloca (strlen (name) + 1);
20509 char *p;
20510 int dollar_inside = 0;
20511 struct declare_alias_data data = {file, false};
20513 strcpy (buffer, name);
20514 p = strchr (buffer, '$');
20515 while (p) {
20516 *p = '_';
20517 dollar_inside++;
20518 p = strchr (p + 1, '$');
20520 if (TREE_PUBLIC (decl))
20522 if (!RS6000_WEAK || !DECL_WEAK (decl))
20524 if (dollar_inside) {
20525 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20526 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20528 fputs ("\t.globl .", file);
20529 RS6000_OUTPUT_BASENAME (file, buffer);
20530 #ifdef HAVE_GAS_HIDDEN
20531 fputs (rs6000_xcoff_visibility (decl), file);
20532 #endif
20533 putc ('\n', file);
20536 else
20538 if (dollar_inside) {
20539 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20540 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20542 fputs ("\t.lglobl .", file);
20543 RS6000_OUTPUT_BASENAME (file, buffer);
20544 putc ('\n', file);
20546 fputs ("\t.csect ", file);
20547 RS6000_OUTPUT_BASENAME (file, buffer);
20548 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20549 RS6000_OUTPUT_BASENAME (file, buffer);
20550 fputs (":\n", file);
20551 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20552 &data, true);
20553 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20554 RS6000_OUTPUT_BASENAME (file, buffer);
20555 fputs (", TOC[tc0], 0\n", file);
20556 in_section = NULL;
20557 switch_to_section (function_section (decl));
20558 putc ('.', file);
20559 RS6000_OUTPUT_BASENAME (file, buffer);
20560 fputs (":\n", file);
20561 data.function_descriptor = true;
20562 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20563 &data, true);
20564 if (!DECL_IGNORED_P (decl))
20566 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20567 xcoffout_declare_function (file, decl, buffer);
20568 else if (write_symbols == DWARF2_DEBUG)
20570 name = (*targetm.strip_name_encoding) (name);
20571 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20574 return;
20578 /* Output assembly language to globalize a symbol from a DECL,
20579 possibly with visibility. */
20581 void
20582 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20584 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20585 fputs (GLOBAL_ASM_OP, stream);
20586 RS6000_OUTPUT_BASENAME (stream, name);
20587 #ifdef HAVE_GAS_HIDDEN
20588 fputs (rs6000_xcoff_visibility (decl), stream);
20589 #endif
20590 putc ('\n', stream);
20593 /* Output assembly language to define a symbol as COMMON from a DECL,
20594 possibly with visibility. */
20596 void
20597 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20598 tree decl ATTRIBUTE_UNUSED,
20599 const char *name,
20600 unsigned HOST_WIDE_INT size,
20601 unsigned HOST_WIDE_INT align)
20603 unsigned HOST_WIDE_INT align2 = 2;
20605 if (align > 32)
20606 align2 = floor_log2 (align / BITS_PER_UNIT);
20607 else if (size > 4)
20608 align2 = 3;
20610 fputs (COMMON_ASM_OP, stream);
20611 RS6000_OUTPUT_BASENAME (stream, name);
20613 fprintf (stream,
20614 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20615 size, align2);
20617 #ifdef HAVE_GAS_HIDDEN
20618 if (decl != NULL)
20619 fputs (rs6000_xcoff_visibility (decl), stream);
20620 #endif
20621 putc ('\n', stream);
20624 /* This macro produces the initial definition of a object (variable) name.
20625 Because AIX assembler's .set command has unexpected semantics, we output
20626 all aliases as alternative labels in front of the definition. */
20628 void
20629 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20631 struct declare_alias_data data = {file, false};
20632 RS6000_OUTPUT_BASENAME (file, name);
20633 fputs (":\n", file);
20634 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20635 &data, true);
20638 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20640 void
20641 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20643 fputs (integer_asm_op (size, FALSE), file);
20644 assemble_name (file, label);
20645 fputs ("-$", file);
20648 /* Output a symbol offset relative to the dbase for the current object.
20649 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20650 signed offsets.
20652 __gcc_unwind_dbase is embedded in all executables/libraries through
20653 libgcc/config/rs6000/crtdbase.S. */
20655 void
20656 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20658 fputs (integer_asm_op (size, FALSE), file);
20659 assemble_name (file, label);
20660 fputs("-__gcc_unwind_dbase", file);
20663 #ifdef HAVE_AS_TLS
20664 static void
20665 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20667 rtx symbol;
20668 int flags;
20669 const char *symname;
20671 default_encode_section_info (decl, rtl, first);
20673 /* Careful not to prod global register variables. */
20674 if (!MEM_P (rtl))
20675 return;
20676 symbol = XEXP (rtl, 0);
20677 if (!SYMBOL_REF_P (symbol))
20678 return;
20680 flags = SYMBOL_REF_FLAGS (symbol);
20682 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20683 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20685 SYMBOL_REF_FLAGS (symbol) = flags;
20687 /* Append mapping class to extern decls. */
20688 symname = XSTR (symbol, 0);
20689 if (decl /* sync condition with assemble_external () */
20690 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20691 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20692 || TREE_CODE (decl) == FUNCTION_DECL)
20693 && symname[strlen (symname) - 1] != ']')
20695 char *newname = (char *) alloca (strlen (symname) + 5);
20696 strcpy (newname, symname);
20697 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20698 ? "[DS]" : "[UA]"));
20699 XSTR (symbol, 0) = ggc_strdup (newname);
20702 #endif /* HAVE_AS_TLS */
20703 #endif /* TARGET_XCOFF */
20705 void
20706 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20707 const char *name, const char *val)
20709 fputs ("\t.weak\t", stream);
20710 RS6000_OUTPUT_BASENAME (stream, name);
20711 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20712 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20714 if (TARGET_XCOFF)
20715 fputs ("[DS]", stream);
20716 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20717 if (TARGET_XCOFF)
20718 fputs (rs6000_xcoff_visibility (decl), stream);
20719 #endif
20720 fputs ("\n\t.weak\t.", stream);
20721 RS6000_OUTPUT_BASENAME (stream, name);
20723 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20724 if (TARGET_XCOFF)
20725 fputs (rs6000_xcoff_visibility (decl), stream);
20726 #endif
20727 fputc ('\n', stream);
20728 if (val)
20730 #ifdef ASM_OUTPUT_DEF
20731 ASM_OUTPUT_DEF (stream, name, val);
20732 #endif
20733 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20734 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20736 fputs ("\t.set\t.", stream);
20737 RS6000_OUTPUT_BASENAME (stream, name);
20738 fputs (",.", stream);
20739 RS6000_OUTPUT_BASENAME (stream, val);
20740 fputc ('\n', stream);
20746 /* Return true if INSN should not be copied. */
20748 static bool
20749 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20751 return recog_memoized (insn) >= 0
20752 && get_attr_cannot_copy (insn);
20755 /* Compute a (partial) cost for rtx X. Return true if the complete
20756 cost has been computed, and false if subexpressions should be
20757 scanned. In either case, *TOTAL contains the cost result. */
20759 static bool
20760 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20761 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20763 int code = GET_CODE (x);
20765 switch (code)
20767 /* On the RS/6000, if it is valid in the insn, it is free. */
20768 case CONST_INT:
20769 if (((outer_code == SET
20770 || outer_code == PLUS
20771 || outer_code == MINUS)
20772 && (satisfies_constraint_I (x)
20773 || satisfies_constraint_L (x)))
20774 || (outer_code == AND
20775 && (satisfies_constraint_K (x)
20776 || (mode == SImode
20777 ? satisfies_constraint_L (x)
20778 : satisfies_constraint_J (x))))
20779 || ((outer_code == IOR || outer_code == XOR)
20780 && (satisfies_constraint_K (x)
20781 || (mode == SImode
20782 ? satisfies_constraint_L (x)
20783 : satisfies_constraint_J (x))))
20784 || outer_code == ASHIFT
20785 || outer_code == ASHIFTRT
20786 || outer_code == LSHIFTRT
20787 || outer_code == ROTATE
20788 || outer_code == ROTATERT
20789 || outer_code == ZERO_EXTRACT
20790 || (outer_code == MULT
20791 && satisfies_constraint_I (x))
20792 || ((outer_code == DIV || outer_code == UDIV
20793 || outer_code == MOD || outer_code == UMOD)
20794 && exact_log2 (INTVAL (x)) >= 0)
20795 || (outer_code == COMPARE
20796 && (satisfies_constraint_I (x)
20797 || satisfies_constraint_K (x)))
20798 || ((outer_code == EQ || outer_code == NE)
20799 && (satisfies_constraint_I (x)
20800 || satisfies_constraint_K (x)
20801 || (mode == SImode
20802 ? satisfies_constraint_L (x)
20803 : satisfies_constraint_J (x))))
20804 || (outer_code == GTU
20805 && satisfies_constraint_I (x))
20806 || (outer_code == LTU
20807 && satisfies_constraint_P (x)))
20809 *total = 0;
20810 return true;
20812 else if ((outer_code == PLUS
20813 && reg_or_add_cint_operand (x, VOIDmode))
20814 || (outer_code == MINUS
20815 && reg_or_sub_cint_operand (x, VOIDmode))
20816 || ((outer_code == SET
20817 || outer_code == IOR
20818 || outer_code == XOR)
20819 && (INTVAL (x)
20820 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20822 *total = COSTS_N_INSNS (1);
20823 return true;
20825 /* FALLTHRU */
20827 case CONST_DOUBLE:
20828 case CONST_WIDE_INT:
20829 case CONST:
20830 case HIGH:
20831 case SYMBOL_REF:
20832 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20833 return true;
20835 case MEM:
20836 /* When optimizing for size, MEM should be slightly more expensive
20837 than generating address, e.g., (plus (reg) (const)).
20838 L1 cache latency is about two instructions. */
20839 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20840 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20841 *total += COSTS_N_INSNS (100);
20842 return true;
20844 case LABEL_REF:
20845 *total = 0;
20846 return true;
20848 case PLUS:
20849 case MINUS:
20850 if (FLOAT_MODE_P (mode))
20851 *total = rs6000_cost->fp;
20852 else
20853 *total = COSTS_N_INSNS (1);
20854 return false;
20856 case MULT:
20857 if (CONST_INT_P (XEXP (x, 1))
20858 && satisfies_constraint_I (XEXP (x, 1)))
20860 if (INTVAL (XEXP (x, 1)) >= -256
20861 && INTVAL (XEXP (x, 1)) <= 255)
20862 *total = rs6000_cost->mulsi_const9;
20863 else
20864 *total = rs6000_cost->mulsi_const;
20866 else if (mode == SFmode)
20867 *total = rs6000_cost->fp;
20868 else if (FLOAT_MODE_P (mode))
20869 *total = rs6000_cost->dmul;
20870 else if (mode == DImode)
20871 *total = rs6000_cost->muldi;
20872 else
20873 *total = rs6000_cost->mulsi;
20874 return false;
20876 case FMA:
20877 if (mode == SFmode)
20878 *total = rs6000_cost->fp;
20879 else
20880 *total = rs6000_cost->dmul;
20881 break;
20883 case DIV:
20884 case MOD:
20885 if (FLOAT_MODE_P (mode))
20887 *total = mode == DFmode ? rs6000_cost->ddiv
20888 : rs6000_cost->sdiv;
20889 return false;
20891 /* FALLTHRU */
20893 case UDIV:
20894 case UMOD:
20895 if (CONST_INT_P (XEXP (x, 1))
20896 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20898 if (code == DIV || code == MOD)
20899 /* Shift, addze */
20900 *total = COSTS_N_INSNS (2);
20901 else
20902 /* Shift */
20903 *total = COSTS_N_INSNS (1);
20905 else
20907 if (GET_MODE (XEXP (x, 1)) == DImode)
20908 *total = rs6000_cost->divdi;
20909 else
20910 *total = rs6000_cost->divsi;
20912 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20913 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20914 *total += COSTS_N_INSNS (2);
20915 return false;
20917 case CTZ:
20918 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20919 return false;
20921 case FFS:
20922 *total = COSTS_N_INSNS (4);
20923 return false;
20925 case POPCOUNT:
20926 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20927 return false;
20929 case PARITY:
20930 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20931 return false;
20933 case NOT:
20934 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20935 *total = 0;
20936 else
20937 *total = COSTS_N_INSNS (1);
20938 return false;
20940 case AND:
20941 if (CONST_INT_P (XEXP (x, 1)))
20943 rtx left = XEXP (x, 0);
20944 rtx_code left_code = GET_CODE (left);
20946 /* rotate-and-mask: 1 insn. */
20947 if ((left_code == ROTATE
20948 || left_code == ASHIFT
20949 || left_code == LSHIFTRT)
20950 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20952 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20953 if (!CONST_INT_P (XEXP (left, 1)))
20954 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20955 *total += COSTS_N_INSNS (1);
20956 return true;
20959 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20960 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20961 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20962 || (val & 0xffff) == val
20963 || (val & 0xffff0000) == val
20964 || ((val & 0xffff) == 0 && mode == SImode))
20966 *total = rtx_cost (left, mode, AND, 0, speed);
20967 *total += COSTS_N_INSNS (1);
20968 return true;
20971 /* 2 insns. */
20972 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20974 *total = rtx_cost (left, mode, AND, 0, speed);
20975 *total += COSTS_N_INSNS (2);
20976 return true;
20980 *total = COSTS_N_INSNS (1);
20981 return false;
20983 case IOR:
20984 /* FIXME */
20985 *total = COSTS_N_INSNS (1);
20986 return true;
20988 case CLZ:
20989 case XOR:
20990 case ZERO_EXTRACT:
20991 *total = COSTS_N_INSNS (1);
20992 return false;
20994 case ASHIFT:
20995 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20996 the sign extend and shift separately within the insn. */
20997 if (TARGET_EXTSWSLI && mode == DImode
20998 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20999 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21001 *total = 0;
21002 return false;
21004 /* fall through */
21006 case ASHIFTRT:
21007 case LSHIFTRT:
21008 case ROTATE:
21009 case ROTATERT:
21010 /* Handle mul_highpart. */
21011 if (outer_code == TRUNCATE
21012 && GET_CODE (XEXP (x, 0)) == MULT)
21014 if (mode == DImode)
21015 *total = rs6000_cost->muldi;
21016 else
21017 *total = rs6000_cost->mulsi;
21018 return true;
21020 else if (outer_code == AND)
21021 *total = 0;
21022 else
21023 *total = COSTS_N_INSNS (1);
21024 return false;
21026 case SIGN_EXTEND:
21027 case ZERO_EXTEND:
21028 if (MEM_P (XEXP (x, 0)))
21029 *total = 0;
21030 else
21031 *total = COSTS_N_INSNS (1);
21032 return false;
21034 case COMPARE:
21035 case NEG:
21036 case ABS:
21037 if (!FLOAT_MODE_P (mode))
21039 *total = COSTS_N_INSNS (1);
21040 return false;
21042 /* FALLTHRU */
21044 case FLOAT:
21045 case UNSIGNED_FLOAT:
21046 case FIX:
21047 case UNSIGNED_FIX:
21048 case FLOAT_TRUNCATE:
21049 *total = rs6000_cost->fp;
21050 return false;
21052 case FLOAT_EXTEND:
21053 if (mode == DFmode)
21054 *total = rs6000_cost->sfdf_convert;
21055 else
21056 *total = rs6000_cost->fp;
21057 return false;
21059 case CALL:
21060 case IF_THEN_ELSE:
21061 if (!speed)
21063 *total = COSTS_N_INSNS (1);
21064 return true;
21066 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21068 *total = rs6000_cost->fp;
21069 return false;
21071 break;
21073 case NE:
21074 case EQ:
21075 case GTU:
21076 case LTU:
21077 /* Carry bit requires mode == Pmode.
21078 NEG or PLUS already counted so only add one. */
21079 if (mode == Pmode
21080 && (outer_code == NEG || outer_code == PLUS))
21082 *total = COSTS_N_INSNS (1);
21083 return true;
21085 /* FALLTHRU */
21087 case GT:
21088 case LT:
21089 case UNORDERED:
21090 if (outer_code == SET)
21092 if (XEXP (x, 1) == const0_rtx)
21094 *total = COSTS_N_INSNS (2);
21095 return true;
21097 else
21099 *total = COSTS_N_INSNS (3);
21100 return false;
21103 /* CC COMPARE. */
21104 if (outer_code == COMPARE)
21106 *total = 0;
21107 return true;
21109 break;
21111 default:
21112 break;
21115 return false;
21118 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21120 static bool
21121 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21122 int opno, int *total, bool speed)
21124 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21126 fprintf (stderr,
21127 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21128 "opno = %d, total = %d, speed = %s, x:\n",
21129 ret ? "complete" : "scan inner",
21130 GET_MODE_NAME (mode),
21131 GET_RTX_NAME (outer_code),
21132 opno,
21133 *total,
21134 speed ? "true" : "false");
21136 debug_rtx (x);
21138 return ret;
21141 static int
21142 rs6000_insn_cost (rtx_insn *insn, bool speed)
21144 if (recog_memoized (insn) < 0)
21145 return 0;
21147 /* If we are optimizing for size, just use the length. */
21148 if (!speed)
21149 return get_attr_length (insn);
21151 /* Use the cost if provided. */
21152 int cost = get_attr_cost (insn);
21153 if (cost > 0)
21154 return cost;
21156 /* If the insn tells us how many insns there are, use that. Otherwise use
21157 the length/4. Adjust the insn length to remove the extra size that
21158 prefixed instructions take. */
21159 int n = get_attr_num_insns (insn);
21160 if (n == 0)
21162 int length = get_attr_length (insn);
21163 if (get_attr_prefixed (insn) == PREFIXED_YES)
21165 int adjust = 0;
21166 ADJUST_INSN_LENGTH (insn, adjust);
21167 length -= adjust;
21170 n = length / 4;
21173 enum attr_type type = get_attr_type (insn);
21175 switch (type)
21177 case TYPE_LOAD:
21178 case TYPE_FPLOAD:
21179 case TYPE_VECLOAD:
21180 cost = COSTS_N_INSNS (n + 1);
21181 break;
21183 case TYPE_MUL:
21184 switch (get_attr_size (insn))
21186 case SIZE_8:
21187 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21188 break;
21189 case SIZE_16:
21190 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21191 break;
21192 case SIZE_32:
21193 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21194 break;
21195 case SIZE_64:
21196 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21197 break;
21198 default:
21199 gcc_unreachable ();
21201 break;
21202 case TYPE_DIV:
21203 switch (get_attr_size (insn))
21205 case SIZE_32:
21206 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21207 break;
21208 case SIZE_64:
21209 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21210 break;
21211 default:
21212 gcc_unreachable ();
21214 break;
21216 case TYPE_FP:
21217 cost = n * rs6000_cost->fp;
21218 break;
21219 case TYPE_DMUL:
21220 cost = n * rs6000_cost->dmul;
21221 break;
21222 case TYPE_SDIV:
21223 cost = n * rs6000_cost->sdiv;
21224 break;
21225 case TYPE_DDIV:
21226 cost = n * rs6000_cost->ddiv;
21227 break;
21229 case TYPE_SYNC:
21230 case TYPE_LOAD_L:
21231 case TYPE_MFCR:
21232 case TYPE_MFCRF:
21233 cost = COSTS_N_INSNS (n + 2);
21234 break;
21236 default:
21237 cost = COSTS_N_INSNS (n);
21240 return cost;
21243 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21245 static int
21246 rs6000_debug_address_cost (rtx x, machine_mode mode,
21247 addr_space_t as, bool speed)
21249 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21251 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21252 ret, speed ? "true" : "false");
21253 debug_rtx (x);
21255 return ret;
21259 /* A C expression returning the cost of moving data from a register of class
21260 CLASS1 to one of CLASS2. */
21262 static int
21263 rs6000_register_move_cost (machine_mode mode,
21264 reg_class_t from, reg_class_t to)
21266 int ret;
21267 reg_class_t rclass;
21269 if (TARGET_DEBUG_COST)
21270 dbg_cost_ctrl++;
21272 /* If we have VSX, we can easily move between FPR or Altivec registers,
21273 otherwise we can only easily move within classes.
21274 Do this first so we give best-case answers for union classes
21275 containing both gprs and vsx regs. */
21276 HARD_REG_SET to_vsx, from_vsx;
21277 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21278 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21279 if (!hard_reg_set_empty_p (to_vsx)
21280 && !hard_reg_set_empty_p (from_vsx)
21281 && (TARGET_VSX
21282 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21284 int reg = FIRST_FPR_REGNO;
21285 if (TARGET_VSX
21286 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21287 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21288 reg = FIRST_ALTIVEC_REGNO;
21289 ret = 2 * hard_regno_nregs (reg, mode);
21292 /* Moves from/to GENERAL_REGS. */
21293 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21294 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21296 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21298 if (TARGET_DIRECT_MOVE)
21300 /* Keep the cost for direct moves above that for within
21301 a register class even if the actual processor cost is
21302 comparable. We do this because a direct move insn
21303 can't be a nop, whereas with ideal register
21304 allocation a move within the same class might turn
21305 out to be a nop. */
21306 if (rs6000_tune == PROCESSOR_POWER9
21307 || rs6000_tune == PROCESSOR_FUTURE)
21308 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21309 else
21310 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21311 /* SFmode requires a conversion when moving between gprs
21312 and vsx. */
21313 if (mode == SFmode)
21314 ret += 2;
21316 else
21317 ret = (rs6000_memory_move_cost (mode, rclass, false)
21318 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21321 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21322 shift. */
21323 else if (rclass == CR_REGS)
21324 ret = 4;
21326 /* For those processors that have slow LR/CTR moves, make them more
21327 expensive than memory in order to bias spills to memory .*/
21328 else if ((rs6000_tune == PROCESSOR_POWER6
21329 || rs6000_tune == PROCESSOR_POWER7
21330 || rs6000_tune == PROCESSOR_POWER8
21331 || rs6000_tune == PROCESSOR_POWER9)
21332 && reg_class_subset_p (rclass, SPECIAL_REGS))
21333 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21335 else
21336 /* A move will cost one instruction per GPR moved. */
21337 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21340 /* Everything else has to go through GENERAL_REGS. */
21341 else
21342 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21343 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21345 if (TARGET_DEBUG_COST)
21347 if (dbg_cost_ctrl == 1)
21348 fprintf (stderr,
21349 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21350 ret, GET_MODE_NAME (mode), reg_class_names[from],
21351 reg_class_names[to]);
21352 dbg_cost_ctrl--;
21355 return ret;
21358 /* A C expressions returning the cost of moving data of MODE from a register to
21359 or from memory. */
21361 static int
21362 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21363 bool in ATTRIBUTE_UNUSED)
21365 int ret;
21367 if (TARGET_DEBUG_COST)
21368 dbg_cost_ctrl++;
21370 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21371 ret = 4 * hard_regno_nregs (0, mode);
21372 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21373 || reg_classes_intersect_p (rclass, VSX_REGS)))
21374 ret = 4 * hard_regno_nregs (32, mode);
21375 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21376 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21377 else
21378 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21380 if (TARGET_DEBUG_COST)
21382 if (dbg_cost_ctrl == 1)
21383 fprintf (stderr,
21384 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21385 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21386 dbg_cost_ctrl--;
21389 return ret;
21392 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21394 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21395 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21396 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21397 move cost between GENERAL_REGS and VSX_REGS low.
21399 It might seem reasonable to use a union class. After all, if usage
21400 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21401 rather than memory. However, in cases where register pressure of
21402 both is high, like the cactus_adm spec test, allowing
21403 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21404 the first scheduling pass. This is partly due to an allocno of
21405 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21406 class, which gives too high a pressure for GENERAL_REGS and too low
21407 for VSX_REGS. So, force a choice of the subclass here.
21409 The best class is also the union if GENERAL_REGS and VSX_REGS have
21410 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21411 allocno class, since trying to narrow down the class by regno mode
21412 is prone to error. For example, SImode is allowed in VSX regs and
21413 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21414 it would be wrong to choose an allocno of GENERAL_REGS based on
21415 SImode. */
21417 static reg_class_t
21418 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21419 reg_class_t allocno_class,
21420 reg_class_t best_class)
21422 switch (allocno_class)
21424 case GEN_OR_VSX_REGS:
21425 /* best_class must be a subset of allocno_class. */
21426 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21427 || best_class == GEN_OR_FLOAT_REGS
21428 || best_class == VSX_REGS
21429 || best_class == ALTIVEC_REGS
21430 || best_class == FLOAT_REGS
21431 || best_class == GENERAL_REGS
21432 || best_class == BASE_REGS);
21433 /* Use best_class but choose wider classes when copying from the
21434 wider class to best_class is cheap. This mimics IRA choice
21435 of allocno class. */
21436 if (best_class == BASE_REGS)
21437 return GENERAL_REGS;
21438 if (TARGET_VSX
21439 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21440 return VSX_REGS;
21441 return best_class;
21443 default:
21444 break;
21447 return allocno_class;
21450 /* Returns a code for a target-specific builtin that implements
21451 reciprocal of the function, or NULL_TREE if not available. */
21453 static tree
21454 rs6000_builtin_reciprocal (tree fndecl)
21456 switch (DECL_MD_FUNCTION_CODE (fndecl))
21458 case VSX_BUILTIN_XVSQRTDP:
21459 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21460 return NULL_TREE;
21462 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21464 case VSX_BUILTIN_XVSQRTSP:
21465 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21466 return NULL_TREE;
21468 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21470 default:
21471 return NULL_TREE;
21475 /* Load up a constant. If the mode is a vector mode, splat the value across
21476 all of the vector elements. */
21478 static rtx
21479 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21481 rtx reg;
21483 if (mode == SFmode || mode == DFmode)
21485 rtx d = const_double_from_real_value (dconst, mode);
21486 reg = force_reg (mode, d);
21488 else if (mode == V4SFmode)
21490 rtx d = const_double_from_real_value (dconst, SFmode);
21491 rtvec v = gen_rtvec (4, d, d, d, d);
21492 reg = gen_reg_rtx (mode);
21493 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21495 else if (mode == V2DFmode)
21497 rtx d = const_double_from_real_value (dconst, DFmode);
21498 rtvec v = gen_rtvec (2, d, d);
21499 reg = gen_reg_rtx (mode);
21500 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21502 else
21503 gcc_unreachable ();
21505 return reg;
21508 /* Generate an FMA instruction. */
21510 static void
21511 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21513 machine_mode mode = GET_MODE (target);
21514 rtx dst;
21516 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21517 gcc_assert (dst != NULL);
21519 if (dst != target)
21520 emit_move_insn (target, dst);
21523 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21525 static void
21526 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21528 machine_mode mode = GET_MODE (dst);
21529 rtx r;
21531 /* This is a tad more complicated, since the fnma_optab is for
21532 a different expression: fma(-m1, m2, a), which is the same
21533 thing except in the case of signed zeros.
21535 Fortunately we know that if FMA is supported that FNMSUB is
21536 also supported in the ISA. Just expand it directly. */
21538 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21540 r = gen_rtx_NEG (mode, a);
21541 r = gen_rtx_FMA (mode, m1, m2, r);
21542 r = gen_rtx_NEG (mode, r);
21543 emit_insn (gen_rtx_SET (dst, r));
21546 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21547 add a reg_note saying that this was a division. Support both scalar and
21548 vector divide. Assumes no trapping math and finite arguments. */
21550 void
21551 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21553 machine_mode mode = GET_MODE (dst);
21554 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21555 int i;
21557 /* Low precision estimates guarantee 5 bits of accuracy. High
21558 precision estimates guarantee 14 bits of accuracy. SFmode
21559 requires 23 bits of accuracy. DFmode requires 52 bits of
21560 accuracy. Each pass at least doubles the accuracy, leading
21561 to the following. */
21562 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21563 if (mode == DFmode || mode == V2DFmode)
21564 passes++;
21566 enum insn_code code = optab_handler (smul_optab, mode);
21567 insn_gen_fn gen_mul = GEN_FCN (code);
21569 gcc_assert (code != CODE_FOR_nothing);
21571 one = rs6000_load_constant_and_splat (mode, dconst1);
21573 /* x0 = 1./d estimate */
21574 x0 = gen_reg_rtx (mode);
21575 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21576 UNSPEC_FRES)));
21578 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21579 if (passes > 1) {
21581 /* e0 = 1. - d * x0 */
21582 e0 = gen_reg_rtx (mode);
21583 rs6000_emit_nmsub (e0, d, x0, one);
21585 /* x1 = x0 + e0 * x0 */
21586 x1 = gen_reg_rtx (mode);
21587 rs6000_emit_madd (x1, e0, x0, x0);
21589 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21590 ++i, xprev = xnext, eprev = enext) {
21592 /* enext = eprev * eprev */
21593 enext = gen_reg_rtx (mode);
21594 emit_insn (gen_mul (enext, eprev, eprev));
21596 /* xnext = xprev + enext * xprev */
21597 xnext = gen_reg_rtx (mode);
21598 rs6000_emit_madd (xnext, enext, xprev, xprev);
21601 } else
21602 xprev = x0;
21604 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21606 /* u = n * xprev */
21607 u = gen_reg_rtx (mode);
21608 emit_insn (gen_mul (u, n, xprev));
21610 /* v = n - (d * u) */
21611 v = gen_reg_rtx (mode);
21612 rs6000_emit_nmsub (v, d, u, n);
21614 /* dst = (v * xprev) + u */
21615 rs6000_emit_madd (dst, v, xprev, u);
21617 if (note_p)
21618 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21621 /* Goldschmidt's Algorithm for single/double-precision floating point
21622 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21624 void
21625 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21627 machine_mode mode = GET_MODE (src);
21628 rtx e = gen_reg_rtx (mode);
21629 rtx g = gen_reg_rtx (mode);
21630 rtx h = gen_reg_rtx (mode);
21632 /* Low precision estimates guarantee 5 bits of accuracy. High
21633 precision estimates guarantee 14 bits of accuracy. SFmode
21634 requires 23 bits of accuracy. DFmode requires 52 bits of
21635 accuracy. Each pass at least doubles the accuracy, leading
21636 to the following. */
21637 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21638 if (mode == DFmode || mode == V2DFmode)
21639 passes++;
21641 int i;
21642 rtx mhalf;
21643 enum insn_code code = optab_handler (smul_optab, mode);
21644 insn_gen_fn gen_mul = GEN_FCN (code);
21646 gcc_assert (code != CODE_FOR_nothing);
21648 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21650 /* e = rsqrt estimate */
21651 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21652 UNSPEC_RSQRT)));
21654 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21655 if (!recip)
21657 rtx zero = force_reg (mode, CONST0_RTX (mode));
21659 if (mode == SFmode)
21661 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21662 e, zero, mode, 0);
21663 if (target != e)
21664 emit_move_insn (e, target);
21666 else
21668 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21669 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21673 /* g = sqrt estimate. */
21674 emit_insn (gen_mul (g, e, src));
21675 /* h = 1/(2*sqrt) estimate. */
21676 emit_insn (gen_mul (h, e, mhalf));
21678 if (recip)
21680 if (passes == 1)
21682 rtx t = gen_reg_rtx (mode);
21683 rs6000_emit_nmsub (t, g, h, mhalf);
21684 /* Apply correction directly to 1/rsqrt estimate. */
21685 rs6000_emit_madd (dst, e, t, e);
21687 else
21689 for (i = 0; i < passes; i++)
21691 rtx t1 = gen_reg_rtx (mode);
21692 rtx g1 = gen_reg_rtx (mode);
21693 rtx h1 = gen_reg_rtx (mode);
21695 rs6000_emit_nmsub (t1, g, h, mhalf);
21696 rs6000_emit_madd (g1, g, t1, g);
21697 rs6000_emit_madd (h1, h, t1, h);
21699 g = g1;
21700 h = h1;
21702 /* Multiply by 2 for 1/rsqrt. */
21703 emit_insn (gen_add3_insn (dst, h, h));
21706 else
21708 rtx t = gen_reg_rtx (mode);
21709 rs6000_emit_nmsub (t, g, h, mhalf);
21710 rs6000_emit_madd (dst, g, t, g);
21713 return;
21716 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21717 (Power7) targets. DST is the target, and SRC is the argument operand. */
21719 void
21720 rs6000_emit_popcount (rtx dst, rtx src)
21722 machine_mode mode = GET_MODE (dst);
21723 rtx tmp1, tmp2;
21725 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21726 if (TARGET_POPCNTD)
21728 if (mode == SImode)
21729 emit_insn (gen_popcntdsi2 (dst, src));
21730 else
21731 emit_insn (gen_popcntddi2 (dst, src));
21732 return;
21735 tmp1 = gen_reg_rtx (mode);
21737 if (mode == SImode)
21739 emit_insn (gen_popcntbsi2 (tmp1, src));
21740 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21741 NULL_RTX, 0);
21742 tmp2 = force_reg (SImode, tmp2);
21743 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21745 else
21747 emit_insn (gen_popcntbdi2 (tmp1, src));
21748 tmp2 = expand_mult (DImode, tmp1,
21749 GEN_INT ((HOST_WIDE_INT)
21750 0x01010101 << 32 | 0x01010101),
21751 NULL_RTX, 0);
21752 tmp2 = force_reg (DImode, tmp2);
21753 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21758 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21759 target, and SRC is the argument operand. */
21761 void
21762 rs6000_emit_parity (rtx dst, rtx src)
21764 machine_mode mode = GET_MODE (dst);
21765 rtx tmp;
21767 tmp = gen_reg_rtx (mode);
21769 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21770 if (TARGET_CMPB)
21772 if (mode == SImode)
21774 emit_insn (gen_popcntbsi2 (tmp, src));
21775 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21777 else
21779 emit_insn (gen_popcntbdi2 (tmp, src));
21780 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21782 return;
21785 if (mode == SImode)
21787 /* Is mult+shift >= shift+xor+shift+xor? */
21788 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21790 rtx tmp1, tmp2, tmp3, tmp4;
21792 tmp1 = gen_reg_rtx (SImode);
21793 emit_insn (gen_popcntbsi2 (tmp1, src));
21795 tmp2 = gen_reg_rtx (SImode);
21796 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21797 tmp3 = gen_reg_rtx (SImode);
21798 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21800 tmp4 = gen_reg_rtx (SImode);
21801 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21802 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21804 else
21805 rs6000_emit_popcount (tmp, src);
21806 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21808 else
21810 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21811 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21813 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21815 tmp1 = gen_reg_rtx (DImode);
21816 emit_insn (gen_popcntbdi2 (tmp1, src));
21818 tmp2 = gen_reg_rtx (DImode);
21819 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21820 tmp3 = gen_reg_rtx (DImode);
21821 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21823 tmp4 = gen_reg_rtx (DImode);
21824 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21825 tmp5 = gen_reg_rtx (DImode);
21826 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21828 tmp6 = gen_reg_rtx (DImode);
21829 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21830 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21832 else
21833 rs6000_emit_popcount (tmp, src);
21834 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21838 /* Expand an Altivec constant permutation for little endian mode.
21839 OP0 and OP1 are the input vectors and TARGET is the output vector.
21840 SEL specifies the constant permutation vector.
21842 There are two issues: First, the two input operands must be
21843 swapped so that together they form a double-wide array in LE
21844 order. Second, the vperm instruction has surprising behavior
21845 in LE mode: it interprets the elements of the source vectors
21846 in BE mode ("left to right") and interprets the elements of
21847 the destination vector in LE mode ("right to left"). To
21848 correct for this, we must subtract each element of the permute
21849 control vector from 31.
21851 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21852 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21853 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21854 serve as the permute control vector. Then, in BE mode,
21856 vperm 9,10,11,12
21858 places the desired result in vr9. However, in LE mode the
21859 vector contents will be
21861 vr10 = 00000003 00000002 00000001 00000000
21862 vr11 = 00000007 00000006 00000005 00000004
21864 The result of the vperm using the same permute control vector is
21866 vr9 = 05000000 07000000 01000000 03000000
21868 That is, the leftmost 4 bytes of vr10 are interpreted as the
21869 source for the rightmost 4 bytes of vr9, and so on.
21871 If we change the permute control vector to
21873 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21875 and issue
21877 vperm 9,11,10,12
21879 we get the desired
21881 vr9 = 00000006 00000004 00000002 00000000. */
21883 static void
21884 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21885 const vec_perm_indices &sel)
21887 unsigned int i;
21888 rtx perm[16];
21889 rtx constv, unspec;
21891 /* Unpack and adjust the constant selector. */
21892 for (i = 0; i < 16; ++i)
21894 unsigned int elt = 31 - (sel[i] & 31);
21895 perm[i] = GEN_INT (elt);
21898 /* Expand to a permute, swapping the inputs and using the
21899 adjusted selector. */
21900 if (!REG_P (op0))
21901 op0 = force_reg (V16QImode, op0);
21902 if (!REG_P (op1))
21903 op1 = force_reg (V16QImode, op1);
21905 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21906 constv = force_reg (V16QImode, constv);
21907 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21908 UNSPEC_VPERM);
21909 if (!REG_P (target))
21911 rtx tmp = gen_reg_rtx (V16QImode);
21912 emit_move_insn (tmp, unspec);
21913 unspec = tmp;
21916 emit_move_insn (target, unspec);
21919 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21920 permute control vector. But here it's not a constant, so we must
21921 generate a vector NAND or NOR to do the adjustment. */
21923 void
21924 altivec_expand_vec_perm_le (rtx operands[4])
21926 rtx notx, iorx, unspec;
21927 rtx target = operands[0];
21928 rtx op0 = operands[1];
21929 rtx op1 = operands[2];
21930 rtx sel = operands[3];
21931 rtx tmp = target;
21932 rtx norreg = gen_reg_rtx (V16QImode);
21933 machine_mode mode = GET_MODE (target);
21935 /* Get everything in regs so the pattern matches. */
21936 if (!REG_P (op0))
21937 op0 = force_reg (mode, op0);
21938 if (!REG_P (op1))
21939 op1 = force_reg (mode, op1);
21940 if (!REG_P (sel))
21941 sel = force_reg (V16QImode, sel);
21942 if (!REG_P (target))
21943 tmp = gen_reg_rtx (mode);
21945 if (TARGET_P9_VECTOR)
21947 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21948 UNSPEC_VPERMR);
21950 else
21952 /* Invert the selector with a VNAND if available, else a VNOR.
21953 The VNAND is preferred for future fusion opportunities. */
21954 notx = gen_rtx_NOT (V16QImode, sel);
21955 iorx = (TARGET_P8_VECTOR
21956 ? gen_rtx_IOR (V16QImode, notx, notx)
21957 : gen_rtx_AND (V16QImode, notx, notx));
21958 emit_insn (gen_rtx_SET (norreg, iorx));
21960 /* Permute with operands reversed and adjusted selector. */
21961 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21962 UNSPEC_VPERM);
21965 /* Copy into target, possibly by way of a register. */
21966 if (!REG_P (target))
21968 emit_move_insn (tmp, unspec);
21969 unspec = tmp;
21972 emit_move_insn (target, unspec);
21975 /* Expand an Altivec constant permutation. Return true if we match
21976 an efficient implementation; false to fall back to VPERM.
21978 OP0 and OP1 are the input vectors and TARGET is the output vector.
21979 SEL specifies the constant permutation vector. */
21981 static bool
21982 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21983 const vec_perm_indices &sel)
21985 struct altivec_perm_insn {
21986 HOST_WIDE_INT mask;
21987 enum insn_code impl;
21988 unsigned char perm[16];
21990 static const struct altivec_perm_insn patterns[] = {
21991 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21992 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21993 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21994 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21995 { OPTION_MASK_ALTIVEC,
21996 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21997 : CODE_FOR_altivec_vmrglb_direct),
21998 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21999 { OPTION_MASK_ALTIVEC,
22000 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22001 : CODE_FOR_altivec_vmrglh_direct),
22002 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22003 { OPTION_MASK_ALTIVEC,
22004 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22005 : CODE_FOR_altivec_vmrglw_direct),
22006 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22007 { OPTION_MASK_ALTIVEC,
22008 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22009 : CODE_FOR_altivec_vmrghb_direct),
22010 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22011 { OPTION_MASK_ALTIVEC,
22012 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22013 : CODE_FOR_altivec_vmrghh_direct),
22014 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22015 { OPTION_MASK_ALTIVEC,
22016 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22017 : CODE_FOR_altivec_vmrghw_direct),
22018 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22019 { OPTION_MASK_P8_VECTOR,
22020 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22021 : CODE_FOR_p8_vmrgow_v4sf_direct),
22022 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22023 { OPTION_MASK_P8_VECTOR,
22024 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22025 : CODE_FOR_p8_vmrgew_v4sf_direct),
22026 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22029 unsigned int i, j, elt, which;
22030 unsigned char perm[16];
22031 rtx x;
22032 bool one_vec;
22034 /* Unpack the constant selector. */
22035 for (i = which = 0; i < 16; ++i)
22037 elt = sel[i] & 31;
22038 which |= (elt < 16 ? 1 : 2);
22039 perm[i] = elt;
22042 /* Simplify the constant selector based on operands. */
22043 switch (which)
22045 default:
22046 gcc_unreachable ();
22048 case 3:
22049 one_vec = false;
22050 if (!rtx_equal_p (op0, op1))
22051 break;
22052 /* FALLTHRU */
22054 case 2:
22055 for (i = 0; i < 16; ++i)
22056 perm[i] &= 15;
22057 op0 = op1;
22058 one_vec = true;
22059 break;
22061 case 1:
22062 op1 = op0;
22063 one_vec = true;
22064 break;
22067 /* Look for splat patterns. */
22068 if (one_vec)
22070 elt = perm[0];
22072 for (i = 0; i < 16; ++i)
22073 if (perm[i] != elt)
22074 break;
22075 if (i == 16)
22077 if (!BYTES_BIG_ENDIAN)
22078 elt = 15 - elt;
22079 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22080 return true;
22083 if (elt % 2 == 0)
22085 for (i = 0; i < 16; i += 2)
22086 if (perm[i] != elt || perm[i + 1] != elt + 1)
22087 break;
22088 if (i == 16)
22090 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22091 x = gen_reg_rtx (V8HImode);
22092 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22093 GEN_INT (field)));
22094 emit_move_insn (target, gen_lowpart (V16QImode, x));
22095 return true;
22099 if (elt % 4 == 0)
22101 for (i = 0; i < 16; i += 4)
22102 if (perm[i] != elt
22103 || perm[i + 1] != elt + 1
22104 || perm[i + 2] != elt + 2
22105 || perm[i + 3] != elt + 3)
22106 break;
22107 if (i == 16)
22109 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22110 x = gen_reg_rtx (V4SImode);
22111 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22112 GEN_INT (field)));
22113 emit_move_insn (target, gen_lowpart (V16QImode, x));
22114 return true;
22119 /* Look for merge and pack patterns. */
22120 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22122 bool swapped;
22124 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22125 continue;
22127 elt = patterns[j].perm[0];
22128 if (perm[0] == elt)
22129 swapped = false;
22130 else if (perm[0] == elt + 16)
22131 swapped = true;
22132 else
22133 continue;
22134 for (i = 1; i < 16; ++i)
22136 elt = patterns[j].perm[i];
22137 if (swapped)
22138 elt = (elt >= 16 ? elt - 16 : elt + 16);
22139 else if (one_vec && elt >= 16)
22140 elt -= 16;
22141 if (perm[i] != elt)
22142 break;
22144 if (i == 16)
22146 enum insn_code icode = patterns[j].impl;
22147 machine_mode omode = insn_data[icode].operand[0].mode;
22148 machine_mode imode = insn_data[icode].operand[1].mode;
22150 /* For little-endian, don't use vpkuwum and vpkuhum if the
22151 underlying vector type is not V4SI and V8HI, respectively.
22152 For example, using vpkuwum with a V8HI picks up the even
22153 halfwords (BE numbering) when the even halfwords (LE
22154 numbering) are what we need. */
22155 if (!BYTES_BIG_ENDIAN
22156 && icode == CODE_FOR_altivec_vpkuwum_direct
22157 && ((REG_P (op0)
22158 && GET_MODE (op0) != V4SImode)
22159 || (SUBREG_P (op0)
22160 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22161 continue;
22162 if (!BYTES_BIG_ENDIAN
22163 && icode == CODE_FOR_altivec_vpkuhum_direct
22164 && ((REG_P (op0)
22165 && GET_MODE (op0) != V8HImode)
22166 || (SUBREG_P (op0)
22167 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22168 continue;
22170 /* For little-endian, the two input operands must be swapped
22171 (or swapped back) to ensure proper right-to-left numbering
22172 from 0 to 2N-1. */
22173 if (swapped ^ !BYTES_BIG_ENDIAN)
22174 std::swap (op0, op1);
22175 if (imode != V16QImode)
22177 op0 = gen_lowpart (imode, op0);
22178 op1 = gen_lowpart (imode, op1);
22180 if (omode == V16QImode)
22181 x = target;
22182 else
22183 x = gen_reg_rtx (omode);
22184 emit_insn (GEN_FCN (icode) (x, op0, op1));
22185 if (omode != V16QImode)
22186 emit_move_insn (target, gen_lowpart (V16QImode, x));
22187 return true;
22191 if (!BYTES_BIG_ENDIAN)
22193 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22194 return true;
22197 return false;
22200 /* Expand a VSX Permute Doubleword constant permutation.
22201 Return true if we match an efficient implementation. */
22203 static bool
22204 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22205 unsigned char perm0, unsigned char perm1)
22207 rtx x;
22209 /* If both selectors come from the same operand, fold to single op. */
22210 if ((perm0 & 2) == (perm1 & 2))
22212 if (perm0 & 2)
22213 op0 = op1;
22214 else
22215 op1 = op0;
22217 /* If both operands are equal, fold to simpler permutation. */
22218 if (rtx_equal_p (op0, op1))
22220 perm0 = perm0 & 1;
22221 perm1 = (perm1 & 1) + 2;
22223 /* If the first selector comes from the second operand, swap. */
22224 else if (perm0 & 2)
22226 if (perm1 & 2)
22227 return false;
22228 perm0 -= 2;
22229 perm1 += 2;
22230 std::swap (op0, op1);
22232 /* If the second selector does not come from the second operand, fail. */
22233 else if ((perm1 & 2) == 0)
22234 return false;
22236 /* Success! */
22237 if (target != NULL)
22239 machine_mode vmode, dmode;
22240 rtvec v;
22242 vmode = GET_MODE (target);
22243 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22244 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22245 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22246 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22247 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22248 emit_insn (gen_rtx_SET (target, x));
22250 return true;
22253 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22255 static bool
22256 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22257 rtx op1, const vec_perm_indices &sel)
22259 bool testing_p = !target;
22261 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22262 if (TARGET_ALTIVEC && testing_p)
22263 return true;
22265 /* Check for ps_merge* or xxpermdi insns. */
22266 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22268 if (testing_p)
22270 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22271 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22273 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22274 return true;
22277 if (TARGET_ALTIVEC)
22279 /* Force the target-independent code to lower to V16QImode. */
22280 if (vmode != V16QImode)
22281 return false;
22282 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22283 return true;
22286 return false;
22289 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22290 OP0 and OP1 are the input vectors and TARGET is the output vector.
22291 PERM specifies the constant permutation vector. */
22293 static void
22294 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22295 machine_mode vmode, const vec_perm_builder &perm)
22297 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22298 if (x != target)
22299 emit_move_insn (target, x);
22302 /* Expand an extract even operation. */
22304 void
22305 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22307 machine_mode vmode = GET_MODE (target);
22308 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22309 vec_perm_builder perm (nelt, nelt, 1);
22311 for (i = 0; i < nelt; i++)
22312 perm.quick_push (i * 2);
22314 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22317 /* Expand a vector interleave operation. */
22319 void
22320 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22322 machine_mode vmode = GET_MODE (target);
22323 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22324 vec_perm_builder perm (nelt, nelt, 1);
22326 high = (highp ? 0 : nelt / 2);
22327 for (i = 0; i < nelt / 2; i++)
22329 perm.quick_push (i + high);
22330 perm.quick_push (i + nelt + high);
22333 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22336 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22337 void
22338 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22340 HOST_WIDE_INT hwi_scale (scale);
22341 REAL_VALUE_TYPE r_pow;
22342 rtvec v = rtvec_alloc (2);
22343 rtx elt;
22344 rtx scale_vec = gen_reg_rtx (V2DFmode);
22345 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22346 elt = const_double_from_real_value (r_pow, DFmode);
22347 RTVEC_ELT (v, 0) = elt;
22348 RTVEC_ELT (v, 1) = elt;
22349 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22350 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22353 /* Return an RTX representing where to find the function value of a
22354 function returning MODE. */
22355 static rtx
22356 rs6000_complex_function_value (machine_mode mode)
22358 unsigned int regno;
22359 rtx r1, r2;
22360 machine_mode inner = GET_MODE_INNER (mode);
22361 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22363 if (TARGET_FLOAT128_TYPE
22364 && (mode == KCmode
22365 || (mode == TCmode && TARGET_IEEEQUAD)))
22366 regno = ALTIVEC_ARG_RETURN;
22368 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22369 regno = FP_ARG_RETURN;
22371 else
22373 regno = GP_ARG_RETURN;
22375 /* 32-bit is OK since it'll go in r3/r4. */
22376 if (TARGET_32BIT && inner_bytes >= 4)
22377 return gen_rtx_REG (mode, regno);
22380 if (inner_bytes >= 8)
22381 return gen_rtx_REG (mode, regno);
22383 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22384 const0_rtx);
22385 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22386 GEN_INT (inner_bytes));
22387 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22390 /* Return an rtx describing a return value of MODE as a PARALLEL
22391 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22392 stride REG_STRIDE. */
22394 static rtx
22395 rs6000_parallel_return (machine_mode mode,
22396 int n_elts, machine_mode elt_mode,
22397 unsigned int regno, unsigned int reg_stride)
22399 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22401 int i;
22402 for (i = 0; i < n_elts; i++)
22404 rtx r = gen_rtx_REG (elt_mode, regno);
22405 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22406 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22407 regno += reg_stride;
22410 return par;
22413 /* Target hook for TARGET_FUNCTION_VALUE.
22415 An integer value is in r3 and a floating-point value is in fp1,
22416 unless -msoft-float. */
22418 static rtx
22419 rs6000_function_value (const_tree valtype,
22420 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22421 bool outgoing ATTRIBUTE_UNUSED)
22423 machine_mode mode;
22424 unsigned int regno;
22425 machine_mode elt_mode;
22426 int n_elts;
22428 /* Special handling for structs in darwin64. */
22429 if (TARGET_MACHO
22430 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22432 CUMULATIVE_ARGS valcum;
22433 rtx valret;
22435 valcum.words = 0;
22436 valcum.fregno = FP_ARG_MIN_REG;
22437 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22438 /* Do a trial code generation as if this were going to be passed as
22439 an argument; if any part goes in memory, we return NULL. */
22440 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22441 if (valret)
22442 return valret;
22443 /* Otherwise fall through to standard ABI rules. */
22446 mode = TYPE_MODE (valtype);
22448 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22449 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22451 int first_reg, n_regs;
22453 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22455 /* _Decimal128 must use even/odd register pairs. */
22456 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22457 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22459 else
22461 first_reg = ALTIVEC_ARG_RETURN;
22462 n_regs = 1;
22465 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22468 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22469 if (TARGET_32BIT && TARGET_POWERPC64)
22470 switch (mode)
22472 default:
22473 break;
22474 case E_DImode:
22475 case E_SCmode:
22476 case E_DCmode:
22477 case E_TCmode:
22478 int count = GET_MODE_SIZE (mode) / 4;
22479 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22482 if ((INTEGRAL_TYPE_P (valtype)
22483 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22484 || POINTER_TYPE_P (valtype))
22485 mode = TARGET_32BIT ? SImode : DImode;
22487 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22488 /* _Decimal128 must use an even/odd register pair. */
22489 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22490 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22491 && !FLOAT128_VECTOR_P (mode))
22492 regno = FP_ARG_RETURN;
22493 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22494 && targetm.calls.split_complex_arg)
22495 return rs6000_complex_function_value (mode);
22496 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22497 return register is used in both cases, and we won't see V2DImode/V2DFmode
22498 for pure altivec, combine the two cases. */
22499 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22500 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22501 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22502 regno = ALTIVEC_ARG_RETURN;
22503 else
22504 regno = GP_ARG_RETURN;
22506 return gen_rtx_REG (mode, regno);
22509 /* Define how to find the value returned by a library function
22510 assuming the value has mode MODE. */
22512 rs6000_libcall_value (machine_mode mode)
22514 unsigned int regno;
22516 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22517 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22518 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22520 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22521 /* _Decimal128 must use an even/odd register pair. */
22522 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22523 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22524 regno = FP_ARG_RETURN;
22525 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22526 return register is used in both cases, and we won't see V2DImode/V2DFmode
22527 for pure altivec, combine the two cases. */
22528 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22529 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22530 regno = ALTIVEC_ARG_RETURN;
22531 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22532 return rs6000_complex_function_value (mode);
22533 else
22534 regno = GP_ARG_RETURN;
22536 return gen_rtx_REG (mode, regno);
22539 /* Compute register pressure classes. We implement the target hook to avoid
22540 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22541 lead to incorrect estimates of number of available registers and therefor
22542 increased register pressure/spill. */
22543 static int
22544 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22546 int n;
22548 n = 0;
22549 pressure_classes[n++] = GENERAL_REGS;
22550 if (TARGET_VSX)
22551 pressure_classes[n++] = VSX_REGS;
22552 else
22554 if (TARGET_ALTIVEC)
22555 pressure_classes[n++] = ALTIVEC_REGS;
22556 if (TARGET_HARD_FLOAT)
22557 pressure_classes[n++] = FLOAT_REGS;
22559 pressure_classes[n++] = CR_REGS;
22560 pressure_classes[n++] = SPECIAL_REGS;
22562 return n;
22565 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22566 Frame pointer elimination is automatically handled.
22568 For the RS/6000, if frame pointer elimination is being done, we would like
22569 to convert ap into fp, not sp.
22571 We need r30 if -mminimal-toc was specified, and there are constant pool
22572 references. */
22574 static bool
22575 rs6000_can_eliminate (const int from, const int to)
22577 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22578 ? ! frame_pointer_needed
22579 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22580 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22581 || constant_pool_empty_p ()
22582 : true);
22585 /* Define the offset between two registers, FROM to be eliminated and its
22586 replacement TO, at the start of a routine. */
22587 HOST_WIDE_INT
22588 rs6000_initial_elimination_offset (int from, int to)
22590 rs6000_stack_t *info = rs6000_stack_info ();
22591 HOST_WIDE_INT offset;
22593 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22594 offset = info->push_p ? 0 : -info->total_size;
22595 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22597 offset = info->push_p ? 0 : -info->total_size;
22598 if (FRAME_GROWS_DOWNWARD)
22599 offset += info->fixed_size + info->vars_size + info->parm_size;
22601 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22602 offset = FRAME_GROWS_DOWNWARD
22603 ? info->fixed_size + info->vars_size + info->parm_size
22604 : 0;
22605 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22606 offset = info->total_size;
22607 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22608 offset = info->push_p ? info->total_size : 0;
22609 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22610 offset = 0;
22611 else
22612 gcc_unreachable ();
22614 return offset;
22617 /* Fill in sizes of registers used by unwinder. */
22619 static void
22620 rs6000_init_dwarf_reg_sizes_extra (tree address)
22622 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22624 int i;
22625 machine_mode mode = TYPE_MODE (char_type_node);
22626 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22627 rtx mem = gen_rtx_MEM (BLKmode, addr);
22628 rtx value = gen_int_mode (16, mode);
22630 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22631 The unwinder still needs to know the size of Altivec registers. */
22633 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22635 int column = DWARF_REG_TO_UNWIND_COLUMN
22636 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22637 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22639 emit_move_insn (adjust_address (mem, mode, offset), value);
22644 /* Map internal gcc register numbers to debug format register numbers.
22645 FORMAT specifies the type of debug register number to use:
22646 0 -- debug information, except for frame-related sections
22647 1 -- DWARF .debug_frame section
22648 2 -- DWARF .eh_frame section */
22650 unsigned int
22651 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22653 /* On some platforms, we use the standard DWARF register
22654 numbering for .debug_info and .debug_frame. */
22655 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22657 #ifdef RS6000_USE_DWARF_NUMBERING
22658 if (regno <= 31)
22659 return regno;
22660 if (FP_REGNO_P (regno))
22661 return regno - FIRST_FPR_REGNO + 32;
22662 if (ALTIVEC_REGNO_P (regno))
22663 return regno - FIRST_ALTIVEC_REGNO + 1124;
22664 if (regno == LR_REGNO)
22665 return 108;
22666 if (regno == CTR_REGNO)
22667 return 109;
22668 if (regno == CA_REGNO)
22669 return 101; /* XER */
22670 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22671 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22672 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22673 to the DWARF reg for CR. */
22674 if (format == 1 && regno == CR2_REGNO)
22675 return 64;
22676 if (CR_REGNO_P (regno))
22677 return regno - CR0_REGNO + 86;
22678 if (regno == VRSAVE_REGNO)
22679 return 356;
22680 if (regno == VSCR_REGNO)
22681 return 67;
22683 /* These do not make much sense. */
22684 if (regno == FRAME_POINTER_REGNUM)
22685 return 111;
22686 if (regno == ARG_POINTER_REGNUM)
22687 return 67;
22688 if (regno == 64)
22689 return 100;
22691 gcc_unreachable ();
22692 #endif
22695 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22696 information, and also for .eh_frame. */
22697 /* Translate the regnos to their numbers in GCC 7 (and before). */
22698 if (regno <= 31)
22699 return regno;
22700 if (FP_REGNO_P (regno))
22701 return regno - FIRST_FPR_REGNO + 32;
22702 if (ALTIVEC_REGNO_P (regno))
22703 return regno - FIRST_ALTIVEC_REGNO + 77;
22704 if (regno == LR_REGNO)
22705 return 65;
22706 if (regno == CTR_REGNO)
22707 return 66;
22708 if (regno == CA_REGNO)
22709 return 76; /* XER */
22710 if (CR_REGNO_P (regno))
22711 return regno - CR0_REGNO + 68;
22712 if (regno == VRSAVE_REGNO)
22713 return 109;
22714 if (regno == VSCR_REGNO)
22715 return 110;
22717 if (regno == FRAME_POINTER_REGNUM)
22718 return 111;
22719 if (regno == ARG_POINTER_REGNUM)
22720 return 67;
22721 if (regno == 64)
22722 return 64;
22724 gcc_unreachable ();
22727 /* target hook eh_return_filter_mode */
22728 static scalar_int_mode
22729 rs6000_eh_return_filter_mode (void)
22731 return TARGET_32BIT ? SImode : word_mode;
22734 /* Target hook for translate_mode_attribute. */
22735 static machine_mode
22736 rs6000_translate_mode_attribute (machine_mode mode)
22738 if ((FLOAT128_IEEE_P (mode)
22739 && ieee128_float_type_node == long_double_type_node)
22740 || (FLOAT128_IBM_P (mode)
22741 && ibm128_float_type_node == long_double_type_node))
22742 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22743 return mode;
22746 /* Target hook for scalar_mode_supported_p. */
22747 static bool
22748 rs6000_scalar_mode_supported_p (scalar_mode mode)
22750 /* -m32 does not support TImode. This is the default, from
22751 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22752 same ABI as for -m32. But default_scalar_mode_supported_p allows
22753 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22754 for -mpowerpc64. */
22755 if (TARGET_32BIT && mode == TImode)
22756 return false;
22758 if (DECIMAL_FLOAT_MODE_P (mode))
22759 return default_decimal_float_supported_p ();
22760 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22761 return true;
22762 else
22763 return default_scalar_mode_supported_p (mode);
22766 /* Target hook for vector_mode_supported_p. */
22767 static bool
22768 rs6000_vector_mode_supported_p (machine_mode mode)
22770 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22771 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22772 double-double. */
22773 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22774 return true;
22776 else
22777 return false;
22780 /* Target hook for floatn_mode. */
22781 static opt_scalar_float_mode
22782 rs6000_floatn_mode (int n, bool extended)
22784 if (extended)
22786 switch (n)
22788 case 32:
22789 return DFmode;
22791 case 64:
22792 if (TARGET_FLOAT128_TYPE)
22793 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22794 else
22795 return opt_scalar_float_mode ();
22797 case 128:
22798 return opt_scalar_float_mode ();
22800 default:
22801 /* Those are the only valid _FloatNx types. */
22802 gcc_unreachable ();
22805 else
22807 switch (n)
22809 case 32:
22810 return SFmode;
22812 case 64:
22813 return DFmode;
22815 case 128:
22816 if (TARGET_FLOAT128_TYPE)
22817 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22818 else
22819 return opt_scalar_float_mode ();
22821 default:
22822 return opt_scalar_float_mode ();
22828 /* Target hook for c_mode_for_suffix. */
22829 static machine_mode
22830 rs6000_c_mode_for_suffix (char suffix)
22832 if (TARGET_FLOAT128_TYPE)
22834 if (suffix == 'q' || suffix == 'Q')
22835 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22837 /* At the moment, we are not defining a suffix for IBM extended double.
22838 If/when the default for -mabi=ieeelongdouble is changed, and we want
22839 to support __ibm128 constants in legacy library code, we may need to
22840 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22841 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22842 __float80 constants. */
22845 return VOIDmode;
22848 /* Target hook for invalid_arg_for_unprototyped_fn. */
22849 static const char *
22850 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22852 return (!rs6000_darwin64_abi
22853 && typelist == 0
22854 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22855 && (funcdecl == NULL_TREE
22856 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22857 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22858 ? N_("AltiVec argument passed to unprototyped function")
22859 : NULL;
22862 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22863 setup by using __stack_chk_fail_local hidden function instead of
22864 calling __stack_chk_fail directly. Otherwise it is better to call
22865 __stack_chk_fail directly. */
22867 static tree ATTRIBUTE_UNUSED
22868 rs6000_stack_protect_fail (void)
22870 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22871 ? default_hidden_stack_protect_fail ()
22872 : default_external_stack_protect_fail ();
22875 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22877 #if TARGET_ELF
22878 static unsigned HOST_WIDE_INT
22879 rs6000_asan_shadow_offset (void)
22881 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22883 #endif
22885 /* Mask options that we want to support inside of attribute((target)) and
22886 #pragma GCC target operations. Note, we do not include things like
22887 64/32-bit, endianness, hard/soft floating point, etc. that would have
22888 different calling sequences. */
22890 struct rs6000_opt_mask {
22891 const char *name; /* option name */
22892 HOST_WIDE_INT mask; /* mask to set */
22893 bool invert; /* invert sense of mask */
22894 bool valid_target; /* option is a target option */
22897 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22899 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22900 { "cmpb", OPTION_MASK_CMPB, false, true },
22901 { "crypto", OPTION_MASK_CRYPTO, false, true },
22902 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22903 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22904 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22905 false, true },
22906 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22907 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22908 { "fprnd", OPTION_MASK_FPRND, false, true },
22909 { "future", OPTION_MASK_FUTURE, false, true },
22910 { "hard-dfp", OPTION_MASK_DFP, false, true },
22911 { "htm", OPTION_MASK_HTM, false, true },
22912 { "isel", OPTION_MASK_ISEL, false, true },
22913 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22914 { "mfpgpr", 0, false, true },
22915 { "modulo", OPTION_MASK_MODULO, false, true },
22916 { "mulhw", OPTION_MASK_MULHW, false, true },
22917 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22918 { "pcrel", OPTION_MASK_PCREL, false, true },
22919 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22920 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22921 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22922 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22923 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22924 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22925 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22926 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22927 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22928 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22929 { "prefixed", OPTION_MASK_PREFIXED, false, true },
22930 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22931 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22932 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22933 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22934 { "string", 0, false, true },
22935 { "update", OPTION_MASK_NO_UPDATE, true , true },
22936 { "vsx", OPTION_MASK_VSX, false, true },
22937 #ifdef OPTION_MASK_64BIT
22938 #if TARGET_AIX_OS
22939 { "aix64", OPTION_MASK_64BIT, false, false },
22940 { "aix32", OPTION_MASK_64BIT, true, false },
22941 #else
22942 { "64", OPTION_MASK_64BIT, false, false },
22943 { "32", OPTION_MASK_64BIT, true, false },
22944 #endif
22945 #endif
22946 #ifdef OPTION_MASK_EABI
22947 { "eabi", OPTION_MASK_EABI, false, false },
22948 #endif
22949 #ifdef OPTION_MASK_LITTLE_ENDIAN
22950 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22951 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22952 #endif
22953 #ifdef OPTION_MASK_RELOCATABLE
22954 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22955 #endif
22956 #ifdef OPTION_MASK_STRICT_ALIGN
22957 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22958 #endif
22959 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22960 { "string", 0, false, false },
22963 /* Builtin mask mapping for printing the flags. */
22964 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22966 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22967 { "vsx", RS6000_BTM_VSX, false, false },
22968 { "fre", RS6000_BTM_FRE, false, false },
22969 { "fres", RS6000_BTM_FRES, false, false },
22970 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22971 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22972 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22973 { "cell", RS6000_BTM_CELL, false, false },
22974 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22975 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22976 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22977 { "crypto", RS6000_BTM_CRYPTO, false, false },
22978 { "htm", RS6000_BTM_HTM, false, false },
22979 { "hard-dfp", RS6000_BTM_DFP, false, false },
22980 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22981 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22982 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22983 { "float128", RS6000_BTM_FLOAT128, false, false },
22984 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22987 /* Option variables that we want to support inside attribute((target)) and
22988 #pragma GCC target operations. */
22990 struct rs6000_opt_var {
22991 const char *name; /* option name */
22992 size_t global_offset; /* offset of the option in global_options. */
22993 size_t target_offset; /* offset of the option in target options. */
22996 static struct rs6000_opt_var const rs6000_opt_vars[] =
22998 { "friz",
22999 offsetof (struct gcc_options, x_TARGET_FRIZ),
23000 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23001 { "avoid-indexed-addresses",
23002 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23003 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23004 { "longcall",
23005 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23006 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23007 { "optimize-swaps",
23008 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23009 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23010 { "allow-movmisalign",
23011 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23012 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23013 { "sched-groups",
23014 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23015 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23016 { "always-hint",
23017 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23018 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23019 { "align-branch-targets",
23020 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23021 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23022 { "sched-prolog",
23023 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23024 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23025 { "sched-epilog",
23026 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23027 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23028 { "speculate-indirect-jumps",
23029 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23030 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23033 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23034 parsing. Return true if there were no errors. */
23036 static bool
23037 rs6000_inner_target_options (tree args, bool attr_p)
23039 bool ret = true;
23041 if (args == NULL_TREE)
23044 else if (TREE_CODE (args) == STRING_CST)
23046 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23047 char *q;
23049 while ((q = strtok (p, ",")) != NULL)
23051 bool error_p = false;
23052 bool not_valid_p = false;
23053 const char *cpu_opt = NULL;
23055 p = NULL;
23056 if (strncmp (q, "cpu=", 4) == 0)
23058 int cpu_index = rs6000_cpu_name_lookup (q+4);
23059 if (cpu_index >= 0)
23060 rs6000_cpu_index = cpu_index;
23061 else
23063 error_p = true;
23064 cpu_opt = q+4;
23067 else if (strncmp (q, "tune=", 5) == 0)
23069 int tune_index = rs6000_cpu_name_lookup (q+5);
23070 if (tune_index >= 0)
23071 rs6000_tune_index = tune_index;
23072 else
23074 error_p = true;
23075 cpu_opt = q+5;
23078 else
23080 size_t i;
23081 bool invert = false;
23082 char *r = q;
23084 error_p = true;
23085 if (strncmp (r, "no-", 3) == 0)
23087 invert = true;
23088 r += 3;
23091 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23092 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23094 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23096 if (!rs6000_opt_masks[i].valid_target)
23097 not_valid_p = true;
23098 else
23100 error_p = false;
23101 rs6000_isa_flags_explicit |= mask;
23103 /* VSX needs altivec, so -mvsx automagically sets
23104 altivec and disables -mavoid-indexed-addresses. */
23105 if (!invert)
23107 if (mask == OPTION_MASK_VSX)
23109 mask |= OPTION_MASK_ALTIVEC;
23110 TARGET_AVOID_XFORM = 0;
23114 if (rs6000_opt_masks[i].invert)
23115 invert = !invert;
23117 if (invert)
23118 rs6000_isa_flags &= ~mask;
23119 else
23120 rs6000_isa_flags |= mask;
23122 break;
23125 if (error_p && !not_valid_p)
23127 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23128 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23130 size_t j = rs6000_opt_vars[i].global_offset;
23131 *((int *) ((char *)&global_options + j)) = !invert;
23132 error_p = false;
23133 not_valid_p = false;
23134 break;
23139 if (error_p)
23141 const char *eprefix, *esuffix;
23143 ret = false;
23144 if (attr_p)
23146 eprefix = "__attribute__((__target__(";
23147 esuffix = ")))";
23149 else
23151 eprefix = "#pragma GCC target ";
23152 esuffix = "";
23155 if (cpu_opt)
23156 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23157 q, esuffix);
23158 else if (not_valid_p)
23159 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23160 else
23161 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23166 else if (TREE_CODE (args) == TREE_LIST)
23170 tree value = TREE_VALUE (args);
23171 if (value)
23173 bool ret2 = rs6000_inner_target_options (value, attr_p);
23174 if (!ret2)
23175 ret = false;
23177 args = TREE_CHAIN (args);
23179 while (args != NULL_TREE);
23182 else
23184 error ("attribute %<target%> argument not a string");
23185 return false;
23188 return ret;
23191 /* Print out the target options as a list for -mdebug=target. */
23193 static void
23194 rs6000_debug_target_options (tree args, const char *prefix)
23196 if (args == NULL_TREE)
23197 fprintf (stderr, "%s<NULL>", prefix);
23199 else if (TREE_CODE (args) == STRING_CST)
23201 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23202 char *q;
23204 while ((q = strtok (p, ",")) != NULL)
23206 p = NULL;
23207 fprintf (stderr, "%s\"%s\"", prefix, q);
23208 prefix = ", ";
23212 else if (TREE_CODE (args) == TREE_LIST)
23216 tree value = TREE_VALUE (args);
23217 if (value)
23219 rs6000_debug_target_options (value, prefix);
23220 prefix = ", ";
23222 args = TREE_CHAIN (args);
23224 while (args != NULL_TREE);
23227 else
23228 gcc_unreachable ();
23230 return;
23234 /* Hook to validate attribute((target("..."))). */
23236 static bool
23237 rs6000_valid_attribute_p (tree fndecl,
23238 tree ARG_UNUSED (name),
23239 tree args,
23240 int flags)
23242 struct cl_target_option cur_target;
23243 bool ret;
23244 tree old_optimize;
23245 tree new_target, new_optimize;
23246 tree func_optimize;
23248 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23250 if (TARGET_DEBUG_TARGET)
23252 tree tname = DECL_NAME (fndecl);
23253 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23254 if (tname)
23255 fprintf (stderr, "function: %.*s\n",
23256 (int) IDENTIFIER_LENGTH (tname),
23257 IDENTIFIER_POINTER (tname));
23258 else
23259 fprintf (stderr, "function: unknown\n");
23261 fprintf (stderr, "args:");
23262 rs6000_debug_target_options (args, " ");
23263 fprintf (stderr, "\n");
23265 if (flags)
23266 fprintf (stderr, "flags: 0x%x\n", flags);
23268 fprintf (stderr, "--------------------\n");
23271 /* attribute((target("default"))) does nothing, beyond
23272 affecting multi-versioning. */
23273 if (TREE_VALUE (args)
23274 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23275 && TREE_CHAIN (args) == NULL_TREE
23276 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23277 return true;
23279 old_optimize = build_optimization_node (&global_options);
23280 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23282 /* If the function changed the optimization levels as well as setting target
23283 options, start with the optimizations specified. */
23284 if (func_optimize && func_optimize != old_optimize)
23285 cl_optimization_restore (&global_options,
23286 TREE_OPTIMIZATION (func_optimize));
23288 /* The target attributes may also change some optimization flags, so update
23289 the optimization options if necessary. */
23290 cl_target_option_save (&cur_target, &global_options);
23291 rs6000_cpu_index = rs6000_tune_index = -1;
23292 ret = rs6000_inner_target_options (args, true);
23294 /* Set up any additional state. */
23295 if (ret)
23297 ret = rs6000_option_override_internal (false);
23298 new_target = build_target_option_node (&global_options);
23300 else
23301 new_target = NULL;
23303 new_optimize = build_optimization_node (&global_options);
23305 if (!new_target)
23306 ret = false;
23308 else if (fndecl)
23310 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23312 if (old_optimize != new_optimize)
23313 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23316 cl_target_option_restore (&global_options, &cur_target);
23318 if (old_optimize != new_optimize)
23319 cl_optimization_restore (&global_options,
23320 TREE_OPTIMIZATION (old_optimize));
23322 return ret;
23326 /* Hook to validate the current #pragma GCC target and set the state, and
23327 update the macros based on what was changed. If ARGS is NULL, then
23328 POP_TARGET is used to reset the options. */
23330 bool
23331 rs6000_pragma_target_parse (tree args, tree pop_target)
23333 tree prev_tree = build_target_option_node (&global_options);
23334 tree cur_tree;
23335 struct cl_target_option *prev_opt, *cur_opt;
23336 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23337 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23339 if (TARGET_DEBUG_TARGET)
23341 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23342 fprintf (stderr, "args:");
23343 rs6000_debug_target_options (args, " ");
23344 fprintf (stderr, "\n");
23346 if (pop_target)
23348 fprintf (stderr, "pop_target:\n");
23349 debug_tree (pop_target);
23351 else
23352 fprintf (stderr, "pop_target: <NULL>\n");
23354 fprintf (stderr, "--------------------\n");
23357 if (! args)
23359 cur_tree = ((pop_target)
23360 ? pop_target
23361 : target_option_default_node);
23362 cl_target_option_restore (&global_options,
23363 TREE_TARGET_OPTION (cur_tree));
23365 else
23367 rs6000_cpu_index = rs6000_tune_index = -1;
23368 if (!rs6000_inner_target_options (args, false)
23369 || !rs6000_option_override_internal (false)
23370 || (cur_tree = build_target_option_node (&global_options))
23371 == NULL_TREE)
23373 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23374 fprintf (stderr, "invalid pragma\n");
23376 return false;
23380 target_option_current_node = cur_tree;
23381 rs6000_activate_target_options (target_option_current_node);
23383 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23384 change the macros that are defined. */
23385 if (rs6000_target_modify_macros_ptr)
23387 prev_opt = TREE_TARGET_OPTION (prev_tree);
23388 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23389 prev_flags = prev_opt->x_rs6000_isa_flags;
23391 cur_opt = TREE_TARGET_OPTION (cur_tree);
23392 cur_flags = cur_opt->x_rs6000_isa_flags;
23393 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23395 diff_bumask = (prev_bumask ^ cur_bumask);
23396 diff_flags = (prev_flags ^ cur_flags);
23398 if ((diff_flags != 0) || (diff_bumask != 0))
23400 /* Delete old macros. */
23401 rs6000_target_modify_macros_ptr (false,
23402 prev_flags & diff_flags,
23403 prev_bumask & diff_bumask);
23405 /* Define new macros. */
23406 rs6000_target_modify_macros_ptr (true,
23407 cur_flags & diff_flags,
23408 cur_bumask & diff_bumask);
23412 return true;
23416 /* Remember the last target of rs6000_set_current_function. */
23417 static GTY(()) tree rs6000_previous_fndecl;
23419 /* Restore target's globals from NEW_TREE and invalidate the
23420 rs6000_previous_fndecl cache. */
23422 void
23423 rs6000_activate_target_options (tree new_tree)
23425 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23426 if (TREE_TARGET_GLOBALS (new_tree))
23427 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23428 else if (new_tree == target_option_default_node)
23429 restore_target_globals (&default_target_globals);
23430 else
23431 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23432 rs6000_previous_fndecl = NULL_TREE;
23435 /* Establish appropriate back-end context for processing the function
23436 FNDECL. The argument might be NULL to indicate processing at top
23437 level, outside of any function scope. */
23438 static void
23439 rs6000_set_current_function (tree fndecl)
23441 if (TARGET_DEBUG_TARGET)
23443 fprintf (stderr, "\n==================== rs6000_set_current_function");
23445 if (fndecl)
23446 fprintf (stderr, ", fndecl %s (%p)",
23447 (DECL_NAME (fndecl)
23448 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23449 : "<unknown>"), (void *)fndecl);
23451 if (rs6000_previous_fndecl)
23452 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23454 fprintf (stderr, "\n");
23457 /* Only change the context if the function changes. This hook is called
23458 several times in the course of compiling a function, and we don't want to
23459 slow things down too much or call target_reinit when it isn't safe. */
23460 if (fndecl == rs6000_previous_fndecl)
23461 return;
23463 tree old_tree;
23464 if (rs6000_previous_fndecl == NULL_TREE)
23465 old_tree = target_option_current_node;
23466 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23467 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23468 else
23469 old_tree = target_option_default_node;
23471 tree new_tree;
23472 if (fndecl == NULL_TREE)
23474 if (old_tree != target_option_current_node)
23475 new_tree = target_option_current_node;
23476 else
23477 new_tree = NULL_TREE;
23479 else
23481 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23482 if (new_tree == NULL_TREE)
23483 new_tree = target_option_default_node;
23486 if (TARGET_DEBUG_TARGET)
23488 if (new_tree)
23490 fprintf (stderr, "\nnew fndecl target specific options:\n");
23491 debug_tree (new_tree);
23494 if (old_tree)
23496 fprintf (stderr, "\nold fndecl target specific options:\n");
23497 debug_tree (old_tree);
23500 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23501 fprintf (stderr, "--------------------\n");
23504 if (new_tree && old_tree != new_tree)
23505 rs6000_activate_target_options (new_tree);
23507 if (fndecl)
23508 rs6000_previous_fndecl = fndecl;
23512 /* Save the current options */
23514 static void
23515 rs6000_function_specific_save (struct cl_target_option *ptr,
23516 struct gcc_options *opts)
23518 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23519 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23522 /* Restore the current options */
23524 static void
23525 rs6000_function_specific_restore (struct gcc_options *opts,
23526 struct cl_target_option *ptr)
23529 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23530 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23531 (void) rs6000_option_override_internal (false);
23534 /* Print the current options */
23536 static void
23537 rs6000_function_specific_print (FILE *file, int indent,
23538 struct cl_target_option *ptr)
23540 rs6000_print_isa_options (file, indent, "Isa options set",
23541 ptr->x_rs6000_isa_flags);
23543 rs6000_print_isa_options (file, indent, "Isa options explicit",
23544 ptr->x_rs6000_isa_flags_explicit);
23547 /* Helper function to print the current isa or misc options on a line. */
23549 static void
23550 rs6000_print_options_internal (FILE *file,
23551 int indent,
23552 const char *string,
23553 HOST_WIDE_INT flags,
23554 const char *prefix,
23555 const struct rs6000_opt_mask *opts,
23556 size_t num_elements)
23558 size_t i;
23559 size_t start_column = 0;
23560 size_t cur_column;
23561 size_t max_column = 120;
23562 size_t prefix_len = strlen (prefix);
23563 size_t comma_len = 0;
23564 const char *comma = "";
23566 if (indent)
23567 start_column += fprintf (file, "%*s", indent, "");
23569 if (!flags)
23571 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23572 return;
23575 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23577 /* Print the various mask options. */
23578 cur_column = start_column;
23579 for (i = 0; i < num_elements; i++)
23581 bool invert = opts[i].invert;
23582 const char *name = opts[i].name;
23583 const char *no_str = "";
23584 HOST_WIDE_INT mask = opts[i].mask;
23585 size_t len = comma_len + prefix_len + strlen (name);
23587 if (!invert)
23589 if ((flags & mask) == 0)
23591 no_str = "no-";
23592 len += strlen ("no-");
23595 flags &= ~mask;
23598 else
23600 if ((flags & mask) != 0)
23602 no_str = "no-";
23603 len += strlen ("no-");
23606 flags |= mask;
23609 cur_column += len;
23610 if (cur_column > max_column)
23612 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23613 cur_column = start_column + len;
23614 comma = "";
23617 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23618 comma = ", ";
23619 comma_len = strlen (", ");
23622 fputs ("\n", file);
23625 /* Helper function to print the current isa options on a line. */
23627 static void
23628 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23629 HOST_WIDE_INT flags)
23631 rs6000_print_options_internal (file, indent, string, flags, "-m",
23632 &rs6000_opt_masks[0],
23633 ARRAY_SIZE (rs6000_opt_masks));
23636 static void
23637 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23638 HOST_WIDE_INT flags)
23640 rs6000_print_options_internal (file, indent, string, flags, "",
23641 &rs6000_builtin_mask_names[0],
23642 ARRAY_SIZE (rs6000_builtin_mask_names));
23645 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23646 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23647 -mupper-regs-df, etc.).
23649 If the user used -mno-power8-vector, we need to turn off all of the implicit
23650 ISA 2.07 and 3.0 options that relate to the vector unit.
23652 If the user used -mno-power9-vector, we need to turn off all of the implicit
23653 ISA 3.0 options that relate to the vector unit.
23655 This function does not handle explicit options such as the user specifying
23656 -mdirect-move. These are handled in rs6000_option_override_internal, and
23657 the appropriate error is given if needed.
23659 We return a mask of all of the implicit options that should not be enabled
23660 by default. */
23662 static HOST_WIDE_INT
23663 rs6000_disable_incompatible_switches (void)
23665 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23666 size_t i, j;
23668 static const struct {
23669 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23670 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23671 const char *const name; /* name of the switch. */
23672 } flags[] = {
23673 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23674 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23675 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23676 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23677 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
23680 for (i = 0; i < ARRAY_SIZE (flags); i++)
23682 HOST_WIDE_INT no_flag = flags[i].no_flag;
23684 if ((rs6000_isa_flags & no_flag) == 0
23685 && (rs6000_isa_flags_explicit & no_flag) != 0)
23687 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23688 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23689 & rs6000_isa_flags
23690 & dep_flags);
23692 if (set_flags)
23694 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23695 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23697 set_flags &= ~rs6000_opt_masks[j].mask;
23698 error ("%<-mno-%s%> turns off %<-m%s%>",
23699 flags[i].name,
23700 rs6000_opt_masks[j].name);
23703 gcc_assert (!set_flags);
23706 rs6000_isa_flags &= ~dep_flags;
23707 ignore_masks |= no_flag | dep_flags;
23711 return ignore_masks;
23715 /* Helper function for printing the function name when debugging. */
23717 static const char *
23718 get_decl_name (tree fn)
23720 tree name;
23722 if (!fn)
23723 return "<null>";
23725 name = DECL_NAME (fn);
23726 if (!name)
23727 return "<no-name>";
23729 return IDENTIFIER_POINTER (name);
23732 /* Return the clone id of the target we are compiling code for in a target
23733 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23734 the priority list for the target clones (ordered from lowest to
23735 highest). */
23737 static int
23738 rs6000_clone_priority (tree fndecl)
23740 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23741 HOST_WIDE_INT isa_masks;
23742 int ret = CLONE_DEFAULT;
23743 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23744 const char *attrs_str = NULL;
23746 attrs = TREE_VALUE (TREE_VALUE (attrs));
23747 attrs_str = TREE_STRING_POINTER (attrs);
23749 /* Return priority zero for default function. Return the ISA needed for the
23750 function if it is not the default. */
23751 if (strcmp (attrs_str, "default") != 0)
23753 if (fn_opts == NULL_TREE)
23754 fn_opts = target_option_default_node;
23756 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23757 isa_masks = rs6000_isa_flags;
23758 else
23759 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23761 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23762 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23763 break;
23766 if (TARGET_DEBUG_TARGET)
23767 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23768 get_decl_name (fndecl), ret);
23770 return ret;
23773 /* This compares the priority of target features in function DECL1 and DECL2.
23774 It returns positive value if DECL1 is higher priority, negative value if
23775 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23776 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23778 static int
23779 rs6000_compare_version_priority (tree decl1, tree decl2)
23781 int priority1 = rs6000_clone_priority (decl1);
23782 int priority2 = rs6000_clone_priority (decl2);
23783 int ret = priority1 - priority2;
23785 if (TARGET_DEBUG_TARGET)
23786 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23787 get_decl_name (decl1), get_decl_name (decl2), ret);
23789 return ret;
23792 /* Make a dispatcher declaration for the multi-versioned function DECL.
23793 Calls to DECL function will be replaced with calls to the dispatcher
23794 by the front-end. Returns the decl of the dispatcher function. */
23796 static tree
23797 rs6000_get_function_versions_dispatcher (void *decl)
23799 tree fn = (tree) decl;
23800 struct cgraph_node *node = NULL;
23801 struct cgraph_node *default_node = NULL;
23802 struct cgraph_function_version_info *node_v = NULL;
23803 struct cgraph_function_version_info *first_v = NULL;
23805 tree dispatch_decl = NULL;
23807 struct cgraph_function_version_info *default_version_info = NULL;
23808 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23810 if (TARGET_DEBUG_TARGET)
23811 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23812 get_decl_name (fn));
23814 node = cgraph_node::get (fn);
23815 gcc_assert (node != NULL);
23817 node_v = node->function_version ();
23818 gcc_assert (node_v != NULL);
23820 if (node_v->dispatcher_resolver != NULL)
23821 return node_v->dispatcher_resolver;
23823 /* Find the default version and make it the first node. */
23824 first_v = node_v;
23825 /* Go to the beginning of the chain. */
23826 while (first_v->prev != NULL)
23827 first_v = first_v->prev;
23829 default_version_info = first_v;
23830 while (default_version_info != NULL)
23832 const tree decl2 = default_version_info->this_node->decl;
23833 if (is_function_default_version (decl2))
23834 break;
23835 default_version_info = default_version_info->next;
23838 /* If there is no default node, just return NULL. */
23839 if (default_version_info == NULL)
23840 return NULL;
23842 /* Make default info the first node. */
23843 if (first_v != default_version_info)
23845 default_version_info->prev->next = default_version_info->next;
23846 if (default_version_info->next)
23847 default_version_info->next->prev = default_version_info->prev;
23848 first_v->prev = default_version_info;
23849 default_version_info->next = first_v;
23850 default_version_info->prev = NULL;
23853 default_node = default_version_info->this_node;
23855 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23856 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23857 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23858 "exports hardware capability bits");
23859 #else
23861 if (targetm.has_ifunc_p ())
23863 struct cgraph_function_version_info *it_v = NULL;
23864 struct cgraph_node *dispatcher_node = NULL;
23865 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23867 /* Right now, the dispatching is done via ifunc. */
23868 dispatch_decl = make_dispatcher_decl (default_node->decl);
23870 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23871 gcc_assert (dispatcher_node != NULL);
23872 dispatcher_node->dispatcher_function = 1;
23873 dispatcher_version_info
23874 = dispatcher_node->insert_new_function_version ();
23875 dispatcher_version_info->next = default_version_info;
23876 dispatcher_node->definition = 1;
23878 /* Set the dispatcher for all the versions. */
23879 it_v = default_version_info;
23880 while (it_v != NULL)
23882 it_v->dispatcher_resolver = dispatch_decl;
23883 it_v = it_v->next;
23886 else
23888 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23889 "multiversioning needs ifunc which is not supported "
23890 "on this target");
23892 #endif
23894 return dispatch_decl;
23897 /* Make the resolver function decl to dispatch the versions of a multi-
23898 versioned function, DEFAULT_DECL. Create an empty basic block in the
23899 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23900 function. */
23902 static tree
23903 make_resolver_func (const tree default_decl,
23904 const tree dispatch_decl,
23905 basic_block *empty_bb)
23907 /* Make the resolver function static. The resolver function returns
23908 void *. */
23909 tree decl_name = clone_function_name (default_decl, "resolver");
23910 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23911 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23912 tree decl = build_fn_decl (resolver_name, type);
23913 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23915 DECL_NAME (decl) = decl_name;
23916 TREE_USED (decl) = 1;
23917 DECL_ARTIFICIAL (decl) = 1;
23918 DECL_IGNORED_P (decl) = 0;
23919 TREE_PUBLIC (decl) = 0;
23920 DECL_UNINLINABLE (decl) = 1;
23922 /* Resolver is not external, body is generated. */
23923 DECL_EXTERNAL (decl) = 0;
23924 DECL_EXTERNAL (dispatch_decl) = 0;
23926 DECL_CONTEXT (decl) = NULL_TREE;
23927 DECL_INITIAL (decl) = make_node (BLOCK);
23928 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23930 if (DECL_COMDAT_GROUP (default_decl)
23931 || TREE_PUBLIC (default_decl))
23933 /* In this case, each translation unit with a call to this
23934 versioned function will put out a resolver. Ensure it
23935 is comdat to keep just one copy. */
23936 DECL_COMDAT (decl) = 1;
23937 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
23939 else
23940 TREE_PUBLIC (dispatch_decl) = 0;
23942 /* Build result decl and add to function_decl. */
23943 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23944 DECL_CONTEXT (t) = decl;
23945 DECL_ARTIFICIAL (t) = 1;
23946 DECL_IGNORED_P (t) = 1;
23947 DECL_RESULT (decl) = t;
23949 gimplify_function_tree (decl);
23950 push_cfun (DECL_STRUCT_FUNCTION (decl));
23951 *empty_bb = init_lowered_empty_function (decl, false,
23952 profile_count::uninitialized ());
23954 cgraph_node::add_new_function (decl, true);
23955 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23957 pop_cfun ();
23959 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23960 DECL_ATTRIBUTES (dispatch_decl)
23961 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23963 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23965 return decl;
23968 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23969 return a pointer to VERSION_DECL if we are running on a machine that
23970 supports the index CLONE_ISA hardware architecture bits. This function will
23971 be called during version dispatch to decide which function version to
23972 execute. It returns the basic block at the end, to which more conditions
23973 can be added. */
23975 static basic_block
23976 add_condition_to_bb (tree function_decl, tree version_decl,
23977 int clone_isa, basic_block new_bb)
23979 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23981 gcc_assert (new_bb != NULL);
23982 gimple_seq gseq = bb_seq (new_bb);
23985 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23986 build_fold_addr_expr (version_decl));
23987 tree result_var = create_tmp_var (ptr_type_node);
23988 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23989 gimple *return_stmt = gimple_build_return (result_var);
23991 if (clone_isa == CLONE_DEFAULT)
23993 gimple_seq_add_stmt (&gseq, convert_stmt);
23994 gimple_seq_add_stmt (&gseq, return_stmt);
23995 set_bb_seq (new_bb, gseq);
23996 gimple_set_bb (convert_stmt, new_bb);
23997 gimple_set_bb (return_stmt, new_bb);
23998 pop_cfun ();
23999 return new_bb;
24002 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24003 tree cond_var = create_tmp_var (bool_int_type_node);
24004 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24005 const char *arg_str = rs6000_clone_map[clone_isa].name;
24006 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24007 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24008 gimple_call_set_lhs (call_cond_stmt, cond_var);
24010 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24011 gimple_set_bb (call_cond_stmt, new_bb);
24012 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24014 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24015 NULL_TREE, NULL_TREE);
24016 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24017 gimple_set_bb (if_else_stmt, new_bb);
24018 gimple_seq_add_stmt (&gseq, if_else_stmt);
24020 gimple_seq_add_stmt (&gseq, convert_stmt);
24021 gimple_seq_add_stmt (&gseq, return_stmt);
24022 set_bb_seq (new_bb, gseq);
24024 basic_block bb1 = new_bb;
24025 edge e12 = split_block (bb1, if_else_stmt);
24026 basic_block bb2 = e12->dest;
24027 e12->flags &= ~EDGE_FALLTHRU;
24028 e12->flags |= EDGE_TRUE_VALUE;
24030 edge e23 = split_block (bb2, return_stmt);
24031 gimple_set_bb (convert_stmt, bb2);
24032 gimple_set_bb (return_stmt, bb2);
24034 basic_block bb3 = e23->dest;
24035 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24037 remove_edge (e23);
24038 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24040 pop_cfun ();
24041 return bb3;
24044 /* This function generates the dispatch function for multi-versioned functions.
24045 DISPATCH_DECL is the function which will contain the dispatch logic.
24046 FNDECLS are the function choices for dispatch, and is a tree chain.
24047 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24048 code is generated. */
24050 static int
24051 dispatch_function_versions (tree dispatch_decl,
24052 void *fndecls_p,
24053 basic_block *empty_bb)
24055 int ix;
24056 tree ele;
24057 vec<tree> *fndecls;
24058 tree clones[CLONE_MAX];
24060 if (TARGET_DEBUG_TARGET)
24061 fputs ("dispatch_function_versions, top\n", stderr);
24063 gcc_assert (dispatch_decl != NULL
24064 && fndecls_p != NULL
24065 && empty_bb != NULL);
24067 /* fndecls_p is actually a vector. */
24068 fndecls = static_cast<vec<tree> *> (fndecls_p);
24070 /* At least one more version other than the default. */
24071 gcc_assert (fndecls->length () >= 2);
24073 /* The first version in the vector is the default decl. */
24074 memset ((void *) clones, '\0', sizeof (clones));
24075 clones[CLONE_DEFAULT] = (*fndecls)[0];
24077 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24078 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24079 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24080 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24081 to insert the code here to do the call. */
24083 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24085 int priority = rs6000_clone_priority (ele);
24086 if (!clones[priority])
24087 clones[priority] = ele;
24090 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24091 if (clones[ix])
24093 if (TARGET_DEBUG_TARGET)
24094 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24095 ix, get_decl_name (clones[ix]));
24097 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24098 *empty_bb);
24101 return 0;
24104 /* Generate the dispatching code body to dispatch multi-versioned function
24105 DECL. The target hook is called to process the "target" attributes and
24106 provide the code to dispatch the right function at run-time. NODE points
24107 to the dispatcher decl whose body will be created. */
24109 static tree
24110 rs6000_generate_version_dispatcher_body (void *node_p)
24112 tree resolver;
24113 basic_block empty_bb;
24114 struct cgraph_node *node = (cgraph_node *) node_p;
24115 struct cgraph_function_version_info *ninfo = node->function_version ();
24117 if (ninfo->dispatcher_resolver)
24118 return ninfo->dispatcher_resolver;
24120 /* node is going to be an alias, so remove the finalized bit. */
24121 node->definition = false;
24123 /* The first version in the chain corresponds to the default version. */
24124 ninfo->dispatcher_resolver = resolver
24125 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24127 if (TARGET_DEBUG_TARGET)
24128 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24129 get_decl_name (resolver));
24131 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24132 auto_vec<tree, 2> fn_ver_vec;
24134 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24135 vinfo;
24136 vinfo = vinfo->next)
24138 struct cgraph_node *version = vinfo->this_node;
24139 /* Check for virtual functions here again, as by this time it should
24140 have been determined if this function needs a vtable index or
24141 not. This happens for methods in derived classes that override
24142 virtual methods in base classes but are not explicitly marked as
24143 virtual. */
24144 if (DECL_VINDEX (version->decl))
24145 sorry ("Virtual function multiversioning not supported");
24147 fn_ver_vec.safe_push (version->decl);
24150 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24151 cgraph_edge::rebuild_edges ();
24152 pop_cfun ();
24153 return resolver;
24157 /* Hook to determine if one function can safely inline another. */
24159 static bool
24160 rs6000_can_inline_p (tree caller, tree callee)
24162 bool ret = false;
24163 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24164 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24166 /* If the callee has no option attributes, then it is ok to inline. */
24167 if (!callee_tree)
24168 ret = true;
24170 else
24172 HOST_WIDE_INT caller_isa;
24173 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24174 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24175 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24177 /* If the caller has option attributes, then use them.
24178 Otherwise, use the command line options. */
24179 if (caller_tree)
24180 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24181 else
24182 caller_isa = rs6000_isa_flags;
24184 /* The callee's options must be a subset of the caller's options, i.e.
24185 a vsx function may inline an altivec function, but a no-vsx function
24186 must not inline a vsx function. However, for those options that the
24187 callee has explicitly enabled or disabled, then we must enforce that
24188 the callee's and caller's options match exactly; see PR70010. */
24189 if (((caller_isa & callee_isa) == callee_isa)
24190 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24191 ret = true;
24194 if (TARGET_DEBUG_TARGET)
24195 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24196 get_decl_name (caller), get_decl_name (callee),
24197 (ret ? "can" : "cannot"));
24199 return ret;
24202 /* Allocate a stack temp and fixup the address so it meets the particular
24203 memory requirements (either offetable or REG+REG addressing). */
24206 rs6000_allocate_stack_temp (machine_mode mode,
24207 bool offsettable_p,
24208 bool reg_reg_p)
24210 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24211 rtx addr = XEXP (stack, 0);
24212 int strict_p = reload_completed;
24214 if (!legitimate_indirect_address_p (addr, strict_p))
24216 if (offsettable_p
24217 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24218 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24220 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24221 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24224 return stack;
24227 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24228 convert to such a form to deal with memory reference instructions
24229 like STFIWX and LDBRX that only take reg+reg addressing. */
24232 rs6000_force_indexed_or_indirect_mem (rtx x)
24234 machine_mode mode = GET_MODE (x);
24236 gcc_assert (MEM_P (x));
24237 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24239 rtx addr = XEXP (x, 0);
24240 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24242 rtx reg = XEXP (addr, 0);
24243 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24244 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24245 gcc_assert (REG_P (reg));
24246 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24247 addr = reg;
24249 else if (GET_CODE (addr) == PRE_MODIFY)
24251 rtx reg = XEXP (addr, 0);
24252 rtx expr = XEXP (addr, 1);
24253 gcc_assert (REG_P (reg));
24254 gcc_assert (GET_CODE (expr) == PLUS);
24255 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24256 addr = reg;
24259 if (GET_CODE (addr) == PLUS)
24261 rtx op0 = XEXP (addr, 0);
24262 rtx op1 = XEXP (addr, 1);
24263 op0 = force_reg (Pmode, op0);
24264 op1 = force_reg (Pmode, op1);
24265 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24267 else
24268 x = replace_equiv_address (x, force_reg (Pmode, addr));
24271 return x;
24274 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24276 On the RS/6000, all integer constants are acceptable, most won't be valid
24277 for particular insns, though. Only easy FP constants are acceptable. */
24279 static bool
24280 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24282 if (TARGET_ELF && tls_referenced_p (x))
24283 return false;
24285 if (CONST_DOUBLE_P (x))
24286 return easy_fp_constant (x, mode);
24288 if (GET_CODE (x) == CONST_VECTOR)
24289 return easy_vector_constant (x, mode);
24291 return true;
24295 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24297 static bool
24298 chain_already_loaded (rtx_insn *last)
24300 for (; last != NULL; last = PREV_INSN (last))
24302 if (NONJUMP_INSN_P (last))
24304 rtx patt = PATTERN (last);
24306 if (GET_CODE (patt) == SET)
24308 rtx lhs = XEXP (patt, 0);
24310 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24311 return true;
24315 return false;
24318 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24320 void
24321 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24323 rtx func = func_desc;
24324 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24325 rtx toc_load = NULL_RTX;
24326 rtx toc_restore = NULL_RTX;
24327 rtx func_addr;
24328 rtx abi_reg = NULL_RTX;
24329 rtx call[5];
24330 int n_call;
24331 rtx insn;
24332 bool is_pltseq_longcall;
24334 if (global_tlsarg)
24335 tlsarg = global_tlsarg;
24337 /* Handle longcall attributes. */
24338 is_pltseq_longcall = false;
24339 if ((INTVAL (cookie) & CALL_LONG) != 0
24340 && GET_CODE (func_desc) == SYMBOL_REF)
24342 func = rs6000_longcall_ref (func_desc, tlsarg);
24343 if (TARGET_PLTSEQ)
24344 is_pltseq_longcall = true;
24347 /* Handle indirect calls. */
24348 if (!SYMBOL_REF_P (func)
24349 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24351 if (!rs6000_pcrel_p (cfun))
24353 /* Save the TOC into its reserved slot before the call,
24354 and prepare to restore it after the call. */
24355 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24356 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24357 gen_rtvec (1, stack_toc_offset),
24358 UNSPEC_TOCSLOT);
24359 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24361 /* Can we optimize saving the TOC in the prologue or
24362 do we need to do it at every call? */
24363 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24364 cfun->machine->save_toc_in_prologue = true;
24365 else
24367 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24368 rtx stack_toc_mem = gen_frame_mem (Pmode,
24369 gen_rtx_PLUS (Pmode, stack_ptr,
24370 stack_toc_offset));
24371 MEM_VOLATILE_P (stack_toc_mem) = 1;
24372 if (is_pltseq_longcall)
24374 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24375 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24376 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24378 else
24379 emit_move_insn (stack_toc_mem, toc_reg);
24383 if (DEFAULT_ABI == ABI_ELFv2)
24385 /* A function pointer in the ELFv2 ABI is just a plain address, but
24386 the ABI requires it to be loaded into r12 before the call. */
24387 func_addr = gen_rtx_REG (Pmode, 12);
24388 if (!rtx_equal_p (func_addr, func))
24389 emit_move_insn (func_addr, func);
24390 abi_reg = func_addr;
24391 /* Indirect calls via CTR are strongly preferred over indirect
24392 calls via LR, so move the address there. Needed to mark
24393 this insn for linker plt sequence editing too. */
24394 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24395 if (is_pltseq_longcall)
24397 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24398 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24399 emit_insn (gen_rtx_SET (func_addr, mark_func));
24400 v = gen_rtvec (2, func_addr, func_desc);
24401 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24403 else
24404 emit_move_insn (func_addr, abi_reg);
24406 else
24408 /* A function pointer under AIX is a pointer to a data area whose
24409 first word contains the actual address of the function, whose
24410 second word contains a pointer to its TOC, and whose third word
24411 contains a value to place in the static chain register (r11).
24412 Note that if we load the static chain, our "trampoline" need
24413 not have any executable code. */
24415 /* Load up address of the actual function. */
24416 func = force_reg (Pmode, func);
24417 func_addr = gen_reg_rtx (Pmode);
24418 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24420 /* Indirect calls via CTR are strongly preferred over indirect
24421 calls via LR, so move the address there. */
24422 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24423 emit_move_insn (ctr_reg, func_addr);
24424 func_addr = ctr_reg;
24426 /* Prepare to load the TOC of the called function. Note that the
24427 TOC load must happen immediately before the actual call so
24428 that unwinding the TOC registers works correctly. See the
24429 comment in frob_update_context. */
24430 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24431 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24432 gen_rtx_PLUS (Pmode, func,
24433 func_toc_offset));
24434 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24436 /* If we have a static chain, load it up. But, if the call was
24437 originally direct, the 3rd word has not been written since no
24438 trampoline has been built, so we ought not to load it, lest we
24439 override a static chain value. */
24440 if (!(GET_CODE (func_desc) == SYMBOL_REF
24441 && SYMBOL_REF_FUNCTION_P (func_desc))
24442 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24443 && !chain_already_loaded (get_current_sequence ()->next->last))
24445 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24446 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24447 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24448 gen_rtx_PLUS (Pmode, func,
24449 func_sc_offset));
24450 emit_move_insn (sc_reg, func_sc_mem);
24451 abi_reg = sc_reg;
24455 else
24457 /* No TOC register needed for calls from PC-relative callers. */
24458 if (!rs6000_pcrel_p (cfun))
24459 /* Direct calls use the TOC: for local calls, the callee will
24460 assume the TOC register is set; for non-local calls, the
24461 PLT stub needs the TOC register. */
24462 abi_reg = toc_reg;
24463 func_addr = func;
24466 /* Create the call. */
24467 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24468 if (value != NULL_RTX)
24469 call[0] = gen_rtx_SET (value, call[0]);
24470 call[1] = gen_rtx_USE (VOIDmode, cookie);
24471 n_call = 2;
24473 if (toc_load)
24474 call[n_call++] = toc_load;
24475 if (toc_restore)
24476 call[n_call++] = toc_restore;
24478 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24480 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24481 insn = emit_call_insn (insn);
24483 /* Mention all registers defined by the ABI to hold information
24484 as uses in CALL_INSN_FUNCTION_USAGE. */
24485 if (abi_reg)
24486 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24489 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24491 void
24492 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24494 rtx call[2];
24495 rtx insn;
24497 gcc_assert (INTVAL (cookie) == 0);
24499 if (global_tlsarg)
24500 tlsarg = global_tlsarg;
24502 /* Create the call. */
24503 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24504 if (value != NULL_RTX)
24505 call[0] = gen_rtx_SET (value, call[0]);
24507 call[1] = simple_return_rtx;
24509 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24510 insn = emit_call_insn (insn);
24512 /* Note use of the TOC register. */
24513 if (!rs6000_pcrel_p (cfun))
24514 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24515 gen_rtx_REG (Pmode, TOC_REGNUM));
24518 /* Expand code to perform a call under the SYSV4 ABI. */
24520 void
24521 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24523 rtx func = func_desc;
24524 rtx func_addr;
24525 rtx call[4];
24526 rtx insn;
24527 rtx abi_reg = NULL_RTX;
24528 int n;
24530 if (global_tlsarg)
24531 tlsarg = global_tlsarg;
24533 /* Handle longcall attributes. */
24534 if ((INTVAL (cookie) & CALL_LONG) != 0
24535 && GET_CODE (func_desc) == SYMBOL_REF)
24537 func = rs6000_longcall_ref (func_desc, tlsarg);
24538 /* If the longcall was implemented as an inline PLT call using
24539 PLT unspecs then func will be REG:r11. If not, func will be
24540 a pseudo reg. The inline PLT call sequence supports lazy
24541 linking (and longcalls to functions in dlopen'd libraries).
24542 The other style of longcalls don't. The lazy linking entry
24543 to the dynamic symbol resolver requires r11 be the function
24544 address (as it is for linker generated PLT stubs). Ensure
24545 r11 stays valid to the bctrl by marking r11 used by the call. */
24546 if (TARGET_PLTSEQ)
24547 abi_reg = func;
24550 /* Handle indirect calls. */
24551 if (GET_CODE (func) != SYMBOL_REF)
24553 func = force_reg (Pmode, func);
24555 /* Indirect calls via CTR are strongly preferred over indirect
24556 calls via LR, so move the address there. That can't be left
24557 to reload because we want to mark every instruction in an
24558 inline PLT call sequence with a reloc, enabling the linker to
24559 edit the sequence back to a direct call when that makes sense. */
24560 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24561 if (abi_reg)
24563 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24564 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24565 emit_insn (gen_rtx_SET (func_addr, mark_func));
24566 v = gen_rtvec (2, func_addr, func_desc);
24567 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24569 else
24570 emit_move_insn (func_addr, func);
24572 else
24573 func_addr = func;
24575 /* Create the call. */
24576 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24577 if (value != NULL_RTX)
24578 call[0] = gen_rtx_SET (value, call[0]);
24580 call[1] = gen_rtx_USE (VOIDmode, cookie);
24581 n = 2;
24582 if (TARGET_SECURE_PLT
24583 && flag_pic
24584 && GET_CODE (func_addr) == SYMBOL_REF
24585 && !SYMBOL_REF_LOCAL_P (func_addr))
24586 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24588 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24590 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24591 insn = emit_call_insn (insn);
24592 if (abi_reg)
24593 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24596 /* Expand code to perform a sibling call under the SysV4 ABI. */
24598 void
24599 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24601 rtx func = func_desc;
24602 rtx func_addr;
24603 rtx call[3];
24604 rtx insn;
24605 rtx abi_reg = NULL_RTX;
24607 if (global_tlsarg)
24608 tlsarg = global_tlsarg;
24610 /* Handle longcall attributes. */
24611 if ((INTVAL (cookie) & CALL_LONG) != 0
24612 && GET_CODE (func_desc) == SYMBOL_REF)
24614 func = rs6000_longcall_ref (func_desc, tlsarg);
24615 /* If the longcall was implemented as an inline PLT call using
24616 PLT unspecs then func will be REG:r11. If not, func will be
24617 a pseudo reg. The inline PLT call sequence supports lazy
24618 linking (and longcalls to functions in dlopen'd libraries).
24619 The other style of longcalls don't. The lazy linking entry
24620 to the dynamic symbol resolver requires r11 be the function
24621 address (as it is for linker generated PLT stubs). Ensure
24622 r11 stays valid to the bctr by marking r11 used by the call. */
24623 if (TARGET_PLTSEQ)
24624 abi_reg = func;
24627 /* Handle indirect calls. */
24628 if (GET_CODE (func) != SYMBOL_REF)
24630 func = force_reg (Pmode, func);
24632 /* Indirect sibcalls must go via CTR. That can't be left to
24633 reload because we want to mark every instruction in an inline
24634 PLT call sequence with a reloc, enabling the linker to edit
24635 the sequence back to a direct call when that makes sense. */
24636 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24637 if (abi_reg)
24639 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24640 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24641 emit_insn (gen_rtx_SET (func_addr, mark_func));
24642 v = gen_rtvec (2, func_addr, func_desc);
24643 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24645 else
24646 emit_move_insn (func_addr, func);
24648 else
24649 func_addr = func;
24651 /* Create the call. */
24652 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24653 if (value != NULL_RTX)
24654 call[0] = gen_rtx_SET (value, call[0]);
24656 call[1] = gen_rtx_USE (VOIDmode, cookie);
24657 call[2] = simple_return_rtx;
24659 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24660 insn = emit_call_insn (insn);
24661 if (abi_reg)
24662 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24665 #if TARGET_MACHO
24667 /* Expand code to perform a call under the Darwin ABI.
24668 Modulo handling of mlongcall, this is much the same as sysv.
24669 if/when the longcall optimisation is removed, we could drop this
24670 code and use the sysv case (taking care to avoid the tls stuff).
24672 We can use this for sibcalls too, if needed. */
24674 void
24675 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24676 rtx cookie, bool sibcall)
24678 rtx func = func_desc;
24679 rtx func_addr;
24680 rtx call[3];
24681 rtx insn;
24682 int cookie_val = INTVAL (cookie);
24683 bool make_island = false;
24685 /* Handle longcall attributes, there are two cases for Darwin:
24686 1) Newer linkers are capable of synthesising any branch islands needed.
24687 2) We need a helper branch island synthesised by the compiler.
24688 The second case has mostly been retired and we don't use it for m64.
24689 In fact, it's is an optimisation, we could just indirect as sysv does..
24690 ... however, backwards compatibility for now.
24691 If we're going to use this, then we need to keep the CALL_LONG bit set,
24692 so that we can pick up the special insn form later. */
24693 if ((cookie_val & CALL_LONG) != 0
24694 && GET_CODE (func_desc) == SYMBOL_REF)
24696 /* FIXME: the longcall opt should not hang off this flag, it is most
24697 likely incorrect for kernel-mode code-generation. */
24698 if (darwin_symbol_stubs && TARGET_32BIT)
24699 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24700 else
24702 /* The linker is capable of doing this, but the user explicitly
24703 asked for -mlongcall, so we'll do the 'normal' version. */
24704 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24705 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24709 /* Handle indirect calls. */
24710 if (GET_CODE (func) != SYMBOL_REF)
24712 func = force_reg (Pmode, func);
24714 /* Indirect calls via CTR are strongly preferred over indirect
24715 calls via LR, and are required for indirect sibcalls, so move
24716 the address there. */
24717 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24718 emit_move_insn (func_addr, func);
24720 else
24721 func_addr = func;
24723 /* Create the call. */
24724 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24725 if (value != NULL_RTX)
24726 call[0] = gen_rtx_SET (value, call[0]);
24728 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24730 if (sibcall)
24731 call[2] = simple_return_rtx;
24732 else
24733 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24735 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24736 insn = emit_call_insn (insn);
24737 /* Now we have the debug info in the insn, we can set up the branch island
24738 if we're using one. */
24739 if (make_island)
24741 tree funname = get_identifier (XSTR (func_desc, 0));
24743 if (no_previous_def (funname))
24745 rtx label_rtx = gen_label_rtx ();
24746 char *label_buf, temp_buf[256];
24747 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24748 CODE_LABEL_NUMBER (label_rtx));
24749 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24750 tree labelname = get_identifier (label_buf);
24751 add_compiler_branch_island (labelname, funname,
24752 insn_line ((const rtx_insn*)insn));
24756 #endif
24758 void
24759 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24760 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24762 #if TARGET_MACHO
24763 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24764 #else
24765 gcc_unreachable();
24766 #endif
24770 void
24771 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24772 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24774 #if TARGET_MACHO
24775 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24776 #else
24777 gcc_unreachable();
24778 #endif
24781 /* Return whether we should generate PC-relative code for FNDECL. */
24782 bool
24783 rs6000_fndecl_pcrel_p (const_tree fndecl)
24785 if (DEFAULT_ABI != ABI_ELFv2)
24786 return false;
24788 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24790 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24791 && TARGET_CMODEL == CMODEL_MEDIUM);
24794 /* Return whether we should generate PC-relative code for *FN. */
24795 bool
24796 rs6000_pcrel_p (struct function *fn)
24798 if (DEFAULT_ABI != ABI_ELFv2)
24799 return false;
24801 /* Optimize usual case. */
24802 if (fn == cfun)
24803 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24804 && TARGET_CMODEL == CMODEL_MEDIUM);
24806 return rs6000_fndecl_pcrel_p (fn->decl);
24810 /* Given an address (ADDR), a mode (MODE), and what the format of the
24811 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24812 for the address. */
24814 enum insn_form
24815 address_to_insn_form (rtx addr,
24816 machine_mode mode,
24817 enum non_prefixed_form non_prefixed_format)
24819 /* Single register is easy. */
24820 if (REG_P (addr) || SUBREG_P (addr))
24821 return INSN_FORM_BASE_REG;
24823 /* If the non prefixed instruction format doesn't support offset addressing,
24824 make sure only indexed addressing is allowed.
24826 We special case SDmode so that the register allocator does not try to move
24827 SDmode through GPR registers, but instead uses the 32-bit integer load and
24828 store instructions for the floating point registers. */
24829 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24831 if (GET_CODE (addr) != PLUS)
24832 return INSN_FORM_BAD;
24834 rtx op0 = XEXP (addr, 0);
24835 rtx op1 = XEXP (addr, 1);
24836 if (!REG_P (op0) && !SUBREG_P (op0))
24837 return INSN_FORM_BAD;
24839 if (!REG_P (op1) && !SUBREG_P (op1))
24840 return INSN_FORM_BAD;
24842 return INSN_FORM_X;
24845 /* Deal with update forms. */
24846 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24847 return INSN_FORM_UPDATE;
24849 /* Handle PC-relative symbols and labels. Check for both local and
24850 external symbols. Assume labels are always local. TLS symbols
24851 are not PC-relative for rs6000. */
24852 if (TARGET_PCREL)
24854 if (LABEL_REF_P (addr))
24855 return INSN_FORM_PCREL_LOCAL;
24857 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
24859 if (!SYMBOL_REF_LOCAL_P (addr))
24860 return INSN_FORM_PCREL_EXTERNAL;
24861 else
24862 return INSN_FORM_PCREL_LOCAL;
24866 if (GET_CODE (addr) == CONST)
24867 addr = XEXP (addr, 0);
24869 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24870 if (GET_CODE (addr) == LO_SUM)
24871 return INSN_FORM_LO_SUM;
24873 /* Everything below must be an offset address of some form. */
24874 if (GET_CODE (addr) != PLUS)
24875 return INSN_FORM_BAD;
24877 rtx op0 = XEXP (addr, 0);
24878 rtx op1 = XEXP (addr, 1);
24880 /* Check for indexed addresses. */
24881 if (REG_P (op1) || SUBREG_P (op1))
24883 if (REG_P (op0) || SUBREG_P (op0))
24884 return INSN_FORM_X;
24886 return INSN_FORM_BAD;
24889 if (!CONST_INT_P (op1))
24890 return INSN_FORM_BAD;
24892 HOST_WIDE_INT offset = INTVAL (op1);
24893 if (!SIGNED_INTEGER_34BIT_P (offset))
24894 return INSN_FORM_BAD;
24896 /* Check for local and external PC-relative addresses. Labels are always
24897 local. TLS symbols are not PC-relative for rs6000. */
24898 if (TARGET_PCREL)
24900 if (LABEL_REF_P (op0))
24901 return INSN_FORM_PCREL_LOCAL;
24903 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
24905 if (!SYMBOL_REF_LOCAL_P (op0))
24906 return INSN_FORM_PCREL_EXTERNAL;
24907 else
24908 return INSN_FORM_PCREL_LOCAL;
24912 /* If it isn't PC-relative, the address must use a base register. */
24913 if (!REG_P (op0) && !SUBREG_P (op0))
24914 return INSN_FORM_BAD;
24916 /* Large offsets must be prefixed. */
24917 if (!SIGNED_INTEGER_16BIT_P (offset))
24919 if (TARGET_PREFIXED)
24920 return INSN_FORM_PREFIXED_NUMERIC;
24922 return INSN_FORM_BAD;
24925 /* We have a 16-bit offset, see what default instruction format to use. */
24926 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24928 unsigned size = GET_MODE_SIZE (mode);
24930 /* On 64-bit systems, assume 64-bit integers need to use DS form
24931 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24932 (for LXV and STXV). TImode is problematical in that its normal usage
24933 is expected to be GPRs where it wants a DS instruction format, but if
24934 it goes into the vector registers, it wants a DQ instruction
24935 format. */
24936 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24937 non_prefixed_format = NON_PREFIXED_DS;
24939 else if (TARGET_VSX && size >= 16
24940 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24941 non_prefixed_format = NON_PREFIXED_DQ;
24943 else
24944 non_prefixed_format = NON_PREFIXED_D;
24947 /* Classify the D/DS/DQ-form addresses. */
24948 switch (non_prefixed_format)
24950 /* Instruction format D, all 16 bits are valid. */
24951 case NON_PREFIXED_D:
24952 return INSN_FORM_D;
24954 /* Instruction format DS, bottom 2 bits must be 0. */
24955 case NON_PREFIXED_DS:
24956 if ((offset & 3) == 0)
24957 return INSN_FORM_DS;
24959 else if (TARGET_PREFIXED)
24960 return INSN_FORM_PREFIXED_NUMERIC;
24962 else
24963 return INSN_FORM_BAD;
24965 /* Instruction format DQ, bottom 4 bits must be 0. */
24966 case NON_PREFIXED_DQ:
24967 if ((offset & 15) == 0)
24968 return INSN_FORM_DQ;
24970 else if (TARGET_PREFIXED)
24971 return INSN_FORM_PREFIXED_NUMERIC;
24973 else
24974 return INSN_FORM_BAD;
24976 default:
24977 break;
24980 return INSN_FORM_BAD;
24983 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24984 instruction format (D/DS/DQ) used for offset memory. */
24986 static enum non_prefixed_form
24987 reg_to_non_prefixed (rtx reg, machine_mode mode)
24989 /* If it isn't a register, use the defaults. */
24990 if (!REG_P (reg) && !SUBREG_P (reg))
24991 return NON_PREFIXED_DEFAULT;
24993 unsigned int r = reg_or_subregno (reg);
24995 /* If we have a pseudo, use the default instruction format. */
24996 if (!HARD_REGISTER_NUM_P (r))
24997 return NON_PREFIXED_DEFAULT;
24999 unsigned size = GET_MODE_SIZE (mode);
25001 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25002 128-bit floating point, and 128-bit integers. Before power9, only indexed
25003 addressing was available for vectors. */
25004 if (FP_REGNO_P (r))
25006 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25007 return NON_PREFIXED_D;
25009 else if (size < 8)
25010 return NON_PREFIXED_X;
25012 else if (TARGET_VSX && size >= 16
25013 && (VECTOR_MODE_P (mode)
25014 || FLOAT128_VECTOR_P (mode)
25015 || mode == TImode || mode == CTImode))
25016 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25018 else
25019 return NON_PREFIXED_DEFAULT;
25022 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25023 128-bit floating point, and 128-bit integers. Before power9, only indexed
25024 addressing was available. */
25025 else if (ALTIVEC_REGNO_P (r))
25027 if (!TARGET_P9_VECTOR)
25028 return NON_PREFIXED_X;
25030 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25031 return NON_PREFIXED_DS;
25033 else if (size < 8)
25034 return NON_PREFIXED_X;
25036 else if (TARGET_VSX && size >= 16
25037 && (VECTOR_MODE_P (mode)
25038 || FLOAT128_VECTOR_P (mode)
25039 || mode == TImode || mode == CTImode))
25040 return NON_PREFIXED_DQ;
25042 else
25043 return NON_PREFIXED_DEFAULT;
25046 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25047 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25048 through the GPR registers for memory operations. */
25049 else if (TARGET_POWERPC64 && size >= 8)
25050 return NON_PREFIXED_DS;
25052 return NON_PREFIXED_D;
25056 /* Whether a load instruction is a prefixed instruction. This is called from
25057 the prefixed attribute processing. */
25059 bool
25060 prefixed_load_p (rtx_insn *insn)
25062 /* Validate the insn to make sure it is a normal load insn. */
25063 extract_insn_cached (insn);
25064 if (recog_data.n_operands < 2)
25065 return false;
25067 rtx reg = recog_data.operand[0];
25068 rtx mem = recog_data.operand[1];
25070 if (!REG_P (reg) && !SUBREG_P (reg))
25071 return false;
25073 if (!MEM_P (mem))
25074 return false;
25076 /* Prefixed load instructions do not support update or indexed forms. */
25077 if (get_attr_indexed (insn) == INDEXED_YES
25078 || get_attr_update (insn) == UPDATE_YES)
25079 return false;
25081 /* LWA uses the DS format instead of the D format that LWZ uses. */
25082 enum non_prefixed_form non_prefixed;
25083 machine_mode reg_mode = GET_MODE (reg);
25084 machine_mode mem_mode = GET_MODE (mem);
25086 if (mem_mode == SImode && reg_mode == DImode
25087 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25088 non_prefixed = NON_PREFIXED_DS;
25090 else
25091 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25093 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25096 /* Whether a store instruction is a prefixed instruction. This is called from
25097 the prefixed attribute processing. */
25099 bool
25100 prefixed_store_p (rtx_insn *insn)
25102 /* Validate the insn to make sure it is a normal store insn. */
25103 extract_insn_cached (insn);
25104 if (recog_data.n_operands < 2)
25105 return false;
25107 rtx mem = recog_data.operand[0];
25108 rtx reg = recog_data.operand[1];
25110 if (!REG_P (reg) && !SUBREG_P (reg))
25111 return false;
25113 if (!MEM_P (mem))
25114 return false;
25116 /* Prefixed store instructions do not support update or indexed forms. */
25117 if (get_attr_indexed (insn) == INDEXED_YES
25118 || get_attr_update (insn) == UPDATE_YES)
25119 return false;
25121 machine_mode mem_mode = GET_MODE (mem);
25122 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25123 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25126 /* Whether a load immediate or add instruction is a prefixed instruction. This
25127 is called from the prefixed attribute processing. */
25129 bool
25130 prefixed_paddi_p (rtx_insn *insn)
25132 rtx set = single_set (insn);
25133 if (!set)
25134 return false;
25136 rtx dest = SET_DEST (set);
25137 rtx src = SET_SRC (set);
25139 if (!REG_P (dest) && !SUBREG_P (dest))
25140 return false;
25142 /* Is this a load immediate that can't be done with a simple ADDI or
25143 ADDIS? */
25144 if (CONST_INT_P (src))
25145 return (satisfies_constraint_eI (src)
25146 && !satisfies_constraint_I (src)
25147 && !satisfies_constraint_L (src));
25149 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25150 ADDIS? */
25151 if (GET_CODE (src) == PLUS)
25153 rtx op1 = XEXP (src, 1);
25155 return (CONST_INT_P (op1)
25156 && satisfies_constraint_eI (op1)
25157 && !satisfies_constraint_I (op1)
25158 && !satisfies_constraint_L (op1));
25161 /* If not, is it a load of a PC-relative address? */
25162 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25163 return false;
25165 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25166 return false;
25168 enum insn_form iform = address_to_insn_form (src, Pmode,
25169 NON_PREFIXED_DEFAULT);
25171 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25174 /* Whether the next instruction needs a 'p' prefix issued before the
25175 instruction is printed out. */
25176 static bool next_insn_prefixed_p;
25178 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25179 outputting the assembler code. On the PowerPC, we remember if the current
25180 insn is a prefixed insn where we need to emit a 'p' before the insn.
25182 In addition, if the insn is part of a PC-relative reference to an external
25183 label optimization, this is recorded also. */
25184 void
25185 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25187 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25188 return;
25191 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25192 We use it to emit a 'p' for prefixed insns that is set in
25193 FINAL_PRESCAN_INSN. */
25194 void
25195 rs6000_asm_output_opcode (FILE *stream)
25197 if (next_insn_prefixed_p)
25198 fprintf (stream, "p");
25200 return;
25203 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25204 should be adjusted to reflect any required changes. This macro is used when
25205 there is some systematic length adjustment required that would be difficult
25206 to express in the length attribute.
25208 In the PowerPC, we use this to adjust the length of an instruction if one or
25209 more prefixed instructions are generated, using the attribute
25210 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25211 hardware requires that a prefied instruciton does not cross a 64-byte
25212 boundary. This means the compiler has to assume the length of the first
25213 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25214 already set for the non-prefixed instruction, we just need to udpate for the
25215 difference. */
25218 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25220 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
25222 rtx pattern = PATTERN (insn);
25223 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25224 && get_attr_prefixed (insn) == PREFIXED_YES)
25226 int num_prefixed = get_attr_max_prefixed_insns (insn);
25227 length += 4 * (num_prefixed + 1);
25231 return length;
25235 #ifdef HAVE_GAS_HIDDEN
25236 # define USE_HIDDEN_LINKONCE 1
25237 #else
25238 # define USE_HIDDEN_LINKONCE 0
25239 #endif
25241 /* Fills in the label name that should be used for a 476 link stack thunk. */
25243 void
25244 get_ppc476_thunk_name (char name[32])
25246 gcc_assert (TARGET_LINK_STACK);
25248 if (USE_HIDDEN_LINKONCE)
25249 sprintf (name, "__ppc476.get_thunk");
25250 else
25251 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25254 /* This function emits the simple thunk routine that is used to preserve
25255 the link stack on the 476 cpu. */
25257 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25258 static void
25259 rs6000_code_end (void)
25261 char name[32];
25262 tree decl;
25264 if (!TARGET_LINK_STACK)
25265 return;
25267 get_ppc476_thunk_name (name);
25269 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25270 build_function_type_list (void_type_node, NULL_TREE));
25271 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25272 NULL_TREE, void_type_node);
25273 TREE_PUBLIC (decl) = 1;
25274 TREE_STATIC (decl) = 1;
25276 #if RS6000_WEAK
25277 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25279 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25280 targetm.asm_out.unique_section (decl, 0);
25281 switch_to_section (get_named_section (decl, NULL, 0));
25282 DECL_WEAK (decl) = 1;
25283 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25284 targetm.asm_out.globalize_label (asm_out_file, name);
25285 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25286 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25288 else
25289 #endif
25291 switch_to_section (text_section);
25292 ASM_OUTPUT_LABEL (asm_out_file, name);
25295 DECL_INITIAL (decl) = make_node (BLOCK);
25296 current_function_decl = decl;
25297 allocate_struct_function (decl, false);
25298 init_function_start (decl);
25299 first_function_block_is_cold = false;
25300 /* Make sure unwind info is emitted for the thunk if needed. */
25301 final_start_function (emit_barrier (), asm_out_file, 1);
25303 fputs ("\tblr\n", asm_out_file);
25305 final_end_function ();
25306 init_insn_lengths ();
25307 free_after_compilation (cfun);
25308 set_cfun (NULL);
25309 current_function_decl = NULL;
25312 /* Add r30 to hard reg set if the prologue sets it up and it is not
25313 pic_offset_table_rtx. */
25315 static void
25316 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25318 if (!TARGET_SINGLE_PIC_BASE
25319 && TARGET_TOC
25320 && TARGET_MINIMAL_TOC
25321 && !constant_pool_empty_p ())
25322 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25323 if (cfun->machine->split_stack_argp_used)
25324 add_to_hard_reg_set (&set->set, Pmode, 12);
25326 /* Make sure the hard reg set doesn't include r2, which was possibly added
25327 via PIC_OFFSET_TABLE_REGNUM. */
25328 if (TARGET_TOC)
25329 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25333 /* Helper function for rs6000_split_logical to emit a logical instruction after
25334 spliting the operation to single GPR registers.
25336 DEST is the destination register.
25337 OP1 and OP2 are the input source registers.
25338 CODE is the base operation (AND, IOR, XOR, NOT).
25339 MODE is the machine mode.
25340 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25341 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25342 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25344 static void
25345 rs6000_split_logical_inner (rtx dest,
25346 rtx op1,
25347 rtx op2,
25348 enum rtx_code code,
25349 machine_mode mode,
25350 bool complement_final_p,
25351 bool complement_op1_p,
25352 bool complement_op2_p)
25354 rtx bool_rtx;
25356 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25357 if (op2 && CONST_INT_P (op2)
25358 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25359 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25361 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25362 HOST_WIDE_INT value = INTVAL (op2) & mask;
25364 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25365 if (code == AND)
25367 if (value == 0)
25369 emit_insn (gen_rtx_SET (dest, const0_rtx));
25370 return;
25373 else if (value == mask)
25375 if (!rtx_equal_p (dest, op1))
25376 emit_insn (gen_rtx_SET (dest, op1));
25377 return;
25381 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25382 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25383 else if (code == IOR || code == XOR)
25385 if (value == 0)
25387 if (!rtx_equal_p (dest, op1))
25388 emit_insn (gen_rtx_SET (dest, op1));
25389 return;
25394 if (code == AND && mode == SImode
25395 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25397 emit_insn (gen_andsi3 (dest, op1, op2));
25398 return;
25401 if (complement_op1_p)
25402 op1 = gen_rtx_NOT (mode, op1);
25404 if (complement_op2_p)
25405 op2 = gen_rtx_NOT (mode, op2);
25407 /* For canonical RTL, if only one arm is inverted it is the first. */
25408 if (!complement_op1_p && complement_op2_p)
25409 std::swap (op1, op2);
25411 bool_rtx = ((code == NOT)
25412 ? gen_rtx_NOT (mode, op1)
25413 : gen_rtx_fmt_ee (code, mode, op1, op2));
25415 if (complement_final_p)
25416 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25418 emit_insn (gen_rtx_SET (dest, bool_rtx));
25421 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25422 operations are split immediately during RTL generation to allow for more
25423 optimizations of the AND/IOR/XOR.
25425 OPERANDS is an array containing the destination and two input operands.
25426 CODE is the base operation (AND, IOR, XOR, NOT).
25427 MODE is the machine mode.
25428 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25429 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25430 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25431 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25432 formation of the AND instructions. */
25434 static void
25435 rs6000_split_logical_di (rtx operands[3],
25436 enum rtx_code code,
25437 bool complement_final_p,
25438 bool complement_op1_p,
25439 bool complement_op2_p)
25441 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25442 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25443 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25444 enum hi_lo { hi = 0, lo = 1 };
25445 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25446 size_t i;
25448 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25449 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25450 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25451 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25453 if (code == NOT)
25454 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25455 else
25457 if (!CONST_INT_P (operands[2]))
25459 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25460 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25462 else
25464 HOST_WIDE_INT value = INTVAL (operands[2]);
25465 HOST_WIDE_INT value_hi_lo[2];
25467 gcc_assert (!complement_final_p);
25468 gcc_assert (!complement_op1_p);
25469 gcc_assert (!complement_op2_p);
25471 value_hi_lo[hi] = value >> 32;
25472 value_hi_lo[lo] = value & lower_32bits;
25474 for (i = 0; i < 2; i++)
25476 HOST_WIDE_INT sub_value = value_hi_lo[i];
25478 if (sub_value & sign_bit)
25479 sub_value |= upper_32bits;
25481 op2_hi_lo[i] = GEN_INT (sub_value);
25483 /* If this is an AND instruction, check to see if we need to load
25484 the value in a register. */
25485 if (code == AND && sub_value != -1 && sub_value != 0
25486 && !and_operand (op2_hi_lo[i], SImode))
25487 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25492 for (i = 0; i < 2; i++)
25494 /* Split large IOR/XOR operations. */
25495 if ((code == IOR || code == XOR)
25496 && CONST_INT_P (op2_hi_lo[i])
25497 && !complement_final_p
25498 && !complement_op1_p
25499 && !complement_op2_p
25500 && !logical_const_operand (op2_hi_lo[i], SImode))
25502 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25503 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25504 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25505 rtx tmp = gen_reg_rtx (SImode);
25507 /* Make sure the constant is sign extended. */
25508 if ((hi_16bits & sign_bit) != 0)
25509 hi_16bits |= upper_32bits;
25511 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25512 code, SImode, false, false, false);
25514 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25515 code, SImode, false, false, false);
25517 else
25518 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25519 code, SImode, complement_final_p,
25520 complement_op1_p, complement_op2_p);
25523 return;
25526 /* Split the insns that make up boolean operations operating on multiple GPR
25527 registers. The boolean MD patterns ensure that the inputs either are
25528 exactly the same as the output registers, or there is no overlap.
25530 OPERANDS is an array containing the destination and two input operands.
25531 CODE is the base operation (AND, IOR, XOR, NOT).
25532 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25533 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25534 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25536 void
25537 rs6000_split_logical (rtx operands[3],
25538 enum rtx_code code,
25539 bool complement_final_p,
25540 bool complement_op1_p,
25541 bool complement_op2_p)
25543 machine_mode mode = GET_MODE (operands[0]);
25544 machine_mode sub_mode;
25545 rtx op0, op1, op2;
25546 int sub_size, regno0, regno1, nregs, i;
25548 /* If this is DImode, use the specialized version that can run before
25549 register allocation. */
25550 if (mode == DImode && !TARGET_POWERPC64)
25552 rs6000_split_logical_di (operands, code, complement_final_p,
25553 complement_op1_p, complement_op2_p);
25554 return;
25557 op0 = operands[0];
25558 op1 = operands[1];
25559 op2 = (code == NOT) ? NULL_RTX : operands[2];
25560 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25561 sub_size = GET_MODE_SIZE (sub_mode);
25562 regno0 = REGNO (op0);
25563 regno1 = REGNO (op1);
25565 gcc_assert (reload_completed);
25566 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25567 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25569 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25570 gcc_assert (nregs > 1);
25572 if (op2 && REG_P (op2))
25573 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25575 for (i = 0; i < nregs; i++)
25577 int offset = i * sub_size;
25578 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25579 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25580 rtx sub_op2 = ((code == NOT)
25581 ? NULL_RTX
25582 : simplify_subreg (sub_mode, op2, mode, offset));
25584 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25585 complement_final_p, complement_op1_p,
25586 complement_op2_p);
25589 return;
25593 /* Return true if the peephole2 can combine a load involving a combination of
25594 an addis instruction and a load with an offset that can be fused together on
25595 a power8. */
25597 bool
25598 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25599 rtx addis_value, /* addis value. */
25600 rtx target, /* target register that is loaded. */
25601 rtx mem) /* bottom part of the memory addr. */
25603 rtx addr;
25604 rtx base_reg;
25606 /* Validate arguments. */
25607 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25608 return false;
25610 if (!base_reg_operand (target, GET_MODE (target)))
25611 return false;
25613 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25614 return false;
25616 /* Allow sign/zero extension. */
25617 if (GET_CODE (mem) == ZERO_EXTEND
25618 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25619 mem = XEXP (mem, 0);
25621 if (!MEM_P (mem))
25622 return false;
25624 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25625 return false;
25627 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25628 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25629 return false;
25631 /* Validate that the register used to load the high value is either the
25632 register being loaded, or we can safely replace its use.
25634 This function is only called from the peephole2 pass and we assume that
25635 there are 2 instructions in the peephole (addis and load), so we want to
25636 check if the target register was not used in the memory address and the
25637 register to hold the addis result is dead after the peephole. */
25638 if (REGNO (addis_reg) != REGNO (target))
25640 if (reg_mentioned_p (target, mem))
25641 return false;
25643 if (!peep2_reg_dead_p (2, addis_reg))
25644 return false;
25646 /* If the target register being loaded is the stack pointer, we must
25647 avoid loading any other value into it, even temporarily. */
25648 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25649 return false;
25652 base_reg = XEXP (addr, 0);
25653 return REGNO (addis_reg) == REGNO (base_reg);
25656 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25657 sequence. We adjust the addis register to use the target register. If the
25658 load sign extends, we adjust the code to do the zero extending load, and an
25659 explicit sign extension later since the fusion only covers zero extending
25660 loads.
25662 The operands are:
25663 operands[0] register set with addis (to be replaced with target)
25664 operands[1] value set via addis
25665 operands[2] target register being loaded
25666 operands[3] D-form memory reference using operands[0]. */
25668 void
25669 expand_fusion_gpr_load (rtx *operands)
25671 rtx addis_value = operands[1];
25672 rtx target = operands[2];
25673 rtx orig_mem = operands[3];
25674 rtx new_addr, new_mem, orig_addr, offset;
25675 enum rtx_code plus_or_lo_sum;
25676 machine_mode target_mode = GET_MODE (target);
25677 machine_mode extend_mode = target_mode;
25678 machine_mode ptr_mode = Pmode;
25679 enum rtx_code extend = UNKNOWN;
25681 if (GET_CODE (orig_mem) == ZERO_EXTEND
25682 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25684 extend = GET_CODE (orig_mem);
25685 orig_mem = XEXP (orig_mem, 0);
25686 target_mode = GET_MODE (orig_mem);
25689 gcc_assert (MEM_P (orig_mem));
25691 orig_addr = XEXP (orig_mem, 0);
25692 plus_or_lo_sum = GET_CODE (orig_addr);
25693 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25695 offset = XEXP (orig_addr, 1);
25696 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25697 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25699 if (extend != UNKNOWN)
25700 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25702 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25703 UNSPEC_FUSION_GPR);
25704 emit_insn (gen_rtx_SET (target, new_mem));
25706 if (extend == SIGN_EXTEND)
25708 int sub_off = ((BYTES_BIG_ENDIAN)
25709 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25710 : 0);
25711 rtx sign_reg
25712 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25714 emit_insn (gen_rtx_SET (target,
25715 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25718 return;
25721 /* Emit the addis instruction that will be part of a fused instruction
25722 sequence. */
25724 void
25725 emit_fusion_addis (rtx target, rtx addis_value)
25727 rtx fuse_ops[10];
25728 const char *addis_str = NULL;
25730 /* Emit the addis instruction. */
25731 fuse_ops[0] = target;
25732 if (satisfies_constraint_L (addis_value))
25734 fuse_ops[1] = addis_value;
25735 addis_str = "lis %0,%v1";
25738 else if (GET_CODE (addis_value) == PLUS)
25740 rtx op0 = XEXP (addis_value, 0);
25741 rtx op1 = XEXP (addis_value, 1);
25743 if (REG_P (op0) && CONST_INT_P (op1)
25744 && satisfies_constraint_L (op1))
25746 fuse_ops[1] = op0;
25747 fuse_ops[2] = op1;
25748 addis_str = "addis %0,%1,%v2";
25752 else if (GET_CODE (addis_value) == HIGH)
25754 rtx value = XEXP (addis_value, 0);
25755 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25757 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25758 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25759 if (TARGET_ELF)
25760 addis_str = "addis %0,%2,%1@toc@ha";
25762 else if (TARGET_XCOFF)
25763 addis_str = "addis %0,%1@u(%2)";
25765 else
25766 gcc_unreachable ();
25769 else if (GET_CODE (value) == PLUS)
25771 rtx op0 = XEXP (value, 0);
25772 rtx op1 = XEXP (value, 1);
25774 if (GET_CODE (op0) == UNSPEC
25775 && XINT (op0, 1) == UNSPEC_TOCREL
25776 && CONST_INT_P (op1))
25778 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25779 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25780 fuse_ops[3] = op1;
25781 if (TARGET_ELF)
25782 addis_str = "addis %0,%2,%1+%3@toc@ha";
25784 else if (TARGET_XCOFF)
25785 addis_str = "addis %0,%1+%3@u(%2)";
25787 else
25788 gcc_unreachable ();
25792 else if (satisfies_constraint_L (value))
25794 fuse_ops[1] = value;
25795 addis_str = "lis %0,%v1";
25798 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25800 fuse_ops[1] = value;
25801 addis_str = "lis %0,%1@ha";
25805 if (!addis_str)
25806 fatal_insn ("Could not generate addis value for fusion", addis_value);
25808 output_asm_insn (addis_str, fuse_ops);
25811 /* Emit a D-form load or store instruction that is the second instruction
25812 of a fusion sequence. */
25814 static void
25815 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25817 rtx fuse_ops[10];
25818 char insn_template[80];
25820 fuse_ops[0] = load_reg;
25821 fuse_ops[1] = addis_reg;
25823 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25825 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25826 fuse_ops[2] = offset;
25827 output_asm_insn (insn_template, fuse_ops);
25830 else if (GET_CODE (offset) == UNSPEC
25831 && XINT (offset, 1) == UNSPEC_TOCREL)
25833 if (TARGET_ELF)
25834 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25836 else if (TARGET_XCOFF)
25837 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25839 else
25840 gcc_unreachable ();
25842 fuse_ops[2] = XVECEXP (offset, 0, 0);
25843 output_asm_insn (insn_template, fuse_ops);
25846 else if (GET_CODE (offset) == PLUS
25847 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25848 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25849 && CONST_INT_P (XEXP (offset, 1)))
25851 rtx tocrel_unspec = XEXP (offset, 0);
25852 if (TARGET_ELF)
25853 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25855 else if (TARGET_XCOFF)
25856 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25858 else
25859 gcc_unreachable ();
25861 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25862 fuse_ops[3] = XEXP (offset, 1);
25863 output_asm_insn (insn_template, fuse_ops);
25866 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25868 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25870 fuse_ops[2] = offset;
25871 output_asm_insn (insn_template, fuse_ops);
25874 else
25875 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25877 return;
25880 /* Given an address, convert it into the addis and load offset parts. Addresses
25881 created during the peephole2 process look like:
25882 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25883 (unspec [(...)] UNSPEC_TOCREL)) */
25885 static void
25886 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25888 rtx hi, lo;
25890 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25892 hi = XEXP (addr, 0);
25893 lo = XEXP (addr, 1);
25895 else
25896 gcc_unreachable ();
25898 *p_hi = hi;
25899 *p_lo = lo;
25902 /* Return a string to fuse an addis instruction with a gpr load to the same
25903 register that we loaded up the addis instruction. The address that is used
25904 is the logical address that was formed during peephole2:
25905 (lo_sum (high) (low-part))
25907 The code is complicated, so we call output_asm_insn directly, and just
25908 return "". */
25910 const char *
25911 emit_fusion_gpr_load (rtx target, rtx mem)
25913 rtx addis_value;
25914 rtx addr;
25915 rtx load_offset;
25916 const char *load_str = NULL;
25917 machine_mode mode;
25919 if (GET_CODE (mem) == ZERO_EXTEND)
25920 mem = XEXP (mem, 0);
25922 gcc_assert (REG_P (target) && MEM_P (mem));
25924 addr = XEXP (mem, 0);
25925 fusion_split_address (addr, &addis_value, &load_offset);
25927 /* Now emit the load instruction to the same register. */
25928 mode = GET_MODE (mem);
25929 switch (mode)
25931 case E_QImode:
25932 load_str = "lbz";
25933 break;
25935 case E_HImode:
25936 load_str = "lhz";
25937 break;
25939 case E_SImode:
25940 case E_SFmode:
25941 load_str = "lwz";
25942 break;
25944 case E_DImode:
25945 case E_DFmode:
25946 gcc_assert (TARGET_POWERPC64);
25947 load_str = "ld";
25948 break;
25950 default:
25951 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25954 /* Emit the addis instruction. */
25955 emit_fusion_addis (target, addis_value);
25957 /* Emit the D-form load instruction. */
25958 emit_fusion_load (target, target, load_offset, load_str);
25960 return "";
25964 #ifdef RS6000_GLIBC_ATOMIC_FENV
25965 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25966 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25967 #endif
25969 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25971 static void
25972 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25974 if (!TARGET_HARD_FLOAT)
25976 #ifdef RS6000_GLIBC_ATOMIC_FENV
25977 if (atomic_hold_decl == NULL_TREE)
25979 atomic_hold_decl
25980 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25981 get_identifier ("__atomic_feholdexcept"),
25982 build_function_type_list (void_type_node,
25983 double_ptr_type_node,
25984 NULL_TREE));
25985 TREE_PUBLIC (atomic_hold_decl) = 1;
25986 DECL_EXTERNAL (atomic_hold_decl) = 1;
25989 if (atomic_clear_decl == NULL_TREE)
25991 atomic_clear_decl
25992 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25993 get_identifier ("__atomic_feclearexcept"),
25994 build_function_type_list (void_type_node,
25995 NULL_TREE));
25996 TREE_PUBLIC (atomic_clear_decl) = 1;
25997 DECL_EXTERNAL (atomic_clear_decl) = 1;
26000 tree const_double = build_qualified_type (double_type_node,
26001 TYPE_QUAL_CONST);
26002 tree const_double_ptr = build_pointer_type (const_double);
26003 if (atomic_update_decl == NULL_TREE)
26005 atomic_update_decl
26006 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26007 get_identifier ("__atomic_feupdateenv"),
26008 build_function_type_list (void_type_node,
26009 const_double_ptr,
26010 NULL_TREE));
26011 TREE_PUBLIC (atomic_update_decl) = 1;
26012 DECL_EXTERNAL (atomic_update_decl) = 1;
26015 tree fenv_var = create_tmp_var_raw (double_type_node);
26016 TREE_ADDRESSABLE (fenv_var) = 1;
26017 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26018 build4 (TARGET_EXPR, double_type_node, fenv_var,
26019 void_node, NULL_TREE, NULL_TREE));
26021 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26022 *clear = build_call_expr (atomic_clear_decl, 0);
26023 *update = build_call_expr (atomic_update_decl, 1,
26024 fold_convert (const_double_ptr, fenv_addr));
26025 #endif
26026 return;
26029 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26030 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26031 tree call_mffs = build_call_expr (mffs, 0);
26033 /* Generates the equivalent of feholdexcept (&fenv_var)
26035 *fenv_var = __builtin_mffs ();
26036 double fenv_hold;
26037 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26038 __builtin_mtfsf (0xff, fenv_hold); */
26040 /* Mask to clear everything except for the rounding modes and non-IEEE
26041 arithmetic flag. */
26042 const unsigned HOST_WIDE_INT hold_exception_mask
26043 = HOST_WIDE_INT_C (0xffffffff00000007);
26045 tree fenv_var = create_tmp_var_raw (double_type_node);
26047 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26048 NULL_TREE, NULL_TREE);
26050 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26051 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26052 build_int_cst (uint64_type_node,
26053 hold_exception_mask));
26055 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26056 fenv_llu_and);
26058 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26059 build_int_cst (unsigned_type_node, 0xff),
26060 fenv_hold_mtfsf);
26062 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26064 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26066 double fenv_clear = __builtin_mffs ();
26067 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26068 __builtin_mtfsf (0xff, fenv_clear); */
26070 /* Mask to clear everything except for the rounding modes and non-IEEE
26071 arithmetic flag. */
26072 const unsigned HOST_WIDE_INT clear_exception_mask
26073 = HOST_WIDE_INT_C (0xffffffff00000000);
26075 tree fenv_clear = create_tmp_var_raw (double_type_node);
26077 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26078 call_mffs, NULL_TREE, NULL_TREE);
26080 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26081 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26082 fenv_clean_llu,
26083 build_int_cst (uint64_type_node,
26084 clear_exception_mask));
26086 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26087 fenv_clear_llu_and);
26089 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26090 build_int_cst (unsigned_type_node, 0xff),
26091 fenv_clear_mtfsf);
26093 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26095 /* Generates the equivalent of feupdateenv (&fenv_var)
26097 double old_fenv = __builtin_mffs ();
26098 double fenv_update;
26099 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26100 (*(uint64_t*)fenv_var 0x1ff80fff);
26101 __builtin_mtfsf (0xff, fenv_update); */
26103 const unsigned HOST_WIDE_INT update_exception_mask
26104 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26105 const unsigned HOST_WIDE_INT new_exception_mask
26106 = HOST_WIDE_INT_C (0x1ff80fff);
26108 tree old_fenv = create_tmp_var_raw (double_type_node);
26109 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26110 call_mffs, NULL_TREE, NULL_TREE);
26112 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26113 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26114 build_int_cst (uint64_type_node,
26115 update_exception_mask));
26117 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26118 build_int_cst (uint64_type_node,
26119 new_exception_mask));
26121 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26122 old_llu_and, new_llu_and);
26124 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26125 new_llu_mask);
26127 tree update_mtfsf = build_call_expr (mtfsf, 2,
26128 build_int_cst (unsigned_type_node, 0xff),
26129 fenv_update_mtfsf);
26131 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26134 void
26135 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26137 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26139 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26140 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26142 /* The destination of the vmrgew instruction layout is:
26143 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26144 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26145 vmrgew instruction will be correct. */
26146 if (BYTES_BIG_ENDIAN)
26148 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26149 GEN_INT (0)));
26150 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26151 GEN_INT (3)));
26153 else
26155 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26156 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26159 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26160 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26162 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26163 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26165 if (BYTES_BIG_ENDIAN)
26166 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26167 else
26168 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26171 void
26172 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26174 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26176 rtx_tmp0 = gen_reg_rtx (V2DImode);
26177 rtx_tmp1 = gen_reg_rtx (V2DImode);
26179 /* The destination of the vmrgew instruction layout is:
26180 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26181 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26182 vmrgew instruction will be correct. */
26183 if (BYTES_BIG_ENDIAN)
26185 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26186 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26188 else
26190 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26191 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26194 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26195 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26197 if (signed_convert)
26199 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26200 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26202 else
26204 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26205 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26208 if (BYTES_BIG_ENDIAN)
26209 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26210 else
26211 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26214 void
26215 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26216 rtx src2)
26218 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26220 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26221 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26223 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26224 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26226 rtx_tmp2 = gen_reg_rtx (V4SImode);
26227 rtx_tmp3 = gen_reg_rtx (V4SImode);
26229 if (signed_convert)
26231 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26232 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26234 else
26236 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26237 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26240 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26243 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26245 static bool
26246 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26247 optimization_type opt_type)
26249 switch (op)
26251 case rsqrt_optab:
26252 return (opt_type == OPTIMIZE_FOR_SPEED
26253 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26255 default:
26256 return true;
26260 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26262 static HOST_WIDE_INT
26263 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26265 if (TREE_CODE (exp) == STRING_CST
26266 && (STRICT_ALIGNMENT || !optimize_size))
26267 return MAX (align, BITS_PER_WORD);
26268 return align;
26271 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26273 static HOST_WIDE_INT
26274 rs6000_starting_frame_offset (void)
26276 if (FRAME_GROWS_DOWNWARD)
26277 return 0;
26278 return RS6000_STARTING_FRAME_OFFSET;
26282 /* Create an alias for a mangled name where we have changed the mangling (in
26283 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26284 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26286 #if TARGET_ELF && RS6000_WEAK
26287 static void
26288 rs6000_globalize_decl_name (FILE * stream, tree decl)
26290 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26292 targetm.asm_out.globalize_label (stream, name);
26294 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26296 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26297 const char *old_name;
26299 ieee128_mangling_gcc_8_1 = true;
26300 lang_hooks.set_decl_assembler_name (decl);
26301 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26302 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26303 ieee128_mangling_gcc_8_1 = false;
26305 if (strcmp (name, old_name) != 0)
26307 fprintf (stream, "\t.weak %s\n", old_name);
26308 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26312 #endif
26315 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26316 function names from <foo>l to <foo>f128 if the default long double type is
26317 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26318 include file switches the names on systems that support long double as IEEE
26319 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26320 In the future, glibc will export names like __ieee128_sinf128 and we can
26321 switch to using those instead of using sinf128, which pollutes the user's
26322 namespace.
26324 This will switch the names for Fortran math functions as well (which doesn't
26325 use math.h). However, Fortran needs other changes to the compiler and
26326 library before you can switch the real*16 type at compile time.
26328 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26329 only do this transformation if the __float128 type is enabled. This
26330 prevents us from doing the transformation on older 32-bit parts that might
26331 have enabled using IEEE 128-bit floating point as the default long double
26332 type. */
26334 static tree
26335 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26337 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26338 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26340 size_t len = IDENTIFIER_LENGTH (id);
26341 const char *name = IDENTIFIER_POINTER (id);
26343 if (name[len - 1] == 'l')
26345 bool uses_ieee128_p = false;
26346 tree type = TREE_TYPE (decl);
26347 machine_mode ret_mode = TYPE_MODE (type);
26349 /* See if the function returns a IEEE 128-bit floating point type or
26350 complex type. */
26351 if (ret_mode == TFmode || ret_mode == TCmode)
26352 uses_ieee128_p = true;
26353 else
26355 function_args_iterator args_iter;
26356 tree arg;
26358 /* See if the function passes a IEEE 128-bit floating point type
26359 or complex type. */
26360 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26362 machine_mode arg_mode = TYPE_MODE (arg);
26363 if (arg_mode == TFmode || arg_mode == TCmode)
26365 uses_ieee128_p = true;
26366 break;
26371 /* If we passed or returned an IEEE 128-bit floating point type,
26372 change the name. */
26373 if (uses_ieee128_p)
26375 char *name2 = (char *) alloca (len + 4);
26376 memcpy (name2, name, len - 1);
26377 strcpy (name2 + len - 1, "f128");
26378 id = get_identifier (name2);
26383 return id;
26386 /* Predict whether the given loop in gimple will be transformed in the RTL
26387 doloop_optimize pass. */
26389 static bool
26390 rs6000_predict_doloop_p (struct loop *loop)
26392 gcc_assert (loop);
26394 /* On rs6000, targetm.can_use_doloop_p is actually
26395 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26396 if (loop->inner != NULL)
26398 if (dump_file && (dump_flags & TDF_DETAILS))
26399 fprintf (dump_file, "Predict doloop failure due to"
26400 " loop nesting.\n");
26401 return false;
26404 return true;
26407 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
26409 static bool
26410 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
26412 gcc_assert (MEM_P (mem));
26414 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
26415 type addresses, so don't allow MEMs with those address types to be
26416 substituted as an equivalent expression. See PR93974 for details. */
26417 if (GET_CODE (XEXP (mem, 0)) == AND)
26418 return true;
26420 return false;
26423 struct gcc_target targetm = TARGET_INITIALIZER;
26425 #include "gt-rs6000.h"