[RS6000] rs6000_rtx_costs for PLUS/MINUS constant
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob375fff59928a4b7b2ccb23636e4fd15ff8c10393
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Don't enable PC-relative addressing if the target does not support it. */
102 #ifndef PCREL_SUPPORTED_BY_OS
103 #define PCREL_SUPPORTED_BY_OS 0
104 #endif
106 /* Support targetm.vectorize.builtin_mask_for_load. */
107 tree altivec_builtin_mask_for_load;
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
122 #if TARGET_ELF
123 /* Note whether IEEE 128-bit floating point was passed or returned, either as
124 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
125 floating point. We changed the default C++ mangling for these types and we
126 may want to generate a weak alias of the old mangling (U10__float128) to the
127 new mangling (u9__ieee128). */
128 bool rs6000_passes_ieee128 = false;
129 #endif
131 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
132 name used in current releases (i.e. u9__ieee128). */
133 static bool ieee128_mangling_gcc_8_1;
135 /* Width in bits of a pointer. */
136 unsigned rs6000_pointer_size;
138 #ifdef HAVE_AS_GNU_ATTRIBUTE
139 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
140 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
141 # endif
142 /* Flag whether floating point values have been passed/returned.
143 Note that this doesn't say whether fprs are used, since the
144 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
145 should be set for soft-float values passed in gprs and ieee128
146 values passed in vsx registers. */
147 bool rs6000_passes_float = false;
148 bool rs6000_passes_long_double = false;
149 /* Flag whether vector values have been passed/returned. */
150 bool rs6000_passes_vector = false;
151 /* Flag whether small (<= 8 byte) structures have been returned. */
152 bool rs6000_returns_struct = false;
153 #endif
155 /* Value is TRUE if register/mode pair is acceptable. */
156 static bool rs6000_hard_regno_mode_ok_p
157 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
159 /* Maximum number of registers needed for a given register class and mode. */
160 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
162 /* How many registers are needed for a given register and mode. */
163 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
165 /* Map register number to register class. */
166 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
168 static int dbg_cost_ctrl;
170 /* Built in types. */
171 tree rs6000_builtin_types[RS6000_BTI_MAX];
172 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
174 /* Flag to say the TOC is initialized */
175 int toc_initialized, need_toc_init;
176 char toc_label_name[10];
178 /* Cached value of rs6000_variable_issue. This is cached in
179 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
180 static short cached_can_issue_more;
182 static GTY(()) section *read_only_data_section;
183 static GTY(()) section *private_data_section;
184 static GTY(()) section *tls_data_section;
185 static GTY(()) section *tls_private_data_section;
186 static GTY(()) section *read_only_private_data_section;
187 static GTY(()) section *sdata2_section;
189 section *toc_section = 0;
191 /* Describe the vector unit used for modes. */
192 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
193 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
195 /* Register classes for various constraints that are based on the target
196 switches. */
197 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
199 /* Describe the alignment of a vector. */
200 int rs6000_vector_align[NUM_MACHINE_MODES];
202 /* Map selected modes to types for builtins. */
203 tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
205 /* What modes to automatically generate reciprocal divide estimate (fre) and
206 reciprocal sqrt (frsqrte) for. */
207 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
209 /* Masks to determine which reciprocal esitmate instructions to generate
210 automatically. */
211 enum rs6000_recip_mask {
212 RECIP_SF_DIV = 0x001, /* Use divide estimate */
213 RECIP_DF_DIV = 0x002,
214 RECIP_V4SF_DIV = 0x004,
215 RECIP_V2DF_DIV = 0x008,
217 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
218 RECIP_DF_RSQRT = 0x020,
219 RECIP_V4SF_RSQRT = 0x040,
220 RECIP_V2DF_RSQRT = 0x080,
222 /* Various combination of flags for -mrecip=xxx. */
223 RECIP_NONE = 0,
224 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
225 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
226 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
228 RECIP_HIGH_PRECISION = RECIP_ALL,
230 /* On low precision machines like the power5, don't enable double precision
231 reciprocal square root estimate, since it isn't accurate enough. */
232 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
235 /* -mrecip options. */
236 static struct
238 const char *string; /* option name */
239 unsigned int mask; /* mask bits to set */
240 } recip_options[] = {
241 { "all", RECIP_ALL },
242 { "none", RECIP_NONE },
243 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
244 | RECIP_V2DF_DIV) },
245 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
246 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
247 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
248 | RECIP_V2DF_RSQRT) },
249 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
250 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
253 /* On PowerPC, we have a limited number of target clones that we care about
254 which means we can use an array to hold the options, rather than having more
255 elaborate data structures to identify each possible variation. Order the
256 clones from the default to the highest ISA. */
257 enum {
258 CLONE_DEFAULT = 0, /* default clone. */
259 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
260 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
261 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
262 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
263 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
264 CLONE_MAX
267 /* Map compiler ISA bits into HWCAP names. */
268 struct clone_map {
269 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
270 const char *name; /* name to use in __builtin_cpu_supports. */
273 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
274 { 0, "" }, /* Default options. */
275 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
276 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
277 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
278 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
279 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
283 /* Newer LIBCs explicitly export this symbol to declare that they provide
284 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
285 reference to this symbol whenever we expand a CPU builtin, so that
286 we never link against an old LIBC. */
287 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
289 /* True if we have expanded a CPU builtin. */
290 bool cpu_builtin_p = false;
292 /* Pointer to function (in rs6000-c.c) that can define or undefine target
293 macros that have changed. Languages that don't support the preprocessor
294 don't link in rs6000-c.c, so we can't call it directly. */
295 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
297 /* Simplfy register classes into simpler classifications. We assume
298 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
299 check for standard register classes (gpr/floating/altivec/vsx) and
300 floating/vector classes (float/altivec/vsx). */
302 enum rs6000_reg_type {
303 NO_REG_TYPE,
304 PSEUDO_REG_TYPE,
305 GPR_REG_TYPE,
306 VSX_REG_TYPE,
307 ALTIVEC_REG_TYPE,
308 FPR_REG_TYPE,
309 SPR_REG_TYPE,
310 CR_REG_TYPE
313 /* Map register class to register type. */
314 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
316 /* First/last register type for the 'normal' register types (i.e. general
317 purpose, floating point, altivec, and VSX registers). */
318 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
320 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
323 /* Register classes we care about in secondary reload or go if legitimate
324 address. We only need to worry about GPR, FPR, and Altivec registers here,
325 along an ANY field that is the OR of the 3 register classes. */
327 enum rs6000_reload_reg_type {
328 RELOAD_REG_GPR, /* General purpose registers. */
329 RELOAD_REG_FPR, /* Traditional floating point regs. */
330 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
331 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
332 N_RELOAD_REG
335 /* For setting up register classes, loop through the 3 register classes mapping
336 into real registers, and skip the ANY class, which is just an OR of the
337 bits. */
338 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
339 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
341 /* Map reload register type to a register in the register class. */
342 struct reload_reg_map_type {
343 const char *name; /* Register class name. */
344 int reg; /* Register in the register class. */
347 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
348 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
349 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
350 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
351 { "Any", -1 }, /* RELOAD_REG_ANY. */
354 /* Mask bits for each register class, indexed per mode. Historically the
355 compiler has been more restrictive which types can do PRE_MODIFY instead of
356 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
357 typedef unsigned char addr_mask_type;
359 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
360 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
361 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
362 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
363 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
364 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
365 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
366 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
368 /* Register type masks based on the type, of valid addressing modes. */
369 struct rs6000_reg_addr {
370 enum insn_code reload_load; /* INSN to reload for loading. */
371 enum insn_code reload_store; /* INSN to reload for storing. */
372 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
373 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
374 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
375 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
376 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
379 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
381 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
382 static inline bool
383 mode_supports_pre_incdec_p (machine_mode mode)
385 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
386 != 0);
389 /* Helper function to say whether a mode supports PRE_MODIFY. */
390 static inline bool
391 mode_supports_pre_modify_p (machine_mode mode)
393 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
394 != 0);
397 /* Return true if we have D-form addressing in altivec registers. */
398 static inline bool
399 mode_supports_vmx_dform (machine_mode mode)
401 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
404 /* Return true if we have D-form addressing in VSX registers. This addressing
405 is more limited than normal d-form addressing in that the offset must be
406 aligned on a 16-byte boundary. */
407 static inline bool
408 mode_supports_dq_form (machine_mode mode)
410 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
411 != 0);
414 /* Given that there exists at least one variable that is set (produced)
415 by OUT_INSN and read (consumed) by IN_INSN, return true iff
416 IN_INSN represents one or more memory store operations and none of
417 the variables set by OUT_INSN is used by IN_INSN as the address of a
418 store operation. If either IN_INSN or OUT_INSN does not represent
419 a "single" RTL SET expression (as loosely defined by the
420 implementation of the single_set function) or a PARALLEL with only
421 SETs, CLOBBERs, and USEs inside, this function returns false.
423 This rs6000-specific version of store_data_bypass_p checks for
424 certain conditions that result in assertion failures (and internal
425 compiler errors) in the generic store_data_bypass_p function and
426 returns false rather than calling store_data_bypass_p if one of the
427 problematic conditions is detected. */
430 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
432 rtx out_set, in_set;
433 rtx out_pat, in_pat;
434 rtx out_exp, in_exp;
435 int i, j;
437 in_set = single_set (in_insn);
438 if (in_set)
440 if (MEM_P (SET_DEST (in_set)))
442 out_set = single_set (out_insn);
443 if (!out_set)
445 out_pat = PATTERN (out_insn);
446 if (GET_CODE (out_pat) == PARALLEL)
448 for (i = 0; i < XVECLEN (out_pat, 0); i++)
450 out_exp = XVECEXP (out_pat, 0, i);
451 if ((GET_CODE (out_exp) == CLOBBER)
452 || (GET_CODE (out_exp) == USE))
453 continue;
454 else if (GET_CODE (out_exp) != SET)
455 return false;
461 else
463 in_pat = PATTERN (in_insn);
464 if (GET_CODE (in_pat) != PARALLEL)
465 return false;
467 for (i = 0; i < XVECLEN (in_pat, 0); i++)
469 in_exp = XVECEXP (in_pat, 0, i);
470 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
471 continue;
472 else if (GET_CODE (in_exp) != SET)
473 return false;
475 if (MEM_P (SET_DEST (in_exp)))
477 out_set = single_set (out_insn);
478 if (!out_set)
480 out_pat = PATTERN (out_insn);
481 if (GET_CODE (out_pat) != PARALLEL)
482 return false;
483 for (j = 0; j < XVECLEN (out_pat, 0); j++)
485 out_exp = XVECEXP (out_pat, 0, j);
486 if ((GET_CODE (out_exp) == CLOBBER)
487 || (GET_CODE (out_exp) == USE))
488 continue;
489 else if (GET_CODE (out_exp) != SET)
490 return false;
496 return store_data_bypass_p (out_insn, in_insn);
500 /* Processor costs (relative to an add) */
502 const struct processor_costs *rs6000_cost;
504 /* Instruction size costs on 32bit processors. */
505 static const
506 struct processor_costs size32_cost = {
507 COSTS_N_INSNS (1), /* mulsi */
508 COSTS_N_INSNS (1), /* mulsi_const */
509 COSTS_N_INSNS (1), /* mulsi_const9 */
510 COSTS_N_INSNS (1), /* muldi */
511 COSTS_N_INSNS (1), /* divsi */
512 COSTS_N_INSNS (1), /* divdi */
513 COSTS_N_INSNS (1), /* fp */
514 COSTS_N_INSNS (1), /* dmul */
515 COSTS_N_INSNS (1), /* sdiv */
516 COSTS_N_INSNS (1), /* ddiv */
517 32, /* cache line size */
518 0, /* l1 cache */
519 0, /* l2 cache */
520 0, /* streams */
521 0, /* SF->DF convert */
524 /* Instruction size costs on 64bit processors. */
525 static const
526 struct processor_costs size64_cost = {
527 COSTS_N_INSNS (1), /* mulsi */
528 COSTS_N_INSNS (1), /* mulsi_const */
529 COSTS_N_INSNS (1), /* mulsi_const9 */
530 COSTS_N_INSNS (1), /* muldi */
531 COSTS_N_INSNS (1), /* divsi */
532 COSTS_N_INSNS (1), /* divdi */
533 COSTS_N_INSNS (1), /* fp */
534 COSTS_N_INSNS (1), /* dmul */
535 COSTS_N_INSNS (1), /* sdiv */
536 COSTS_N_INSNS (1), /* ddiv */
537 128, /* cache line size */
538 0, /* l1 cache */
539 0, /* l2 cache */
540 0, /* streams */
541 0, /* SF->DF convert */
544 /* Instruction costs on RS64A processors. */
545 static const
546 struct processor_costs rs64a_cost = {
547 COSTS_N_INSNS (20), /* mulsi */
548 COSTS_N_INSNS (12), /* mulsi_const */
549 COSTS_N_INSNS (8), /* mulsi_const9 */
550 COSTS_N_INSNS (34), /* muldi */
551 COSTS_N_INSNS (65), /* divsi */
552 COSTS_N_INSNS (67), /* divdi */
553 COSTS_N_INSNS (4), /* fp */
554 COSTS_N_INSNS (4), /* dmul */
555 COSTS_N_INSNS (31), /* sdiv */
556 COSTS_N_INSNS (31), /* ddiv */
557 128, /* cache line size */
558 128, /* l1 cache */
559 2048, /* l2 cache */
560 1, /* streams */
561 0, /* SF->DF convert */
564 /* Instruction costs on MPCCORE processors. */
565 static const
566 struct processor_costs mpccore_cost = {
567 COSTS_N_INSNS (2), /* mulsi */
568 COSTS_N_INSNS (2), /* mulsi_const */
569 COSTS_N_INSNS (2), /* mulsi_const9 */
570 COSTS_N_INSNS (2), /* muldi */
571 COSTS_N_INSNS (6), /* divsi */
572 COSTS_N_INSNS (6), /* divdi */
573 COSTS_N_INSNS (4), /* fp */
574 COSTS_N_INSNS (5), /* dmul */
575 COSTS_N_INSNS (10), /* sdiv */
576 COSTS_N_INSNS (17), /* ddiv */
577 32, /* cache line size */
578 4, /* l1 cache */
579 16, /* l2 cache */
580 1, /* streams */
581 0, /* SF->DF convert */
584 /* Instruction costs on PPC403 processors. */
585 static const
586 struct processor_costs ppc403_cost = {
587 COSTS_N_INSNS (4), /* mulsi */
588 COSTS_N_INSNS (4), /* mulsi_const */
589 COSTS_N_INSNS (4), /* mulsi_const9 */
590 COSTS_N_INSNS (4), /* muldi */
591 COSTS_N_INSNS (33), /* divsi */
592 COSTS_N_INSNS (33), /* divdi */
593 COSTS_N_INSNS (11), /* fp */
594 COSTS_N_INSNS (11), /* dmul */
595 COSTS_N_INSNS (11), /* sdiv */
596 COSTS_N_INSNS (11), /* ddiv */
597 32, /* cache line size */
598 4, /* l1 cache */
599 16, /* l2 cache */
600 1, /* streams */
601 0, /* SF->DF convert */
604 /* Instruction costs on PPC405 processors. */
605 static const
606 struct processor_costs ppc405_cost = {
607 COSTS_N_INSNS (5), /* mulsi */
608 COSTS_N_INSNS (4), /* mulsi_const */
609 COSTS_N_INSNS (3), /* mulsi_const9 */
610 COSTS_N_INSNS (5), /* muldi */
611 COSTS_N_INSNS (35), /* divsi */
612 COSTS_N_INSNS (35), /* divdi */
613 COSTS_N_INSNS (11), /* fp */
614 COSTS_N_INSNS (11), /* dmul */
615 COSTS_N_INSNS (11), /* sdiv */
616 COSTS_N_INSNS (11), /* ddiv */
617 32, /* cache line size */
618 16, /* l1 cache */
619 128, /* l2 cache */
620 1, /* streams */
621 0, /* SF->DF convert */
624 /* Instruction costs on PPC440 processors. */
625 static const
626 struct processor_costs ppc440_cost = {
627 COSTS_N_INSNS (3), /* mulsi */
628 COSTS_N_INSNS (2), /* mulsi_const */
629 COSTS_N_INSNS (2), /* mulsi_const9 */
630 COSTS_N_INSNS (3), /* muldi */
631 COSTS_N_INSNS (34), /* divsi */
632 COSTS_N_INSNS (34), /* divdi */
633 COSTS_N_INSNS (5), /* fp */
634 COSTS_N_INSNS (5), /* dmul */
635 COSTS_N_INSNS (19), /* sdiv */
636 COSTS_N_INSNS (33), /* ddiv */
637 32, /* cache line size */
638 32, /* l1 cache */
639 256, /* l2 cache */
640 1, /* streams */
641 0, /* SF->DF convert */
644 /* Instruction costs on PPC476 processors. */
645 static const
646 struct processor_costs ppc476_cost = {
647 COSTS_N_INSNS (4), /* mulsi */
648 COSTS_N_INSNS (4), /* mulsi_const */
649 COSTS_N_INSNS (4), /* mulsi_const9 */
650 COSTS_N_INSNS (4), /* muldi */
651 COSTS_N_INSNS (11), /* divsi */
652 COSTS_N_INSNS (11), /* divdi */
653 COSTS_N_INSNS (6), /* fp */
654 COSTS_N_INSNS (6), /* dmul */
655 COSTS_N_INSNS (19), /* sdiv */
656 COSTS_N_INSNS (33), /* ddiv */
657 32, /* l1 cache line size */
658 32, /* l1 cache */
659 512, /* l2 cache */
660 1, /* streams */
661 0, /* SF->DF convert */
664 /* Instruction costs on PPC601 processors. */
665 static const
666 struct processor_costs ppc601_cost = {
667 COSTS_N_INSNS (5), /* mulsi */
668 COSTS_N_INSNS (5), /* mulsi_const */
669 COSTS_N_INSNS (5), /* mulsi_const9 */
670 COSTS_N_INSNS (5), /* muldi */
671 COSTS_N_INSNS (36), /* divsi */
672 COSTS_N_INSNS (36), /* divdi */
673 COSTS_N_INSNS (4), /* fp */
674 COSTS_N_INSNS (5), /* dmul */
675 COSTS_N_INSNS (17), /* sdiv */
676 COSTS_N_INSNS (31), /* ddiv */
677 32, /* cache line size */
678 32, /* l1 cache */
679 256, /* l2 cache */
680 1, /* streams */
681 0, /* SF->DF convert */
684 /* Instruction costs on PPC603 processors. */
685 static const
686 struct processor_costs ppc603_cost = {
687 COSTS_N_INSNS (5), /* mulsi */
688 COSTS_N_INSNS (3), /* mulsi_const */
689 COSTS_N_INSNS (2), /* mulsi_const9 */
690 COSTS_N_INSNS (5), /* muldi */
691 COSTS_N_INSNS (37), /* divsi */
692 COSTS_N_INSNS (37), /* divdi */
693 COSTS_N_INSNS (3), /* fp */
694 COSTS_N_INSNS (4), /* dmul */
695 COSTS_N_INSNS (18), /* sdiv */
696 COSTS_N_INSNS (33), /* ddiv */
697 32, /* cache line size */
698 8, /* l1 cache */
699 64, /* l2 cache */
700 1, /* streams */
701 0, /* SF->DF convert */
704 /* Instruction costs on PPC604 processors. */
705 static const
706 struct processor_costs ppc604_cost = {
707 COSTS_N_INSNS (4), /* mulsi */
708 COSTS_N_INSNS (4), /* mulsi_const */
709 COSTS_N_INSNS (4), /* mulsi_const9 */
710 COSTS_N_INSNS (4), /* muldi */
711 COSTS_N_INSNS (20), /* divsi */
712 COSTS_N_INSNS (20), /* divdi */
713 COSTS_N_INSNS (3), /* fp */
714 COSTS_N_INSNS (3), /* dmul */
715 COSTS_N_INSNS (18), /* sdiv */
716 COSTS_N_INSNS (32), /* ddiv */
717 32, /* cache line size */
718 16, /* l1 cache */
719 512, /* l2 cache */
720 1, /* streams */
721 0, /* SF->DF convert */
724 /* Instruction costs on PPC604e processors. */
725 static const
726 struct processor_costs ppc604e_cost = {
727 COSTS_N_INSNS (2), /* mulsi */
728 COSTS_N_INSNS (2), /* mulsi_const */
729 COSTS_N_INSNS (2), /* mulsi_const9 */
730 COSTS_N_INSNS (2), /* muldi */
731 COSTS_N_INSNS (20), /* divsi */
732 COSTS_N_INSNS (20), /* divdi */
733 COSTS_N_INSNS (3), /* fp */
734 COSTS_N_INSNS (3), /* dmul */
735 COSTS_N_INSNS (18), /* sdiv */
736 COSTS_N_INSNS (32), /* ddiv */
737 32, /* cache line size */
738 32, /* l1 cache */
739 1024, /* l2 cache */
740 1, /* streams */
741 0, /* SF->DF convert */
744 /* Instruction costs on PPC620 processors. */
745 static const
746 struct processor_costs ppc620_cost = {
747 COSTS_N_INSNS (5), /* mulsi */
748 COSTS_N_INSNS (4), /* mulsi_const */
749 COSTS_N_INSNS (3), /* mulsi_const9 */
750 COSTS_N_INSNS (7), /* muldi */
751 COSTS_N_INSNS (21), /* divsi */
752 COSTS_N_INSNS (37), /* divdi */
753 COSTS_N_INSNS (3), /* fp */
754 COSTS_N_INSNS (3), /* dmul */
755 COSTS_N_INSNS (18), /* sdiv */
756 COSTS_N_INSNS (32), /* ddiv */
757 128, /* cache line size */
758 32, /* l1 cache */
759 1024, /* l2 cache */
760 1, /* streams */
761 0, /* SF->DF convert */
764 /* Instruction costs on PPC630 processors. */
765 static const
766 struct processor_costs ppc630_cost = {
767 COSTS_N_INSNS (5), /* mulsi */
768 COSTS_N_INSNS (4), /* mulsi_const */
769 COSTS_N_INSNS (3), /* mulsi_const9 */
770 COSTS_N_INSNS (7), /* muldi */
771 COSTS_N_INSNS (21), /* divsi */
772 COSTS_N_INSNS (37), /* divdi */
773 COSTS_N_INSNS (3), /* fp */
774 COSTS_N_INSNS (3), /* dmul */
775 COSTS_N_INSNS (17), /* sdiv */
776 COSTS_N_INSNS (21), /* ddiv */
777 128, /* cache line size */
778 64, /* l1 cache */
779 1024, /* l2 cache */
780 1, /* streams */
781 0, /* SF->DF convert */
784 /* Instruction costs on Cell processor. */
785 /* COSTS_N_INSNS (1) ~ one add. */
786 static const
787 struct processor_costs ppccell_cost = {
788 COSTS_N_INSNS (9/2)+2, /* mulsi */
789 COSTS_N_INSNS (6/2), /* mulsi_const */
790 COSTS_N_INSNS (6/2), /* mulsi_const9 */
791 COSTS_N_INSNS (15/2)+2, /* muldi */
792 COSTS_N_INSNS (38/2), /* divsi */
793 COSTS_N_INSNS (70/2), /* divdi */
794 COSTS_N_INSNS (10/2), /* fp */
795 COSTS_N_INSNS (10/2), /* dmul */
796 COSTS_N_INSNS (74/2), /* sdiv */
797 COSTS_N_INSNS (74/2), /* ddiv */
798 128, /* cache line size */
799 32, /* l1 cache */
800 512, /* l2 cache */
801 6, /* streams */
802 0, /* SF->DF convert */
805 /* Instruction costs on PPC750 and PPC7400 processors. */
806 static const
807 struct processor_costs ppc750_cost = {
808 COSTS_N_INSNS (5), /* mulsi */
809 COSTS_N_INSNS (3), /* mulsi_const */
810 COSTS_N_INSNS (2), /* mulsi_const9 */
811 COSTS_N_INSNS (5), /* muldi */
812 COSTS_N_INSNS (17), /* divsi */
813 COSTS_N_INSNS (17), /* divdi */
814 COSTS_N_INSNS (3), /* fp */
815 COSTS_N_INSNS (3), /* dmul */
816 COSTS_N_INSNS (17), /* sdiv */
817 COSTS_N_INSNS (31), /* ddiv */
818 32, /* cache line size */
819 32, /* l1 cache */
820 512, /* l2 cache */
821 1, /* streams */
822 0, /* SF->DF convert */
825 /* Instruction costs on PPC7450 processors. */
826 static const
827 struct processor_costs ppc7450_cost = {
828 COSTS_N_INSNS (4), /* mulsi */
829 COSTS_N_INSNS (3), /* mulsi_const */
830 COSTS_N_INSNS (3), /* mulsi_const9 */
831 COSTS_N_INSNS (4), /* muldi */
832 COSTS_N_INSNS (23), /* divsi */
833 COSTS_N_INSNS (23), /* divdi */
834 COSTS_N_INSNS (5), /* fp */
835 COSTS_N_INSNS (5), /* dmul */
836 COSTS_N_INSNS (21), /* sdiv */
837 COSTS_N_INSNS (35), /* ddiv */
838 32, /* cache line size */
839 32, /* l1 cache */
840 1024, /* l2 cache */
841 1, /* streams */
842 0, /* SF->DF convert */
845 /* Instruction costs on PPC8540 processors. */
846 static const
847 struct processor_costs ppc8540_cost = {
848 COSTS_N_INSNS (4), /* mulsi */
849 COSTS_N_INSNS (4), /* mulsi_const */
850 COSTS_N_INSNS (4), /* mulsi_const9 */
851 COSTS_N_INSNS (4), /* muldi */
852 COSTS_N_INSNS (19), /* divsi */
853 COSTS_N_INSNS (19), /* divdi */
854 COSTS_N_INSNS (4), /* fp */
855 COSTS_N_INSNS (4), /* dmul */
856 COSTS_N_INSNS (29), /* sdiv */
857 COSTS_N_INSNS (29), /* ddiv */
858 32, /* cache line size */
859 32, /* l1 cache */
860 256, /* l2 cache */
861 1, /* prefetch streams /*/
862 0, /* SF->DF convert */
865 /* Instruction costs on E300C2 and E300C3 cores. */
866 static const
867 struct processor_costs ppce300c2c3_cost = {
868 COSTS_N_INSNS (4), /* mulsi */
869 COSTS_N_INSNS (4), /* mulsi_const */
870 COSTS_N_INSNS (4), /* mulsi_const9 */
871 COSTS_N_INSNS (4), /* muldi */
872 COSTS_N_INSNS (19), /* divsi */
873 COSTS_N_INSNS (19), /* divdi */
874 COSTS_N_INSNS (3), /* fp */
875 COSTS_N_INSNS (4), /* dmul */
876 COSTS_N_INSNS (18), /* sdiv */
877 COSTS_N_INSNS (33), /* ddiv */
879 16, /* l1 cache */
880 16, /* l2 cache */
881 1, /* prefetch streams /*/
882 0, /* SF->DF convert */
885 /* Instruction costs on PPCE500MC processors. */
886 static const
887 struct processor_costs ppce500mc_cost = {
888 COSTS_N_INSNS (4), /* mulsi */
889 COSTS_N_INSNS (4), /* mulsi_const */
890 COSTS_N_INSNS (4), /* mulsi_const9 */
891 COSTS_N_INSNS (4), /* muldi */
892 COSTS_N_INSNS (14), /* divsi */
893 COSTS_N_INSNS (14), /* divdi */
894 COSTS_N_INSNS (8), /* fp */
895 COSTS_N_INSNS (10), /* dmul */
896 COSTS_N_INSNS (36), /* sdiv */
897 COSTS_N_INSNS (66), /* ddiv */
898 64, /* cache line size */
899 32, /* l1 cache */
900 128, /* l2 cache */
901 1, /* prefetch streams /*/
902 0, /* SF->DF convert */
905 /* Instruction costs on PPCE500MC64 processors. */
906 static const
907 struct processor_costs ppce500mc64_cost = {
908 COSTS_N_INSNS (4), /* mulsi */
909 COSTS_N_INSNS (4), /* mulsi_const */
910 COSTS_N_INSNS (4), /* mulsi_const9 */
911 COSTS_N_INSNS (4), /* muldi */
912 COSTS_N_INSNS (14), /* divsi */
913 COSTS_N_INSNS (14), /* divdi */
914 COSTS_N_INSNS (4), /* fp */
915 COSTS_N_INSNS (10), /* dmul */
916 COSTS_N_INSNS (36), /* sdiv */
917 COSTS_N_INSNS (66), /* ddiv */
918 64, /* cache line size */
919 32, /* l1 cache */
920 128, /* l2 cache */
921 1, /* prefetch streams /*/
922 0, /* SF->DF convert */
925 /* Instruction costs on PPCE5500 processors. */
926 static const
927 struct processor_costs ppce5500_cost = {
928 COSTS_N_INSNS (5), /* mulsi */
929 COSTS_N_INSNS (5), /* mulsi_const */
930 COSTS_N_INSNS (4), /* mulsi_const9 */
931 COSTS_N_INSNS (5), /* muldi */
932 COSTS_N_INSNS (14), /* divsi */
933 COSTS_N_INSNS (14), /* divdi */
934 COSTS_N_INSNS (7), /* fp */
935 COSTS_N_INSNS (10), /* dmul */
936 COSTS_N_INSNS (36), /* sdiv */
937 COSTS_N_INSNS (66), /* ddiv */
938 64, /* cache line size */
939 32, /* l1 cache */
940 128, /* l2 cache */
941 1, /* prefetch streams /*/
942 0, /* SF->DF convert */
945 /* Instruction costs on PPCE6500 processors. */
946 static const
947 struct processor_costs ppce6500_cost = {
948 COSTS_N_INSNS (5), /* mulsi */
949 COSTS_N_INSNS (5), /* mulsi_const */
950 COSTS_N_INSNS (4), /* mulsi_const9 */
951 COSTS_N_INSNS (5), /* muldi */
952 COSTS_N_INSNS (14), /* divsi */
953 COSTS_N_INSNS (14), /* divdi */
954 COSTS_N_INSNS (7), /* fp */
955 COSTS_N_INSNS (10), /* dmul */
956 COSTS_N_INSNS (36), /* sdiv */
957 COSTS_N_INSNS (66), /* ddiv */
958 64, /* cache line size */
959 32, /* l1 cache */
960 128, /* l2 cache */
961 1, /* prefetch streams /*/
962 0, /* SF->DF convert */
965 /* Instruction costs on AppliedMicro Titan processors. */
966 static const
967 struct processor_costs titan_cost = {
968 COSTS_N_INSNS (5), /* mulsi */
969 COSTS_N_INSNS (5), /* mulsi_const */
970 COSTS_N_INSNS (5), /* mulsi_const9 */
971 COSTS_N_INSNS (5), /* muldi */
972 COSTS_N_INSNS (18), /* divsi */
973 COSTS_N_INSNS (18), /* divdi */
974 COSTS_N_INSNS (10), /* fp */
975 COSTS_N_INSNS (10), /* dmul */
976 COSTS_N_INSNS (46), /* sdiv */
977 COSTS_N_INSNS (72), /* ddiv */
978 32, /* cache line size */
979 32, /* l1 cache */
980 512, /* l2 cache */
981 1, /* prefetch streams /*/
982 0, /* SF->DF convert */
985 /* Instruction costs on POWER4 and POWER5 processors. */
986 static const
987 struct processor_costs power4_cost = {
988 COSTS_N_INSNS (3), /* mulsi */
989 COSTS_N_INSNS (2), /* mulsi_const */
990 COSTS_N_INSNS (2), /* mulsi_const9 */
991 COSTS_N_INSNS (4), /* muldi */
992 COSTS_N_INSNS (18), /* divsi */
993 COSTS_N_INSNS (34), /* divdi */
994 COSTS_N_INSNS (3), /* fp */
995 COSTS_N_INSNS (3), /* dmul */
996 COSTS_N_INSNS (17), /* sdiv */
997 COSTS_N_INSNS (17), /* ddiv */
998 128, /* cache line size */
999 32, /* l1 cache */
1000 1024, /* l2 cache */
1001 8, /* prefetch streams /*/
1002 0, /* SF->DF convert */
1005 /* Instruction costs on POWER6 processors. */
1006 static const
1007 struct processor_costs power6_cost = {
1008 COSTS_N_INSNS (8), /* mulsi */
1009 COSTS_N_INSNS (8), /* mulsi_const */
1010 COSTS_N_INSNS (8), /* mulsi_const9 */
1011 COSTS_N_INSNS (8), /* muldi */
1012 COSTS_N_INSNS (22), /* divsi */
1013 COSTS_N_INSNS (28), /* divdi */
1014 COSTS_N_INSNS (3), /* fp */
1015 COSTS_N_INSNS (3), /* dmul */
1016 COSTS_N_INSNS (13), /* sdiv */
1017 COSTS_N_INSNS (16), /* ddiv */
1018 128, /* cache line size */
1019 64, /* l1 cache */
1020 2048, /* l2 cache */
1021 16, /* prefetch streams */
1022 0, /* SF->DF convert */
1025 /* Instruction costs on POWER7 processors. */
1026 static const
1027 struct processor_costs power7_cost = {
1028 COSTS_N_INSNS (2), /* mulsi */
1029 COSTS_N_INSNS (2), /* mulsi_const */
1030 COSTS_N_INSNS (2), /* mulsi_const9 */
1031 COSTS_N_INSNS (2), /* muldi */
1032 COSTS_N_INSNS (18), /* divsi */
1033 COSTS_N_INSNS (34), /* divdi */
1034 COSTS_N_INSNS (3), /* fp */
1035 COSTS_N_INSNS (3), /* dmul */
1036 COSTS_N_INSNS (13), /* sdiv */
1037 COSTS_N_INSNS (16), /* ddiv */
1038 128, /* cache line size */
1039 32, /* l1 cache */
1040 256, /* l2 cache */
1041 12, /* prefetch streams */
1042 COSTS_N_INSNS (3), /* SF->DF convert */
1045 /* Instruction costs on POWER8 processors. */
1046 static const
1047 struct processor_costs power8_cost = {
1048 COSTS_N_INSNS (3), /* mulsi */
1049 COSTS_N_INSNS (3), /* mulsi_const */
1050 COSTS_N_INSNS (3), /* mulsi_const9 */
1051 COSTS_N_INSNS (3), /* muldi */
1052 COSTS_N_INSNS (19), /* divsi */
1053 COSTS_N_INSNS (35), /* divdi */
1054 COSTS_N_INSNS (3), /* fp */
1055 COSTS_N_INSNS (3), /* dmul */
1056 COSTS_N_INSNS (14), /* sdiv */
1057 COSTS_N_INSNS (17), /* ddiv */
1058 128, /* cache line size */
1059 32, /* l1 cache */
1060 256, /* l2 cache */
1061 12, /* prefetch streams */
1062 COSTS_N_INSNS (3), /* SF->DF convert */
1065 /* Instruction costs on POWER9 processors. */
1066 static const
1067 struct processor_costs power9_cost = {
1068 COSTS_N_INSNS (3), /* mulsi */
1069 COSTS_N_INSNS (3), /* mulsi_const */
1070 COSTS_N_INSNS (3), /* mulsi_const9 */
1071 COSTS_N_INSNS (3), /* muldi */
1072 COSTS_N_INSNS (8), /* divsi */
1073 COSTS_N_INSNS (12), /* divdi */
1074 COSTS_N_INSNS (3), /* fp */
1075 COSTS_N_INSNS (3), /* dmul */
1076 COSTS_N_INSNS (13), /* sdiv */
1077 COSTS_N_INSNS (18), /* ddiv */
1078 128, /* cache line size */
1079 32, /* l1 cache */
1080 512, /* l2 cache */
1081 8, /* prefetch streams */
1082 COSTS_N_INSNS (3), /* SF->DF convert */
1085 /* Instruction costs on POWER A2 processors. */
1086 static const
1087 struct processor_costs ppca2_cost = {
1088 COSTS_N_INSNS (16), /* mulsi */
1089 COSTS_N_INSNS (16), /* mulsi_const */
1090 COSTS_N_INSNS (16), /* mulsi_const9 */
1091 COSTS_N_INSNS (16), /* muldi */
1092 COSTS_N_INSNS (22), /* divsi */
1093 COSTS_N_INSNS (28), /* divdi */
1094 COSTS_N_INSNS (3), /* fp */
1095 COSTS_N_INSNS (3), /* dmul */
1096 COSTS_N_INSNS (59), /* sdiv */
1097 COSTS_N_INSNS (72), /* ddiv */
1099 16, /* l1 cache */
1100 2048, /* l2 cache */
1101 16, /* prefetch streams */
1102 0, /* SF->DF convert */
1105 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1106 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1109 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1110 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1111 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1112 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1113 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1114 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1115 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1116 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1117 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1118 bool);
1119 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1120 unsigned int);
1121 static bool is_microcoded_insn (rtx_insn *);
1122 static bool is_nonpipeline_insn (rtx_insn *);
1123 static bool is_cracked_insn (rtx_insn *);
1124 static bool is_load_insn (rtx, rtx *);
1125 static bool is_store_insn (rtx, rtx *);
1126 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1127 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1128 static bool insn_must_be_first_in_group (rtx_insn *);
1129 static bool insn_must_be_last_in_group (rtx_insn *);
1130 int easy_vector_constant (rtx, machine_mode);
1131 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1132 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1133 #if TARGET_MACHO
1134 static tree get_prev_label (tree);
1135 #endif
1136 static bool rs6000_mode_dependent_address (const_rtx);
1137 static bool rs6000_debug_mode_dependent_address (const_rtx);
1138 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1139 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1140 machine_mode, rtx);
1141 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1142 machine_mode,
1143 rtx);
1144 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1145 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1146 enum reg_class);
1147 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1148 reg_class_t,
1149 reg_class_t);
1150 static bool rs6000_debug_can_change_mode_class (machine_mode,
1151 machine_mode,
1152 reg_class_t);
1154 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1155 = rs6000_mode_dependent_address;
1157 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1158 machine_mode, rtx)
1159 = rs6000_secondary_reload_class;
1161 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1162 = rs6000_preferred_reload_class;
1164 const int INSN_NOT_AVAILABLE = -1;
1166 static void rs6000_print_isa_options (FILE *, int, const char *,
1167 HOST_WIDE_INT);
1168 static void rs6000_print_builtin_options (FILE *, int, const char *,
1169 HOST_WIDE_INT);
1170 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1172 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1173 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1174 enum rs6000_reg_type,
1175 machine_mode,
1176 secondary_reload_info *,
1177 bool);
1178 static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
1179 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1181 /* Hash table stuff for keeping track of TOC entries. */
1183 struct GTY((for_user)) toc_hash_struct
1185 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1186 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1187 rtx key;
1188 machine_mode key_mode;
1189 int labelno;
1192 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1194 static hashval_t hash (toc_hash_struct *);
1195 static bool equal (toc_hash_struct *, toc_hash_struct *);
1198 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1202 /* Default register names. */
1203 char rs6000_reg_names[][8] =
1205 /* GPRs */
1206 "0", "1", "2", "3", "4", "5", "6", "7",
1207 "8", "9", "10", "11", "12", "13", "14", "15",
1208 "16", "17", "18", "19", "20", "21", "22", "23",
1209 "24", "25", "26", "27", "28", "29", "30", "31",
1210 /* FPRs */
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1215 /* VRs */
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1220 /* lr ctr ca ap */
1221 "lr", "ctr", "ca", "ap",
1222 /* cr0..cr7 */
1223 "0", "1", "2", "3", "4", "5", "6", "7",
1224 /* vrsave vscr sfp */
1225 "vrsave", "vscr", "sfp",
1228 #ifdef TARGET_REGNAMES
1229 static const char alt_reg_names[][8] =
1231 /* GPRs */
1232 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1233 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1234 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1235 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1236 /* FPRs */
1237 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1238 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1239 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1240 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1241 /* VRs */
1242 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1243 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1244 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1245 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1246 /* lr ctr ca ap */
1247 "lr", "ctr", "ca", "ap",
1248 /* cr0..cr7 */
1249 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1250 /* vrsave vscr sfp */
1251 "vrsave", "vscr", "sfp",
1253 #endif
1255 /* Table of valid machine attributes. */
1257 static const struct attribute_spec rs6000_attribute_table[] =
1259 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1260 affects_type_identity, handler, exclude } */
1261 { "altivec", 1, 1, false, true, false, false,
1262 rs6000_handle_altivec_attribute, NULL },
1263 { "longcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute, NULL },
1265 { "shortcall", 0, 0, false, true, true, false,
1266 rs6000_handle_longcall_attribute, NULL },
1267 { "ms_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute, NULL },
1269 { "gcc_struct", 0, 0, false, false, false, false,
1270 rs6000_handle_struct_attribute, NULL },
1271 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1272 SUBTARGET_ATTRIBUTE_TABLE,
1273 #endif
1274 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1277 #ifndef TARGET_PROFILE_KERNEL
1278 #define TARGET_PROFILE_KERNEL 0
1279 #endif
1281 /* Initialize the GCC target structure. */
1282 #undef TARGET_ATTRIBUTE_TABLE
1283 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1284 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1285 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1286 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1287 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1289 #undef TARGET_ASM_ALIGNED_DI_OP
1290 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1292 /* Default unaligned ops are only provided for ELF. Find the ops needed
1293 for non-ELF systems. */
1294 #ifndef OBJECT_FORMAT_ELF
1295 #if TARGET_XCOFF
1296 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1297 64-bit targets. */
1298 #undef TARGET_ASM_UNALIGNED_HI_OP
1299 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1300 #undef TARGET_ASM_UNALIGNED_SI_OP
1301 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1302 #undef TARGET_ASM_UNALIGNED_DI_OP
1303 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1304 #else
1305 /* For Darwin. */
1306 #undef TARGET_ASM_UNALIGNED_HI_OP
1307 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1308 #undef TARGET_ASM_UNALIGNED_SI_OP
1309 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1310 #undef TARGET_ASM_UNALIGNED_DI_OP
1311 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1312 #undef TARGET_ASM_ALIGNED_DI_OP
1313 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1314 #endif
1315 #endif
1317 /* This hook deals with fixups for relocatable code and DI-mode objects
1318 in 64-bit code. */
1319 #undef TARGET_ASM_INTEGER
1320 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1322 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1323 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1324 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1325 #endif
1327 #undef TARGET_SET_UP_BY_PROLOGUE
1328 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1330 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1332 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1333 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1334 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1336 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1338 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1340 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1341 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1343 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1344 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1346 #undef TARGET_INTERNAL_ARG_POINTER
1347 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1349 #undef TARGET_HAVE_TLS
1350 #define TARGET_HAVE_TLS HAVE_AS_TLS
1352 #undef TARGET_CANNOT_FORCE_CONST_MEM
1353 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1355 #undef TARGET_DELEGITIMIZE_ADDRESS
1356 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1358 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1359 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1361 #undef TARGET_LEGITIMATE_COMBINED_INSN
1362 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1364 #undef TARGET_ASM_FUNCTION_PROLOGUE
1365 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1366 #undef TARGET_ASM_FUNCTION_EPILOGUE
1367 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1369 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1370 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1372 #undef TARGET_LEGITIMIZE_ADDRESS
1373 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1375 #undef TARGET_SCHED_VARIABLE_ISSUE
1376 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1378 #undef TARGET_SCHED_ISSUE_RATE
1379 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1380 #undef TARGET_SCHED_ADJUST_COST
1381 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1382 #undef TARGET_SCHED_ADJUST_PRIORITY
1383 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1384 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1385 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1386 #undef TARGET_SCHED_INIT
1387 #define TARGET_SCHED_INIT rs6000_sched_init
1388 #undef TARGET_SCHED_FINISH
1389 #define TARGET_SCHED_FINISH rs6000_sched_finish
1390 #undef TARGET_SCHED_REORDER
1391 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1392 #undef TARGET_SCHED_REORDER2
1393 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1395 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1396 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1398 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1399 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1401 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1402 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1403 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1404 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1405 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1406 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1407 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1408 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1410 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1411 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1413 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1414 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1415 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1416 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1417 rs6000_builtin_support_vector_misalignment
1418 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1419 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1420 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1421 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1422 rs6000_builtin_vectorization_cost
1423 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1424 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1425 rs6000_preferred_simd_mode
1426 #undef TARGET_VECTORIZE_INIT_COST
1427 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1428 #undef TARGET_VECTORIZE_ADD_STMT_COST
1429 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1430 #undef TARGET_VECTORIZE_FINISH_COST
1431 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1432 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1433 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1435 #undef TARGET_LOOP_UNROLL_ADJUST
1436 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1438 #undef TARGET_INIT_BUILTINS
1439 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1440 #undef TARGET_BUILTIN_DECL
1441 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1443 #undef TARGET_FOLD_BUILTIN
1444 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1445 #undef TARGET_GIMPLE_FOLD_BUILTIN
1446 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1448 #undef TARGET_EXPAND_BUILTIN
1449 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1451 #undef TARGET_MANGLE_TYPE
1452 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1454 #undef TARGET_INIT_LIBFUNCS
1455 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1457 #if TARGET_MACHO
1458 #undef TARGET_BINDS_LOCAL_P
1459 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1460 #endif
1462 #undef TARGET_MS_BITFIELD_LAYOUT_P
1463 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1465 #undef TARGET_ASM_OUTPUT_MI_THUNK
1466 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1468 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1469 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1471 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1472 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1474 #undef TARGET_REGISTER_MOVE_COST
1475 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1476 #undef TARGET_MEMORY_MOVE_COST
1477 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1478 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1479 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1480 rs6000_ira_change_pseudo_allocno_class
1481 #undef TARGET_CANNOT_COPY_INSN_P
1482 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1483 #undef TARGET_RTX_COSTS
1484 #define TARGET_RTX_COSTS rs6000_rtx_costs
1485 #undef TARGET_ADDRESS_COST
1486 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1487 #undef TARGET_INSN_COST
1488 #define TARGET_INSN_COST rs6000_insn_cost
1490 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1491 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1493 #undef TARGET_PROMOTE_FUNCTION_MODE
1494 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1496 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1497 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1499 #undef TARGET_RETURN_IN_MEMORY
1500 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1502 #undef TARGET_RETURN_IN_MSB
1503 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1505 #undef TARGET_SETUP_INCOMING_VARARGS
1506 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1508 /* Always strict argument naming on rs6000. */
1509 #undef TARGET_STRICT_ARGUMENT_NAMING
1510 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1511 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1512 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1513 #undef TARGET_SPLIT_COMPLEX_ARG
1514 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1515 #undef TARGET_MUST_PASS_IN_STACK
1516 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1517 #undef TARGET_PASS_BY_REFERENCE
1518 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1519 #undef TARGET_ARG_PARTIAL_BYTES
1520 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1521 #undef TARGET_FUNCTION_ARG_ADVANCE
1522 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1523 #undef TARGET_FUNCTION_ARG
1524 #define TARGET_FUNCTION_ARG rs6000_function_arg
1525 #undef TARGET_FUNCTION_ARG_PADDING
1526 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1527 #undef TARGET_FUNCTION_ARG_BOUNDARY
1528 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1530 #undef TARGET_BUILD_BUILTIN_VA_LIST
1531 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1533 #undef TARGET_EXPAND_BUILTIN_VA_START
1534 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1536 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1537 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1539 #undef TARGET_EH_RETURN_FILTER_MODE
1540 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1542 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1543 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1545 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1546 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1548 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1549 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1551 #undef TARGET_FLOATN_MODE
1552 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1554 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1555 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1557 #undef TARGET_MD_ASM_ADJUST
1558 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1560 #undef TARGET_OPTION_OVERRIDE
1561 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1563 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1564 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1565 rs6000_builtin_vectorized_function
1567 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1568 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1569 rs6000_builtin_md_vectorized_function
1571 #undef TARGET_STACK_PROTECT_GUARD
1572 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1574 #if !TARGET_MACHO
1575 #undef TARGET_STACK_PROTECT_FAIL
1576 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1577 #endif
1579 #ifdef HAVE_AS_TLS
1580 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1581 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1582 #endif
1584 /* Use a 32-bit anchor range. This leads to sequences like:
1586 addis tmp,anchor,high
1587 add dest,tmp,low
1589 where tmp itself acts as an anchor, and can be shared between
1590 accesses to the same 64k page. */
1591 #undef TARGET_MIN_ANCHOR_OFFSET
1592 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1593 #undef TARGET_MAX_ANCHOR_OFFSET
1594 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1595 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1596 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1597 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1598 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1600 #undef TARGET_BUILTIN_RECIPROCAL
1601 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1603 #undef TARGET_SECONDARY_RELOAD
1604 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1605 #undef TARGET_SECONDARY_MEMORY_NEEDED
1606 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1607 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1608 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1610 #undef TARGET_LEGITIMATE_ADDRESS_P
1611 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1613 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1614 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1616 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1617 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1619 #undef TARGET_CAN_ELIMINATE
1620 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1622 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1623 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1625 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1626 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1628 #undef TARGET_TRAMPOLINE_INIT
1629 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1631 #undef TARGET_FUNCTION_VALUE
1632 #define TARGET_FUNCTION_VALUE rs6000_function_value
1634 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1635 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1637 #undef TARGET_OPTION_SAVE
1638 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1640 #undef TARGET_OPTION_RESTORE
1641 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1643 #undef TARGET_OPTION_PRINT
1644 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1646 #undef TARGET_CAN_INLINE_P
1647 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1649 #undef TARGET_SET_CURRENT_FUNCTION
1650 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1652 #undef TARGET_LEGITIMATE_CONSTANT_P
1653 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1655 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1656 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1658 #undef TARGET_CAN_USE_DOLOOP_P
1659 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1661 #undef TARGET_PREDICT_DOLOOP_P
1662 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1664 #undef TARGET_HAVE_COUNT_REG_DECR_P
1665 #define TARGET_HAVE_COUNT_REG_DECR_P true
1667 /* 1000000000 is infinite cost in IVOPTs. */
1668 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1669 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1671 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1672 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1674 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1675 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1677 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1678 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1679 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1680 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1681 #undef TARGET_UNWIND_WORD_MODE
1682 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1684 #undef TARGET_OFFLOAD_OPTIONS
1685 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1687 #undef TARGET_C_MODE_FOR_SUFFIX
1688 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1690 #undef TARGET_INVALID_BINARY_OP
1691 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1693 #undef TARGET_OPTAB_SUPPORTED_P
1694 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1696 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1697 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1699 #undef TARGET_COMPARE_VERSION_PRIORITY
1700 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1702 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1703 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1704 rs6000_generate_version_dispatcher_body
1706 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1707 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1708 rs6000_get_function_versions_dispatcher
1710 #undef TARGET_OPTION_FUNCTION_VERSIONS
1711 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1713 #undef TARGET_HARD_REGNO_NREGS
1714 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1715 #undef TARGET_HARD_REGNO_MODE_OK
1716 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1718 #undef TARGET_MODES_TIEABLE_P
1719 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1721 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1722 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1723 rs6000_hard_regno_call_part_clobbered
1725 #undef TARGET_SLOW_UNALIGNED_ACCESS
1726 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1728 #undef TARGET_CAN_CHANGE_MODE_CLASS
1729 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1731 #undef TARGET_CONSTANT_ALIGNMENT
1732 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1734 #undef TARGET_STARTING_FRAME_OFFSET
1735 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1737 #if TARGET_ELF && RS6000_WEAK
1738 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1739 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1740 #endif
1742 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1743 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1745 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1746 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1748 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1749 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1750 rs6000_cannot_substitute_mem_equiv_p
1752 #undef TARGET_INVALID_CONVERSION
1753 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1756 /* Processor table. */
1757 struct rs6000_ptt
1759 const char *const name; /* Canonical processor name. */
1760 const enum processor_type processor; /* Processor type enum value. */
1761 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1764 static struct rs6000_ptt const processor_target_table[] =
1766 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1767 #include "rs6000-cpus.def"
1768 #undef RS6000_CPU
1771 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1772 name is invalid. */
1774 static int
1775 rs6000_cpu_name_lookup (const char *name)
1777 size_t i;
1779 if (name != NULL)
1781 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1782 if (! strcmp (name, processor_target_table[i].name))
1783 return (int)i;
1786 return -1;
1790 /* Return number of consecutive hard regs needed starting at reg REGNO
1791 to hold something of mode MODE.
1792 This is ordinarily the length in words of a value of mode MODE
1793 but can be less for certain modes in special long registers.
1795 POWER and PowerPC GPRs hold 32 bits worth;
1796 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1798 static int
1799 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1801 unsigned HOST_WIDE_INT reg_size;
1803 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1804 128-bit floating point that can go in vector registers, which has VSX
1805 memory addressing. */
1806 if (FP_REGNO_P (regno))
1807 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1808 ? UNITS_PER_VSX_WORD
1809 : UNITS_PER_FP_WORD);
1811 else if (ALTIVEC_REGNO_P (regno))
1812 reg_size = UNITS_PER_ALTIVEC_WORD;
1814 else
1815 reg_size = UNITS_PER_WORD;
1817 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1820 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1821 MODE. */
1822 static int
1823 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1825 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1827 if (COMPLEX_MODE_P (mode))
1828 mode = GET_MODE_INNER (mode);
1830 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1831 registers. We need to allow OImode to have the same registers as POImode,
1832 even though we do not enable the move pattern for OImode. */
1833 if (mode == POImode || mode == OImode)
1834 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1836 /* MMA accumulator modes need FPR registers divisible by 4. We need to allow
1837 XImode to have the same registers as PXImode, even though we do not enable
1838 the move pattern for XImode. */
1839 if (mode == PXImode || mode == XImode)
1840 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1842 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1843 register combinations, and use PTImode where we need to deal with quad
1844 word memory operations. Don't allow quad words in the argument or frame
1845 pointer registers, just registers 0..31. */
1846 if (mode == PTImode)
1847 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1848 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1849 && ((regno & 1) == 0));
1851 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1852 implementations. Don't allow an item to be split between a FP register
1853 and an Altivec register. Allow TImode in all VSX registers if the user
1854 asked for it. */
1855 if (TARGET_VSX && VSX_REGNO_P (regno)
1856 && (VECTOR_MEM_VSX_P (mode)
1857 || VECTOR_ALIGNMENT_P (mode)
1858 || reg_addr[mode].scalar_in_vmx_p
1859 || mode == TImode
1860 || (TARGET_VADDUQM && mode == V1TImode)))
1862 if (FP_REGNO_P (regno))
1863 return FP_REGNO_P (last_regno);
1865 if (ALTIVEC_REGNO_P (regno))
1867 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1868 return 0;
1870 return ALTIVEC_REGNO_P (last_regno);
1874 /* The GPRs can hold any mode, but values bigger than one register
1875 cannot go past R31. */
1876 if (INT_REGNO_P (regno))
1877 return INT_REGNO_P (last_regno);
1879 /* The float registers (except for VSX vector modes) can only hold floating
1880 modes and DImode. */
1881 if (FP_REGNO_P (regno))
1883 if (VECTOR_ALIGNMENT_P (mode))
1884 return false;
1886 if (SCALAR_FLOAT_MODE_P (mode)
1887 && (mode != TDmode || (regno % 2) == 0)
1888 && FP_REGNO_P (last_regno))
1889 return 1;
1891 if (GET_MODE_CLASS (mode) == MODE_INT)
1893 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1894 return 1;
1896 if (TARGET_P8_VECTOR && (mode == SImode))
1897 return 1;
1899 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1900 return 1;
1903 return 0;
1906 /* The CR register can only hold CC modes. */
1907 if (CR_REGNO_P (regno))
1908 return GET_MODE_CLASS (mode) == MODE_CC;
1910 if (CA_REGNO_P (regno))
1911 return mode == Pmode || mode == SImode;
1913 /* AltiVec only in AldyVec registers. */
1914 if (ALTIVEC_REGNO_P (regno))
1915 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1916 || mode == V1TImode);
1918 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1919 and it must be able to fit within the register set. */
1921 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1924 /* Implement TARGET_HARD_REGNO_NREGS. */
1926 static unsigned int
1927 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1929 return rs6000_hard_regno_nregs[mode][regno];
1932 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1934 static bool
1935 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1937 return rs6000_hard_regno_mode_ok_p[mode][regno];
1940 /* Implement TARGET_MODES_TIEABLE_P.
1942 PTImode cannot tie with other modes because PTImode is restricted to even
1943 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1944 57744).
1946 Similarly, don't allow POImode (vector pair, restricted to even VSX
1947 registers) or PXImode (vector quad, restricted to FPR registers divisible
1948 by 4) to tie with other modes.
1950 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1951 128-bit floating point on VSX systems ties with other vectors. */
1953 static bool
1954 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1956 if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode
1957 || mode2 == PTImode || mode2 == POImode || mode2 == PXImode)
1958 return mode1 == mode2;
1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1961 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1962 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1963 return false;
1965 if (SCALAR_FLOAT_MODE_P (mode1))
1966 return SCALAR_FLOAT_MODE_P (mode2);
1967 if (SCALAR_FLOAT_MODE_P (mode2))
1968 return false;
1970 if (GET_MODE_CLASS (mode1) == MODE_CC)
1971 return GET_MODE_CLASS (mode2) == MODE_CC;
1972 if (GET_MODE_CLASS (mode2) == MODE_CC)
1973 return false;
1975 return true;
1978 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1980 static bool
1981 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1982 machine_mode mode)
1984 if (TARGET_32BIT
1985 && TARGET_POWERPC64
1986 && GET_MODE_SIZE (mode) > 4
1987 && INT_REGNO_P (regno))
1988 return true;
1990 if (TARGET_VSX
1991 && FP_REGNO_P (regno)
1992 && GET_MODE_SIZE (mode) > 8
1993 && !FLOAT128_2REG_P (mode))
1994 return true;
1996 return false;
1999 /* Print interesting facts about registers. */
2000 static void
2001 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2003 int r, m;
2005 for (r = first_regno; r <= last_regno; ++r)
2007 const char *comma = "";
2008 int len;
2010 if (first_regno == last_regno)
2011 fprintf (stderr, "%s:\t", reg_name);
2012 else
2013 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2015 len = 8;
2016 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2017 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2019 if (len > 70)
2021 fprintf (stderr, ",\n\t");
2022 len = 8;
2023 comma = "";
2026 if (rs6000_hard_regno_nregs[m][r] > 1)
2027 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2028 rs6000_hard_regno_nregs[m][r]);
2029 else
2030 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2032 comma = ", ";
2035 if (call_used_or_fixed_reg_p (r))
2037 if (len > 70)
2039 fprintf (stderr, ",\n\t");
2040 len = 8;
2041 comma = "";
2044 len += fprintf (stderr, "%s%s", comma, "call-used");
2045 comma = ", ";
2048 if (fixed_regs[r])
2050 if (len > 70)
2052 fprintf (stderr, ",\n\t");
2053 len = 8;
2054 comma = "";
2057 len += fprintf (stderr, "%s%s", comma, "fixed");
2058 comma = ", ";
2061 if (len > 70)
2063 fprintf (stderr, ",\n\t");
2064 comma = "";
2067 len += fprintf (stderr, "%sreg-class = %s", comma,
2068 reg_class_names[(int)rs6000_regno_regclass[r]]);
2069 comma = ", ";
2071 if (len > 70)
2073 fprintf (stderr, ",\n\t");
2074 comma = "";
2077 fprintf (stderr, "%sregno = %d\n", comma, r);
2081 static const char *
2082 rs6000_debug_vector_unit (enum rs6000_vector v)
2084 const char *ret;
2086 switch (v)
2088 case VECTOR_NONE: ret = "none"; break;
2089 case VECTOR_ALTIVEC: ret = "altivec"; break;
2090 case VECTOR_VSX: ret = "vsx"; break;
2091 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2092 default: ret = "unknown"; break;
2095 return ret;
2098 /* Inner function printing just the address mask for a particular reload
2099 register class. */
2100 DEBUG_FUNCTION char *
2101 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2103 static char ret[8];
2104 char *p = ret;
2106 if ((mask & RELOAD_REG_VALID) != 0)
2107 *p++ = 'v';
2108 else if (keep_spaces)
2109 *p++ = ' ';
2111 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2112 *p++ = 'm';
2113 else if (keep_spaces)
2114 *p++ = ' ';
2116 if ((mask & RELOAD_REG_INDEXED) != 0)
2117 *p++ = 'i';
2118 else if (keep_spaces)
2119 *p++ = ' ';
2121 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2122 *p++ = 'O';
2123 else if ((mask & RELOAD_REG_OFFSET) != 0)
2124 *p++ = 'o';
2125 else if (keep_spaces)
2126 *p++ = ' ';
2128 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2129 *p++ = '+';
2130 else if (keep_spaces)
2131 *p++ = ' ';
2133 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2134 *p++ = '+';
2135 else if (keep_spaces)
2136 *p++ = ' ';
2138 if ((mask & RELOAD_REG_AND_M16) != 0)
2139 *p++ = '&';
2140 else if (keep_spaces)
2141 *p++ = ' ';
2143 *p = '\0';
2145 return ret;
2148 /* Print the address masks in a human readble fashion. */
2149 DEBUG_FUNCTION void
2150 rs6000_debug_print_mode (ssize_t m)
2152 ssize_t rc;
2153 int spaces = 0;
2155 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2156 for (rc = 0; rc < N_RELOAD_REG; rc++)
2157 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2158 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2160 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2161 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2163 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2164 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2165 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2166 spaces = 0;
2168 else
2169 spaces += strlen (" Reload=sl");
2171 if (reg_addr[m].scalar_in_vmx_p)
2173 fprintf (stderr, "%*s Upper=y", spaces, "");
2174 spaces = 0;
2176 else
2177 spaces += strlen (" Upper=y");
2179 if (rs6000_vector_unit[m] != VECTOR_NONE
2180 || rs6000_vector_mem[m] != VECTOR_NONE)
2182 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2183 spaces, "",
2184 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2185 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2188 fputs ("\n", stderr);
2191 #define DEBUG_FMT_ID "%-32s= "
2192 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2193 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2194 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2196 /* Print various interesting information with -mdebug=reg. */
2197 static void
2198 rs6000_debug_reg_global (void)
2200 static const char *const tf[2] = { "false", "true" };
2201 const char *nl = (const char *)0;
2202 int m;
2203 size_t m1, m2, v;
2204 char costly_num[20];
2205 char nop_num[20];
2206 char flags_buffer[40];
2207 const char *costly_str;
2208 const char *nop_str;
2209 const char *trace_str;
2210 const char *abi_str;
2211 const char *cmodel_str;
2212 struct cl_target_option cl_opts;
2214 /* Modes we want tieable information on. */
2215 static const machine_mode print_tieable_modes[] = {
2216 QImode,
2217 HImode,
2218 SImode,
2219 DImode,
2220 TImode,
2221 PTImode,
2222 SFmode,
2223 DFmode,
2224 TFmode,
2225 IFmode,
2226 KFmode,
2227 SDmode,
2228 DDmode,
2229 TDmode,
2230 V2SImode,
2231 V2SFmode,
2232 V16QImode,
2233 V8HImode,
2234 V4SImode,
2235 V2DImode,
2236 V1TImode,
2237 V32QImode,
2238 V16HImode,
2239 V8SImode,
2240 V4DImode,
2241 V2TImode,
2242 V4SFmode,
2243 V2DFmode,
2244 V8SFmode,
2245 V4DFmode,
2246 OImode,
2247 XImode,
2248 POImode,
2249 PXImode,
2250 CCmode,
2251 CCUNSmode,
2252 CCEQmode,
2253 CCFPmode,
2256 /* Virtual regs we are interested in. */
2257 const static struct {
2258 int regno; /* register number. */
2259 const char *name; /* register name. */
2260 } virtual_regs[] = {
2261 { STACK_POINTER_REGNUM, "stack pointer:" },
2262 { TOC_REGNUM, "toc: " },
2263 { STATIC_CHAIN_REGNUM, "static chain: " },
2264 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2265 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2266 { ARG_POINTER_REGNUM, "arg pointer: " },
2267 { FRAME_POINTER_REGNUM, "frame pointer:" },
2268 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2269 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2270 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2271 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2272 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2273 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2274 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2275 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2276 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2279 fputs ("\nHard register information:\n", stderr);
2280 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2281 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2282 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2283 LAST_ALTIVEC_REGNO,
2284 "vs");
2285 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2286 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2287 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2288 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2289 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2290 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2292 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2293 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2294 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2296 fprintf (stderr,
2297 "\n"
2298 "d reg_class = %s\n"
2299 "f reg_class = %s\n"
2300 "v reg_class = %s\n"
2301 "wa reg_class = %s\n"
2302 "we reg_class = %s\n"
2303 "wr reg_class = %s\n"
2304 "wx reg_class = %s\n"
2305 "wA reg_class = %s\n"
2306 "\n",
2307 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2308 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2309 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2311 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2312 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2313 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2314 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2316 nl = "\n";
2317 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2318 rs6000_debug_print_mode (m);
2320 fputs ("\n", stderr);
2322 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2324 machine_mode mode1 = print_tieable_modes[m1];
2325 bool first_time = true;
2327 nl = (const char *)0;
2328 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2330 machine_mode mode2 = print_tieable_modes[m2];
2331 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2333 if (first_time)
2335 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2336 nl = "\n";
2337 first_time = false;
2340 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2344 if (!first_time)
2345 fputs ("\n", stderr);
2348 if (nl)
2349 fputs (nl, stderr);
2351 if (rs6000_recip_control)
2353 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2355 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2356 if (rs6000_recip_bits[m])
2358 fprintf (stderr,
2359 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2360 GET_MODE_NAME (m),
2361 (RS6000_RECIP_AUTO_RE_P (m)
2362 ? "auto"
2363 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2364 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2365 ? "auto"
2366 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2369 fputs ("\n", stderr);
2372 if (rs6000_cpu_index >= 0)
2374 const char *name = processor_target_table[rs6000_cpu_index].name;
2375 HOST_WIDE_INT flags
2376 = processor_target_table[rs6000_cpu_index].target_enable;
2378 sprintf (flags_buffer, "-mcpu=%s flags", name);
2379 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2381 else
2382 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2384 if (rs6000_tune_index >= 0)
2386 const char *name = processor_target_table[rs6000_tune_index].name;
2387 HOST_WIDE_INT flags
2388 = processor_target_table[rs6000_tune_index].target_enable;
2390 sprintf (flags_buffer, "-mtune=%s flags", name);
2391 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2393 else
2394 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2396 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2397 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2398 rs6000_isa_flags);
2400 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2401 rs6000_isa_flags_explicit);
2403 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2404 rs6000_builtin_mask);
2406 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2408 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2409 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2411 switch (rs6000_sched_costly_dep)
2413 case max_dep_latency:
2414 costly_str = "max_dep_latency";
2415 break;
2417 case no_dep_costly:
2418 costly_str = "no_dep_costly";
2419 break;
2421 case all_deps_costly:
2422 costly_str = "all_deps_costly";
2423 break;
2425 case true_store_to_load_dep_costly:
2426 costly_str = "true_store_to_load_dep_costly";
2427 break;
2429 case store_to_load_dep_costly:
2430 costly_str = "store_to_load_dep_costly";
2431 break;
2433 default:
2434 costly_str = costly_num;
2435 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2436 break;
2439 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2441 switch (rs6000_sched_insert_nops)
2443 case sched_finish_regroup_exact:
2444 nop_str = "sched_finish_regroup_exact";
2445 break;
2447 case sched_finish_pad_groups:
2448 nop_str = "sched_finish_pad_groups";
2449 break;
2451 case sched_finish_none:
2452 nop_str = "sched_finish_none";
2453 break;
2455 default:
2456 nop_str = nop_num;
2457 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2458 break;
2461 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2463 switch (rs6000_sdata)
2465 default:
2466 case SDATA_NONE:
2467 break;
2469 case SDATA_DATA:
2470 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2471 break;
2473 case SDATA_SYSV:
2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2475 break;
2477 case SDATA_EABI:
2478 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2479 break;
2483 switch (rs6000_traceback)
2485 case traceback_default: trace_str = "default"; break;
2486 case traceback_none: trace_str = "none"; break;
2487 case traceback_part: trace_str = "part"; break;
2488 case traceback_full: trace_str = "full"; break;
2489 default: trace_str = "unknown"; break;
2492 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2494 switch (rs6000_current_cmodel)
2496 case CMODEL_SMALL: cmodel_str = "small"; break;
2497 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2498 case CMODEL_LARGE: cmodel_str = "large"; break;
2499 default: cmodel_str = "unknown"; break;
2502 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2504 switch (rs6000_current_abi)
2506 case ABI_NONE: abi_str = "none"; break;
2507 case ABI_AIX: abi_str = "aix"; break;
2508 case ABI_ELFv2: abi_str = "ELFv2"; break;
2509 case ABI_V4: abi_str = "V4"; break;
2510 case ABI_DARWIN: abi_str = "darwin"; break;
2511 default: abi_str = "unknown"; break;
2514 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2516 if (rs6000_altivec_abi)
2517 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2519 if (rs6000_darwin64_abi)
2520 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2522 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2523 (TARGET_SOFT_FLOAT ? "true" : "false"));
2525 if (TARGET_LINK_STACK)
2526 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2528 if (TARGET_P8_FUSION)
2530 char options[80];
2532 strcpy (options, "power8");
2533 if (TARGET_P8_FUSION_SIGN)
2534 strcat (options, ", sign");
2536 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2539 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2540 TARGET_SECURE_PLT ? "secure" : "bss");
2541 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2542 aix_struct_return ? "aix" : "sysv");
2543 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2544 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2545 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2546 tf[!!rs6000_align_branch_targets]);
2547 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2548 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2549 rs6000_long_double_type_size);
2550 if (rs6000_long_double_type_size > 64)
2552 fprintf (stderr, DEBUG_FMT_S, "long double type",
2553 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2554 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2555 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2557 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2558 (int)rs6000_sched_restricted_insns_priority);
2559 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2560 (int)END_BUILTINS);
2561 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2562 (int)RS6000_BUILTIN_COUNT);
2564 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2565 (int)TARGET_FLOAT128_ENABLE_TYPE);
2567 if (TARGET_VSX)
2568 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2569 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2571 if (TARGET_DIRECT_MOVE_128)
2572 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2573 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2577 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2578 legitimate address support to figure out the appropriate addressing to
2579 use. */
2581 static void
2582 rs6000_setup_reg_addr_masks (void)
2584 ssize_t rc, reg, m, nregs;
2585 addr_mask_type any_addr_mask, addr_mask;
2587 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2589 machine_mode m2 = (machine_mode) m;
2590 bool complex_p = false;
2591 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2592 size_t msize;
2594 if (COMPLEX_MODE_P (m2))
2596 complex_p = true;
2597 m2 = GET_MODE_INNER (m2);
2600 msize = GET_MODE_SIZE (m2);
2602 /* SDmode is special in that we want to access it only via REG+REG
2603 addressing on power7 and above, since we want to use the LFIWZX and
2604 STFIWZX instructions to load it. */
2605 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2607 any_addr_mask = 0;
2608 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2610 addr_mask = 0;
2611 reg = reload_reg_map[rc].reg;
2613 /* Can mode values go in the GPR/FPR/Altivec registers? */
2614 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2616 bool small_int_vsx_p = (small_int_p
2617 && (rc == RELOAD_REG_FPR
2618 || rc == RELOAD_REG_VMX));
2620 nregs = rs6000_hard_regno_nregs[m][reg];
2621 addr_mask |= RELOAD_REG_VALID;
2623 /* Indicate if the mode takes more than 1 physical register. If
2624 it takes a single register, indicate it can do REG+REG
2625 addressing. Small integers in VSX registers can only do
2626 REG+REG addressing. */
2627 if (small_int_vsx_p)
2628 addr_mask |= RELOAD_REG_INDEXED;
2629 else if (nregs > 1 || m == BLKmode || complex_p)
2630 addr_mask |= RELOAD_REG_MULTIPLE;
2631 else
2632 addr_mask |= RELOAD_REG_INDEXED;
2634 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2635 addressing. If we allow scalars into Altivec registers,
2636 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2638 For VSX systems, we don't allow update addressing for
2639 DFmode/SFmode if those registers can go in both the
2640 traditional floating point registers and Altivec registers.
2641 The load/store instructions for the Altivec registers do not
2642 have update forms. If we allowed update addressing, it seems
2643 to break IV-OPT code using floating point if the index type is
2644 int instead of long (PR target/81550 and target/84042). */
2646 if (TARGET_UPDATE
2647 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2648 && msize <= 8
2649 && !VECTOR_MODE_P (m2)
2650 && !VECTOR_ALIGNMENT_P (m2)
2651 && !complex_p
2652 && (m != E_DFmode || !TARGET_VSX)
2653 && (m != E_SFmode || !TARGET_P8_VECTOR)
2654 && !small_int_vsx_p)
2656 addr_mask |= RELOAD_REG_PRE_INCDEC;
2658 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2659 we don't allow PRE_MODIFY for some multi-register
2660 operations. */
2661 switch (m)
2663 default:
2664 addr_mask |= RELOAD_REG_PRE_MODIFY;
2665 break;
2667 case E_DImode:
2668 if (TARGET_POWERPC64)
2669 addr_mask |= RELOAD_REG_PRE_MODIFY;
2670 break;
2672 case E_DFmode:
2673 case E_DDmode:
2674 if (TARGET_HARD_FLOAT)
2675 addr_mask |= RELOAD_REG_PRE_MODIFY;
2676 break;
2681 /* GPR and FPR registers can do REG+OFFSET addressing, except
2682 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2683 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2684 if ((addr_mask != 0) && !indexed_only_p
2685 && msize <= 8
2686 && (rc == RELOAD_REG_GPR
2687 || ((msize == 8 || m2 == SFmode)
2688 && (rc == RELOAD_REG_FPR
2689 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2690 addr_mask |= RELOAD_REG_OFFSET;
2692 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2693 instructions are enabled. The offset for 128-bit VSX registers is
2694 only 12-bits. While GPRs can handle the full offset range, VSX
2695 registers can only handle the restricted range. */
2696 else if ((addr_mask != 0) && !indexed_only_p
2697 && msize == 16 && TARGET_P9_VECTOR
2698 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2699 || (m2 == TImode && TARGET_VSX)))
2701 addr_mask |= RELOAD_REG_OFFSET;
2702 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2703 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2706 /* Vector pairs can do both indexed and offset loads if the
2707 instructions are enabled, otherwise they can only do offset loads
2708 since it will be broken into two vector moves. Vector quads can
2709 only do offset loads. */
2710 else if ((addr_mask != 0) && TARGET_MMA
2711 && (m2 == POImode || m2 == PXImode))
2713 addr_mask |= RELOAD_REG_OFFSET;
2714 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2716 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2717 if (m2 == POImode)
2718 addr_mask |= RELOAD_REG_INDEXED;
2722 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2723 addressing on 128-bit types. */
2724 if (rc == RELOAD_REG_VMX && msize == 16
2725 && (addr_mask & RELOAD_REG_VALID) != 0)
2726 addr_mask |= RELOAD_REG_AND_M16;
2728 reg_addr[m].addr_mask[rc] = addr_mask;
2729 any_addr_mask |= addr_mask;
2732 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2737 /* Initialize the various global tables that are based on register size. */
2738 static void
2739 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2741 ssize_t r, m, c;
2742 int align64;
2743 int align32;
2745 /* Precalculate REGNO_REG_CLASS. */
2746 rs6000_regno_regclass[0] = GENERAL_REGS;
2747 for (r = 1; r < 32; ++r)
2748 rs6000_regno_regclass[r] = BASE_REGS;
2750 for (r = 32; r < 64; ++r)
2751 rs6000_regno_regclass[r] = FLOAT_REGS;
2753 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2754 rs6000_regno_regclass[r] = NO_REGS;
2756 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2757 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2759 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2760 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2761 rs6000_regno_regclass[r] = CR_REGS;
2763 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2764 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2765 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2766 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2767 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2768 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2769 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2771 /* Precalculate register class to simpler reload register class. We don't
2772 need all of the register classes that are combinations of different
2773 classes, just the simple ones that have constraint letters. */
2774 for (c = 0; c < N_REG_CLASSES; c++)
2775 reg_class_to_reg_type[c] = NO_REG_TYPE;
2777 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2779 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2780 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2781 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2782 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2783 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2784 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2785 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2786 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2788 if (TARGET_VSX)
2790 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2791 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2793 else
2795 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2796 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2799 /* Precalculate the valid memory formats as well as the vector information,
2800 this must be set up before the rs6000_hard_regno_nregs_internal calls
2801 below. */
2802 gcc_assert ((int)VECTOR_NONE == 0);
2803 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2804 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2806 gcc_assert ((int)CODE_FOR_nothing == 0);
2807 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2809 gcc_assert ((int)NO_REGS == 0);
2810 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2812 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2813 believes it can use native alignment or still uses 128-bit alignment. */
2814 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2816 align64 = 64;
2817 align32 = 32;
2819 else
2821 align64 = 128;
2822 align32 = 128;
2825 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2826 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2827 if (TARGET_FLOAT128_TYPE)
2829 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2830 rs6000_vector_align[KFmode] = 128;
2832 if (FLOAT128_IEEE_P (TFmode))
2834 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2835 rs6000_vector_align[TFmode] = 128;
2839 /* V2DF mode, VSX only. */
2840 if (TARGET_VSX)
2842 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2843 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2844 rs6000_vector_align[V2DFmode] = align64;
2847 /* V4SF mode, either VSX or Altivec. */
2848 if (TARGET_VSX)
2850 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2851 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2852 rs6000_vector_align[V4SFmode] = align32;
2854 else if (TARGET_ALTIVEC)
2856 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2857 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2858 rs6000_vector_align[V4SFmode] = align32;
2861 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2862 and stores. */
2863 if (TARGET_ALTIVEC)
2865 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2866 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2867 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2868 rs6000_vector_align[V4SImode] = align32;
2869 rs6000_vector_align[V8HImode] = align32;
2870 rs6000_vector_align[V16QImode] = align32;
2872 if (TARGET_VSX)
2874 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2875 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2876 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2878 else
2880 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2881 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2882 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2886 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2887 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2888 if (TARGET_VSX)
2890 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2891 rs6000_vector_unit[V2DImode]
2892 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2893 rs6000_vector_align[V2DImode] = align64;
2895 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2896 rs6000_vector_unit[V1TImode]
2897 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2898 rs6000_vector_align[V1TImode] = 128;
2901 /* DFmode, see if we want to use the VSX unit. Memory is handled
2902 differently, so don't set rs6000_vector_mem. */
2903 if (TARGET_VSX)
2905 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2906 rs6000_vector_align[DFmode] = 64;
2909 /* SFmode, see if we want to use the VSX unit. */
2910 if (TARGET_P8_VECTOR)
2912 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2913 rs6000_vector_align[SFmode] = 32;
2916 /* Allow TImode in VSX register and set the VSX memory macros. */
2917 if (TARGET_VSX)
2919 rs6000_vector_mem[TImode] = VECTOR_VSX;
2920 rs6000_vector_align[TImode] = align64;
2923 /* Add support for vector pairs and vector quad registers. */
2924 if (TARGET_MMA)
2926 rs6000_vector_unit[POImode] = VECTOR_NONE;
2927 rs6000_vector_mem[POImode] = VECTOR_VSX;
2928 rs6000_vector_align[POImode] = 256;
2930 rs6000_vector_unit[PXImode] = VECTOR_NONE;
2931 rs6000_vector_mem[PXImode] = VECTOR_VSX;
2932 rs6000_vector_align[PXImode] = 512;
2935 /* Register class constraints for the constraints that depend on compile
2936 switches. When the VSX code was added, different constraints were added
2937 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2938 of the VSX registers are used. The register classes for scalar floating
2939 point types is set, based on whether we allow that type into the upper
2940 (Altivec) registers. GCC has register classes to target the Altivec
2941 registers for load/store operations, to select using a VSX memory
2942 operation instead of the traditional floating point operation. The
2943 constraints are:
2945 d - Register class to use with traditional DFmode instructions.
2946 f - Register class to use with traditional SFmode instructions.
2947 v - Altivec register.
2948 wa - Any VSX register.
2949 wc - Reserved to represent individual CR bits (used in LLVM).
2950 wn - always NO_REGS.
2951 wr - GPR if 64-bit mode is permitted.
2952 wx - Float register if we can do 32-bit int stores. */
2954 if (TARGET_HARD_FLOAT)
2956 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2957 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2960 if (TARGET_VSX)
2961 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2963 /* Add conditional constraints based on various options, to allow us to
2964 collapse multiple insn patterns. */
2965 if (TARGET_ALTIVEC)
2966 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2968 if (TARGET_POWERPC64)
2970 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2971 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2974 if (TARGET_STFIWX)
2975 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2977 /* Support for new direct moves (ISA 3.0 + 64bit). */
2978 if (TARGET_DIRECT_MOVE_128)
2979 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2981 /* Set up the reload helper and direct move functions. */
2982 if (TARGET_VSX || TARGET_ALTIVEC)
2984 if (TARGET_64BIT)
2986 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2987 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2988 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2989 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2990 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2991 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2992 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2993 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2994 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2995 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2996 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2997 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2998 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2999 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3000 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3001 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3002 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3003 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3004 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3005 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3007 if (FLOAT128_VECTOR_P (KFmode))
3009 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3010 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3013 if (FLOAT128_VECTOR_P (TFmode))
3015 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3016 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3019 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3020 available. */
3021 if (TARGET_NO_SDMODE_STACK)
3023 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3024 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3027 if (TARGET_VSX)
3029 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3030 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3033 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3035 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3036 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3037 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3038 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3039 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3040 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3041 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3042 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3043 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3045 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3046 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3047 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3048 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3049 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3050 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3051 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3052 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3053 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3055 if (FLOAT128_VECTOR_P (KFmode))
3057 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3058 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3061 if (FLOAT128_VECTOR_P (TFmode))
3063 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3064 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3067 if (TARGET_MMA)
3069 reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store;
3070 reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load;
3071 reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store;
3072 reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load;
3076 else
3078 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3079 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3080 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3081 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3082 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3083 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3084 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3085 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3086 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3087 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3088 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3089 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3090 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3091 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3092 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3093 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3094 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3095 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3096 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3097 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3099 if (FLOAT128_VECTOR_P (KFmode))
3101 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3102 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3105 if (FLOAT128_IEEE_P (TFmode))
3107 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3108 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3111 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3112 available. */
3113 if (TARGET_NO_SDMODE_STACK)
3115 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3116 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3119 if (TARGET_VSX)
3121 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3122 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3125 if (TARGET_DIRECT_MOVE)
3127 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3128 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3129 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3133 reg_addr[DFmode].scalar_in_vmx_p = true;
3134 reg_addr[DImode].scalar_in_vmx_p = true;
3136 if (TARGET_P8_VECTOR)
3138 reg_addr[SFmode].scalar_in_vmx_p = true;
3139 reg_addr[SImode].scalar_in_vmx_p = true;
3141 if (TARGET_P9_VECTOR)
3143 reg_addr[HImode].scalar_in_vmx_p = true;
3144 reg_addr[QImode].scalar_in_vmx_p = true;
3149 /* Precalculate HARD_REGNO_NREGS. */
3150 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3151 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3152 rs6000_hard_regno_nregs[m][r]
3153 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3155 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3156 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3157 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3158 rs6000_hard_regno_mode_ok_p[m][r]
3159 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3161 /* Precalculate CLASS_MAX_NREGS sizes. */
3162 for (c = 0; c < LIM_REG_CLASSES; ++c)
3164 int reg_size;
3166 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3167 reg_size = UNITS_PER_VSX_WORD;
3169 else if (c == ALTIVEC_REGS)
3170 reg_size = UNITS_PER_ALTIVEC_WORD;
3172 else if (c == FLOAT_REGS)
3173 reg_size = UNITS_PER_FP_WORD;
3175 else
3176 reg_size = UNITS_PER_WORD;
3178 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3180 machine_mode m2 = (machine_mode)m;
3181 int reg_size2 = reg_size;
3183 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3184 in VSX. */
3185 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3186 reg_size2 = UNITS_PER_FP_WORD;
3188 rs6000_class_max_nregs[m][c]
3189 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3193 /* Calculate which modes to automatically generate code to use a the
3194 reciprocal divide and square root instructions. In the future, possibly
3195 automatically generate the instructions even if the user did not specify
3196 -mrecip. The older machines double precision reciprocal sqrt estimate is
3197 not accurate enough. */
3198 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3199 if (TARGET_FRES)
3200 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3201 if (TARGET_FRE)
3202 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3203 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3204 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3205 if (VECTOR_UNIT_VSX_P (V2DFmode))
3206 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3208 if (TARGET_FRSQRTES)
3209 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3210 if (TARGET_FRSQRTE)
3211 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3212 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3213 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3214 if (VECTOR_UNIT_VSX_P (V2DFmode))
3215 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3217 if (rs6000_recip_control)
3219 if (!flag_finite_math_only)
3220 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3221 "-ffast-math");
3222 if (flag_trapping_math)
3223 warning (0, "%qs requires %qs or %qs", "-mrecip",
3224 "-fno-trapping-math", "-ffast-math");
3225 if (!flag_reciprocal_math)
3226 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3227 "-ffast-math");
3228 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3230 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3231 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3232 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3234 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3235 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3236 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3238 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3239 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3240 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3242 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3243 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3244 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3247 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3248 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3251 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3252 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3254 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3255 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3256 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3258 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3259 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3260 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3264 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3265 legitimate address support to figure out the appropriate addressing to
3266 use. */
3267 rs6000_setup_reg_addr_masks ();
3269 if (global_init_p || TARGET_DEBUG_TARGET)
3271 if (TARGET_DEBUG_REG)
3272 rs6000_debug_reg_global ();
3274 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3275 fprintf (stderr,
3276 "SImode variable mult cost = %d\n"
3277 "SImode constant mult cost = %d\n"
3278 "SImode short constant mult cost = %d\n"
3279 "DImode multipliciation cost = %d\n"
3280 "SImode division cost = %d\n"
3281 "DImode division cost = %d\n"
3282 "Simple fp operation cost = %d\n"
3283 "DFmode multiplication cost = %d\n"
3284 "SFmode division cost = %d\n"
3285 "DFmode division cost = %d\n"
3286 "cache line size = %d\n"
3287 "l1 cache size = %d\n"
3288 "l2 cache size = %d\n"
3289 "simultaneous prefetches = %d\n"
3290 "\n",
3291 rs6000_cost->mulsi,
3292 rs6000_cost->mulsi_const,
3293 rs6000_cost->mulsi_const9,
3294 rs6000_cost->muldi,
3295 rs6000_cost->divsi,
3296 rs6000_cost->divdi,
3297 rs6000_cost->fp,
3298 rs6000_cost->dmul,
3299 rs6000_cost->sdiv,
3300 rs6000_cost->ddiv,
3301 rs6000_cost->cache_line_size,
3302 rs6000_cost->l1_cache_size,
3303 rs6000_cost->l2_cache_size,
3304 rs6000_cost->simultaneous_prefetches);
3308 #if TARGET_MACHO
3309 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3311 static void
3312 darwin_rs6000_override_options (void)
3314 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3315 off. */
3316 rs6000_altivec_abi = 1;
3317 TARGET_ALTIVEC_VRSAVE = 1;
3318 rs6000_current_abi = ABI_DARWIN;
3320 if (DEFAULT_ABI == ABI_DARWIN
3321 && TARGET_64BIT)
3322 darwin_one_byte_bool = 1;
3324 if (TARGET_64BIT && ! TARGET_POWERPC64)
3326 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3327 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3330 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3331 optimisation, and will not work with the most generic case (where the
3332 symbol is undefined external, but there is no symbl stub). */
3333 if (TARGET_64BIT)
3334 rs6000_default_long_calls = 0;
3336 /* ld_classic is (so far) still used for kernel (static) code, and supports
3337 the JBSR longcall / branch islands. */
3338 if (flag_mkernel)
3340 rs6000_default_long_calls = 1;
3342 /* Allow a kext author to do -mkernel -mhard-float. */
3343 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3344 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3347 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3348 Altivec. */
3349 if (!flag_mkernel && !flag_apple_kext
3350 && TARGET_64BIT
3351 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3352 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3354 /* Unless the user (not the configurer) has explicitly overridden
3355 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3356 G4 unless targeting the kernel. */
3357 if (!flag_mkernel
3358 && !flag_apple_kext
3359 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3360 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3361 && ! global_options_set.x_rs6000_cpu_index)
3363 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3366 #endif
3368 /* If not otherwise specified by a target, make 'long double' equivalent to
3369 'double'. */
3371 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3372 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3373 #endif
3375 /* Return the builtin mask of the various options used that could affect which
3376 builtins were used. In the past we used target_flags, but we've run out of
3377 bits, and some options are no longer in target_flags. */
3379 HOST_WIDE_INT
3380 rs6000_builtin_mask_calculate (void)
3382 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3383 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3384 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3385 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3386 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3387 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3388 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3389 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3390 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3391 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3392 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3393 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3394 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3395 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3396 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3397 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3398 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3399 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3400 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3401 | ((TARGET_LONG_DOUBLE_128
3402 && TARGET_HARD_FLOAT
3403 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3404 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3405 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
3406 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0)
3407 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0));
3410 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3411 to clobber the XER[CA] bit because clobbering that bit without telling
3412 the compiler worked just fine with versions of GCC before GCC 5, and
3413 breaking a lot of older code in ways that are hard to track down is
3414 not such a great idea. */
3416 static rtx_insn *
3417 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3418 vec<const char *> &/*constraints*/,
3419 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3421 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3422 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3423 return NULL;
3426 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3427 but is called when the optimize level is changed via an attribute or
3428 pragma or when it is reset at the end of the code affected by the
3429 attribute or pragma. It is not called at the beginning of compilation
3430 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3431 actions then, you should have TARGET_OPTION_OVERRIDE call
3432 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3434 static void
3435 rs6000_override_options_after_change (void)
3437 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3438 turns -frename-registers on. */
3439 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
3440 || (global_options_set.x_flag_unroll_all_loops
3441 && flag_unroll_all_loops))
3443 if (!global_options_set.x_unroll_only_small_loops)
3444 unroll_only_small_loops = 0;
3445 if (!global_options_set.x_flag_rename_registers)
3446 flag_rename_registers = 1;
3447 if (!global_options_set.x_flag_cunroll_grow_size)
3448 flag_cunroll_grow_size = 1;
3450 else if (!global_options_set.x_flag_cunroll_grow_size)
3451 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3454 /* Override command line options.
3456 Combine build-specific configuration information with options
3457 specified on the command line to set various state variables which
3458 influence code generation, optimization, and expansion of built-in
3459 functions. Assure that command-line configuration preferences are
3460 compatible with each other and with the build configuration; issue
3461 warnings while adjusting configuration or error messages while
3462 rejecting configuration.
3464 Upon entry to this function:
3466 This function is called once at the beginning of
3467 compilation, and then again at the start and end of compiling
3468 each section of code that has a different configuration, as
3469 indicated, for example, by adding the
3471 __attribute__((__target__("cpu=power9")))
3473 qualifier to a function definition or, for example, by bracketing
3474 code between
3476 #pragma GCC target("altivec")
3480 #pragma GCC reset_options
3482 directives. Parameter global_init_p is true for the initial
3483 invocation, which initializes global variables, and false for all
3484 subsequent invocations.
3487 Various global state information is assumed to be valid. This
3488 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3489 default CPU specified at build configure time, TARGET_DEFAULT,
3490 representing the default set of option flags for the default
3491 target, and global_options_set.x_rs6000_isa_flags, representing
3492 which options were requested on the command line.
3494 Upon return from this function:
3496 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3497 was set by name on the command line. Additionally, if certain
3498 attributes are automatically enabled or disabled by this function
3499 in order to assure compatibility between options and
3500 configuration, the flags associated with those attributes are
3501 also set. By setting these "explicit bits", we avoid the risk
3502 that other code might accidentally overwrite these particular
3503 attributes with "default values".
3505 The various bits of rs6000_isa_flags are set to indicate the
3506 target options that have been selected for the most current
3507 compilation efforts. This has the effect of also turning on the
3508 associated TARGET_XXX values since these are macros which are
3509 generally defined to test the corresponding bit of the
3510 rs6000_isa_flags variable.
3512 The variable rs6000_builtin_mask is set to represent the target
3513 options for the most current compilation efforts, consistent with
3514 the current contents of rs6000_isa_flags. This variable controls
3515 expansion of built-in functions.
3517 Various other global variables and fields of global structures
3518 (over 50 in all) are initialized to reflect the desired options
3519 for the most current compilation efforts. */
3521 static bool
3522 rs6000_option_override_internal (bool global_init_p)
3524 bool ret = true;
3526 HOST_WIDE_INT set_masks;
3527 HOST_WIDE_INT ignore_masks;
3528 int cpu_index = -1;
3529 int tune_index;
3530 struct cl_target_option *main_target_opt
3531 = ((global_init_p || target_option_default_node == NULL)
3532 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3534 /* Print defaults. */
3535 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3536 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3538 /* Remember the explicit arguments. */
3539 if (global_init_p)
3540 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3542 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3543 library functions, so warn about it. The flag may be useful for
3544 performance studies from time to time though, so don't disable it
3545 entirely. */
3546 if (global_options_set.x_rs6000_alignment_flags
3547 && rs6000_alignment_flags == MASK_ALIGN_POWER
3548 && DEFAULT_ABI == ABI_DARWIN
3549 && TARGET_64BIT)
3550 warning (0, "%qs is not supported for 64-bit Darwin;"
3551 " it is incompatible with the installed C and C++ libraries",
3552 "-malign-power");
3554 /* Numerous experiment shows that IRA based loop pressure
3555 calculation works better for RTL loop invariant motion on targets
3556 with enough (>= 32) registers. It is an expensive optimization.
3557 So it is on only for peak performance. */
3558 if (optimize >= 3 && global_init_p
3559 && !global_options_set.x_flag_ira_loop_pressure)
3560 flag_ira_loop_pressure = 1;
3562 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3563 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3564 options were already specified. */
3565 if (flag_sanitize & SANITIZE_USER_ADDRESS
3566 && !global_options_set.x_flag_asynchronous_unwind_tables)
3567 flag_asynchronous_unwind_tables = 1;
3569 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3570 loop unroller is active. It is only checked during unrolling, so
3571 we can just set it on by default. */
3572 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3573 flag_variable_expansion_in_unroller = 1;
3575 /* Set the pointer size. */
3576 if (TARGET_64BIT)
3578 rs6000_pmode = DImode;
3579 rs6000_pointer_size = 64;
3581 else
3583 rs6000_pmode = SImode;
3584 rs6000_pointer_size = 32;
3587 /* Some OSs don't support saving the high part of 64-bit registers on context
3588 switch. Other OSs don't support saving Altivec registers. On those OSs,
3589 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3590 if the user wants either, the user must explicitly specify them and we
3591 won't interfere with the user's specification. */
3593 set_masks = POWERPC_MASKS;
3594 #ifdef OS_MISSING_POWERPC64
3595 if (OS_MISSING_POWERPC64)
3596 set_masks &= ~OPTION_MASK_POWERPC64;
3597 #endif
3598 #ifdef OS_MISSING_ALTIVEC
3599 if (OS_MISSING_ALTIVEC)
3600 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3601 | OTHER_VSX_VECTOR_MASKS);
3602 #endif
3604 /* Don't override by the processor default if given explicitly. */
3605 set_masks &= ~rs6000_isa_flags_explicit;
3607 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3608 the cpu in a target attribute or pragma, but did not specify a tuning
3609 option, use the cpu for the tuning option rather than the option specified
3610 with -mtune on the command line. Process a '--with-cpu' configuration
3611 request as an implicit --cpu. */
3612 if (rs6000_cpu_index >= 0)
3613 cpu_index = rs6000_cpu_index;
3614 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3615 cpu_index = main_target_opt->x_rs6000_cpu_index;
3616 else if (OPTION_TARGET_CPU_DEFAULT)
3617 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3619 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3620 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3621 with those from the cpu, except for options that were explicitly set. If
3622 we don't have a cpu, do not override the target bits set in
3623 TARGET_DEFAULT. */
3624 if (cpu_index >= 0)
3626 rs6000_cpu_index = cpu_index;
3627 rs6000_isa_flags &= ~set_masks;
3628 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3629 & set_masks);
3631 else
3633 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3634 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3635 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3636 to using rs6000_isa_flags, we need to do the initialization here.
3638 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3639 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3640 HOST_WIDE_INT flags;
3641 if (TARGET_DEFAULT)
3642 flags = TARGET_DEFAULT;
3643 else
3645 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3646 const char *default_cpu = (!TARGET_POWERPC64
3647 ? "powerpc"
3648 : (BYTES_BIG_ENDIAN
3649 ? "powerpc64"
3650 : "powerpc64le"));
3651 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3652 flags = processor_target_table[default_cpu_index].target_enable;
3654 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3657 if (rs6000_tune_index >= 0)
3658 tune_index = rs6000_tune_index;
3659 else if (cpu_index >= 0)
3660 rs6000_tune_index = tune_index = cpu_index;
3661 else
3663 size_t i;
3664 enum processor_type tune_proc
3665 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3667 tune_index = -1;
3668 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3669 if (processor_target_table[i].processor == tune_proc)
3671 tune_index = i;
3672 break;
3676 if (cpu_index >= 0)
3677 rs6000_cpu = processor_target_table[cpu_index].processor;
3678 else
3679 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3681 gcc_assert (tune_index >= 0);
3682 rs6000_tune = processor_target_table[tune_index].processor;
3684 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3685 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3686 || rs6000_cpu == PROCESSOR_PPCE5500)
3688 if (TARGET_ALTIVEC)
3689 error ("AltiVec not supported in this target");
3692 /* If we are optimizing big endian systems for space, use the load/store
3693 multiple instructions. */
3694 if (BYTES_BIG_ENDIAN && optimize_size)
3695 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3697 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3698 because the hardware doesn't support the instructions used in little
3699 endian mode, and causes an alignment trap. The 750 does not cause an
3700 alignment trap (except when the target is unaligned). */
3702 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3704 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3705 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3706 warning (0, "%qs is not supported on little endian systems",
3707 "-mmultiple");
3710 /* If little-endian, default to -mstrict-align on older processors.
3711 Testing for htm matches power8 and later. */
3712 if (!BYTES_BIG_ENDIAN
3713 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3714 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3716 if (!rs6000_fold_gimple)
3717 fprintf (stderr,
3718 "gimple folding of rs6000 builtins has been disabled.\n");
3720 /* Add some warnings for VSX. */
3721 if (TARGET_VSX)
3723 const char *msg = NULL;
3724 if (!TARGET_HARD_FLOAT)
3726 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3727 msg = N_("%<-mvsx%> requires hardware floating point");
3728 else
3730 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3731 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3734 else if (TARGET_AVOID_XFORM > 0)
3735 msg = N_("%<-mvsx%> needs indexed addressing");
3736 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3737 & OPTION_MASK_ALTIVEC))
3739 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3740 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3741 else
3742 msg = N_("%<-mno-altivec%> disables vsx");
3745 if (msg)
3747 warning (0, msg);
3748 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3749 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3753 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3754 the -mcpu setting to enable options that conflict. */
3755 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3756 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3757 | OPTION_MASK_ALTIVEC
3758 | OPTION_MASK_VSX)) != 0)
3759 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3760 | OPTION_MASK_DIRECT_MOVE)
3761 & ~rs6000_isa_flags_explicit);
3763 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3764 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3766 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3767 off all of the options that depend on those flags. */
3768 ignore_masks = rs6000_disable_incompatible_switches ();
3770 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3771 unless the user explicitly used the -mno-<option> to disable the code. */
3772 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3773 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3774 else if (TARGET_P9_MINMAX)
3776 if (cpu_index >= 0)
3778 if (cpu_index == PROCESSOR_POWER9)
3780 /* legacy behavior: allow -mcpu=power9 with certain
3781 capabilities explicitly disabled. */
3782 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3784 else
3785 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3786 "for <xxx> less than power9", "-mcpu");
3788 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3789 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3790 & rs6000_isa_flags_explicit))
3791 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3792 were explicitly cleared. */
3793 error ("%qs incompatible with explicitly disabled options",
3794 "-mpower9-minmax");
3795 else
3796 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3798 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3799 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3800 else if (TARGET_VSX)
3801 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3802 else if (TARGET_POPCNTD)
3803 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3804 else if (TARGET_DFP)
3805 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3806 else if (TARGET_CMPB)
3807 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3808 else if (TARGET_FPRND)
3809 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3810 else if (TARGET_POPCNTB)
3811 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3812 else if (TARGET_ALTIVEC)
3813 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3815 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3817 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3818 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3819 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3822 if (!TARGET_FPRND && TARGET_VSX)
3824 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3825 /* TARGET_VSX = 1 implies Power 7 and newer */
3826 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3827 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3830 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3832 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3833 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3834 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3837 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3839 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3840 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3841 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3844 if (TARGET_P8_VECTOR && !TARGET_VSX)
3846 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3847 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3848 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3849 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3851 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3852 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3853 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3855 else
3857 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3858 not explicit. */
3859 rs6000_isa_flags |= OPTION_MASK_VSX;
3860 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3864 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3866 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3867 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3868 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3871 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3872 silently turn off quad memory mode. */
3873 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3875 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3876 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3878 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3879 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3881 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3882 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3885 /* Non-atomic quad memory load/store are disabled for little endian, since
3886 the words are reversed, but atomic operations can still be done by
3887 swapping the words. */
3888 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3890 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3891 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3892 "mode"));
3894 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3897 /* Assume if the user asked for normal quad memory instructions, they want
3898 the atomic versions as well, unless they explicity told us not to use quad
3899 word atomic instructions. */
3900 if (TARGET_QUAD_MEMORY
3901 && !TARGET_QUAD_MEMORY_ATOMIC
3902 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3903 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3905 /* If we can shrink-wrap the TOC register save separately, then use
3906 -msave-toc-indirect unless explicitly disabled. */
3907 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3908 && flag_shrink_wrap_separate
3909 && optimize_function_for_speed_p (cfun))
3910 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3912 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3913 generating power8 instructions. Power9 does not optimize power8 fusion
3914 cases. */
3915 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3917 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3918 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3919 else
3920 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3923 /* Setting additional fusion flags turns on base fusion. */
3924 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3926 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3928 if (TARGET_P8_FUSION_SIGN)
3929 error ("%qs requires %qs", "-mpower8-fusion-sign",
3930 "-mpower8-fusion");
3932 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3934 else
3935 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3938 /* Power8 does not fuse sign extended loads with the addis. If we are
3939 optimizing at high levels for speed, convert a sign extended load into a
3940 zero extending load, and an explicit sign extension. */
3941 if (TARGET_P8_FUSION
3942 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3943 && optimize_function_for_speed_p (cfun)
3944 && optimize >= 3)
3945 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3947 /* ISA 3.0 vector instructions include ISA 2.07. */
3948 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3950 /* We prefer to not mention undocumented options in
3951 error messages. However, if users have managed to select
3952 power9-vector without selecting power8-vector, they
3953 already know about undocumented flags. */
3954 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3955 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3956 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3957 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3959 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3960 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3961 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3963 else
3965 /* OPTION_MASK_P9_VECTOR is explicit and
3966 OPTION_MASK_P8_VECTOR is not explicit. */
3967 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3968 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3972 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3973 support. If we only have ISA 2.06 support, and the user did not specify
3974 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3975 but we don't enable the full vectorization support */
3976 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3977 TARGET_ALLOW_MOVMISALIGN = 1;
3979 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3981 if (TARGET_ALLOW_MOVMISALIGN > 0
3982 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3983 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3985 TARGET_ALLOW_MOVMISALIGN = 0;
3988 /* Determine when unaligned vector accesses are permitted, and when
3989 they are preferred over masked Altivec loads. Note that if
3990 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3991 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3992 not true. */
3993 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3995 if (!TARGET_VSX)
3997 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3998 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4000 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4003 else if (!TARGET_ALLOW_MOVMISALIGN)
4005 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4006 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4007 "-mallow-movmisalign");
4009 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4013 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4015 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4016 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4017 else
4018 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4021 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_VECTOR_PAIR))
4023 /* When the POImode issues of PR96791 are resolved, then we can
4024 once again enable use of vector pair for memcpy/memmove on
4025 P10 if we have TARGET_MMA. For now we make it disabled by
4026 default for all targets. */
4027 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_VECTOR_PAIR;
4030 /* Use long double size to select the appropriate long double. We use
4031 TYPE_PRECISION to differentiate the 3 different long double types. We map
4032 128 into the precision used for TFmode. */
4033 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4034 ? 64
4035 : FLOAT_PRECISION_TFmode);
4037 /* Set long double size before the IEEE 128-bit tests. */
4038 if (!global_options_set.x_rs6000_long_double_type_size)
4040 if (main_target_opt != NULL
4041 && (main_target_opt->x_rs6000_long_double_type_size
4042 != default_long_double_size))
4043 error ("target attribute or pragma changes %<long double%> size");
4044 else
4045 rs6000_long_double_type_size = default_long_double_size;
4047 else if (rs6000_long_double_type_size == 128)
4048 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4049 else if (global_options_set.x_rs6000_ieeequad)
4051 if (global_options.x_rs6000_ieeequad)
4052 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
4053 else
4054 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
4057 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4058 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4059 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4060 those systems will not pick up this default. Warn if the user changes the
4061 default unless -Wno-psabi. */
4062 if (!global_options_set.x_rs6000_ieeequad)
4063 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4065 else
4067 if (global_options.x_rs6000_ieeequad
4068 && (!TARGET_POPCNTD || !TARGET_VSX))
4069 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4071 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4073 static bool warned_change_long_double;
4074 if (!warned_change_long_double)
4076 warned_change_long_double = true;
4077 if (TARGET_IEEEQUAD)
4078 warning (OPT_Wpsabi, "Using IEEE extended precision "
4079 "%<long double%>");
4080 else
4081 warning (OPT_Wpsabi, "Using IBM extended precision "
4082 "%<long double%>");
4087 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4088 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4089 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4090 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4091 the keyword as well as the type. */
4092 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4094 /* IEEE 128-bit floating point requires VSX support. */
4095 if (TARGET_FLOAT128_KEYWORD)
4097 if (!TARGET_VSX)
4099 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4100 error ("%qs requires VSX support", "-mfloat128");
4102 TARGET_FLOAT128_TYPE = 0;
4103 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4104 | OPTION_MASK_FLOAT128_HW);
4106 else if (!TARGET_FLOAT128_TYPE)
4108 TARGET_FLOAT128_TYPE = 1;
4109 warning (0, "The %<-mfloat128%> option may not be fully supported");
4113 /* Enable the __float128 keyword under Linux by default. */
4114 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4115 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4116 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4118 /* If we have are supporting the float128 type and full ISA 3.0 support,
4119 enable -mfloat128-hardware by default. However, don't enable the
4120 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4121 because sometimes the compiler wants to put things in an integer
4122 container, and if we don't have __int128 support, it is impossible. */
4123 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4124 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4125 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4126 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4128 if (TARGET_FLOAT128_HW
4129 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4131 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4132 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4134 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4137 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4139 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4140 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4142 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4145 /* Enable -mprefixed by default on power10 systems. */
4146 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4147 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4149 /* -mprefixed requires -mcpu=power10 (or later). */
4150 else if (TARGET_PREFIXED && !TARGET_POWER10)
4152 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4153 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4155 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4158 /* -mpcrel requires prefixed load/store addressing. */
4159 if (TARGET_PCREL && !TARGET_PREFIXED)
4161 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4162 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4164 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4167 /* Print the options after updating the defaults. */
4168 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4169 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4171 /* E500mc does "better" if we inline more aggressively. Respect the
4172 user's opinion, though. */
4173 if (rs6000_block_move_inline_limit == 0
4174 && (rs6000_tune == PROCESSOR_PPCE500MC
4175 || rs6000_tune == PROCESSOR_PPCE500MC64
4176 || rs6000_tune == PROCESSOR_PPCE5500
4177 || rs6000_tune == PROCESSOR_PPCE6500))
4178 rs6000_block_move_inline_limit = 128;
4180 /* store_one_arg depends on expand_block_move to handle at least the
4181 size of reg_parm_stack_space. */
4182 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4183 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4185 if (global_init_p)
4187 /* If the appropriate debug option is enabled, replace the target hooks
4188 with debug versions that call the real version and then prints
4189 debugging information. */
4190 if (TARGET_DEBUG_COST)
4192 targetm.rtx_costs = rs6000_debug_rtx_costs;
4193 targetm.address_cost = rs6000_debug_address_cost;
4194 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4197 if (TARGET_DEBUG_ADDR)
4199 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4200 targetm.legitimize_address = rs6000_debug_legitimize_address;
4201 rs6000_secondary_reload_class_ptr
4202 = rs6000_debug_secondary_reload_class;
4203 targetm.secondary_memory_needed
4204 = rs6000_debug_secondary_memory_needed;
4205 targetm.can_change_mode_class
4206 = rs6000_debug_can_change_mode_class;
4207 rs6000_preferred_reload_class_ptr
4208 = rs6000_debug_preferred_reload_class;
4209 rs6000_mode_dependent_address_ptr
4210 = rs6000_debug_mode_dependent_address;
4213 if (rs6000_veclibabi_name)
4215 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4216 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4217 else
4219 error ("unknown vectorization library ABI type (%qs) for "
4220 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4221 ret = false;
4226 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4227 target attribute or pragma which automatically enables both options,
4228 unless the altivec ABI was set. This is set by default for 64-bit, but
4229 not for 32-bit. */
4230 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4232 TARGET_FLOAT128_TYPE = 0;
4233 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4234 | OPTION_MASK_FLOAT128_KEYWORD)
4235 & ~rs6000_isa_flags_explicit);
4238 /* Enable Altivec ABI for AIX -maltivec. */
4239 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4241 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4242 error ("target attribute or pragma changes AltiVec ABI");
4243 else
4244 rs6000_altivec_abi = 1;
4247 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4248 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4249 be explicitly overridden in either case. */
4250 if (TARGET_ELF)
4252 if (!global_options_set.x_rs6000_altivec_abi
4253 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4255 if (main_target_opt != NULL &&
4256 !main_target_opt->x_rs6000_altivec_abi)
4257 error ("target attribute or pragma changes AltiVec ABI");
4258 else
4259 rs6000_altivec_abi = 1;
4263 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4264 So far, the only darwin64 targets are also MACH-O. */
4265 if (TARGET_MACHO
4266 && DEFAULT_ABI == ABI_DARWIN
4267 && TARGET_64BIT)
4269 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4270 error ("target attribute or pragma changes darwin64 ABI");
4271 else
4273 rs6000_darwin64_abi = 1;
4274 /* Default to natural alignment, for better performance. */
4275 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4279 /* Place FP constants in the constant pool instead of TOC
4280 if section anchors enabled. */
4281 if (flag_section_anchors
4282 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4283 TARGET_NO_FP_IN_TOC = 1;
4285 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4286 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4288 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4289 SUBTARGET_OVERRIDE_OPTIONS;
4290 #endif
4291 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4292 SUBSUBTARGET_OVERRIDE_OPTIONS;
4293 #endif
4294 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4295 SUB3TARGET_OVERRIDE_OPTIONS;
4296 #endif
4298 /* If the ABI has support for PC-relative relocations, enable it by default.
4299 This test depends on the sub-target tests above setting the code model to
4300 medium for ELF v2 systems. */
4301 if (PCREL_SUPPORTED_BY_OS
4302 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4303 rs6000_isa_flags |= OPTION_MASK_PCREL;
4305 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4306 after the subtarget override options are done. */
4307 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4309 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4310 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4312 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4315 /* Enable -mmma by default on power10 systems. */
4316 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4317 rs6000_isa_flags |= OPTION_MASK_MMA;
4319 /* Turn off vector pair/mma options on non-power10 systems. */
4320 else if (!TARGET_POWER10 && TARGET_MMA)
4322 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4323 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4325 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4328 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4329 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4331 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4332 && rs6000_tune != PROCESSOR_POWER5
4333 && rs6000_tune != PROCESSOR_POWER6
4334 && rs6000_tune != PROCESSOR_POWER7
4335 && rs6000_tune != PROCESSOR_POWER8
4336 && rs6000_tune != PROCESSOR_POWER9
4337 && rs6000_tune != PROCESSOR_POWER10
4338 && rs6000_tune != PROCESSOR_PPCA2
4339 && rs6000_tune != PROCESSOR_CELL
4340 && rs6000_tune != PROCESSOR_PPC476);
4341 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4342 || rs6000_tune == PROCESSOR_POWER5
4343 || rs6000_tune == PROCESSOR_POWER7
4344 || rs6000_tune == PROCESSOR_POWER8);
4345 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4346 || rs6000_tune == PROCESSOR_POWER5
4347 || rs6000_tune == PROCESSOR_POWER6
4348 || rs6000_tune == PROCESSOR_POWER7
4349 || rs6000_tune == PROCESSOR_POWER8
4350 || rs6000_tune == PROCESSOR_POWER9
4351 || rs6000_tune == PROCESSOR_POWER10
4352 || rs6000_tune == PROCESSOR_PPCE500MC
4353 || rs6000_tune == PROCESSOR_PPCE500MC64
4354 || rs6000_tune == PROCESSOR_PPCE5500
4355 || rs6000_tune == PROCESSOR_PPCE6500);
4357 /* Allow debug switches to override the above settings. These are set to -1
4358 in rs6000.opt to indicate the user hasn't directly set the switch. */
4359 if (TARGET_ALWAYS_HINT >= 0)
4360 rs6000_always_hint = TARGET_ALWAYS_HINT;
4362 if (TARGET_SCHED_GROUPS >= 0)
4363 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4365 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4366 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4368 rs6000_sched_restricted_insns_priority
4369 = (rs6000_sched_groups ? 1 : 0);
4371 /* Handle -msched-costly-dep option. */
4372 rs6000_sched_costly_dep
4373 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4375 if (rs6000_sched_costly_dep_str)
4377 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4378 rs6000_sched_costly_dep = no_dep_costly;
4379 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4380 rs6000_sched_costly_dep = all_deps_costly;
4381 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4382 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4383 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4384 rs6000_sched_costly_dep = store_to_load_dep_costly;
4385 else
4386 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4387 atoi (rs6000_sched_costly_dep_str));
4390 /* Handle -minsert-sched-nops option. */
4391 rs6000_sched_insert_nops
4392 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4394 if (rs6000_sched_insert_nops_str)
4396 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4397 rs6000_sched_insert_nops = sched_finish_none;
4398 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4399 rs6000_sched_insert_nops = sched_finish_pad_groups;
4400 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4401 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4402 else
4403 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4404 atoi (rs6000_sched_insert_nops_str));
4407 /* Handle stack protector */
4408 if (!global_options_set.x_rs6000_stack_protector_guard)
4409 #ifdef TARGET_THREAD_SSP_OFFSET
4410 rs6000_stack_protector_guard = SSP_TLS;
4411 #else
4412 rs6000_stack_protector_guard = SSP_GLOBAL;
4413 #endif
4415 #ifdef TARGET_THREAD_SSP_OFFSET
4416 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4417 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4418 #endif
4420 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4422 char *endp;
4423 const char *str = rs6000_stack_protector_guard_offset_str;
4425 errno = 0;
4426 long offset = strtol (str, &endp, 0);
4427 if (!*str || *endp || errno)
4428 error ("%qs is not a valid number in %qs", str,
4429 "-mstack-protector-guard-offset=");
4431 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4432 || (TARGET_64BIT && (offset & 3)))
4433 error ("%qs is not a valid offset in %qs", str,
4434 "-mstack-protector-guard-offset=");
4436 rs6000_stack_protector_guard_offset = offset;
4439 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4441 const char *str = rs6000_stack_protector_guard_reg_str;
4442 int reg = decode_reg_name (str);
4444 if (!IN_RANGE (reg, 1, 31))
4445 error ("%qs is not a valid base register in %qs", str,
4446 "-mstack-protector-guard-reg=");
4448 rs6000_stack_protector_guard_reg = reg;
4451 if (rs6000_stack_protector_guard == SSP_TLS
4452 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4453 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4455 if (global_init_p)
4457 #ifdef TARGET_REGNAMES
4458 /* If the user desires alternate register names, copy in the
4459 alternate names now. */
4460 if (TARGET_REGNAMES)
4461 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4462 #endif
4464 /* Set aix_struct_return last, after the ABI is determined.
4465 If -maix-struct-return or -msvr4-struct-return was explicitly
4466 used, don't override with the ABI default. */
4467 if (!global_options_set.x_aix_struct_return)
4468 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4470 #if 0
4471 /* IBM XL compiler defaults to unsigned bitfields. */
4472 if (TARGET_XL_COMPAT)
4473 flag_signed_bitfields = 0;
4474 #endif
4476 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4477 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4479 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4481 /* We can only guarantee the availability of DI pseudo-ops when
4482 assembling for 64-bit targets. */
4483 if (!TARGET_64BIT)
4485 targetm.asm_out.aligned_op.di = NULL;
4486 targetm.asm_out.unaligned_op.di = NULL;
4490 /* Set branch target alignment, if not optimizing for size. */
4491 if (!optimize_size)
4493 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4494 aligned 8byte to avoid misprediction by the branch predictor. */
4495 if (rs6000_tune == PROCESSOR_TITAN
4496 || rs6000_tune == PROCESSOR_CELL)
4498 if (flag_align_functions && !str_align_functions)
4499 str_align_functions = "8";
4500 if (flag_align_jumps && !str_align_jumps)
4501 str_align_jumps = "8";
4502 if (flag_align_loops && !str_align_loops)
4503 str_align_loops = "8";
4505 if (rs6000_align_branch_targets)
4507 if (flag_align_functions && !str_align_functions)
4508 str_align_functions = "16";
4509 if (flag_align_jumps && !str_align_jumps)
4510 str_align_jumps = "16";
4511 if (flag_align_loops && !str_align_loops)
4513 can_override_loop_align = 1;
4514 str_align_loops = "16";
4519 /* Arrange to save and restore machine status around nested functions. */
4520 init_machine_status = rs6000_init_machine_status;
4522 /* We should always be splitting complex arguments, but we can't break
4523 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4524 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4525 targetm.calls.split_complex_arg = NULL;
4527 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4528 if (DEFAULT_ABI == ABI_AIX)
4529 targetm.calls.custom_function_descriptors = 0;
4532 /* Initialize rs6000_cost with the appropriate target costs. */
4533 if (optimize_size)
4534 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4535 else
4536 switch (rs6000_tune)
4538 case PROCESSOR_RS64A:
4539 rs6000_cost = &rs64a_cost;
4540 break;
4542 case PROCESSOR_MPCCORE:
4543 rs6000_cost = &mpccore_cost;
4544 break;
4546 case PROCESSOR_PPC403:
4547 rs6000_cost = &ppc403_cost;
4548 break;
4550 case PROCESSOR_PPC405:
4551 rs6000_cost = &ppc405_cost;
4552 break;
4554 case PROCESSOR_PPC440:
4555 rs6000_cost = &ppc440_cost;
4556 break;
4558 case PROCESSOR_PPC476:
4559 rs6000_cost = &ppc476_cost;
4560 break;
4562 case PROCESSOR_PPC601:
4563 rs6000_cost = &ppc601_cost;
4564 break;
4566 case PROCESSOR_PPC603:
4567 rs6000_cost = &ppc603_cost;
4568 break;
4570 case PROCESSOR_PPC604:
4571 rs6000_cost = &ppc604_cost;
4572 break;
4574 case PROCESSOR_PPC604e:
4575 rs6000_cost = &ppc604e_cost;
4576 break;
4578 case PROCESSOR_PPC620:
4579 rs6000_cost = &ppc620_cost;
4580 break;
4582 case PROCESSOR_PPC630:
4583 rs6000_cost = &ppc630_cost;
4584 break;
4586 case PROCESSOR_CELL:
4587 rs6000_cost = &ppccell_cost;
4588 break;
4590 case PROCESSOR_PPC750:
4591 case PROCESSOR_PPC7400:
4592 rs6000_cost = &ppc750_cost;
4593 break;
4595 case PROCESSOR_PPC7450:
4596 rs6000_cost = &ppc7450_cost;
4597 break;
4599 case PROCESSOR_PPC8540:
4600 case PROCESSOR_PPC8548:
4601 rs6000_cost = &ppc8540_cost;
4602 break;
4604 case PROCESSOR_PPCE300C2:
4605 case PROCESSOR_PPCE300C3:
4606 rs6000_cost = &ppce300c2c3_cost;
4607 break;
4609 case PROCESSOR_PPCE500MC:
4610 rs6000_cost = &ppce500mc_cost;
4611 break;
4613 case PROCESSOR_PPCE500MC64:
4614 rs6000_cost = &ppce500mc64_cost;
4615 break;
4617 case PROCESSOR_PPCE5500:
4618 rs6000_cost = &ppce5500_cost;
4619 break;
4621 case PROCESSOR_PPCE6500:
4622 rs6000_cost = &ppce6500_cost;
4623 break;
4625 case PROCESSOR_TITAN:
4626 rs6000_cost = &titan_cost;
4627 break;
4629 case PROCESSOR_POWER4:
4630 case PROCESSOR_POWER5:
4631 rs6000_cost = &power4_cost;
4632 break;
4634 case PROCESSOR_POWER6:
4635 rs6000_cost = &power6_cost;
4636 break;
4638 case PROCESSOR_POWER7:
4639 rs6000_cost = &power7_cost;
4640 break;
4642 case PROCESSOR_POWER8:
4643 rs6000_cost = &power8_cost;
4644 break;
4646 case PROCESSOR_POWER9:
4647 case PROCESSOR_POWER10:
4648 rs6000_cost = &power9_cost;
4649 break;
4651 case PROCESSOR_PPCA2:
4652 rs6000_cost = &ppca2_cost;
4653 break;
4655 default:
4656 gcc_unreachable ();
4659 if (global_init_p)
4661 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4662 param_simultaneous_prefetches,
4663 rs6000_cost->simultaneous_prefetches);
4664 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4665 param_l1_cache_size,
4666 rs6000_cost->l1_cache_size);
4667 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4668 param_l1_cache_line_size,
4669 rs6000_cost->cache_line_size);
4670 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4671 param_l2_cache_size,
4672 rs6000_cost->l2_cache_size);
4674 /* Increase loop peeling limits based on performance analysis. */
4675 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4676 param_max_peeled_insns, 400);
4677 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4678 param_max_completely_peeled_insns, 400);
4680 /* Temporarily disable it for now since lxvl/stxvl on the default
4681 supported hardware Power9 has unexpected performance behaviors. */
4682 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4683 param_vect_partial_vector_usage, 0);
4685 /* Use the 'model' -fsched-pressure algorithm by default. */
4686 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4687 param_sched_pressure_algorithm,
4688 SCHED_PRESSURE_MODEL);
4690 /* If using typedef char *va_list, signal that
4691 __builtin_va_start (&ap, 0) can be optimized to
4692 ap = __builtin_next_arg (0). */
4693 if (DEFAULT_ABI != ABI_V4)
4694 targetm.expand_builtin_va_start = NULL;
4697 rs6000_override_options_after_change ();
4699 /* If not explicitly specified via option, decide whether to generate indexed
4700 load/store instructions. A value of -1 indicates that the
4701 initial value of this variable has not been overwritten. During
4702 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4703 if (TARGET_AVOID_XFORM == -1)
4704 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4705 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4706 need indexed accesses and the type used is the scalar type of the element
4707 being loaded or stored. */
4708 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4709 && !TARGET_ALTIVEC);
4711 /* Set the -mrecip options. */
4712 if (rs6000_recip_name)
4714 char *p = ASTRDUP (rs6000_recip_name);
4715 char *q;
4716 unsigned int mask, i;
4717 bool invert;
4719 while ((q = strtok (p, ",")) != NULL)
4721 p = NULL;
4722 if (*q == '!')
4724 invert = true;
4725 q++;
4727 else
4728 invert = false;
4730 if (!strcmp (q, "default"))
4731 mask = ((TARGET_RECIP_PRECISION)
4732 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4733 else
4735 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4736 if (!strcmp (q, recip_options[i].string))
4738 mask = recip_options[i].mask;
4739 break;
4742 if (i == ARRAY_SIZE (recip_options))
4744 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4745 invert = false;
4746 mask = 0;
4747 ret = false;
4751 if (invert)
4752 rs6000_recip_control &= ~mask;
4753 else
4754 rs6000_recip_control |= mask;
4758 /* Set the builtin mask of the various options used that could affect which
4759 builtins were used. In the past we used target_flags, but we've run out
4760 of bits, and some options are no longer in target_flags. */
4761 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4762 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4763 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4764 rs6000_builtin_mask);
4766 /* Initialize all of the registers. */
4767 rs6000_init_hard_regno_mode_ok (global_init_p);
4769 /* Save the initial options in case the user does function specific options */
4770 if (global_init_p)
4771 target_option_default_node = target_option_current_node
4772 = build_target_option_node (&global_options, &global_options_set);
4774 /* If not explicitly specified via option, decide whether to generate the
4775 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4776 if (TARGET_LINK_STACK == -1)
4777 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4779 /* Deprecate use of -mno-speculate-indirect-jumps. */
4780 if (!rs6000_speculate_indirect_jumps)
4781 warning (0, "%qs is deprecated and not recommended in any circumstances",
4782 "-mno-speculate-indirect-jumps");
4784 return ret;
4787 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4788 define the target cpu type. */
4790 static void
4791 rs6000_option_override (void)
4793 (void) rs6000_option_override_internal (true);
4797 /* Implement targetm.vectorize.builtin_mask_for_load. */
4798 static tree
4799 rs6000_builtin_mask_for_load (void)
4801 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4802 if ((TARGET_ALTIVEC && !TARGET_VSX)
4803 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4804 return altivec_builtin_mask_for_load;
4805 else
4806 return 0;
4809 /* Implement LOOP_ALIGN. */
4810 align_flags
4811 rs6000_loop_align (rtx label)
4813 basic_block bb;
4814 int ninsns;
4816 /* Don't override loop alignment if -falign-loops was specified. */
4817 if (!can_override_loop_align)
4818 return align_loops;
4820 bb = BLOCK_FOR_INSN (label);
4821 ninsns = num_loop_insns(bb->loop_father);
4823 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4824 if (ninsns > 4 && ninsns <= 8
4825 && (rs6000_tune == PROCESSOR_POWER4
4826 || rs6000_tune == PROCESSOR_POWER5
4827 || rs6000_tune == PROCESSOR_POWER6
4828 || rs6000_tune == PROCESSOR_POWER7
4829 || rs6000_tune == PROCESSOR_POWER8))
4830 return align_flags (5);
4831 else
4832 return align_loops;
4835 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4836 after applying N number of iterations. This routine does not determine
4837 how may iterations are required to reach desired alignment. */
4839 static bool
4840 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4842 if (is_packed)
4843 return false;
4845 if (TARGET_32BIT)
4847 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4848 return true;
4850 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4851 return true;
4853 return false;
4855 else
4857 if (TARGET_MACHO)
4858 return false;
4860 /* Assuming that all other types are naturally aligned. CHECKME! */
4861 return true;
4865 /* Return true if the vector misalignment factor is supported by the
4866 target. */
4867 static bool
4868 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4869 const_tree type,
4870 int misalignment,
4871 bool is_packed)
4873 if (TARGET_VSX)
4875 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4876 return true;
4878 /* Return if movmisalign pattern is not supported for this mode. */
4879 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4880 return false;
4882 if (misalignment == -1)
4884 /* Misalignment factor is unknown at compile time but we know
4885 it's word aligned. */
4886 if (rs6000_vector_alignment_reachable (type, is_packed))
4888 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4890 if (element_size == 64 || element_size == 32)
4891 return true;
4894 return false;
4897 /* VSX supports word-aligned vector. */
4898 if (misalignment % 4 == 0)
4899 return true;
4901 return false;
4904 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4905 static int
4906 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4907 tree vectype, int misalign)
4909 unsigned elements;
4910 tree elem_type;
4912 switch (type_of_cost)
4914 case scalar_stmt:
4915 case scalar_store:
4916 case vector_stmt:
4917 case vector_store:
4918 case vec_to_scalar:
4919 case scalar_to_vec:
4920 case cond_branch_not_taken:
4921 return 1;
4922 case scalar_load:
4923 case vector_load:
4924 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4925 return 2;
4927 case vec_perm:
4928 /* Power7 has only one permute unit, make it a bit expensive. */
4929 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4930 return 3;
4931 else
4932 return 1;
4934 case vec_promote_demote:
4935 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4936 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4937 return 4;
4938 else
4939 return 1;
4941 case cond_branch_taken:
4942 return 3;
4944 case unaligned_load:
4945 case vector_gather_load:
4946 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4947 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4948 return 2;
4950 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4952 elements = TYPE_VECTOR_SUBPARTS (vectype);
4953 if (elements == 2)
4954 /* Double word aligned. */
4955 return 4;
4957 if (elements == 4)
4959 switch (misalign)
4961 case 8:
4962 /* Double word aligned. */
4963 return 4;
4965 case -1:
4966 /* Unknown misalignment. */
4967 case 4:
4968 case 12:
4969 /* Word aligned. */
4970 return 33;
4972 default:
4973 gcc_unreachable ();
4978 if (TARGET_ALTIVEC)
4979 /* Misaligned loads are not supported. */
4980 gcc_unreachable ();
4982 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4983 return 4;
4985 case unaligned_store:
4986 case vector_scatter_store:
4987 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4988 return 1;
4990 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4992 elements = TYPE_VECTOR_SUBPARTS (vectype);
4993 if (elements == 2)
4994 /* Double word aligned. */
4995 return 2;
4997 if (elements == 4)
4999 switch (misalign)
5001 case 8:
5002 /* Double word aligned. */
5003 return 2;
5005 case -1:
5006 /* Unknown misalignment. */
5007 case 4:
5008 case 12:
5009 /* Word aligned. */
5010 return 23;
5012 default:
5013 gcc_unreachable ();
5018 if (TARGET_ALTIVEC)
5019 /* Misaligned stores are not supported. */
5020 gcc_unreachable ();
5022 return 2;
5024 case vec_construct:
5025 /* This is a rough approximation assuming non-constant elements
5026 constructed into a vector via element insertion. FIXME:
5027 vec_construct is not granular enough for uniformly good
5028 decisions. If the initialization is a splat, this is
5029 cheaper than we estimate. Improve this someday. */
5030 elem_type = TREE_TYPE (vectype);
5031 /* 32-bit vectors loaded into registers are stored as double
5032 precision, so we need 2 permutes, 2 converts, and 1 merge
5033 to construct a vector of short floats from them. */
5034 if (SCALAR_FLOAT_TYPE_P (elem_type)
5035 && TYPE_PRECISION (elem_type) == 32)
5036 return 5;
5037 /* On POWER9, integer vector types are built up in GPRs and then
5038 use a direct move (2 cycles). For POWER8 this is even worse,
5039 as we need two direct moves and a merge, and the direct moves
5040 are five cycles. */
5041 else if (INTEGRAL_TYPE_P (elem_type))
5043 if (TARGET_P9_VECTOR)
5044 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5045 else
5046 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5048 else
5049 /* V2DFmode doesn't need a direct move. */
5050 return 2;
5052 default:
5053 gcc_unreachable ();
5057 /* Implement targetm.vectorize.preferred_simd_mode. */
5059 static machine_mode
5060 rs6000_preferred_simd_mode (scalar_mode mode)
5062 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5064 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5065 return vmode.require ();
5067 return word_mode;
5070 typedef struct _rs6000_cost_data
5072 struct loop *loop_info;
5073 unsigned cost[3];
5074 } rs6000_cost_data;
5076 /* Test for likely overcommitment of vector hardware resources. If a
5077 loop iteration is relatively large, and too large a percentage of
5078 instructions in the loop are vectorized, the cost model may not
5079 adequately reflect delays from unavailable vector resources.
5080 Penalize the loop body cost for this case. */
5082 static void
5083 rs6000_density_test (rs6000_cost_data *data)
5085 const int DENSITY_PCT_THRESHOLD = 85;
5086 const int DENSITY_SIZE_THRESHOLD = 70;
5087 const int DENSITY_PENALTY = 10;
5088 struct loop *loop = data->loop_info;
5089 basic_block *bbs = get_loop_body (loop);
5090 int nbbs = loop->num_nodes;
5091 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
5092 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5093 int i, density_pct;
5095 for (i = 0; i < nbbs; i++)
5097 basic_block bb = bbs[i];
5098 gimple_stmt_iterator gsi;
5100 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5102 gimple *stmt = gsi_stmt (gsi);
5103 if (is_gimple_debug (stmt))
5104 continue;
5106 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5108 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5109 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5110 not_vec_cost++;
5114 free (bbs);
5115 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5117 if (density_pct > DENSITY_PCT_THRESHOLD
5118 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5120 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5121 if (dump_enabled_p ())
5122 dump_printf_loc (MSG_NOTE, vect_location,
5123 "density %d%%, cost %d exceeds threshold, penalizing "
5124 "loop body cost by %d%%", density_pct,
5125 vec_cost + not_vec_cost, DENSITY_PENALTY);
5129 /* Implement targetm.vectorize.init_cost. */
5131 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5132 instruction is needed by the vectorization. */
5133 static bool rs6000_vect_nonmem;
5135 static void *
5136 rs6000_init_cost (struct loop *loop_info)
5138 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5139 data->loop_info = loop_info;
5140 data->cost[vect_prologue] = 0;
5141 data->cost[vect_body] = 0;
5142 data->cost[vect_epilogue] = 0;
5143 rs6000_vect_nonmem = false;
5144 return data;
5147 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5148 For some statement, we would like to further fine-grain tweak the cost on
5149 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5150 information on statement operation codes etc. One typical case here is
5151 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5152 for scalar cost, but it should be priced more whatever transformed to either
5153 compare + branch or compare + isel instructions. */
5155 static unsigned
5156 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5157 struct _stmt_vec_info *stmt_info)
5159 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5160 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5162 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5163 if (subcode == COND_EXPR)
5164 return 2;
5167 return 0;
5170 /* Implement targetm.vectorize.add_stmt_cost. */
5172 static unsigned
5173 rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
5174 enum vect_cost_for_stmt kind,
5175 struct _stmt_vec_info *stmt_info, tree vectype,
5176 int misalign, enum vect_cost_model_location where)
5178 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5179 unsigned retval = 0;
5181 if (flag_vect_cost_model)
5183 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5184 misalign);
5185 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5186 /* Statements in an inner loop relative to the loop being
5187 vectorized are weighted more heavily. The value here is
5188 arbitrary and could potentially be improved with analysis. */
5189 if (where == vect_body && stmt_info
5190 && stmt_in_inner_loop_p (vinfo, stmt_info))
5191 count *= 50; /* FIXME. */
5193 retval = (unsigned) (count * stmt_cost);
5194 cost_data->cost[where] += retval;
5196 /* Check whether we're doing something other than just a copy loop.
5197 Not all such loops may be profitably vectorized; see
5198 rs6000_finish_cost. */
5199 if ((kind == vec_to_scalar || kind == vec_perm
5200 || kind == vec_promote_demote || kind == vec_construct
5201 || kind == scalar_to_vec)
5202 || (where == vect_body && kind == vector_stmt))
5203 rs6000_vect_nonmem = true;
5206 return retval;
5209 /* For some target specific vectorization cost which can't be handled per stmt,
5210 we check the requisite conditions and adjust the vectorization cost
5211 accordingly if satisfied. One typical example is to model shift cost for
5212 vector with length by counting number of required lengths under condition
5213 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5215 static void
5216 rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
5218 struct loop *loop = data->loop_info;
5219 gcc_assert (loop);
5220 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
5222 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5224 rgroup_controls *rgc;
5225 unsigned int num_vectors_m1;
5226 unsigned int shift_cnt = 0;
5227 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5228 if (rgc->type)
5229 /* Each length needs one shift to fill into bits 0-7. */
5230 shift_cnt += num_vectors_m1 + 1;
5232 rs6000_add_stmt_cost (loop_vinfo, (void *) data, shift_cnt, scalar_stmt,
5233 NULL, NULL_TREE, 0, vect_body);
5237 /* Implement targetm.vectorize.finish_cost. */
5239 static void
5240 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5241 unsigned *body_cost, unsigned *epilogue_cost)
5243 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5245 if (cost_data->loop_info)
5247 rs6000_adjust_vect_cost_per_loop (cost_data);
5248 rs6000_density_test (cost_data);
5251 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5252 that require versioning for any reason. The vectorization is at
5253 best a wash inside the loop, and the versioning checks make
5254 profitability highly unlikely and potentially quite harmful. */
5255 if (cost_data->loop_info)
5257 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5258 if (!rs6000_vect_nonmem
5259 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5260 && LOOP_REQUIRES_VERSIONING (vec_info))
5261 cost_data->cost[vect_body] += 10000;
5264 *prologue_cost = cost_data->cost[vect_prologue];
5265 *body_cost = cost_data->cost[vect_body];
5266 *epilogue_cost = cost_data->cost[vect_epilogue];
5269 /* Implement targetm.vectorize.destroy_cost_data. */
5271 static void
5272 rs6000_destroy_cost_data (void *data)
5274 free (data);
5277 /* Implement targetm.loop_unroll_adjust. */
5279 static unsigned
5280 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5282 if (unroll_only_small_loops)
5284 /* TODO: These are hardcoded values right now. We probably should use
5285 a PARAM here. */
5286 if (loop->ninsns <= 6)
5287 return MIN (4, nunroll);
5288 if (loop->ninsns <= 10)
5289 return MIN (2, nunroll);
5291 return 0;
5294 return nunroll;
5297 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5298 library with vectorized intrinsics. */
5300 static tree
5301 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5302 tree type_in)
5304 char name[32];
5305 const char *suffix = NULL;
5306 tree fntype, new_fndecl, bdecl = NULL_TREE;
5307 int n_args = 1;
5308 const char *bname;
5309 machine_mode el_mode, in_mode;
5310 int n, in_n;
5312 /* Libmass is suitable for unsafe math only as it does not correctly support
5313 parts of IEEE with the required precision such as denormals. Only support
5314 it if we have VSX to use the simd d2 or f4 functions.
5315 XXX: Add variable length support. */
5316 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5317 return NULL_TREE;
5319 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5320 n = TYPE_VECTOR_SUBPARTS (type_out);
5321 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5322 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5323 if (el_mode != in_mode
5324 || n != in_n)
5325 return NULL_TREE;
5327 switch (fn)
5329 CASE_CFN_ATAN2:
5330 CASE_CFN_HYPOT:
5331 CASE_CFN_POW:
5332 n_args = 2;
5333 gcc_fallthrough ();
5335 CASE_CFN_ACOS:
5336 CASE_CFN_ACOSH:
5337 CASE_CFN_ASIN:
5338 CASE_CFN_ASINH:
5339 CASE_CFN_ATAN:
5340 CASE_CFN_ATANH:
5341 CASE_CFN_CBRT:
5342 CASE_CFN_COS:
5343 CASE_CFN_COSH:
5344 CASE_CFN_ERF:
5345 CASE_CFN_ERFC:
5346 CASE_CFN_EXP2:
5347 CASE_CFN_EXP:
5348 CASE_CFN_EXPM1:
5349 CASE_CFN_LGAMMA:
5350 CASE_CFN_LOG10:
5351 CASE_CFN_LOG1P:
5352 CASE_CFN_LOG2:
5353 CASE_CFN_LOG:
5354 CASE_CFN_SIN:
5355 CASE_CFN_SINH:
5356 CASE_CFN_SQRT:
5357 CASE_CFN_TAN:
5358 CASE_CFN_TANH:
5359 if (el_mode == DFmode && n == 2)
5361 bdecl = mathfn_built_in (double_type_node, fn);
5362 suffix = "d2"; /* pow -> powd2 */
5364 else if (el_mode == SFmode && n == 4)
5366 bdecl = mathfn_built_in (float_type_node, fn);
5367 suffix = "4"; /* powf -> powf4 */
5369 else
5370 return NULL_TREE;
5371 if (!bdecl)
5372 return NULL_TREE;
5373 break;
5375 default:
5376 return NULL_TREE;
5379 gcc_assert (suffix != NULL);
5380 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5381 if (!bname)
5382 return NULL_TREE;
5384 strcpy (name, bname + strlen ("__builtin_"));
5385 strcat (name, suffix);
5387 if (n_args == 1)
5388 fntype = build_function_type_list (type_out, type_in, NULL);
5389 else if (n_args == 2)
5390 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5391 else
5392 gcc_unreachable ();
5394 /* Build a function declaration for the vectorized function. */
5395 new_fndecl = build_decl (BUILTINS_LOCATION,
5396 FUNCTION_DECL, get_identifier (name), fntype);
5397 TREE_PUBLIC (new_fndecl) = 1;
5398 DECL_EXTERNAL (new_fndecl) = 1;
5399 DECL_IS_NOVOPS (new_fndecl) = 1;
5400 TREE_READONLY (new_fndecl) = 1;
5402 return new_fndecl;
5405 /* Returns a function decl for a vectorized version of the builtin function
5406 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5407 if it is not available. */
5409 static tree
5410 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5411 tree type_in)
5413 machine_mode in_mode, out_mode;
5414 int in_n, out_n;
5416 if (TARGET_DEBUG_BUILTIN)
5417 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5418 combined_fn_name (combined_fn (fn)),
5419 GET_MODE_NAME (TYPE_MODE (type_out)),
5420 GET_MODE_NAME (TYPE_MODE (type_in)));
5422 if (TREE_CODE (type_out) != VECTOR_TYPE
5423 || TREE_CODE (type_in) != VECTOR_TYPE)
5424 return NULL_TREE;
5426 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5427 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5428 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5429 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5431 switch (fn)
5433 CASE_CFN_COPYSIGN:
5434 if (VECTOR_UNIT_VSX_P (V2DFmode)
5435 && out_mode == DFmode && out_n == 2
5436 && in_mode == DFmode && in_n == 2)
5437 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5438 if (VECTOR_UNIT_VSX_P (V4SFmode)
5439 && out_mode == SFmode && out_n == 4
5440 && in_mode == SFmode && in_n == 4)
5441 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5442 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5443 && out_mode == SFmode && out_n == 4
5444 && in_mode == SFmode && in_n == 4)
5445 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5446 break;
5447 CASE_CFN_CEIL:
5448 if (VECTOR_UNIT_VSX_P (V2DFmode)
5449 && out_mode == DFmode && out_n == 2
5450 && in_mode == DFmode && in_n == 2)
5451 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5452 if (VECTOR_UNIT_VSX_P (V4SFmode)
5453 && out_mode == SFmode && out_n == 4
5454 && in_mode == SFmode && in_n == 4)
5455 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5456 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5457 && out_mode == SFmode && out_n == 4
5458 && in_mode == SFmode && in_n == 4)
5459 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5460 break;
5461 CASE_CFN_FLOOR:
5462 if (VECTOR_UNIT_VSX_P (V2DFmode)
5463 && out_mode == DFmode && out_n == 2
5464 && in_mode == DFmode && in_n == 2)
5465 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5466 if (VECTOR_UNIT_VSX_P (V4SFmode)
5467 && out_mode == SFmode && out_n == 4
5468 && in_mode == SFmode && in_n == 4)
5469 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5470 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5471 && out_mode == SFmode && out_n == 4
5472 && in_mode == SFmode && in_n == 4)
5473 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5474 break;
5475 CASE_CFN_FMA:
5476 if (VECTOR_UNIT_VSX_P (V2DFmode)
5477 && out_mode == DFmode && out_n == 2
5478 && in_mode == DFmode && in_n == 2)
5479 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5480 if (VECTOR_UNIT_VSX_P (V4SFmode)
5481 && out_mode == SFmode && out_n == 4
5482 && in_mode == SFmode && in_n == 4)
5483 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5484 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5485 && out_mode == SFmode && out_n == 4
5486 && in_mode == SFmode && in_n == 4)
5487 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5488 break;
5489 CASE_CFN_TRUNC:
5490 if (VECTOR_UNIT_VSX_P (V2DFmode)
5491 && out_mode == DFmode && out_n == 2
5492 && in_mode == DFmode && in_n == 2)
5493 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5494 if (VECTOR_UNIT_VSX_P (V4SFmode)
5495 && out_mode == SFmode && out_n == 4
5496 && in_mode == SFmode && in_n == 4)
5497 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5498 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5499 && out_mode == SFmode && out_n == 4
5500 && in_mode == SFmode && in_n == 4)
5501 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5502 break;
5503 CASE_CFN_NEARBYINT:
5504 if (VECTOR_UNIT_VSX_P (V2DFmode)
5505 && flag_unsafe_math_optimizations
5506 && out_mode == DFmode && out_n == 2
5507 && in_mode == DFmode && in_n == 2)
5508 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5509 if (VECTOR_UNIT_VSX_P (V4SFmode)
5510 && flag_unsafe_math_optimizations
5511 && out_mode == SFmode && out_n == 4
5512 && in_mode == SFmode && in_n == 4)
5513 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5514 break;
5515 CASE_CFN_RINT:
5516 if (VECTOR_UNIT_VSX_P (V2DFmode)
5517 && !flag_trapping_math
5518 && out_mode == DFmode && out_n == 2
5519 && in_mode == DFmode && in_n == 2)
5520 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5521 if (VECTOR_UNIT_VSX_P (V4SFmode)
5522 && !flag_trapping_math
5523 && out_mode == SFmode && out_n == 4
5524 && in_mode == SFmode && in_n == 4)
5525 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5526 break;
5527 default:
5528 break;
5531 /* Generate calls to libmass if appropriate. */
5532 if (rs6000_veclib_handler)
5533 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5535 return NULL_TREE;
5538 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5540 static tree
5541 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5542 tree type_in)
5544 machine_mode in_mode, out_mode;
5545 int in_n, out_n;
5547 if (TARGET_DEBUG_BUILTIN)
5548 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5549 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5550 GET_MODE_NAME (TYPE_MODE (type_out)),
5551 GET_MODE_NAME (TYPE_MODE (type_in)));
5553 if (TREE_CODE (type_out) != VECTOR_TYPE
5554 || TREE_CODE (type_in) != VECTOR_TYPE)
5555 return NULL_TREE;
5557 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5558 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5559 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5560 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5562 enum rs6000_builtins fn
5563 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5564 switch (fn)
5566 case RS6000_BUILTIN_RSQRTF:
5567 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5568 && out_mode == SFmode && out_n == 4
5569 && in_mode == SFmode && in_n == 4)
5570 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5571 break;
5572 case RS6000_BUILTIN_RSQRT:
5573 if (VECTOR_UNIT_VSX_P (V2DFmode)
5574 && out_mode == DFmode && out_n == 2
5575 && in_mode == DFmode && in_n == 2)
5576 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5577 break;
5578 case RS6000_BUILTIN_RECIPF:
5579 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5580 && out_mode == SFmode && out_n == 4
5581 && in_mode == SFmode && in_n == 4)
5582 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5583 break;
5584 case RS6000_BUILTIN_RECIP:
5585 if (VECTOR_UNIT_VSX_P (V2DFmode)
5586 && out_mode == DFmode && out_n == 2
5587 && in_mode == DFmode && in_n == 2)
5588 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5589 break;
5590 default:
5591 break;
5593 return NULL_TREE;
5596 /* Default CPU string for rs6000*_file_start functions. */
5597 static const char *rs6000_default_cpu;
5599 #ifdef USING_ELFOS_H
5600 const char *rs6000_machine;
5602 const char *
5603 rs6000_machine_from_flags (void)
5605 HOST_WIDE_INT flags = rs6000_isa_flags;
5607 /* Disable the flags that should never influence the .machine selection. */
5608 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5610 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5611 return "power10";
5612 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5613 return "power9";
5614 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5615 return "power8";
5616 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5617 return "power7";
5618 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5619 return "power6";
5620 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5621 return "power5";
5622 if ((flags & ISA_2_1_MASKS) != 0)
5623 return "power4";
5624 if ((flags & OPTION_MASK_POWERPC64) != 0)
5625 return "ppc64";
5626 return "ppc";
5629 void
5630 emit_asm_machine (void)
5632 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5634 #endif
5636 /* Do anything needed at the start of the asm file. */
5638 static void
5639 rs6000_file_start (void)
5641 char buffer[80];
5642 const char *start = buffer;
5643 FILE *file = asm_out_file;
5645 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5647 default_file_start ();
5649 if (flag_verbose_asm)
5651 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5653 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5655 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5656 start = "";
5659 if (global_options_set.x_rs6000_cpu_index)
5661 fprintf (file, "%s -mcpu=%s", start,
5662 processor_target_table[rs6000_cpu_index].name);
5663 start = "";
5666 if (global_options_set.x_rs6000_tune_index)
5668 fprintf (file, "%s -mtune=%s", start,
5669 processor_target_table[rs6000_tune_index].name);
5670 start = "";
5673 if (PPC405_ERRATUM77)
5675 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5676 start = "";
5679 #ifdef USING_ELFOS_H
5680 switch (rs6000_sdata)
5682 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5683 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5684 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5685 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5688 if (rs6000_sdata && g_switch_value)
5690 fprintf (file, "%s -G %d", start,
5691 g_switch_value);
5692 start = "";
5694 #endif
5696 if (*start == '\0')
5697 putc ('\n', file);
5700 #ifdef USING_ELFOS_H
5701 rs6000_machine = rs6000_machine_from_flags ();
5702 emit_asm_machine ();
5703 #endif
5705 if (DEFAULT_ABI == ABI_ELFv2)
5706 fprintf (file, "\t.abiversion 2\n");
5710 /* Return nonzero if this function is known to have a null epilogue. */
5713 direct_return (void)
5715 if (reload_completed)
5717 rs6000_stack_t *info = rs6000_stack_info ();
5719 if (info->first_gp_reg_save == 32
5720 && info->first_fp_reg_save == 64
5721 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5722 && ! info->lr_save_p
5723 && ! info->cr_save_p
5724 && info->vrsave_size == 0
5725 && ! info->push_p)
5726 return 1;
5729 return 0;
5732 /* Helper for num_insns_constant. Calculate number of instructions to
5733 load VALUE to a single gpr using combinations of addi, addis, ori,
5734 oris, sldi and rldimi instructions. */
5736 static int
5737 num_insns_constant_gpr (HOST_WIDE_INT value)
5739 /* signed constant loadable with addi */
5740 if (SIGNED_INTEGER_16BIT_P (value))
5741 return 1;
5743 /* constant loadable with addis */
5744 else if ((value & 0xffff) == 0
5745 && (value >> 31 == -1 || value >> 31 == 0))
5746 return 1;
5748 /* PADDI can support up to 34 bit signed integers. */
5749 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
5750 return 1;
5752 else if (TARGET_POWERPC64)
5754 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5755 HOST_WIDE_INT high = value >> 31;
5757 if (high == 0 || high == -1)
5758 return 2;
5760 high >>= 1;
5762 if (low == 0 || low == high)
5763 return num_insns_constant_gpr (high) + 1;
5764 else if (high == 0)
5765 return num_insns_constant_gpr (low) + 1;
5766 else
5767 return (num_insns_constant_gpr (high)
5768 + num_insns_constant_gpr (low) + 1);
5771 else
5772 return 2;
5775 /* Helper for num_insns_constant. Allow constants formed by the
5776 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5777 and handle modes that require multiple gprs. */
5779 static int
5780 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5782 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5783 int total = 0;
5784 while (nregs-- > 0)
5786 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5787 int insns = num_insns_constant_gpr (low);
5788 if (insns > 2
5789 /* We won't get more than 2 from num_insns_constant_gpr
5790 except when TARGET_POWERPC64 and mode is DImode or
5791 wider, so the register mode must be DImode. */
5792 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5793 insns = 2;
5794 total += insns;
5795 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
5796 it all at once would be UB. */
5797 value >>= (BITS_PER_WORD - 1);
5798 value >>= 1;
5800 return total;
5803 /* Return the number of instructions it takes to form a constant in as
5804 many gprs are needed for MODE. */
5807 num_insns_constant (rtx op, machine_mode mode)
5809 HOST_WIDE_INT val;
5811 switch (GET_CODE (op))
5813 case CONST_INT:
5814 val = INTVAL (op);
5815 break;
5817 case CONST_WIDE_INT:
5819 int insns = 0;
5820 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5821 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5822 DImode);
5823 return insns;
5826 case CONST_DOUBLE:
5828 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5830 if (mode == SFmode || mode == SDmode)
5832 long l;
5834 if (mode == SDmode)
5835 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5836 else
5837 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5838 /* See the first define_split in rs6000.md handling a
5839 const_double_operand. */
5840 val = l;
5841 mode = SImode;
5843 else if (mode == DFmode || mode == DDmode)
5845 long l[2];
5847 if (mode == DDmode)
5848 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5849 else
5850 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5852 /* See the second (32-bit) and third (64-bit) define_split
5853 in rs6000.md handling a const_double_operand. */
5854 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5855 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5856 mode = DImode;
5858 else if (mode == TFmode || mode == TDmode
5859 || mode == KFmode || mode == IFmode)
5861 long l[4];
5862 int insns;
5864 if (mode == TDmode)
5865 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5866 else
5867 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5869 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5870 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5871 insns = num_insns_constant_multi (val, DImode);
5872 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5873 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5874 insns += num_insns_constant_multi (val, DImode);
5875 return insns;
5877 else
5878 gcc_unreachable ();
5880 break;
5882 default:
5883 gcc_unreachable ();
5886 return num_insns_constant_multi (val, mode);
5889 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5890 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5891 corresponding element of the vector, but for V4SFmode, the
5892 corresponding "float" is interpreted as an SImode integer. */
5894 HOST_WIDE_INT
5895 const_vector_elt_as_int (rtx op, unsigned int elt)
5897 rtx tmp;
5899 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5900 gcc_assert (GET_MODE (op) != V2DImode
5901 && GET_MODE (op) != V2DFmode);
5903 tmp = CONST_VECTOR_ELT (op, elt);
5904 if (GET_MODE (op) == V4SFmode)
5905 tmp = gen_lowpart (SImode, tmp);
5906 return INTVAL (tmp);
5909 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5910 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5911 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5912 all items are set to the same value and contain COPIES replicas of the
5913 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5914 operand and the others are set to the value of the operand's msb. */
5916 static bool
5917 vspltis_constant (rtx op, unsigned step, unsigned copies)
5919 machine_mode mode = GET_MODE (op);
5920 machine_mode inner = GET_MODE_INNER (mode);
5922 unsigned i;
5923 unsigned nunits;
5924 unsigned bitsize;
5925 unsigned mask;
5927 HOST_WIDE_INT val;
5928 HOST_WIDE_INT splat_val;
5929 HOST_WIDE_INT msb_val;
5931 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5932 return false;
5934 nunits = GET_MODE_NUNITS (mode);
5935 bitsize = GET_MODE_BITSIZE (inner);
5936 mask = GET_MODE_MASK (inner);
5938 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5939 splat_val = val;
5940 msb_val = val >= 0 ? 0 : -1;
5942 /* Construct the value to be splatted, if possible. If not, return 0. */
5943 for (i = 2; i <= copies; i *= 2)
5945 HOST_WIDE_INT small_val;
5946 bitsize /= 2;
5947 small_val = splat_val >> bitsize;
5948 mask >>= bitsize;
5949 if (splat_val != ((HOST_WIDE_INT)
5950 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5951 | (small_val & mask)))
5952 return false;
5953 splat_val = small_val;
5956 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5957 if (EASY_VECTOR_15 (splat_val))
5960 /* Also check if we can splat, and then add the result to itself. Do so if
5961 the value is positive, of if the splat instruction is using OP's mode;
5962 for splat_val < 0, the splat and the add should use the same mode. */
5963 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5964 && (splat_val >= 0 || (step == 1 && copies == 1)))
5967 /* Also check if are loading up the most significant bit which can be done by
5968 loading up -1 and shifting the value left by -1. */
5969 else if (EASY_VECTOR_MSB (splat_val, inner))
5972 else
5973 return false;
5975 /* Check if VAL is present in every STEP-th element, and the
5976 other elements are filled with its most significant bit. */
5977 for (i = 1; i < nunits; ++i)
5979 HOST_WIDE_INT desired_val;
5980 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5981 if ((i & (step - 1)) == 0)
5982 desired_val = val;
5983 else
5984 desired_val = msb_val;
5986 if (desired_val != const_vector_elt_as_int (op, elt))
5987 return false;
5990 return true;
5993 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5994 instruction, filling in the bottom elements with 0 or -1.
5996 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5997 for the number of zeroes to shift in, or negative for the number of 0xff
5998 bytes to shift in.
6000 OP is a CONST_VECTOR. */
6003 vspltis_shifted (rtx op)
6005 machine_mode mode = GET_MODE (op);
6006 machine_mode inner = GET_MODE_INNER (mode);
6008 unsigned i, j;
6009 unsigned nunits;
6010 unsigned mask;
6012 HOST_WIDE_INT val;
6014 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6015 return false;
6017 /* We need to create pseudo registers to do the shift, so don't recognize
6018 shift vector constants after reload. */
6019 if (!can_create_pseudo_p ())
6020 return false;
6022 nunits = GET_MODE_NUNITS (mode);
6023 mask = GET_MODE_MASK (inner);
6025 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6027 /* Check if the value can really be the operand of a vspltis[bhw]. */
6028 if (EASY_VECTOR_15 (val))
6031 /* Also check if we are loading up the most significant bit which can be done
6032 by loading up -1 and shifting the value left by -1. */
6033 else if (EASY_VECTOR_MSB (val, inner))
6036 else
6037 return 0;
6039 /* Check if VAL is present in every STEP-th element until we find elements
6040 that are 0 or all 1 bits. */
6041 for (i = 1; i < nunits; ++i)
6043 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6044 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6046 /* If the value isn't the splat value, check for the remaining elements
6047 being 0/-1. */
6048 if (val != elt_val)
6050 if (elt_val == 0)
6052 for (j = i+1; j < nunits; ++j)
6054 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6055 if (const_vector_elt_as_int (op, elt2) != 0)
6056 return 0;
6059 return (nunits - i) * GET_MODE_SIZE (inner);
6062 else if ((elt_val & mask) == mask)
6064 for (j = i+1; j < nunits; ++j)
6066 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6067 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6068 return 0;
6071 return -((nunits - i) * GET_MODE_SIZE (inner));
6074 else
6075 return 0;
6079 /* If all elements are equal, we don't need to do VLSDOI. */
6080 return 0;
6084 /* Return true if OP is of the given MODE and can be synthesized
6085 with a vspltisb, vspltish or vspltisw. */
6087 bool
6088 easy_altivec_constant (rtx op, machine_mode mode)
6090 unsigned step, copies;
6092 if (mode == VOIDmode)
6093 mode = GET_MODE (op);
6094 else if (mode != GET_MODE (op))
6095 return false;
6097 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6098 constants. */
6099 if (mode == V2DFmode)
6100 return zero_constant (op, mode);
6102 else if (mode == V2DImode)
6104 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6105 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6106 return false;
6108 if (zero_constant (op, mode))
6109 return true;
6111 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6112 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6113 return true;
6115 return false;
6118 /* V1TImode is a special container for TImode. Ignore for now. */
6119 else if (mode == V1TImode)
6120 return false;
6122 /* Start with a vspltisw. */
6123 step = GET_MODE_NUNITS (mode) / 4;
6124 copies = 1;
6126 if (vspltis_constant (op, step, copies))
6127 return true;
6129 /* Then try with a vspltish. */
6130 if (step == 1)
6131 copies <<= 1;
6132 else
6133 step >>= 1;
6135 if (vspltis_constant (op, step, copies))
6136 return true;
6138 /* And finally a vspltisb. */
6139 if (step == 1)
6140 copies <<= 1;
6141 else
6142 step >>= 1;
6144 if (vspltis_constant (op, step, copies))
6145 return true;
6147 if (vspltis_shifted (op) != 0)
6148 return true;
6150 return false;
6153 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6154 result is OP. Abort if it is not possible. */
6157 gen_easy_altivec_constant (rtx op)
6159 machine_mode mode = GET_MODE (op);
6160 int nunits = GET_MODE_NUNITS (mode);
6161 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6162 unsigned step = nunits / 4;
6163 unsigned copies = 1;
6165 /* Start with a vspltisw. */
6166 if (vspltis_constant (op, step, copies))
6167 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6169 /* Then try with a vspltish. */
6170 if (step == 1)
6171 copies <<= 1;
6172 else
6173 step >>= 1;
6175 if (vspltis_constant (op, step, copies))
6176 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6178 /* And finally a vspltisb. */
6179 if (step == 1)
6180 copies <<= 1;
6181 else
6182 step >>= 1;
6184 if (vspltis_constant (op, step, copies))
6185 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6187 gcc_unreachable ();
6190 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6191 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6193 Return the number of instructions needed (1 or 2) into the address pointed
6194 via NUM_INSNS_PTR.
6196 Return the constant that is being split via CONSTANT_PTR. */
6198 bool
6199 xxspltib_constant_p (rtx op,
6200 machine_mode mode,
6201 int *num_insns_ptr,
6202 int *constant_ptr)
6204 size_t nunits = GET_MODE_NUNITS (mode);
6205 size_t i;
6206 HOST_WIDE_INT value;
6207 rtx element;
6209 /* Set the returned values to out of bound values. */
6210 *num_insns_ptr = -1;
6211 *constant_ptr = 256;
6213 if (!TARGET_P9_VECTOR)
6214 return false;
6216 if (mode == VOIDmode)
6217 mode = GET_MODE (op);
6219 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6220 return false;
6222 /* Handle (vec_duplicate <constant>). */
6223 if (GET_CODE (op) == VEC_DUPLICATE)
6225 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6226 && mode != V2DImode)
6227 return false;
6229 element = XEXP (op, 0);
6230 if (!CONST_INT_P (element))
6231 return false;
6233 value = INTVAL (element);
6234 if (!IN_RANGE (value, -128, 127))
6235 return false;
6238 /* Handle (const_vector [...]). */
6239 else if (GET_CODE (op) == CONST_VECTOR)
6241 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6242 && mode != V2DImode)
6243 return false;
6245 element = CONST_VECTOR_ELT (op, 0);
6246 if (!CONST_INT_P (element))
6247 return false;
6249 value = INTVAL (element);
6250 if (!IN_RANGE (value, -128, 127))
6251 return false;
6253 for (i = 1; i < nunits; i++)
6255 element = CONST_VECTOR_ELT (op, i);
6256 if (!CONST_INT_P (element))
6257 return false;
6259 if (value != INTVAL (element))
6260 return false;
6264 /* Handle integer constants being loaded into the upper part of the VSX
6265 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6266 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6267 else if (CONST_INT_P (op))
6269 if (!SCALAR_INT_MODE_P (mode))
6270 return false;
6272 value = INTVAL (op);
6273 if (!IN_RANGE (value, -128, 127))
6274 return false;
6276 if (!IN_RANGE (value, -1, 0))
6278 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6279 return false;
6281 if (EASY_VECTOR_15 (value))
6282 return false;
6286 else
6287 return false;
6289 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6290 sign extend. Special case 0/-1 to allow getting any VSX register instead
6291 of an Altivec register. */
6292 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6293 && EASY_VECTOR_15 (value))
6294 return false;
6296 /* Return # of instructions and the constant byte for XXSPLTIB. */
6297 if (mode == V16QImode)
6298 *num_insns_ptr = 1;
6300 else if (IN_RANGE (value, -1, 0))
6301 *num_insns_ptr = 1;
6303 else
6304 *num_insns_ptr = 2;
6306 *constant_ptr = (int) value;
6307 return true;
6310 const char *
6311 output_vec_const_move (rtx *operands)
6313 int shift;
6314 machine_mode mode;
6315 rtx dest, vec;
6317 dest = operands[0];
6318 vec = operands[1];
6319 mode = GET_MODE (dest);
6321 if (TARGET_VSX)
6323 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6324 int xxspltib_value = 256;
6325 int num_insns = -1;
6327 if (zero_constant (vec, mode))
6329 if (TARGET_P9_VECTOR)
6330 return "xxspltib %x0,0";
6332 else if (dest_vmx_p)
6333 return "vspltisw %0,0";
6335 else
6336 return "xxlxor %x0,%x0,%x0";
6339 if (all_ones_constant (vec, mode))
6341 if (TARGET_P9_VECTOR)
6342 return "xxspltib %x0,255";
6344 else if (dest_vmx_p)
6345 return "vspltisw %0,-1";
6347 else if (TARGET_P8_VECTOR)
6348 return "xxlorc %x0,%x0,%x0";
6350 else
6351 gcc_unreachable ();
6354 if (TARGET_P9_VECTOR
6355 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6357 if (num_insns == 1)
6359 operands[2] = GEN_INT (xxspltib_value & 0xff);
6360 return "xxspltib %x0,%2";
6363 return "#";
6367 if (TARGET_ALTIVEC)
6369 rtx splat_vec;
6371 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6372 if (zero_constant (vec, mode))
6373 return "vspltisw %0,0";
6375 if (all_ones_constant (vec, mode))
6376 return "vspltisw %0,-1";
6378 /* Do we need to construct a value using VSLDOI? */
6379 shift = vspltis_shifted (vec);
6380 if (shift != 0)
6381 return "#";
6383 splat_vec = gen_easy_altivec_constant (vec);
6384 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6385 operands[1] = XEXP (splat_vec, 0);
6386 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6387 return "#";
6389 switch (GET_MODE (splat_vec))
6391 case E_V4SImode:
6392 return "vspltisw %0,%1";
6394 case E_V8HImode:
6395 return "vspltish %0,%1";
6397 case E_V16QImode:
6398 return "vspltisb %0,%1";
6400 default:
6401 gcc_unreachable ();
6405 gcc_unreachable ();
6408 /* Initialize vector TARGET to VALS. */
6410 void
6411 rs6000_expand_vector_init (rtx target, rtx vals)
6413 machine_mode mode = GET_MODE (target);
6414 machine_mode inner_mode = GET_MODE_INNER (mode);
6415 int n_elts = GET_MODE_NUNITS (mode);
6416 int n_var = 0, one_var = -1;
6417 bool all_same = true, all_const_zero = true;
6418 rtx x, mem;
6419 int i;
6421 for (i = 0; i < n_elts; ++i)
6423 x = XVECEXP (vals, 0, i);
6424 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6425 ++n_var, one_var = i;
6426 else if (x != CONST0_RTX (inner_mode))
6427 all_const_zero = false;
6429 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6430 all_same = false;
6433 if (n_var == 0)
6435 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6436 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6437 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6439 /* Zero register. */
6440 emit_move_insn (target, CONST0_RTX (mode));
6441 return;
6443 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6445 /* Splat immediate. */
6446 emit_insn (gen_rtx_SET (target, const_vec));
6447 return;
6449 else
6451 /* Load from constant pool. */
6452 emit_move_insn (target, const_vec);
6453 return;
6457 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6458 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6460 rtx op[2];
6461 size_t i;
6462 size_t num_elements = all_same ? 1 : 2;
6463 for (i = 0; i < num_elements; i++)
6465 op[i] = XVECEXP (vals, 0, i);
6466 /* Just in case there is a SUBREG with a smaller mode, do a
6467 conversion. */
6468 if (GET_MODE (op[i]) != inner_mode)
6470 rtx tmp = gen_reg_rtx (inner_mode);
6471 convert_move (tmp, op[i], 0);
6472 op[i] = tmp;
6474 /* Allow load with splat double word. */
6475 else if (MEM_P (op[i]))
6477 if (!all_same)
6478 op[i] = force_reg (inner_mode, op[i]);
6480 else if (!REG_P (op[i]))
6481 op[i] = force_reg (inner_mode, op[i]);
6484 if (all_same)
6486 if (mode == V2DFmode)
6487 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6488 else
6489 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6491 else
6493 if (mode == V2DFmode)
6494 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6495 else
6496 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6498 return;
6501 /* Special case initializing vector int if we are on 64-bit systems with
6502 direct move or we have the ISA 3.0 instructions. */
6503 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6504 && TARGET_DIRECT_MOVE_64BIT)
6506 if (all_same)
6508 rtx element0 = XVECEXP (vals, 0, 0);
6509 if (MEM_P (element0))
6510 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6511 else
6512 element0 = force_reg (SImode, element0);
6514 if (TARGET_P9_VECTOR)
6515 emit_insn (gen_vsx_splat_v4si (target, element0));
6516 else
6518 rtx tmp = gen_reg_rtx (DImode);
6519 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6520 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6522 return;
6524 else
6526 rtx elements[4];
6527 size_t i;
6529 for (i = 0; i < 4; i++)
6530 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6532 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6533 elements[2], elements[3]));
6534 return;
6538 /* With single precision floating point on VSX, know that internally single
6539 precision is actually represented as a double, and either make 2 V2DF
6540 vectors, and convert these vectors to single precision, or do one
6541 conversion, and splat the result to the other elements. */
6542 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6544 if (all_same)
6546 rtx element0 = XVECEXP (vals, 0, 0);
6548 if (TARGET_P9_VECTOR)
6550 if (MEM_P (element0))
6551 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6553 emit_insn (gen_vsx_splat_v4sf (target, element0));
6556 else
6558 rtx freg = gen_reg_rtx (V4SFmode);
6559 rtx sreg = force_reg (SFmode, element0);
6560 rtx cvt = (TARGET_XSCVDPSPN
6561 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6562 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6564 emit_insn (cvt);
6565 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6566 const0_rtx));
6569 else
6571 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6573 rtx tmp_sf[4];
6574 rtx tmp_si[4];
6575 rtx tmp_di[4];
6576 rtx mrg_di[4];
6577 for (i = 0; i < 4; i++)
6579 tmp_si[i] = gen_reg_rtx (SImode);
6580 tmp_di[i] = gen_reg_rtx (DImode);
6581 mrg_di[i] = gen_reg_rtx (DImode);
6582 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6583 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6584 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6587 if (!BYTES_BIG_ENDIAN)
6589 std::swap (tmp_di[0], tmp_di[1]);
6590 std::swap (tmp_di[2], tmp_di[3]);
6593 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6594 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6595 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6596 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6598 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6599 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6600 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6602 else
6604 rtx dbl_even = gen_reg_rtx (V2DFmode);
6605 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6606 rtx flt_even = gen_reg_rtx (V4SFmode);
6607 rtx flt_odd = gen_reg_rtx (V4SFmode);
6608 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6609 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6610 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6611 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6613 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6614 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6615 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6616 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6617 rs6000_expand_extract_even (target, flt_even, flt_odd);
6620 return;
6623 /* Special case initializing vector short/char that are splats if we are on
6624 64-bit systems with direct move. */
6625 if (all_same && TARGET_DIRECT_MOVE_64BIT
6626 && (mode == V16QImode || mode == V8HImode))
6628 rtx op0 = XVECEXP (vals, 0, 0);
6629 rtx di_tmp = gen_reg_rtx (DImode);
6631 if (!REG_P (op0))
6632 op0 = force_reg (GET_MODE_INNER (mode), op0);
6634 if (mode == V16QImode)
6636 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6637 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6638 return;
6641 if (mode == V8HImode)
6643 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6644 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6645 return;
6649 /* Store value to stack temp. Load vector element. Splat. However, splat
6650 of 64-bit items is not supported on Altivec. */
6651 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6653 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6654 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6655 XVECEXP (vals, 0, 0));
6656 x = gen_rtx_UNSPEC (VOIDmode,
6657 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6658 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6659 gen_rtvec (2,
6660 gen_rtx_SET (target, mem),
6661 x)));
6662 x = gen_rtx_VEC_SELECT (inner_mode, target,
6663 gen_rtx_PARALLEL (VOIDmode,
6664 gen_rtvec (1, const0_rtx)));
6665 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6666 return;
6669 /* One field is non-constant. Load constant then overwrite
6670 varying field. */
6671 if (n_var == 1)
6673 rtx copy = copy_rtx (vals);
6675 /* Load constant part of vector, substitute neighboring value for
6676 varying element. */
6677 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6678 rs6000_expand_vector_init (target, copy);
6680 /* Insert variable. */
6681 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6682 return;
6685 /* Construct the vector in memory one field at a time
6686 and load the whole vector. */
6687 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6688 for (i = 0; i < n_elts; i++)
6689 emit_move_insn (adjust_address_nv (mem, inner_mode,
6690 i * GET_MODE_SIZE (inner_mode)),
6691 XVECEXP (vals, 0, i));
6692 emit_move_insn (target, mem);
6695 /* Set field ELT of TARGET to VAL. */
6697 void
6698 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6700 machine_mode mode = GET_MODE (target);
6701 machine_mode inner_mode = GET_MODE_INNER (mode);
6702 rtx reg = gen_reg_rtx (mode);
6703 rtx mask, mem, x;
6704 int width = GET_MODE_SIZE (inner_mode);
6705 int i;
6707 val = force_reg (GET_MODE (val), val);
6709 if (VECTOR_MEM_VSX_P (mode))
6711 rtx insn = NULL_RTX;
6712 rtx elt_rtx = GEN_INT (elt);
6714 if (mode == V2DFmode)
6715 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6717 else if (mode == V2DImode)
6718 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6720 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6722 if (mode == V4SImode)
6723 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6724 else if (mode == V8HImode)
6725 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6726 else if (mode == V16QImode)
6727 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6728 else if (mode == V4SFmode)
6729 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6732 if (insn)
6734 emit_insn (insn);
6735 return;
6739 /* Simplify setting single element vectors like V1TImode. */
6740 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6742 emit_move_insn (target, gen_lowpart (mode, val));
6743 return;
6746 /* Load single variable value. */
6747 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6748 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6749 x = gen_rtx_UNSPEC (VOIDmode,
6750 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6751 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6752 gen_rtvec (2,
6753 gen_rtx_SET (reg, mem),
6754 x)));
6756 /* Linear sequence. */
6757 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6758 for (i = 0; i < 16; ++i)
6759 XVECEXP (mask, 0, i) = GEN_INT (i);
6761 /* Set permute mask to insert element into target. */
6762 for (i = 0; i < width; ++i)
6763 XVECEXP (mask, 0, elt*width + i)
6764 = GEN_INT (i + 0x10);
6765 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6767 if (BYTES_BIG_ENDIAN)
6768 x = gen_rtx_UNSPEC (mode,
6769 gen_rtvec (3, target, reg,
6770 force_reg (V16QImode, x)),
6771 UNSPEC_VPERM);
6772 else
6774 if (TARGET_P9_VECTOR)
6775 x = gen_rtx_UNSPEC (mode,
6776 gen_rtvec (3, reg, target,
6777 force_reg (V16QImode, x)),
6778 UNSPEC_VPERMR);
6779 else
6781 /* Invert selector. We prefer to generate VNAND on P8 so
6782 that future fusion opportunities can kick in, but must
6783 generate VNOR elsewhere. */
6784 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6785 rtx iorx = (TARGET_P8_VECTOR
6786 ? gen_rtx_IOR (V16QImode, notx, notx)
6787 : gen_rtx_AND (V16QImode, notx, notx));
6788 rtx tmp = gen_reg_rtx (V16QImode);
6789 emit_insn (gen_rtx_SET (tmp, iorx));
6791 /* Permute with operands reversed and adjusted selector. */
6792 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6793 UNSPEC_VPERM);
6797 emit_insn (gen_rtx_SET (target, x));
6800 /* Extract field ELT from VEC into TARGET. */
6802 void
6803 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6805 machine_mode mode = GET_MODE (vec);
6806 machine_mode inner_mode = GET_MODE_INNER (mode);
6807 rtx mem;
6809 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6811 switch (mode)
6813 default:
6814 break;
6815 case E_V1TImode:
6816 emit_move_insn (target, gen_lowpart (TImode, vec));
6817 break;
6818 case E_V2DFmode:
6819 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6820 return;
6821 case E_V2DImode:
6822 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6823 return;
6824 case E_V4SFmode:
6825 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6826 return;
6827 case E_V16QImode:
6828 if (TARGET_DIRECT_MOVE_64BIT)
6830 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6831 return;
6833 else
6834 break;
6835 case E_V8HImode:
6836 if (TARGET_DIRECT_MOVE_64BIT)
6838 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6839 return;
6841 else
6842 break;
6843 case E_V4SImode:
6844 if (TARGET_DIRECT_MOVE_64BIT)
6846 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6847 return;
6849 break;
6852 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6853 && TARGET_DIRECT_MOVE_64BIT)
6855 if (GET_MODE (elt) != DImode)
6857 rtx tmp = gen_reg_rtx (DImode);
6858 convert_move (tmp, elt, 0);
6859 elt = tmp;
6861 else if (!REG_P (elt))
6862 elt = force_reg (DImode, elt);
6864 switch (mode)
6866 case E_V1TImode:
6867 emit_move_insn (target, gen_lowpart (TImode, vec));
6868 return;
6870 case E_V2DFmode:
6871 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6872 return;
6874 case E_V2DImode:
6875 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6876 return;
6878 case E_V4SFmode:
6879 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6880 return;
6882 case E_V4SImode:
6883 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6884 return;
6886 case E_V8HImode:
6887 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6888 return;
6890 case E_V16QImode:
6891 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6892 return;
6894 default:
6895 gcc_unreachable ();
6899 /* Allocate mode-sized buffer. */
6900 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6902 emit_move_insn (mem, vec);
6903 if (CONST_INT_P (elt))
6905 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6907 /* Add offset to field within buffer matching vector element. */
6908 mem = adjust_address_nv (mem, inner_mode,
6909 modulo_elt * GET_MODE_SIZE (inner_mode));
6910 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6912 else
6914 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6915 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6916 rtx new_addr = gen_reg_rtx (Pmode);
6918 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6919 if (ele_size > 1)
6920 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6921 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6922 new_addr = change_address (mem, inner_mode, new_addr);
6923 emit_move_insn (target, new_addr);
6927 /* Return the offset within a memory object (MEM) of a vector type to a given
6928 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
6929 the element is constant, we return a constant integer.
6931 Otherwise, we use a base register temporary to calculate the offset after
6932 masking it to fit within the bounds of the vector and scaling it. The
6933 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
6934 built-in function. */
6936 static rtx
6937 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
6939 if (CONST_INT_P (element))
6940 return GEN_INT (INTVAL (element) * scalar_size);
6942 /* All insns should use the 'Q' constraint (address is a single register) if
6943 the element number is not a constant. */
6944 gcc_assert (satisfies_constraint_Q (mem));
6946 /* Mask the element to make sure the element number is between 0 and the
6947 maximum number of elements - 1 so that we don't generate an address
6948 outside the vector. */
6949 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
6950 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
6951 emit_insn (gen_rtx_SET (base_tmp, and_op));
6953 /* Shift the element to get the byte offset from the element number. */
6954 int shift = exact_log2 (scalar_size);
6955 gcc_assert (shift >= 0);
6957 if (shift > 0)
6959 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
6960 emit_insn (gen_rtx_SET (base_tmp, shift_op));
6963 return base_tmp;
6966 /* Helper function update PC-relative addresses when we are adjusting a memory
6967 address (ADDR) to a vector to point to a scalar field within the vector with
6968 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
6969 use the base register temporary (BASE_TMP) to form the address. */
6971 static rtx
6972 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
6974 rtx new_addr = NULL;
6976 gcc_assert (CONST_INT_P (element_offset));
6978 if (GET_CODE (addr) == CONST)
6979 addr = XEXP (addr, 0);
6981 if (GET_CODE (addr) == PLUS)
6983 rtx op0 = XEXP (addr, 0);
6984 rtx op1 = XEXP (addr, 1);
6986 if (CONST_INT_P (op1))
6988 HOST_WIDE_INT offset
6989 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
6991 if (offset == 0)
6992 new_addr = op0;
6994 else
6996 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
6997 new_addr = gen_rtx_CONST (Pmode, plus);
7001 else
7003 emit_move_insn (base_tmp, addr);
7004 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7008 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7010 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7011 new_addr = gen_rtx_CONST (Pmode, plus);
7014 else
7015 gcc_unreachable ();
7017 return new_addr;
7020 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7021 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7022 temporary (BASE_TMP) to fixup the address. Return the new memory address
7023 that is valid for reads or writes to a given register (SCALAR_REG).
7025 This function is expected to be called after reload is completed when we are
7026 splitting insns. The temporary BASE_TMP might be set multiple times with
7027 this code. */
7030 rs6000_adjust_vec_address (rtx scalar_reg,
7031 rtx mem,
7032 rtx element,
7033 rtx base_tmp,
7034 machine_mode scalar_mode)
7036 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7037 rtx addr = XEXP (mem, 0);
7038 rtx new_addr;
7040 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7041 gcc_assert (!reg_mentioned_p (base_tmp, element));
7043 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7044 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7046 /* Calculate what we need to add to the address to get the element
7047 address. */
7048 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7050 /* Create the new address pointing to the element within the vector. If we
7051 are adding 0, we don't have to change the address. */
7052 if (element_offset == const0_rtx)
7053 new_addr = addr;
7055 /* A simple indirect address can be converted into a reg + offset
7056 address. */
7057 else if (REG_P (addr) || SUBREG_P (addr))
7058 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7060 /* For references to local static variables, fold a constant offset into the
7061 address. */
7062 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7063 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7065 /* Optimize D-FORM addresses with constant offset with a constant element, to
7066 include the element offset in the address directly. */
7067 else if (GET_CODE (addr) == PLUS)
7069 rtx op0 = XEXP (addr, 0);
7070 rtx op1 = XEXP (addr, 1);
7072 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7073 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7075 /* op0 should never be r0, because r0+offset is not valid. But it
7076 doesn't hurt to make sure it is not r0. */
7077 gcc_assert (reg_or_subregno (op0) != 0);
7079 /* D-FORM address with constant element number. */
7080 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7081 rtx offset_rtx = GEN_INT (offset);
7082 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7084 else
7086 /* If we don't have a D-FORM address with a constant element number,
7087 add the two elements in the current address. Then add the offset.
7089 Previously, we tried to add the offset to OP1 and change the
7090 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7091 complicated because we had to verify that op1 was not GPR0 and we
7092 had a constant element offset (due to the way ADDI is defined).
7093 By doing the add of OP0 and OP1 first, and then adding in the
7094 offset, it has the benefit that if D-FORM instructions are
7095 allowed, the offset is part of the memory access to the vector
7096 element. */
7097 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7098 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7102 else
7104 emit_move_insn (base_tmp, addr);
7105 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7108 /* If the address isn't valid, move the address into the temporary base
7109 register. Some reasons it could not be valid include:
7111 The address offset overflowed the 16 or 34 bit offset size;
7112 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7113 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7114 Only X_FORM loads can be done, and the address is D_FORM. */
7116 enum insn_form iform
7117 = address_to_insn_form (new_addr, scalar_mode,
7118 reg_to_non_prefixed (scalar_reg, scalar_mode));
7120 if (iform == INSN_FORM_BAD)
7122 emit_move_insn (base_tmp, new_addr);
7123 new_addr = base_tmp;
7126 return change_address (mem, scalar_mode, new_addr);
7129 /* Split a variable vec_extract operation into the component instructions. */
7131 void
7132 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7133 rtx tmp_altivec)
7135 machine_mode mode = GET_MODE (src);
7136 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7137 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7138 int byte_shift = exact_log2 (scalar_size);
7140 gcc_assert (byte_shift >= 0);
7142 /* If we are given a memory address, optimize to load just the element. We
7143 don't have to adjust the vector element number on little endian
7144 systems. */
7145 if (MEM_P (src))
7147 emit_move_insn (dest,
7148 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7149 scalar_mode));
7150 return;
7153 else if (REG_P (src) || SUBREG_P (src))
7155 int num_elements = GET_MODE_NUNITS (mode);
7156 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7157 int bit_shift = 7 - exact_log2 (num_elements);
7158 rtx element2;
7159 unsigned int dest_regno = reg_or_subregno (dest);
7160 unsigned int src_regno = reg_or_subregno (src);
7161 unsigned int element_regno = reg_or_subregno (element);
7163 gcc_assert (REG_P (tmp_gpr));
7165 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7166 a general purpose register. */
7167 if (TARGET_P9_VECTOR
7168 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7169 && INT_REGNO_P (dest_regno)
7170 && ALTIVEC_REGNO_P (src_regno)
7171 && INT_REGNO_P (element_regno))
7173 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7174 rtx element_si = gen_rtx_REG (SImode, element_regno);
7176 if (mode == V16QImode)
7177 emit_insn (BYTES_BIG_ENDIAN
7178 ? gen_vextublx (dest_si, element_si, src)
7179 : gen_vextubrx (dest_si, element_si, src));
7181 else if (mode == V8HImode)
7183 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7184 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7185 emit_insn (BYTES_BIG_ENDIAN
7186 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7187 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7191 else
7193 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7194 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7195 emit_insn (BYTES_BIG_ENDIAN
7196 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7197 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7200 return;
7204 gcc_assert (REG_P (tmp_altivec));
7206 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7207 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7208 will shift the element into the upper position (adding 3 to convert a
7209 byte shift into a bit shift). */
7210 if (scalar_size == 8)
7212 if (!BYTES_BIG_ENDIAN)
7214 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7215 element2 = tmp_gpr;
7217 else
7218 element2 = element;
7220 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7221 bit. */
7222 emit_insn (gen_rtx_SET (tmp_gpr,
7223 gen_rtx_AND (DImode,
7224 gen_rtx_ASHIFT (DImode,
7225 element2,
7226 GEN_INT (6)),
7227 GEN_INT (64))));
7229 else
7231 if (!BYTES_BIG_ENDIAN)
7233 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7235 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7236 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7237 element2 = tmp_gpr;
7239 else
7240 element2 = element;
7242 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7245 /* Get the value into the lower byte of the Altivec register where VSLO
7246 expects it. */
7247 if (TARGET_P9_VECTOR)
7248 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7249 else if (can_create_pseudo_p ())
7250 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7251 else
7253 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7254 emit_move_insn (tmp_di, tmp_gpr);
7255 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7258 /* Do the VSLO to get the value into the final location. */
7259 switch (mode)
7261 case E_V2DFmode:
7262 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7263 return;
7265 case E_V2DImode:
7266 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7267 return;
7269 case E_V4SFmode:
7271 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7272 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7273 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7274 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7275 tmp_altivec));
7277 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7278 return;
7281 case E_V4SImode:
7282 case E_V8HImode:
7283 case E_V16QImode:
7285 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7286 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7287 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7288 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7289 tmp_altivec));
7290 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7291 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7292 GEN_INT (64 - bits_in_element)));
7293 return;
7296 default:
7297 gcc_unreachable ();
7300 return;
7302 else
7303 gcc_unreachable ();
7306 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7307 selects whether the alignment is abi mandated, optional, or
7308 both abi and optional alignment. */
7310 unsigned int
7311 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7313 if (how != align_opt)
7315 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7316 align = 128;
7319 if (how != align_abi)
7321 if (TREE_CODE (type) == ARRAY_TYPE
7322 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7324 if (align < BITS_PER_WORD)
7325 align = BITS_PER_WORD;
7329 return align;
7332 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7333 instructions simply ignore the low bits; VSX memory instructions
7334 are aligned to 4 or 8 bytes. */
7336 static bool
7337 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7339 return (STRICT_ALIGNMENT
7340 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7341 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7342 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
7343 && (int) align < VECTOR_ALIGN (mode)))));
7346 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7348 bool
7349 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7351 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7353 if (computed != 128)
7355 static bool warned;
7356 if (!warned && warn_psabi)
7358 warned = true;
7359 inform (input_location,
7360 "the layout of aggregates containing vectors with"
7361 " %d-byte alignment has changed in GCC 5",
7362 computed / BITS_PER_UNIT);
7365 /* In current GCC there is no special case. */
7366 return false;
7369 return false;
7372 /* AIX increases natural record alignment to doubleword if the first
7373 field is an FP double while the FP fields remain word aligned. */
7375 unsigned int
7376 rs6000_special_round_type_align (tree type, unsigned int computed,
7377 unsigned int specified)
7379 unsigned int align = MAX (computed, specified);
7380 tree field = TYPE_FIELDS (type);
7382 /* Skip all non field decls */
7383 while (field != NULL
7384 && (TREE_CODE (field) != FIELD_DECL
7385 || DECL_FIELD_ABI_IGNORED (field)))
7386 field = DECL_CHAIN (field);
7388 if (field != NULL && field != type)
7390 type = TREE_TYPE (field);
7391 while (TREE_CODE (type) == ARRAY_TYPE)
7392 type = TREE_TYPE (type);
7394 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7395 align = MAX (align, 64);
7398 return align;
7401 /* Darwin increases record alignment to the natural alignment of
7402 the first field. */
7404 unsigned int
7405 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7406 unsigned int specified)
7408 unsigned int align = MAX (computed, specified);
7410 if (TYPE_PACKED (type))
7411 return align;
7413 /* Find the first field, looking down into aggregates. */
7414 do {
7415 tree field = TYPE_FIELDS (type);
7416 /* Skip all non field decls */
7417 while (field != NULL
7418 && (TREE_CODE (field) != FIELD_DECL
7419 || DECL_FIELD_ABI_IGNORED (field)))
7420 field = DECL_CHAIN (field);
7421 if (! field)
7422 break;
7423 /* A packed field does not contribute any extra alignment. */
7424 if (DECL_PACKED (field))
7425 return align;
7426 type = TREE_TYPE (field);
7427 while (TREE_CODE (type) == ARRAY_TYPE)
7428 type = TREE_TYPE (type);
7429 } while (AGGREGATE_TYPE_P (type));
7431 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7432 align = MAX (align, TYPE_ALIGN (type));
7434 return align;
7437 /* Return 1 for an operand in small memory on V.4/eabi. */
7440 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7441 machine_mode mode ATTRIBUTE_UNUSED)
7443 #if TARGET_ELF
7444 rtx sym_ref;
7446 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7447 return 0;
7449 if (DEFAULT_ABI != ABI_V4)
7450 return 0;
7452 if (SYMBOL_REF_P (op))
7453 sym_ref = op;
7455 else if (GET_CODE (op) != CONST
7456 || GET_CODE (XEXP (op, 0)) != PLUS
7457 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7458 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7459 return 0;
7461 else
7463 rtx sum = XEXP (op, 0);
7464 HOST_WIDE_INT summand;
7466 /* We have to be careful here, because it is the referenced address
7467 that must be 32k from _SDA_BASE_, not just the symbol. */
7468 summand = INTVAL (XEXP (sum, 1));
7469 if (summand < 0 || summand > g_switch_value)
7470 return 0;
7472 sym_ref = XEXP (sum, 0);
7475 return SYMBOL_REF_SMALL_P (sym_ref);
7476 #else
7477 return 0;
7478 #endif
7481 /* Return true if either operand is a general purpose register. */
7483 bool
7484 gpr_or_gpr_p (rtx op0, rtx op1)
7486 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7487 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7490 /* Return true if this is a move direct operation between GPR registers and
7491 floating point/VSX registers. */
7493 bool
7494 direct_move_p (rtx op0, rtx op1)
7496 if (!REG_P (op0) || !REG_P (op1))
7497 return false;
7499 if (!TARGET_DIRECT_MOVE)
7500 return false;
7502 int regno0 = REGNO (op0);
7503 int regno1 = REGNO (op1);
7504 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7505 return false;
7507 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7508 return true;
7510 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7511 return true;
7513 return false;
7516 /* Return true if the ADDR is an acceptable address for a quad memory
7517 operation of mode MODE (either LQ/STQ for general purpose registers, or
7518 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7519 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7520 3.0 LXV/STXV instruction. */
7522 bool
7523 quad_address_p (rtx addr, machine_mode mode, bool strict)
7525 rtx op0, op1;
7527 if (GET_MODE_SIZE (mode) < 16)
7528 return false;
7530 if (legitimate_indirect_address_p (addr, strict))
7531 return true;
7533 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7534 return false;
7536 /* Is this a valid prefixed address? If the bottom four bits of the offset
7537 are non-zero, we could use a prefixed instruction (which does not have the
7538 DQ-form constraint that the traditional instruction had) instead of
7539 forcing the unaligned offset to a GPR. */
7540 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7541 return true;
7543 if (GET_CODE (addr) != PLUS)
7544 return false;
7546 op0 = XEXP (addr, 0);
7547 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7548 return false;
7550 op1 = XEXP (addr, 1);
7551 if (!CONST_INT_P (op1))
7552 return false;
7554 return quad_address_offset_p (INTVAL (op1));
7557 /* Return true if this is a load or store quad operation. This function does
7558 not handle the atomic quad memory instructions. */
7560 bool
7561 quad_load_store_p (rtx op0, rtx op1)
7563 bool ret;
7565 if (!TARGET_QUAD_MEMORY)
7566 ret = false;
7568 else if (REG_P (op0) && MEM_P (op1))
7569 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7570 && quad_memory_operand (op1, GET_MODE (op1))
7571 && !reg_overlap_mentioned_p (op0, op1));
7573 else if (MEM_P (op0) && REG_P (op1))
7574 ret = (quad_memory_operand (op0, GET_MODE (op0))
7575 && quad_int_reg_operand (op1, GET_MODE (op1)));
7577 else
7578 ret = false;
7580 if (TARGET_DEBUG_ADDR)
7582 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7583 ret ? "true" : "false");
7584 debug_rtx (gen_rtx_SET (op0, op1));
7587 return ret;
7590 /* Given an address, return a constant offset term if one exists. */
7592 static rtx
7593 address_offset (rtx op)
7595 if (GET_CODE (op) == PRE_INC
7596 || GET_CODE (op) == PRE_DEC)
7597 op = XEXP (op, 0);
7598 else if (GET_CODE (op) == PRE_MODIFY
7599 || GET_CODE (op) == LO_SUM)
7600 op = XEXP (op, 1);
7602 if (GET_CODE (op) == CONST)
7603 op = XEXP (op, 0);
7605 if (GET_CODE (op) == PLUS)
7606 op = XEXP (op, 1);
7608 if (CONST_INT_P (op))
7609 return op;
7611 return NULL_RTX;
7614 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7615 the mode. If we can't find (or don't know) the alignment of the symbol
7616 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7617 should be pessimistic]. Offsets are validated in the same way as for
7618 reg + offset. */
7619 static bool
7620 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7622 /* We should not get here with this. */
7623 gcc_checking_assert (! mode_supports_dq_form (mode));
7625 if (GET_CODE (x) == CONST)
7626 x = XEXP (x, 0);
7628 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7629 x = XVECEXP (x, 0, 0);
7631 rtx sym = NULL_RTX;
7632 unsigned HOST_WIDE_INT offset = 0;
7634 if (GET_CODE (x) == PLUS)
7636 sym = XEXP (x, 0);
7637 if (! SYMBOL_REF_P (sym))
7638 return false;
7639 if (!CONST_INT_P (XEXP (x, 1)))
7640 return false;
7641 offset = INTVAL (XEXP (x, 1));
7643 else if (SYMBOL_REF_P (x))
7644 sym = x;
7645 else if (CONST_INT_P (x))
7646 offset = INTVAL (x);
7647 else if (GET_CODE (x) == LABEL_REF)
7648 offset = 0; // We assume code labels are Pmode aligned
7649 else
7650 return false; // not sure what we have here.
7652 /* If we don't know the alignment of the thing to which the symbol refers,
7653 we assume optimistically it is "enough".
7654 ??? maybe we should be pessimistic instead. */
7655 unsigned align = 0;
7657 if (sym)
7659 tree decl = SYMBOL_REF_DECL (sym);
7660 #if TARGET_MACHO
7661 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7662 /* The decl in an indirection symbol is the original one, which might
7663 be less aligned than the indirection. Our indirections are always
7664 pointer-aligned. */
7666 else
7667 #endif
7668 if (decl && DECL_ALIGN (decl))
7669 align = DECL_ALIGN_UNIT (decl);
7672 unsigned int extra = 0;
7673 switch (mode)
7675 case E_DFmode:
7676 case E_DDmode:
7677 case E_DImode:
7678 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7679 addressing. */
7680 if (VECTOR_MEM_VSX_P (mode))
7681 return false;
7683 if (!TARGET_POWERPC64)
7684 extra = 4;
7685 else if ((offset & 3) || (align & 3))
7686 return false;
7687 break;
7689 case E_TFmode:
7690 case E_IFmode:
7691 case E_KFmode:
7692 case E_TDmode:
7693 case E_TImode:
7694 case E_PTImode:
7695 extra = 8;
7696 if (!TARGET_POWERPC64)
7697 extra = 12;
7698 else if ((offset & 3) || (align & 3))
7699 return false;
7700 break;
7702 default:
7703 break;
7706 /* We only care if the access(es) would cause a change to the high part. */
7707 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7708 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7711 /* Return true if the MEM operand is a memory operand suitable for use
7712 with a (full width, possibly multiple) gpr load/store. On
7713 powerpc64 this means the offset must be divisible by 4.
7714 Implements 'Y' constraint.
7716 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7717 a constraint function we know the operand has satisfied a suitable
7718 memory predicate.
7720 Offsetting a lo_sum should not be allowed, except where we know by
7721 alignment that a 32k boundary is not crossed. Note that by
7722 "offsetting" here we mean a further offset to access parts of the
7723 MEM. It's fine to have a lo_sum where the inner address is offset
7724 from a sym, since the same sym+offset will appear in the high part
7725 of the address calculation. */
7727 bool
7728 mem_operand_gpr (rtx op, machine_mode mode)
7730 unsigned HOST_WIDE_INT offset;
7731 int extra;
7732 rtx addr = XEXP (op, 0);
7734 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7735 if (TARGET_UPDATE
7736 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7737 && mode_supports_pre_incdec_p (mode)
7738 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7739 return true;
7741 /* Allow prefixed instructions if supported. If the bottom two bits of the
7742 offset are non-zero, we could use a prefixed instruction (which does not
7743 have the DS-form constraint that the traditional instruction had) instead
7744 of forcing the unaligned offset to a GPR. */
7745 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7746 return true;
7748 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7749 really OK. Doing this early avoids teaching all the other machinery
7750 about them. */
7751 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7752 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7754 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7755 if (!rs6000_offsettable_memref_p (op, mode, false))
7756 return false;
7758 op = address_offset (addr);
7759 if (op == NULL_RTX)
7760 return true;
7762 offset = INTVAL (op);
7763 if (TARGET_POWERPC64 && (offset & 3) != 0)
7764 return false;
7766 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7767 if (extra < 0)
7768 extra = 0;
7770 if (GET_CODE (addr) == LO_SUM)
7771 /* For lo_sum addresses, we must allow any offset except one that
7772 causes a wrap, so test only the low 16 bits. */
7773 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7775 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7778 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7779 enforce an offset divisible by 4 even for 32-bit. */
7781 bool
7782 mem_operand_ds_form (rtx op, machine_mode mode)
7784 unsigned HOST_WIDE_INT offset;
7785 int extra;
7786 rtx addr = XEXP (op, 0);
7788 /* Allow prefixed instructions if supported. If the bottom two bits of the
7789 offset are non-zero, we could use a prefixed instruction (which does not
7790 have the DS-form constraint that the traditional instruction had) instead
7791 of forcing the unaligned offset to a GPR. */
7792 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7793 return true;
7795 if (!offsettable_address_p (false, mode, addr))
7796 return false;
7798 op = address_offset (addr);
7799 if (op == NULL_RTX)
7800 return true;
7802 offset = INTVAL (op);
7803 if ((offset & 3) != 0)
7804 return false;
7806 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7807 if (extra < 0)
7808 extra = 0;
7810 if (GET_CODE (addr) == LO_SUM)
7811 /* For lo_sum addresses, we must allow any offset except one that
7812 causes a wrap, so test only the low 16 bits. */
7813 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7815 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7818 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7820 static bool
7821 reg_offset_addressing_ok_p (machine_mode mode)
7823 switch (mode)
7825 case E_V16QImode:
7826 case E_V8HImode:
7827 case E_V4SFmode:
7828 case E_V4SImode:
7829 case E_V2DFmode:
7830 case E_V2DImode:
7831 case E_V1TImode:
7832 case E_TImode:
7833 case E_TFmode:
7834 case E_KFmode:
7835 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7836 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7837 a vector mode, if we want to use the VSX registers to move it around,
7838 we need to restrict ourselves to reg+reg addressing. Similarly for
7839 IEEE 128-bit floating point that is passed in a single vector
7840 register. */
7841 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7842 return mode_supports_dq_form (mode);
7843 break;
7845 /* The vector pair/quad types support offset addressing if the
7846 underlying vectors support offset addressing. */
7847 case E_POImode:
7848 case E_PXImode:
7849 return TARGET_MMA;
7851 case E_SDmode:
7852 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7853 addressing for the LFIWZX and STFIWX instructions. */
7854 if (TARGET_NO_SDMODE_STACK)
7855 return false;
7856 break;
7858 default:
7859 break;
7862 return true;
7865 static bool
7866 virtual_stack_registers_memory_p (rtx op)
7868 int regnum;
7870 if (REG_P (op))
7871 regnum = REGNO (op);
7873 else if (GET_CODE (op) == PLUS
7874 && REG_P (XEXP (op, 0))
7875 && CONST_INT_P (XEXP (op, 1)))
7876 regnum = REGNO (XEXP (op, 0));
7878 else
7879 return false;
7881 return (regnum >= FIRST_VIRTUAL_REGISTER
7882 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7885 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7886 is known to not straddle a 32k boundary. This function is used
7887 to determine whether -mcmodel=medium code can use TOC pointer
7888 relative addressing for OP. This means the alignment of the TOC
7889 pointer must also be taken into account, and unfortunately that is
7890 only 8 bytes. */
7892 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7893 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7894 #endif
7896 static bool
7897 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7898 machine_mode mode)
7900 tree decl;
7901 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7903 if (!SYMBOL_REF_P (op))
7904 return false;
7906 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7907 SYMBOL_REF. */
7908 if (mode_supports_dq_form (mode))
7909 return false;
7911 dsize = GET_MODE_SIZE (mode);
7912 decl = SYMBOL_REF_DECL (op);
7913 if (!decl)
7915 if (dsize == 0)
7916 return false;
7918 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7919 replacing memory addresses with an anchor plus offset. We
7920 could find the decl by rummaging around in the block->objects
7921 VEC for the given offset but that seems like too much work. */
7922 dalign = BITS_PER_UNIT;
7923 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7924 && SYMBOL_REF_ANCHOR_P (op)
7925 && SYMBOL_REF_BLOCK (op) != NULL)
7927 struct object_block *block = SYMBOL_REF_BLOCK (op);
7929 dalign = block->alignment;
7930 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7932 else if (CONSTANT_POOL_ADDRESS_P (op))
7934 /* It would be nice to have get_pool_align().. */
7935 machine_mode cmode = get_pool_mode (op);
7937 dalign = GET_MODE_ALIGNMENT (cmode);
7940 else if (DECL_P (decl))
7942 dalign = DECL_ALIGN (decl);
7944 if (dsize == 0)
7946 /* Allow BLKmode when the entire object is known to not
7947 cross a 32k boundary. */
7948 if (!DECL_SIZE_UNIT (decl))
7949 return false;
7951 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7952 return false;
7954 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7955 if (dsize > 32768)
7956 return false;
7958 dalign /= BITS_PER_UNIT;
7959 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7960 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7961 return dalign >= dsize;
7964 else
7965 gcc_unreachable ();
7967 /* Find how many bits of the alignment we know for this access. */
7968 dalign /= BITS_PER_UNIT;
7969 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7970 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7971 mask = dalign - 1;
7972 lsb = offset & -offset;
7973 mask &= lsb - 1;
7974 dalign = mask + 1;
7976 return dalign >= dsize;
7979 static bool
7980 constant_pool_expr_p (rtx op)
7982 rtx base, offset;
7984 split_const (op, &base, &offset);
7985 return (SYMBOL_REF_P (base)
7986 && CONSTANT_POOL_ADDRESS_P (base)
7987 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7990 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7991 use that as the register to put the HIGH value into if register allocation
7992 is already done. */
7995 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7997 rtx tocrel, tocreg, hi;
7999 gcc_assert (TARGET_TOC);
8001 if (TARGET_DEBUG_ADDR)
8003 if (SYMBOL_REF_P (symbol))
8004 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8005 XSTR (symbol, 0));
8006 else
8008 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8009 GET_RTX_NAME (GET_CODE (symbol)));
8010 debug_rtx (symbol);
8014 if (!can_create_pseudo_p ())
8015 df_set_regs_ever_live (TOC_REGISTER, true);
8017 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8018 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8019 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8020 return tocrel;
8022 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8023 if (largetoc_reg != NULL)
8025 emit_move_insn (largetoc_reg, hi);
8026 hi = largetoc_reg;
8028 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8031 /* These are only used to pass through from print_operand/print_operand_address
8032 to rs6000_output_addr_const_extra over the intervening function
8033 output_addr_const which is not target code. */
8034 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8036 /* Return true if OP is a toc pointer relative address (the output
8037 of create_TOC_reference). If STRICT, do not match non-split
8038 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8039 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8040 TOCREL_OFFSET_RET respectively. */
8042 bool
8043 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8044 const_rtx *tocrel_offset_ret)
8046 if (!TARGET_TOC)
8047 return false;
8049 if (TARGET_CMODEL != CMODEL_SMALL)
8051 /* When strict ensure we have everything tidy. */
8052 if (strict
8053 && !(GET_CODE (op) == LO_SUM
8054 && REG_P (XEXP (op, 0))
8055 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8056 return false;
8058 /* When not strict, allow non-split TOC addresses and also allow
8059 (lo_sum (high ..)) TOC addresses created during reload. */
8060 if (GET_CODE (op) == LO_SUM)
8061 op = XEXP (op, 1);
8064 const_rtx tocrel_base = op;
8065 const_rtx tocrel_offset = const0_rtx;
8067 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8069 tocrel_base = XEXP (op, 0);
8070 tocrel_offset = XEXP (op, 1);
8073 if (tocrel_base_ret)
8074 *tocrel_base_ret = tocrel_base;
8075 if (tocrel_offset_ret)
8076 *tocrel_offset_ret = tocrel_offset;
8078 return (GET_CODE (tocrel_base) == UNSPEC
8079 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8080 && REG_P (XVECEXP (tocrel_base, 0, 1))
8081 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8084 /* Return true if X is a constant pool address, and also for cmodel=medium
8085 if X is a toc-relative address known to be offsettable within MODE. */
8087 bool
8088 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8089 bool strict)
8091 const_rtx tocrel_base, tocrel_offset;
8092 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8093 && (TARGET_CMODEL != CMODEL_MEDIUM
8094 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8095 || mode == QImode
8096 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8097 INTVAL (tocrel_offset), mode)));
8100 static bool
8101 legitimate_small_data_p (machine_mode mode, rtx x)
8103 return (DEFAULT_ABI == ABI_V4
8104 && !flag_pic && !TARGET_TOC
8105 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8106 && small_data_operand (x, mode));
8109 bool
8110 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8111 bool strict, bool worst_case)
8113 unsigned HOST_WIDE_INT offset;
8114 unsigned int extra;
8116 if (GET_CODE (x) != PLUS)
8117 return false;
8118 if (!REG_P (XEXP (x, 0)))
8119 return false;
8120 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8121 return false;
8122 if (mode_supports_dq_form (mode))
8123 return quad_address_p (x, mode, strict);
8124 if (!reg_offset_addressing_ok_p (mode))
8125 return virtual_stack_registers_memory_p (x);
8126 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8127 return true;
8128 if (!CONST_INT_P (XEXP (x, 1)))
8129 return false;
8131 offset = INTVAL (XEXP (x, 1));
8132 extra = 0;
8133 switch (mode)
8135 case E_DFmode:
8136 case E_DDmode:
8137 case E_DImode:
8138 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8139 addressing. */
8140 if (VECTOR_MEM_VSX_P (mode))
8141 return false;
8143 if (!worst_case)
8144 break;
8145 if (!TARGET_POWERPC64)
8146 extra = 4;
8147 else if (offset & 3)
8148 return false;
8149 break;
8151 case E_TFmode:
8152 case E_IFmode:
8153 case E_KFmode:
8154 case E_TDmode:
8155 case E_TImode:
8156 case E_PTImode:
8157 extra = 8;
8158 if (!worst_case)
8159 break;
8160 if (!TARGET_POWERPC64)
8161 extra = 12;
8162 else if (offset & 3)
8163 return false;
8164 break;
8166 default:
8167 break;
8170 if (TARGET_PREFIXED)
8171 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8172 else
8173 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8176 bool
8177 legitimate_indexed_address_p (rtx x, int strict)
8179 rtx op0, op1;
8181 if (GET_CODE (x) != PLUS)
8182 return false;
8184 op0 = XEXP (x, 0);
8185 op1 = XEXP (x, 1);
8187 return (REG_P (op0) && REG_P (op1)
8188 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8189 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8190 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8191 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8194 bool
8195 avoiding_indexed_address_p (machine_mode mode)
8197 unsigned int msize = GET_MODE_SIZE (mode);
8199 /* Avoid indexed addressing for modes that have non-indexed load/store
8200 instruction forms. On power10, vector pairs have an indexed
8201 form, but vector quads don't. */
8202 if (msize > 16)
8203 return msize != 32;
8205 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8208 bool
8209 legitimate_indirect_address_p (rtx x, int strict)
8211 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
8214 bool
8215 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8217 if (!TARGET_MACHO || !flag_pic
8218 || mode != SImode || !MEM_P (x))
8219 return false;
8220 x = XEXP (x, 0);
8222 if (GET_CODE (x) != LO_SUM)
8223 return false;
8224 if (!REG_P (XEXP (x, 0)))
8225 return false;
8226 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8227 return false;
8228 x = XEXP (x, 1);
8230 return CONSTANT_P (x);
8233 static bool
8234 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8236 if (GET_CODE (x) != LO_SUM)
8237 return false;
8238 if (!REG_P (XEXP (x, 0)))
8239 return false;
8240 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8241 return false;
8242 /* quad word addresses are restricted, and we can't use LO_SUM. */
8243 if (mode_supports_dq_form (mode))
8244 return false;
8245 x = XEXP (x, 1);
8247 if (TARGET_ELF || TARGET_MACHO)
8249 bool large_toc_ok;
8251 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8252 return false;
8253 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8254 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8255 recognizes some LO_SUM addresses as valid although this
8256 function says opposite. In most cases, LRA through different
8257 transformations can generate correct code for address reloads.
8258 It cannot manage only some LO_SUM cases. So we need to add
8259 code here saying that some addresses are still valid. */
8260 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8261 && small_toc_ref (x, VOIDmode));
8262 if (TARGET_TOC && ! large_toc_ok)
8263 return false;
8264 if (GET_MODE_NUNITS (mode) != 1)
8265 return false;
8266 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8267 && !(/* ??? Assume floating point reg based on mode? */
8268 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8269 return false;
8271 return CONSTANT_P (x) || large_toc_ok;
8274 return false;
8278 /* Try machine-dependent ways of modifying an illegitimate address
8279 to be legitimate. If we find one, return the new, valid address.
8280 This is used from only one place: `memory_address' in explow.c.
8282 OLDX is the address as it was before break_out_memory_refs was
8283 called. In some cases it is useful to look at this to decide what
8284 needs to be done.
8286 It is always safe for this function to do nothing. It exists to
8287 recognize opportunities to optimize the output.
8289 On RS/6000, first check for the sum of a register with a constant
8290 integer that is out of range. If so, generate code to add the
8291 constant with the low-order 16 bits masked to the register and force
8292 this result into another register (this can be done with `cau').
8293 Then generate an address of REG+(CONST&0xffff), allowing for the
8294 possibility of bit 16 being a one.
8296 Then check for the sum of a register and something not constant, try to
8297 load the other things into a register and return the sum. */
8299 static rtx
8300 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8301 machine_mode mode)
8303 unsigned int extra;
8305 if (!reg_offset_addressing_ok_p (mode)
8306 || mode_supports_dq_form (mode))
8308 if (virtual_stack_registers_memory_p (x))
8309 return x;
8311 /* In theory we should not be seeing addresses of the form reg+0,
8312 but just in case it is generated, optimize it away. */
8313 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8314 return force_reg (Pmode, XEXP (x, 0));
8316 /* For TImode with load/store quad, restrict addresses to just a single
8317 pointer, so it works with both GPRs and VSX registers. */
8318 /* Make sure both operands are registers. */
8319 else if (GET_CODE (x) == PLUS
8320 && (mode != TImode || !TARGET_VSX))
8321 return gen_rtx_PLUS (Pmode,
8322 force_reg (Pmode, XEXP (x, 0)),
8323 force_reg (Pmode, XEXP (x, 1)));
8324 else
8325 return force_reg (Pmode, x);
8327 if (SYMBOL_REF_P (x))
8329 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8330 if (model != 0)
8331 return rs6000_legitimize_tls_address (x, model);
8334 extra = 0;
8335 switch (mode)
8337 case E_TFmode:
8338 case E_TDmode:
8339 case E_TImode:
8340 case E_PTImode:
8341 case E_IFmode:
8342 case E_KFmode:
8343 /* As in legitimate_offset_address_p we do not assume
8344 worst-case. The mode here is just a hint as to the registers
8345 used. A TImode is usually in gprs, but may actually be in
8346 fprs. Leave worst-case scenario for reload to handle via
8347 insn constraints. PTImode is only GPRs. */
8348 extra = 8;
8349 break;
8350 default:
8351 break;
8354 if (GET_CODE (x) == PLUS
8355 && REG_P (XEXP (x, 0))
8356 && CONST_INT_P (XEXP (x, 1))
8357 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8358 >= 0x10000 - extra))
8360 HOST_WIDE_INT high_int, low_int;
8361 rtx sum;
8362 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8363 if (low_int >= 0x8000 - extra)
8364 low_int = 0;
8365 high_int = INTVAL (XEXP (x, 1)) - low_int;
8366 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8367 GEN_INT (high_int)), 0);
8368 return plus_constant (Pmode, sum, low_int);
8370 else if (GET_CODE (x) == PLUS
8371 && REG_P (XEXP (x, 0))
8372 && !CONST_INT_P (XEXP (x, 1))
8373 && GET_MODE_NUNITS (mode) == 1
8374 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8375 || (/* ??? Assume floating point reg based on mode? */
8376 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8377 && !avoiding_indexed_address_p (mode))
8379 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8380 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8382 else if ((TARGET_ELF
8383 #if TARGET_MACHO
8384 || !MACHO_DYNAMIC_NO_PIC_P
8385 #endif
8387 && TARGET_32BIT
8388 && TARGET_NO_TOC_OR_PCREL
8389 && !flag_pic
8390 && !CONST_INT_P (x)
8391 && !CONST_WIDE_INT_P (x)
8392 && !CONST_DOUBLE_P (x)
8393 && CONSTANT_P (x)
8394 && GET_MODE_NUNITS (mode) == 1
8395 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8396 || (/* ??? Assume floating point reg based on mode? */
8397 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8399 rtx reg = gen_reg_rtx (Pmode);
8400 if (TARGET_ELF)
8401 emit_insn (gen_elf_high (reg, x));
8402 else
8403 emit_insn (gen_macho_high (Pmode, reg, x));
8404 return gen_rtx_LO_SUM (Pmode, reg, x);
8406 else if (TARGET_TOC
8407 && SYMBOL_REF_P (x)
8408 && constant_pool_expr_p (x)
8409 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8410 return create_TOC_reference (x, NULL_RTX);
8411 else
8412 return x;
8415 /* Debug version of rs6000_legitimize_address. */
8416 static rtx
8417 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8419 rtx ret;
8420 rtx_insn *insns;
8422 start_sequence ();
8423 ret = rs6000_legitimize_address (x, oldx, mode);
8424 insns = get_insns ();
8425 end_sequence ();
8427 if (ret != x)
8429 fprintf (stderr,
8430 "\nrs6000_legitimize_address: mode %s, old code %s, "
8431 "new code %s, modified\n",
8432 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8433 GET_RTX_NAME (GET_CODE (ret)));
8435 fprintf (stderr, "Original address:\n");
8436 debug_rtx (x);
8438 fprintf (stderr, "oldx:\n");
8439 debug_rtx (oldx);
8441 fprintf (stderr, "New address:\n");
8442 debug_rtx (ret);
8444 if (insns)
8446 fprintf (stderr, "Insns added:\n");
8447 debug_rtx_list (insns, 20);
8450 else
8452 fprintf (stderr,
8453 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8454 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8456 debug_rtx (x);
8459 if (insns)
8460 emit_insn (insns);
8462 return ret;
8465 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8466 We need to emit DTP-relative relocations. */
8468 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8469 static void
8470 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8472 switch (size)
8474 case 4:
8475 fputs ("\t.long\t", file);
8476 break;
8477 case 8:
8478 fputs (DOUBLE_INT_ASM_OP, file);
8479 break;
8480 default:
8481 gcc_unreachable ();
8483 output_addr_const (file, x);
8484 if (TARGET_ELF)
8485 fputs ("@dtprel+0x8000", file);
8486 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8488 switch (SYMBOL_REF_TLS_MODEL (x))
8490 case 0:
8491 break;
8492 case TLS_MODEL_LOCAL_EXEC:
8493 fputs ("@le", file);
8494 break;
8495 case TLS_MODEL_INITIAL_EXEC:
8496 fputs ("@ie", file);
8497 break;
8498 case TLS_MODEL_GLOBAL_DYNAMIC:
8499 case TLS_MODEL_LOCAL_DYNAMIC:
8500 fputs ("@m", file);
8501 break;
8502 default:
8503 gcc_unreachable ();
8508 /* Return true if X is a symbol that refers to real (rather than emulated)
8509 TLS. */
8511 static bool
8512 rs6000_real_tls_symbol_ref_p (rtx x)
8514 return (SYMBOL_REF_P (x)
8515 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8518 /* In the name of slightly smaller debug output, and to cater to
8519 general assembler lossage, recognize various UNSPEC sequences
8520 and turn them back into a direct symbol reference. */
8522 static rtx
8523 rs6000_delegitimize_address (rtx orig_x)
8525 rtx x, y, offset;
8527 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8528 orig_x = XVECEXP (orig_x, 0, 0);
8530 orig_x = delegitimize_mem_from_attrs (orig_x);
8532 x = orig_x;
8533 if (MEM_P (x))
8534 x = XEXP (x, 0);
8536 y = x;
8537 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8538 y = XEXP (y, 1);
8540 offset = NULL_RTX;
8541 if (GET_CODE (y) == PLUS
8542 && GET_MODE (y) == Pmode
8543 && CONST_INT_P (XEXP (y, 1)))
8545 offset = XEXP (y, 1);
8546 y = XEXP (y, 0);
8549 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8551 y = XVECEXP (y, 0, 0);
8553 #ifdef HAVE_AS_TLS
8554 /* Do not associate thread-local symbols with the original
8555 constant pool symbol. */
8556 if (TARGET_XCOFF
8557 && SYMBOL_REF_P (y)
8558 && CONSTANT_POOL_ADDRESS_P (y)
8559 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8560 return orig_x;
8561 #endif
8563 if (offset != NULL_RTX)
8564 y = gen_rtx_PLUS (Pmode, y, offset);
8565 if (!MEM_P (orig_x))
8566 return y;
8567 else
8568 return replace_equiv_address_nv (orig_x, y);
8571 if (TARGET_MACHO
8572 && GET_CODE (orig_x) == LO_SUM
8573 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8575 y = XEXP (XEXP (orig_x, 1), 0);
8576 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8577 return XVECEXP (y, 0, 0);
8580 return orig_x;
8583 /* Return true if X shouldn't be emitted into the debug info.
8584 The linker doesn't like .toc section references from
8585 .debug_* sections, so reject .toc section symbols. */
8587 static bool
8588 rs6000_const_not_ok_for_debug_p (rtx x)
8590 if (GET_CODE (x) == UNSPEC)
8591 return true;
8592 if (SYMBOL_REF_P (x)
8593 && CONSTANT_POOL_ADDRESS_P (x))
8595 rtx c = get_pool_constant (x);
8596 machine_mode cmode = get_pool_mode (x);
8597 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8598 return true;
8601 return false;
8604 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8606 static bool
8607 rs6000_legitimate_combined_insn (rtx_insn *insn)
8609 int icode = INSN_CODE (insn);
8611 /* Reject creating doloop insns. Combine should not be allowed
8612 to create these for a number of reasons:
8613 1) In a nested loop, if combine creates one of these in an
8614 outer loop and the register allocator happens to allocate ctr
8615 to the outer loop insn, then the inner loop can't use ctr.
8616 Inner loops ought to be more highly optimized.
8617 2) Combine often wants to create one of these from what was
8618 originally a three insn sequence, first combining the three
8619 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8620 allocated ctr, the splitter takes use back to the three insn
8621 sequence. It's better to stop combine at the two insn
8622 sequence.
8623 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8624 insns, the register allocator sometimes uses floating point
8625 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8626 jump insn and output reloads are not implemented for jumps,
8627 the ctrsi/ctrdi splitters need to handle all possible cases.
8628 That's a pain, and it gets to be seriously difficult when a
8629 splitter that runs after reload needs memory to transfer from
8630 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8631 for the difficult case. It's better to not create problems
8632 in the first place. */
8633 if (icode != CODE_FOR_nothing
8634 && (icode == CODE_FOR_bdz_si
8635 || icode == CODE_FOR_bdz_di
8636 || icode == CODE_FOR_bdnz_si
8637 || icode == CODE_FOR_bdnz_di
8638 || icode == CODE_FOR_bdztf_si
8639 || icode == CODE_FOR_bdztf_di
8640 || icode == CODE_FOR_bdnztf_si
8641 || icode == CODE_FOR_bdnztf_di))
8642 return false;
8644 return true;
8647 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8649 static GTY(()) rtx rs6000_tls_symbol;
8650 static rtx
8651 rs6000_tls_get_addr (void)
8653 if (!rs6000_tls_symbol)
8654 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8656 return rs6000_tls_symbol;
8659 /* Construct the SYMBOL_REF for TLS GOT references. */
8661 static GTY(()) rtx rs6000_got_symbol;
8663 rs6000_got_sym (void)
8665 if (!rs6000_got_symbol)
8667 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8668 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8669 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8672 return rs6000_got_symbol;
8675 /* AIX Thread-Local Address support. */
8677 static rtx
8678 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8680 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8681 const char *name;
8682 char *tlsname;
8684 name = XSTR (addr, 0);
8685 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8686 or the symbol will be in TLS private data section. */
8687 if (name[strlen (name) - 1] != ']'
8688 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8689 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8691 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8692 strcpy (tlsname, name);
8693 strcat (tlsname,
8694 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8695 tlsaddr = copy_rtx (addr);
8696 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8698 else
8699 tlsaddr = addr;
8701 /* Place addr into TOC constant pool. */
8702 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8704 /* Output the TOC entry and create the MEM referencing the value. */
8705 if (constant_pool_expr_p (XEXP (sym, 0))
8706 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8708 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8709 mem = gen_const_mem (Pmode, tocref);
8710 set_mem_alias_set (mem, get_TOC_alias_set ());
8712 else
8713 return sym;
8715 /* Use global-dynamic for local-dynamic. */
8716 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8717 || model == TLS_MODEL_LOCAL_DYNAMIC)
8719 /* Create new TOC reference for @m symbol. */
8720 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8721 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8722 strcpy (tlsname, "*LCM");
8723 strcat (tlsname, name + 3);
8724 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8725 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8726 tocref = create_TOC_reference (modaddr, NULL_RTX);
8727 rtx modmem = gen_const_mem (Pmode, tocref);
8728 set_mem_alias_set (modmem, get_TOC_alias_set ());
8730 rtx modreg = gen_reg_rtx (Pmode);
8731 emit_insn (gen_rtx_SET (modreg, modmem));
8733 tmpreg = gen_reg_rtx (Pmode);
8734 emit_insn (gen_rtx_SET (tmpreg, mem));
8736 dest = gen_reg_rtx (Pmode);
8737 if (TARGET_32BIT)
8738 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8739 else
8740 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8741 return dest;
8743 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8744 else if (TARGET_32BIT)
8746 tlsreg = gen_reg_rtx (SImode);
8747 emit_insn (gen_tls_get_tpointer (tlsreg));
8749 else
8750 tlsreg = gen_rtx_REG (DImode, 13);
8752 /* Load the TOC value into temporary register. */
8753 tmpreg = gen_reg_rtx (Pmode);
8754 emit_insn (gen_rtx_SET (tmpreg, mem));
8755 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8756 gen_rtx_MINUS (Pmode, addr, tlsreg));
8758 /* Add TOC symbol value to TLS pointer. */
8759 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8761 return dest;
8764 /* Passes the tls arg value for global dynamic and local dynamic
8765 emit_library_call_value in rs6000_legitimize_tls_address to
8766 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8767 marker relocs put on __tls_get_addr calls. */
8768 static rtx global_tlsarg;
8770 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8771 this (thread-local) address. */
8773 static rtx
8774 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8776 rtx dest, insn;
8778 if (TARGET_XCOFF)
8779 return rs6000_legitimize_tls_address_aix (addr, model);
8781 dest = gen_reg_rtx (Pmode);
8782 if (model == TLS_MODEL_LOCAL_EXEC
8783 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
8785 rtx tlsreg;
8787 if (TARGET_64BIT)
8789 tlsreg = gen_rtx_REG (Pmode, 13);
8790 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8792 else
8794 tlsreg = gen_rtx_REG (Pmode, 2);
8795 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8797 emit_insn (insn);
8799 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8801 rtx tlsreg, tmp;
8803 tmp = gen_reg_rtx (Pmode);
8804 if (TARGET_64BIT)
8806 tlsreg = gen_rtx_REG (Pmode, 13);
8807 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8809 else
8811 tlsreg = gen_rtx_REG (Pmode, 2);
8812 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8814 emit_insn (insn);
8815 if (TARGET_64BIT)
8816 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8817 else
8818 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8819 emit_insn (insn);
8821 else
8823 rtx got, tga, tmp1, tmp2;
8825 /* We currently use relocations like @got@tlsgd for tls, which
8826 means the linker will handle allocation of tls entries, placing
8827 them in the .got section. So use a pointer to the .got section,
8828 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8829 or to secondary GOT sections used by 32-bit -fPIC. */
8830 if (rs6000_pcrel_p ())
8831 got = const0_rtx;
8832 else if (TARGET_64BIT)
8833 got = gen_rtx_REG (Pmode, 2);
8834 else
8836 if (flag_pic == 1)
8837 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8838 else
8840 rtx gsym = rs6000_got_sym ();
8841 got = gen_reg_rtx (Pmode);
8842 if (flag_pic == 0)
8843 rs6000_emit_move (got, gsym, Pmode);
8844 else
8846 rtx mem, lab;
8848 tmp1 = gen_reg_rtx (Pmode);
8849 tmp2 = gen_reg_rtx (Pmode);
8850 mem = gen_const_mem (Pmode, tmp1);
8851 lab = gen_label_rtx ();
8852 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8853 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8854 if (TARGET_LINK_STACK)
8855 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8856 emit_move_insn (tmp2, mem);
8857 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8858 set_unique_reg_note (last, REG_EQUAL, gsym);
8863 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8865 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8866 UNSPEC_TLSGD);
8867 tga = rs6000_tls_get_addr ();
8868 rtx argreg = gen_rtx_REG (Pmode, 3);
8869 emit_insn (gen_rtx_SET (argreg, arg));
8870 global_tlsarg = arg;
8871 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8872 global_tlsarg = NULL_RTX;
8874 /* Make a note so that the result of this call can be CSEd. */
8875 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8876 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8877 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8879 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8881 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8882 tga = rs6000_tls_get_addr ();
8883 tmp1 = gen_reg_rtx (Pmode);
8884 rtx argreg = gen_rtx_REG (Pmode, 3);
8885 emit_insn (gen_rtx_SET (argreg, arg));
8886 global_tlsarg = arg;
8887 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8888 global_tlsarg = NULL_RTX;
8890 /* Make a note so that the result of this call can be CSEd. */
8891 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8892 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8893 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8895 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
8897 if (TARGET_64BIT)
8898 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8899 else
8900 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8902 else if (rs6000_tls_size == 32)
8904 tmp2 = gen_reg_rtx (Pmode);
8905 if (TARGET_64BIT)
8906 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8907 else
8908 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8909 emit_insn (insn);
8910 if (TARGET_64BIT)
8911 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8912 else
8913 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8915 else
8917 tmp2 = gen_reg_rtx (Pmode);
8918 if (TARGET_64BIT)
8919 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8920 else
8921 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8922 emit_insn (insn);
8923 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8925 emit_insn (insn);
8927 else
8929 /* IE, or 64-bit offset LE. */
8930 tmp2 = gen_reg_rtx (Pmode);
8931 if (TARGET_64BIT)
8932 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8933 else
8934 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8935 emit_insn (insn);
8936 if (rs6000_pcrel_p ())
8938 if (TARGET_64BIT)
8939 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8940 else
8941 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8943 else if (TARGET_64BIT)
8944 insn = gen_tls_tls_64 (dest, tmp2, addr);
8945 else
8946 insn = gen_tls_tls_32 (dest, tmp2, addr);
8947 emit_insn (insn);
8951 return dest;
8954 /* Only create the global variable for the stack protect guard if we are using
8955 the global flavor of that guard. */
8956 static tree
8957 rs6000_init_stack_protect_guard (void)
8959 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8960 return default_stack_protect_guard ();
8962 return NULL_TREE;
8965 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8967 static bool
8968 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8970 if (GET_CODE (x) == HIGH
8971 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8972 return true;
8974 /* A TLS symbol in the TOC cannot contain a sum. */
8975 if (GET_CODE (x) == CONST
8976 && GET_CODE (XEXP (x, 0)) == PLUS
8977 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8978 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8979 return true;
8981 /* Do not place an ELF TLS symbol in the constant pool. */
8982 return TARGET_ELF && tls_referenced_p (x);
8985 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8986 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8987 can be addressed relative to the toc pointer. */
8989 static bool
8990 use_toc_relative_ref (rtx sym, machine_mode mode)
8992 return ((constant_pool_expr_p (sym)
8993 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8994 get_pool_mode (sym)))
8995 || (TARGET_CMODEL == CMODEL_MEDIUM
8996 && SYMBOL_REF_LOCAL_P (sym)
8997 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9000 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9001 that is a valid memory address for an instruction.
9002 The MODE argument is the machine mode for the MEM expression
9003 that wants to use this address.
9005 On the RS/6000, there are four valid address: a SYMBOL_REF that
9006 refers to a constant pool entry of an address (or the sum of it
9007 plus a constant), a short (16-bit signed) constant plus a register,
9008 the sum of two registers, or a register indirect, possibly with an
9009 auto-increment. For DFmode, DDmode and DImode with a constant plus
9010 register, we must ensure that both words are addressable or PowerPC64
9011 with offset word aligned.
9013 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9014 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9015 because adjacent memory cells are accessed by adding word-sized offsets
9016 during assembly output. */
9017 static bool
9018 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9020 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9021 bool quad_offset_p = mode_supports_dq_form (mode);
9023 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9024 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9025 && GET_CODE (x) == AND
9026 && CONST_INT_P (XEXP (x, 1))
9027 && INTVAL (XEXP (x, 1)) == -16)
9028 x = XEXP (x, 0);
9030 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9031 return 0;
9032 if (legitimate_indirect_address_p (x, reg_ok_strict))
9033 return 1;
9034 if (TARGET_UPDATE
9035 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9036 && mode_supports_pre_incdec_p (mode)
9037 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9038 return 1;
9040 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9041 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9042 return 1;
9044 /* Handle restricted vector d-form offsets in ISA 3.0. */
9045 if (quad_offset_p)
9047 if (quad_address_p (x, mode, reg_ok_strict))
9048 return 1;
9050 else if (virtual_stack_registers_memory_p (x))
9051 return 1;
9053 else if (reg_offset_p)
9055 if (legitimate_small_data_p (mode, x))
9056 return 1;
9057 if (legitimate_constant_pool_address_p (x, mode,
9058 reg_ok_strict || lra_in_progress))
9059 return 1;
9062 /* For TImode, if we have TImode in VSX registers, only allow register
9063 indirect addresses. This will allow the values to go in either GPRs
9064 or VSX registers without reloading. The vector types would tend to
9065 go into VSX registers, so we allow REG+REG, while TImode seems
9066 somewhat split, in that some uses are GPR based, and some VSX based. */
9067 /* FIXME: We could loosen this by changing the following to
9068 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9069 but currently we cannot allow REG+REG addressing for TImode. See
9070 PR72827 for complete details on how this ends up hoodwinking DSE. */
9071 if (mode == TImode && TARGET_VSX)
9072 return 0;
9073 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9074 if (! reg_ok_strict
9075 && reg_offset_p
9076 && GET_CODE (x) == PLUS
9077 && REG_P (XEXP (x, 0))
9078 && (XEXP (x, 0) == virtual_stack_vars_rtx
9079 || XEXP (x, 0) == arg_pointer_rtx)
9080 && CONST_INT_P (XEXP (x, 1)))
9081 return 1;
9082 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9083 return 1;
9084 if (!FLOAT128_2REG_P (mode)
9085 && (TARGET_HARD_FLOAT
9086 || TARGET_POWERPC64
9087 || (mode != DFmode && mode != DDmode))
9088 && (TARGET_POWERPC64 || mode != DImode)
9089 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9090 && mode != PTImode
9091 && !avoiding_indexed_address_p (mode)
9092 && legitimate_indexed_address_p (x, reg_ok_strict))
9093 return 1;
9094 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9095 && mode_supports_pre_modify_p (mode)
9096 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9097 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9098 reg_ok_strict, false)
9099 || (!avoiding_indexed_address_p (mode)
9100 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9101 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9103 /* There is no prefixed version of the load/store with update. */
9104 rtx addr = XEXP (x, 1);
9105 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9107 if (reg_offset_p && !quad_offset_p
9108 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9109 return 1;
9110 return 0;
9113 /* Debug version of rs6000_legitimate_address_p. */
9114 static bool
9115 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9116 bool reg_ok_strict)
9118 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9119 fprintf (stderr,
9120 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9121 "strict = %d, reload = %s, code = %s\n",
9122 ret ? "true" : "false",
9123 GET_MODE_NAME (mode),
9124 reg_ok_strict,
9125 (reload_completed ? "after" : "before"),
9126 GET_RTX_NAME (GET_CODE (x)));
9127 debug_rtx (x);
9129 return ret;
9132 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9134 static bool
9135 rs6000_mode_dependent_address_p (const_rtx addr,
9136 addr_space_t as ATTRIBUTE_UNUSED)
9138 return rs6000_mode_dependent_address_ptr (addr);
9141 /* Go to LABEL if ADDR (a legitimate address expression)
9142 has an effect that depends on the machine mode it is used for.
9144 On the RS/6000 this is true of all integral offsets (since AltiVec
9145 and VSX modes don't allow them) or is a pre-increment or decrement.
9147 ??? Except that due to conceptual problems in offsettable_address_p
9148 we can't really report the problems of integral offsets. So leave
9149 this assuming that the adjustable offset must be valid for the
9150 sub-words of a TFmode operand, which is what we had before. */
9152 static bool
9153 rs6000_mode_dependent_address (const_rtx addr)
9155 switch (GET_CODE (addr))
9157 case PLUS:
9158 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9159 is considered a legitimate address before reload, so there
9160 are no offset restrictions in that case. Note that this
9161 condition is safe in strict mode because any address involving
9162 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9163 been rejected as illegitimate. */
9164 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9165 && XEXP (addr, 0) != arg_pointer_rtx
9166 && CONST_INT_P (XEXP (addr, 1)))
9168 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9169 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
9170 if (TARGET_PREFIXED)
9171 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
9172 else
9173 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
9175 break;
9177 case LO_SUM:
9178 /* Anything in the constant pool is sufficiently aligned that
9179 all bytes have the same high part address. */
9180 return !legitimate_constant_pool_address_p (addr, QImode, false);
9182 /* Auto-increment cases are now treated generically in recog.c. */
9183 case PRE_MODIFY:
9184 return TARGET_UPDATE;
9186 /* AND is only allowed in Altivec loads. */
9187 case AND:
9188 return true;
9190 default:
9191 break;
9194 return false;
9197 /* Debug version of rs6000_mode_dependent_address. */
9198 static bool
9199 rs6000_debug_mode_dependent_address (const_rtx addr)
9201 bool ret = rs6000_mode_dependent_address (addr);
9203 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9204 ret ? "true" : "false");
9205 debug_rtx (addr);
9207 return ret;
9210 /* Implement FIND_BASE_TERM. */
9213 rs6000_find_base_term (rtx op)
9215 rtx base;
9217 base = op;
9218 if (GET_CODE (base) == CONST)
9219 base = XEXP (base, 0);
9220 if (GET_CODE (base) == PLUS)
9221 base = XEXP (base, 0);
9222 if (GET_CODE (base) == UNSPEC)
9223 switch (XINT (base, 1))
9225 case UNSPEC_TOCREL:
9226 case UNSPEC_MACHOPIC_OFFSET:
9227 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9228 for aliasing purposes. */
9229 return XVECEXP (base, 0, 0);
9232 return op;
9235 /* More elaborate version of recog's offsettable_memref_p predicate
9236 that works around the ??? note of rs6000_mode_dependent_address.
9237 In particular it accepts
9239 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9241 in 32-bit mode, that the recog predicate rejects. */
9243 static bool
9244 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9246 bool worst_case;
9248 if (!MEM_P (op))
9249 return false;
9251 /* First mimic offsettable_memref_p. */
9252 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9253 return true;
9255 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9256 the latter predicate knows nothing about the mode of the memory
9257 reference and, therefore, assumes that it is the largest supported
9258 mode (TFmode). As a consequence, legitimate offsettable memory
9259 references are rejected. rs6000_legitimate_offset_address_p contains
9260 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9261 at least with a little bit of help here given that we know the
9262 actual registers used. */
9263 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9264 || GET_MODE_SIZE (reg_mode) == 4);
9265 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9266 strict, worst_case);
9269 /* Determine the reassociation width to be used in reassociate_bb.
9270 This takes into account how many parallel operations we
9271 can actually do of a given type, and also the latency.
9273 int add/sub 6/cycle
9274 mul 2/cycle
9275 vect add/sub/mul 2/cycle
9276 fp add/sub/mul 2/cycle
9277 dfp 1/cycle
9280 static int
9281 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9282 machine_mode mode)
9284 switch (rs6000_tune)
9286 case PROCESSOR_POWER8:
9287 case PROCESSOR_POWER9:
9288 case PROCESSOR_POWER10:
9289 if (DECIMAL_FLOAT_MODE_P (mode))
9290 return 1;
9291 if (VECTOR_MODE_P (mode))
9292 return 4;
9293 if (INTEGRAL_MODE_P (mode))
9294 return 1;
9295 if (FLOAT_MODE_P (mode))
9296 return 4;
9297 break;
9298 default:
9299 break;
9301 return 1;
9304 /* Change register usage conditional on target flags. */
9305 static void
9306 rs6000_conditional_register_usage (void)
9308 int i;
9310 if (TARGET_DEBUG_TARGET)
9311 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9313 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9314 if (TARGET_64BIT)
9315 fixed_regs[13] = call_used_regs[13] = 1;
9317 /* Conditionally disable FPRs. */
9318 if (TARGET_SOFT_FLOAT)
9319 for (i = 32; i < 64; i++)
9320 fixed_regs[i] = call_used_regs[i] = 1;
9322 /* The TOC register is not killed across calls in a way that is
9323 visible to the compiler. */
9324 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9325 call_used_regs[2] = 0;
9327 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9328 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9330 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9331 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9332 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9334 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9335 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9336 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9338 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9339 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9341 if (!TARGET_ALTIVEC && !TARGET_VSX)
9343 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9344 fixed_regs[i] = call_used_regs[i] = 1;
9345 call_used_regs[VRSAVE_REGNO] = 1;
9348 if (TARGET_ALTIVEC || TARGET_VSX)
9349 global_regs[VSCR_REGNO] = 1;
9351 if (TARGET_ALTIVEC_ABI)
9353 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9354 call_used_regs[i] = 1;
9356 /* AIX reserves VR20:31 in non-extended ABI mode. */
9357 if (TARGET_XCOFF)
9358 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9359 fixed_regs[i] = call_used_regs[i] = 1;
9364 /* Output insns to set DEST equal to the constant SOURCE as a series of
9365 lis, ori and shl instructions and return TRUE. */
9367 bool
9368 rs6000_emit_set_const (rtx dest, rtx source)
9370 machine_mode mode = GET_MODE (dest);
9371 rtx temp, set;
9372 rtx_insn *insn;
9373 HOST_WIDE_INT c;
9375 gcc_checking_assert (CONST_INT_P (source));
9376 c = INTVAL (source);
9377 switch (mode)
9379 case E_QImode:
9380 case E_HImode:
9381 emit_insn (gen_rtx_SET (dest, source));
9382 return true;
9384 case E_SImode:
9385 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9387 emit_insn (gen_rtx_SET (copy_rtx (temp),
9388 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9389 emit_insn (gen_rtx_SET (dest,
9390 gen_rtx_IOR (SImode, copy_rtx (temp),
9391 GEN_INT (c & 0xffff))));
9392 break;
9394 case E_DImode:
9395 if (!TARGET_POWERPC64)
9397 rtx hi, lo;
9399 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9400 DImode);
9401 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9402 DImode);
9403 emit_move_insn (hi, GEN_INT (c >> 32));
9404 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9405 emit_move_insn (lo, GEN_INT (c));
9407 else
9408 rs6000_emit_set_long_const (dest, c);
9409 break;
9411 default:
9412 gcc_unreachable ();
9415 insn = get_last_insn ();
9416 set = single_set (insn);
9417 if (! CONSTANT_P (SET_SRC (set)))
9418 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9420 return true;
9423 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9424 Output insns to set DEST equal to the constant C as a series of
9425 lis, ori and shl instructions. */
9427 static void
9428 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9430 rtx temp;
9431 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9433 ud1 = c & 0xffff;
9434 c = c >> 16;
9435 ud2 = c & 0xffff;
9436 c = c >> 16;
9437 ud3 = c & 0xffff;
9438 c = c >> 16;
9439 ud4 = c & 0xffff;
9441 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9442 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9443 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9445 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9446 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9448 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9450 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9451 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9452 if (ud1 != 0)
9453 emit_move_insn (dest,
9454 gen_rtx_IOR (DImode, copy_rtx (temp),
9455 GEN_INT (ud1)));
9457 else if (ud3 == 0 && ud4 == 0)
9459 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9461 gcc_assert (ud2 & 0x8000);
9462 emit_move_insn (copy_rtx (temp),
9463 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9464 if (ud1 != 0)
9465 emit_move_insn (copy_rtx (temp),
9466 gen_rtx_IOR (DImode, copy_rtx (temp),
9467 GEN_INT (ud1)));
9468 emit_move_insn (dest,
9469 gen_rtx_ZERO_EXTEND (DImode,
9470 gen_lowpart (SImode,
9471 copy_rtx (temp))));
9473 else if (ud1 == ud3 && ud2 == ud4)
9475 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9476 HOST_WIDE_INT num = (ud2 << 16) | ud1;
9477 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
9478 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
9479 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
9480 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
9482 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9483 || (ud4 == 0 && ! (ud3 & 0x8000)))
9485 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9487 emit_move_insn (copy_rtx (temp),
9488 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9489 if (ud2 != 0)
9490 emit_move_insn (copy_rtx (temp),
9491 gen_rtx_IOR (DImode, copy_rtx (temp),
9492 GEN_INT (ud2)));
9493 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9494 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9495 GEN_INT (16)));
9496 if (ud1 != 0)
9497 emit_move_insn (dest,
9498 gen_rtx_IOR (DImode, copy_rtx (temp),
9499 GEN_INT (ud1)));
9501 else
9503 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9505 emit_move_insn (copy_rtx (temp),
9506 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9507 if (ud3 != 0)
9508 emit_move_insn (copy_rtx (temp),
9509 gen_rtx_IOR (DImode, copy_rtx (temp),
9510 GEN_INT (ud3)));
9512 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9513 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9514 GEN_INT (32)));
9515 if (ud2 != 0)
9516 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9517 gen_rtx_IOR (DImode, copy_rtx (temp),
9518 GEN_INT (ud2 << 16)));
9519 if (ud1 != 0)
9520 emit_move_insn (dest,
9521 gen_rtx_IOR (DImode, copy_rtx (temp),
9522 GEN_INT (ud1)));
9526 /* Helper for the following. Get rid of [r+r] memory refs
9527 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9529 static void
9530 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9532 if (MEM_P (operands[0])
9533 && !REG_P (XEXP (operands[0], 0))
9534 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9535 GET_MODE (operands[0]), false))
9536 operands[0]
9537 = replace_equiv_address (operands[0],
9538 copy_addr_to_reg (XEXP (operands[0], 0)));
9540 if (MEM_P (operands[1])
9541 && !REG_P (XEXP (operands[1], 0))
9542 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9543 GET_MODE (operands[1]), false))
9544 operands[1]
9545 = replace_equiv_address (operands[1],
9546 copy_addr_to_reg (XEXP (operands[1], 0)));
9549 /* Generate a vector of constants to permute MODE for a little-endian
9550 storage operation by swapping the two halves of a vector. */
9551 static rtvec
9552 rs6000_const_vec (machine_mode mode)
9554 int i, subparts;
9555 rtvec v;
9557 switch (mode)
9559 case E_V1TImode:
9560 subparts = 1;
9561 break;
9562 case E_V2DFmode:
9563 case E_V2DImode:
9564 subparts = 2;
9565 break;
9566 case E_V4SFmode:
9567 case E_V4SImode:
9568 subparts = 4;
9569 break;
9570 case E_V8HImode:
9571 subparts = 8;
9572 break;
9573 case E_V16QImode:
9574 subparts = 16;
9575 break;
9576 default:
9577 gcc_unreachable();
9580 v = rtvec_alloc (subparts);
9582 for (i = 0; i < subparts / 2; ++i)
9583 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9584 for (i = subparts / 2; i < subparts; ++i)
9585 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9587 return v;
9590 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9591 store operation. */
9592 void
9593 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9595 /* Scalar permutations are easier to express in integer modes rather than
9596 floating-point modes, so cast them here. We use V1TImode instead
9597 of TImode to ensure that the values don't go through GPRs. */
9598 if (FLOAT128_VECTOR_P (mode))
9600 dest = gen_lowpart (V1TImode, dest);
9601 source = gen_lowpart (V1TImode, source);
9602 mode = V1TImode;
9605 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9606 scalar. */
9607 if (mode == TImode || mode == V1TImode)
9608 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9609 GEN_INT (64))));
9610 else
9612 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9613 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9617 /* Emit a little-endian load from vector memory location SOURCE to VSX
9618 register DEST in mode MODE. The load is done with two permuting
9619 insn's that represent an lxvd2x and xxpermdi. */
9620 void
9621 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9623 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9624 V1TImode). */
9625 if (mode == TImode || mode == V1TImode)
9627 mode = V2DImode;
9628 dest = gen_lowpart (V2DImode, dest);
9629 source = adjust_address (source, V2DImode, 0);
9632 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9633 rs6000_emit_le_vsx_permute (tmp, source, mode);
9634 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9637 /* Emit a little-endian store to vector memory location DEST from VSX
9638 register SOURCE in mode MODE. The store is done with two permuting
9639 insn's that represent an xxpermdi and an stxvd2x. */
9640 void
9641 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9643 /* This should never be called during or after LRA, because it does
9644 not re-permute the source register. It is intended only for use
9645 during expand. */
9646 gcc_assert (!lra_in_progress && !reload_completed);
9648 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9649 V1TImode). */
9650 if (mode == TImode || mode == V1TImode)
9652 mode = V2DImode;
9653 dest = adjust_address (dest, V2DImode, 0);
9654 source = gen_lowpart (V2DImode, source);
9657 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9658 rs6000_emit_le_vsx_permute (tmp, source, mode);
9659 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9662 /* Emit a sequence representing a little-endian VSX load or store,
9663 moving data from SOURCE to DEST in mode MODE. This is done
9664 separately from rs6000_emit_move to ensure it is called only
9665 during expand. LE VSX loads and stores introduced later are
9666 handled with a split. The expand-time RTL generation allows
9667 us to optimize away redundant pairs of register-permutes. */
9668 void
9669 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9671 gcc_assert (!BYTES_BIG_ENDIAN
9672 && VECTOR_MEM_VSX_P (mode)
9673 && !TARGET_P9_VECTOR
9674 && !gpr_or_gpr_p (dest, source)
9675 && (MEM_P (source) ^ MEM_P (dest)));
9677 if (MEM_P (source))
9679 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9680 rs6000_emit_le_vsx_load (dest, source, mode);
9682 else
9684 if (!REG_P (source))
9685 source = force_reg (mode, source);
9686 rs6000_emit_le_vsx_store (dest, source, mode);
9690 /* Return whether a SFmode or SImode move can be done without converting one
9691 mode to another. This arrises when we have:
9693 (SUBREG:SF (REG:SI ...))
9694 (SUBREG:SI (REG:SF ...))
9696 and one of the values is in a floating point/vector register, where SFmode
9697 scalars are stored in DFmode format. */
9699 bool
9700 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9702 if (TARGET_ALLOW_SF_SUBREG)
9703 return true;
9705 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9706 return true;
9708 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9709 return true;
9711 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9712 if (SUBREG_P (dest))
9714 rtx dest_subreg = SUBREG_REG (dest);
9715 rtx src_subreg = SUBREG_REG (src);
9716 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9719 return false;
9723 /* Helper function to change moves with:
9725 (SUBREG:SF (REG:SI)) and
9726 (SUBREG:SI (REG:SF))
9728 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9729 values are stored as DFmode values in the VSX registers. We need to convert
9730 the bits before we can use a direct move or operate on the bits in the
9731 vector register as an integer type.
9733 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9735 static bool
9736 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9738 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9739 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9740 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9742 rtx inner_source = SUBREG_REG (source);
9743 machine_mode inner_mode = GET_MODE (inner_source);
9745 if (mode == SImode && inner_mode == SFmode)
9747 emit_insn (gen_movsi_from_sf (dest, inner_source));
9748 return true;
9751 if (mode == SFmode && inner_mode == SImode)
9753 emit_insn (gen_movsf_from_si (dest, inner_source));
9754 return true;
9758 return false;
9761 /* Emit a move from SOURCE to DEST in mode MODE. */
9762 void
9763 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9765 rtx operands[2];
9766 operands[0] = dest;
9767 operands[1] = source;
9769 if (TARGET_DEBUG_ADDR)
9771 fprintf (stderr,
9772 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9773 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9774 GET_MODE_NAME (mode),
9775 lra_in_progress,
9776 reload_completed,
9777 can_create_pseudo_p ());
9778 debug_rtx (dest);
9779 fprintf (stderr, "source:\n");
9780 debug_rtx (source);
9783 /* Check that we get CONST_WIDE_INT only when we should. */
9784 if (CONST_WIDE_INT_P (operands[1])
9785 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9786 gcc_unreachable ();
9788 #ifdef HAVE_AS_GNU_ATTRIBUTE
9789 /* If we use a long double type, set the flags in .gnu_attribute that say
9790 what the long double type is. This is to allow the linker's warning
9791 message for the wrong long double to be useful, even if the function does
9792 not do a call (for example, doing a 128-bit add on power9 if the long
9793 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9794 used if they aren't the default long dobule type. */
9795 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9797 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9798 rs6000_passes_float = rs6000_passes_long_double = true;
9800 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9801 rs6000_passes_float = rs6000_passes_long_double = true;
9803 #endif
9805 /* See if we need to special case SImode/SFmode SUBREG moves. */
9806 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9807 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9808 return;
9810 /* Check if GCC is setting up a block move that will end up using FP
9811 registers as temporaries. We must make sure this is acceptable. */
9812 if (MEM_P (operands[0])
9813 && MEM_P (operands[1])
9814 && mode == DImode
9815 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9816 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9817 && ! (rs6000_slow_unaligned_access (SImode,
9818 (MEM_ALIGN (operands[0]) > 32
9819 ? 32 : MEM_ALIGN (operands[0])))
9820 || rs6000_slow_unaligned_access (SImode,
9821 (MEM_ALIGN (operands[1]) > 32
9822 ? 32 : MEM_ALIGN (operands[1]))))
9823 && ! MEM_VOLATILE_P (operands [0])
9824 && ! MEM_VOLATILE_P (operands [1]))
9826 emit_move_insn (adjust_address (operands[0], SImode, 0),
9827 adjust_address (operands[1], SImode, 0));
9828 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9829 adjust_address (copy_rtx (operands[1]), SImode, 4));
9830 return;
9833 if (can_create_pseudo_p () && MEM_P (operands[0])
9834 && !gpc_reg_operand (operands[1], mode))
9835 operands[1] = force_reg (mode, operands[1]);
9837 /* Recognize the case where operand[1] is a reference to thread-local
9838 data and load its address to a register. */
9839 if (tls_referenced_p (operands[1]))
9841 enum tls_model model;
9842 rtx tmp = operands[1];
9843 rtx addend = NULL;
9845 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9847 addend = XEXP (XEXP (tmp, 0), 1);
9848 tmp = XEXP (XEXP (tmp, 0), 0);
9851 gcc_assert (SYMBOL_REF_P (tmp));
9852 model = SYMBOL_REF_TLS_MODEL (tmp);
9853 gcc_assert (model != 0);
9855 tmp = rs6000_legitimize_tls_address (tmp, model);
9856 if (addend)
9858 tmp = gen_rtx_PLUS (mode, tmp, addend);
9859 tmp = force_operand (tmp, operands[0]);
9861 operands[1] = tmp;
9864 /* 128-bit constant floating-point values on Darwin should really be loaded
9865 as two parts. However, this premature splitting is a problem when DFmode
9866 values can go into Altivec registers. */
9867 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9868 && !reg_addr[DFmode].scalar_in_vmx_p)
9870 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9871 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9872 DFmode);
9873 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9874 GET_MODE_SIZE (DFmode)),
9875 simplify_gen_subreg (DFmode, operands[1], mode,
9876 GET_MODE_SIZE (DFmode)),
9877 DFmode);
9878 return;
9881 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9882 p1:SD) if p1 is not of floating point class and p0 is spilled as
9883 we can have no analogous movsd_store for this. */
9884 if (lra_in_progress && mode == DDmode
9885 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9886 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9887 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9888 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9890 enum reg_class cl;
9891 int regno = REGNO (SUBREG_REG (operands[1]));
9893 if (!HARD_REGISTER_NUM_P (regno))
9895 cl = reg_preferred_class (regno);
9896 regno = reg_renumber[regno];
9897 if (regno < 0)
9898 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9900 if (regno >= 0 && ! FP_REGNO_P (regno))
9902 mode = SDmode;
9903 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9904 operands[1] = SUBREG_REG (operands[1]);
9907 if (lra_in_progress
9908 && mode == SDmode
9909 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9910 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9911 && (REG_P (operands[1])
9912 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9914 int regno = reg_or_subregno (operands[1]);
9915 enum reg_class cl;
9917 if (!HARD_REGISTER_NUM_P (regno))
9919 cl = reg_preferred_class (regno);
9920 gcc_assert (cl != NO_REGS);
9921 regno = reg_renumber[regno];
9922 if (regno < 0)
9923 regno = ira_class_hard_regs[cl][0];
9925 if (FP_REGNO_P (regno))
9927 if (GET_MODE (operands[0]) != DDmode)
9928 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9929 emit_insn (gen_movsd_store (operands[0], operands[1]));
9931 else if (INT_REGNO_P (regno))
9932 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9933 else
9934 gcc_unreachable();
9935 return;
9937 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9938 p:DD)) if p0 is not of floating point class and p1 is spilled as
9939 we can have no analogous movsd_load for this. */
9940 if (lra_in_progress && mode == DDmode
9941 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9942 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9943 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9944 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9946 enum reg_class cl;
9947 int regno = REGNO (SUBREG_REG (operands[0]));
9949 if (!HARD_REGISTER_NUM_P (regno))
9951 cl = reg_preferred_class (regno);
9952 regno = reg_renumber[regno];
9953 if (regno < 0)
9954 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9956 if (regno >= 0 && ! FP_REGNO_P (regno))
9958 mode = SDmode;
9959 operands[0] = SUBREG_REG (operands[0]);
9960 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9963 if (lra_in_progress
9964 && mode == SDmode
9965 && (REG_P (operands[0])
9966 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9967 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9968 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9970 int regno = reg_or_subregno (operands[0]);
9971 enum reg_class cl;
9973 if (!HARD_REGISTER_NUM_P (regno))
9975 cl = reg_preferred_class (regno);
9976 gcc_assert (cl != NO_REGS);
9977 regno = reg_renumber[regno];
9978 if (regno < 0)
9979 regno = ira_class_hard_regs[cl][0];
9981 if (FP_REGNO_P (regno))
9983 if (GET_MODE (operands[1]) != DDmode)
9984 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9985 emit_insn (gen_movsd_load (operands[0], operands[1]));
9987 else if (INT_REGNO_P (regno))
9988 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9989 else
9990 gcc_unreachable();
9991 return;
9994 /* FIXME: In the long term, this switch statement should go away
9995 and be replaced by a sequence of tests based on things like
9996 mode == Pmode. */
9997 switch (mode)
9999 case E_HImode:
10000 case E_QImode:
10001 if (CONSTANT_P (operands[1])
10002 && !CONST_INT_P (operands[1]))
10003 operands[1] = force_const_mem (mode, operands[1]);
10004 break;
10006 case E_TFmode:
10007 case E_TDmode:
10008 case E_IFmode:
10009 case E_KFmode:
10010 if (FLOAT128_2REG_P (mode))
10011 rs6000_eliminate_indexed_memrefs (operands);
10012 /* fall through */
10014 case E_DFmode:
10015 case E_DDmode:
10016 case E_SFmode:
10017 case E_SDmode:
10018 if (CONSTANT_P (operands[1])
10019 && ! easy_fp_constant (operands[1], mode))
10020 operands[1] = force_const_mem (mode, operands[1]);
10021 break;
10023 case E_V16QImode:
10024 case E_V8HImode:
10025 case E_V4SFmode:
10026 case E_V4SImode:
10027 case E_V2DFmode:
10028 case E_V2DImode:
10029 case E_V1TImode:
10030 if (CONSTANT_P (operands[1])
10031 && !easy_vector_constant (operands[1], mode))
10032 operands[1] = force_const_mem (mode, operands[1]);
10033 break;
10035 case E_POImode:
10036 case E_PXImode:
10037 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
10038 error ("%qs is an opaque type, and you can't set it to other values.",
10039 (mode == POImode) ? "__vector_pair" : "__vector_quad");
10040 break;
10042 case E_SImode:
10043 case E_DImode:
10044 /* Use default pattern for address of ELF small data */
10045 if (TARGET_ELF
10046 && mode == Pmode
10047 && DEFAULT_ABI == ABI_V4
10048 && (SYMBOL_REF_P (operands[1])
10049 || GET_CODE (operands[1]) == CONST)
10050 && small_data_operand (operands[1], mode))
10052 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10053 return;
10056 /* Use the default pattern for loading up PC-relative addresses. */
10057 if (TARGET_PCREL && mode == Pmode
10058 && pcrel_local_or_external_address (operands[1], Pmode))
10060 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10061 return;
10064 if (DEFAULT_ABI == ABI_V4
10065 && mode == Pmode && mode == SImode
10066 && flag_pic == 1 && got_operand (operands[1], mode))
10068 emit_insn (gen_movsi_got (operands[0], operands[1]));
10069 return;
10072 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10073 && TARGET_NO_TOC_OR_PCREL
10074 && ! flag_pic
10075 && mode == Pmode
10076 && CONSTANT_P (operands[1])
10077 && GET_CODE (operands[1]) != HIGH
10078 && !CONST_INT_P (operands[1]))
10080 rtx target = (!can_create_pseudo_p ()
10081 ? operands[0]
10082 : gen_reg_rtx (mode));
10084 /* If this is a function address on -mcall-aixdesc,
10085 convert it to the address of the descriptor. */
10086 if (DEFAULT_ABI == ABI_AIX
10087 && SYMBOL_REF_P (operands[1])
10088 && XSTR (operands[1], 0)[0] == '.')
10090 const char *name = XSTR (operands[1], 0);
10091 rtx new_ref;
10092 while (*name == '.')
10093 name++;
10094 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10095 CONSTANT_POOL_ADDRESS_P (new_ref)
10096 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10097 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10098 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10099 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10100 operands[1] = new_ref;
10103 if (DEFAULT_ABI == ABI_DARWIN)
10105 #if TARGET_MACHO
10106 /* This is not PIC code, but could require the subset of
10107 indirections used by mdynamic-no-pic. */
10108 if (MACHO_DYNAMIC_NO_PIC_P)
10110 /* Take care of any required data indirection. */
10111 operands[1] = rs6000_machopic_legitimize_pic_address (
10112 operands[1], mode, operands[0]);
10113 if (operands[0] != operands[1])
10114 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10115 return;
10117 #endif
10118 emit_insn (gen_macho_high (Pmode, target, operands[1]));
10119 emit_insn (gen_macho_low (Pmode, operands[0],
10120 target, operands[1]));
10121 return;
10124 emit_insn (gen_elf_high (target, operands[1]));
10125 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10126 return;
10129 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10130 and we have put it in the TOC, we just need to make a TOC-relative
10131 reference to it. */
10132 if (TARGET_TOC
10133 && SYMBOL_REF_P (operands[1])
10134 && use_toc_relative_ref (operands[1], mode))
10135 operands[1] = create_TOC_reference (operands[1], operands[0]);
10136 else if (mode == Pmode
10137 && CONSTANT_P (operands[1])
10138 && GET_CODE (operands[1]) != HIGH
10139 && ((REG_P (operands[0])
10140 && FP_REGNO_P (REGNO (operands[0])))
10141 || !CONST_INT_P (operands[1])
10142 || (num_insns_constant (operands[1], mode)
10143 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10144 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10145 && (TARGET_CMODEL == CMODEL_SMALL
10146 || can_create_pseudo_p ()
10147 || (REG_P (operands[0])
10148 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10151 #if TARGET_MACHO
10152 /* Darwin uses a special PIC legitimizer. */
10153 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10155 operands[1] =
10156 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10157 operands[0]);
10158 if (operands[0] != operands[1])
10159 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10160 return;
10162 #endif
10164 /* If we are to limit the number of things we put in the TOC and
10165 this is a symbol plus a constant we can add in one insn,
10166 just put the symbol in the TOC and add the constant. */
10167 if (GET_CODE (operands[1]) == CONST
10168 && TARGET_NO_SUM_IN_TOC
10169 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10170 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10171 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10172 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
10173 && ! side_effects_p (operands[0]))
10175 rtx sym =
10176 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10177 rtx other = XEXP (XEXP (operands[1], 0), 1);
10179 sym = force_reg (mode, sym);
10180 emit_insn (gen_add3_insn (operands[0], sym, other));
10181 return;
10184 operands[1] = force_const_mem (mode, operands[1]);
10186 if (TARGET_TOC
10187 && SYMBOL_REF_P (XEXP (operands[1], 0))
10188 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10190 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10191 operands[0]);
10192 operands[1] = gen_const_mem (mode, tocref);
10193 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10196 break;
10198 case E_TImode:
10199 if (!VECTOR_MEM_VSX_P (TImode))
10200 rs6000_eliminate_indexed_memrefs (operands);
10201 break;
10203 case E_PTImode:
10204 rs6000_eliminate_indexed_memrefs (operands);
10205 break;
10207 default:
10208 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10211 /* Above, we may have called force_const_mem which may have returned
10212 an invalid address. If we can, fix this up; otherwise, reload will
10213 have to deal with it. */
10214 if (MEM_P (operands[1]))
10215 operands[1] = validize_mem (operands[1]);
10217 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10221 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
10222 static void
10223 init_float128_ibm (machine_mode mode)
10225 if (!TARGET_XL_COMPAT)
10227 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
10228 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
10229 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
10230 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
10232 if (!TARGET_HARD_FLOAT)
10234 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
10235 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
10236 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
10237 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
10238 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
10239 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
10240 set_optab_libfunc (le_optab, mode, "__gcc_qle");
10241 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
10243 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
10244 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
10245 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
10246 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
10247 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
10248 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
10249 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
10250 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10253 else
10255 set_optab_libfunc (add_optab, mode, "_xlqadd");
10256 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10257 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10258 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10261 /* Add various conversions for IFmode to use the traditional TFmode
10262 names. */
10263 if (mode == IFmode)
10265 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10266 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10267 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10268 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10269 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10270 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10272 if (TARGET_POWERPC64)
10274 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10275 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10276 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10277 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10282 /* Create a decl for either complex long double multiply or complex long double
10283 divide when long double is IEEE 128-bit floating point. We can't use
10284 __multc3 and __divtc3 because the original long double using IBM extended
10285 double used those names. The complex multiply/divide functions are encoded
10286 as builtin functions with a complex result and 4 scalar inputs. */
10288 static void
10289 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10291 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10292 name, NULL_TREE);
10294 set_builtin_decl (fncode, fndecl, true);
10296 if (TARGET_DEBUG_BUILTIN)
10297 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10299 return;
10302 /* Set up IEEE 128-bit floating point routines. Use different names if the
10303 arguments can be passed in a vector register. The historical PowerPC
10304 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10305 continue to use that if we aren't using vector registers to pass IEEE
10306 128-bit floating point. */
10308 static void
10309 init_float128_ieee (machine_mode mode)
10311 if (FLOAT128_VECTOR_P (mode))
10313 static bool complex_muldiv_init_p = false;
10315 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10316 we have clone or target attributes, this will be called a second
10317 time. We want to create the built-in function only once. */
10318 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10320 complex_muldiv_init_p = true;
10321 built_in_function fncode_mul =
10322 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10323 - MIN_MODE_COMPLEX_FLOAT);
10324 built_in_function fncode_div =
10325 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10326 - MIN_MODE_COMPLEX_FLOAT);
10328 tree fntype = build_function_type_list (complex_long_double_type_node,
10329 long_double_type_node,
10330 long_double_type_node,
10331 long_double_type_node,
10332 long_double_type_node,
10333 NULL_TREE);
10335 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10336 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10339 set_optab_libfunc (add_optab, mode, "__addkf3");
10340 set_optab_libfunc (sub_optab, mode, "__subkf3");
10341 set_optab_libfunc (neg_optab, mode, "__negkf2");
10342 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10343 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10344 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10345 set_optab_libfunc (abs_optab, mode, "__abskf2");
10346 set_optab_libfunc (powi_optab, mode, "__powikf2");
10348 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10349 set_optab_libfunc (ne_optab, mode, "__nekf2");
10350 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10351 set_optab_libfunc (ge_optab, mode, "__gekf2");
10352 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10353 set_optab_libfunc (le_optab, mode, "__lekf2");
10354 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10356 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10357 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10358 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10359 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10361 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10362 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10363 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10365 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10366 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10367 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10369 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10370 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10371 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10372 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10373 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10374 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10376 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10377 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10378 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10379 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10381 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10382 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10383 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10384 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10386 if (TARGET_POWERPC64)
10388 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10389 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10390 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10391 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10395 else
10397 set_optab_libfunc (add_optab, mode, "_q_add");
10398 set_optab_libfunc (sub_optab, mode, "_q_sub");
10399 set_optab_libfunc (neg_optab, mode, "_q_neg");
10400 set_optab_libfunc (smul_optab, mode, "_q_mul");
10401 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10402 if (TARGET_PPC_GPOPT)
10403 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10405 set_optab_libfunc (eq_optab, mode, "_q_feq");
10406 set_optab_libfunc (ne_optab, mode, "_q_fne");
10407 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10408 set_optab_libfunc (ge_optab, mode, "_q_fge");
10409 set_optab_libfunc (lt_optab, mode, "_q_flt");
10410 set_optab_libfunc (le_optab, mode, "_q_fle");
10412 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10413 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10414 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10415 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10416 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10417 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10418 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10419 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10423 static void
10424 rs6000_init_libfuncs (void)
10426 /* __float128 support. */
10427 if (TARGET_FLOAT128_TYPE)
10429 init_float128_ibm (IFmode);
10430 init_float128_ieee (KFmode);
10433 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10434 if (TARGET_LONG_DOUBLE_128)
10436 if (!TARGET_IEEEQUAD)
10437 init_float128_ibm (TFmode);
10439 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10440 else
10441 init_float128_ieee (TFmode);
10445 /* Emit a potentially record-form instruction, setting DST from SRC.
10446 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10447 signed comparison of DST with zero. If DOT is 1, the generated RTL
10448 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10449 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10450 a separate COMPARE. */
10452 void
10453 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10455 if (dot == 0)
10457 emit_move_insn (dst, src);
10458 return;
10461 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10463 emit_move_insn (dst, src);
10464 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10465 return;
10468 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10469 if (dot == 1)
10471 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10472 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10474 else
10476 rtx set = gen_rtx_SET (dst, src);
10477 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10482 /* A validation routine: say whether CODE, a condition code, and MODE
10483 match. The other alternatives either don't make sense or should
10484 never be generated. */
10486 void
10487 validate_condition_mode (enum rtx_code code, machine_mode mode)
10489 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10490 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10491 && GET_MODE_CLASS (mode) == MODE_CC);
10493 /* These don't make sense. */
10494 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10495 || mode != CCUNSmode);
10497 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10498 || mode == CCUNSmode);
10500 gcc_assert (mode == CCFPmode
10501 || (code != ORDERED && code != UNORDERED
10502 && code != UNEQ && code != LTGT
10503 && code != UNGT && code != UNLT
10504 && code != UNGE && code != UNLE));
10506 /* These are invalid; the information is not there. */
10507 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10511 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10512 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10513 not zero, store there the bit offset (counted from the right) where
10514 the single stretch of 1 bits begins; and similarly for B, the bit
10515 offset where it ends. */
10517 bool
10518 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10520 unsigned HOST_WIDE_INT val = INTVAL (mask);
10521 unsigned HOST_WIDE_INT bit;
10522 int nb, ne;
10523 int n = GET_MODE_PRECISION (mode);
10525 if (mode != DImode && mode != SImode)
10526 return false;
10528 if (INTVAL (mask) >= 0)
10530 bit = val & -val;
10531 ne = exact_log2 (bit);
10532 nb = exact_log2 (val + bit);
10534 else if (val + 1 == 0)
10536 nb = n;
10537 ne = 0;
10539 else if (val & 1)
10541 val = ~val;
10542 bit = val & -val;
10543 nb = exact_log2 (bit);
10544 ne = exact_log2 (val + bit);
10546 else
10548 bit = val & -val;
10549 ne = exact_log2 (bit);
10550 if (val + bit == 0)
10551 nb = n;
10552 else
10553 nb = 0;
10556 nb--;
10558 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10559 return false;
10561 if (b)
10562 *b = nb;
10563 if (e)
10564 *e = ne;
10566 return true;
10569 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10570 or rldicr instruction, to implement an AND with it in mode MODE. */
10572 bool
10573 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10575 int nb, ne;
10577 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10578 return false;
10580 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10581 does not wrap. */
10582 if (mode == DImode)
10583 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10585 /* For SImode, rlwinm can do everything. */
10586 if (mode == SImode)
10587 return (nb < 32 && ne < 32);
10589 return false;
10592 /* Return the instruction template for an AND with mask in mode MODE, with
10593 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10595 const char *
10596 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10598 int nb, ne;
10600 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10601 gcc_unreachable ();
10603 if (mode == DImode && ne == 0)
10605 operands[3] = GEN_INT (63 - nb);
10606 if (dot)
10607 return "rldicl. %0,%1,0,%3";
10608 return "rldicl %0,%1,0,%3";
10611 if (mode == DImode && nb == 63)
10613 operands[3] = GEN_INT (63 - ne);
10614 if (dot)
10615 return "rldicr. %0,%1,0,%3";
10616 return "rldicr %0,%1,0,%3";
10619 if (nb < 32 && ne < 32)
10621 operands[3] = GEN_INT (31 - nb);
10622 operands[4] = GEN_INT (31 - ne);
10623 if (dot)
10624 return "rlwinm. %0,%1,0,%3,%4";
10625 return "rlwinm %0,%1,0,%3,%4";
10628 gcc_unreachable ();
10631 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10632 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10633 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10635 bool
10636 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10638 int nb, ne;
10640 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10641 return false;
10643 int n = GET_MODE_PRECISION (mode);
10644 int sh = -1;
10646 if (CONST_INT_P (XEXP (shift, 1)))
10648 sh = INTVAL (XEXP (shift, 1));
10649 if (sh < 0 || sh >= n)
10650 return false;
10653 rtx_code code = GET_CODE (shift);
10655 /* Convert any shift by 0 to a rotate, to simplify below code. */
10656 if (sh == 0)
10657 code = ROTATE;
10659 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10660 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10661 code = ASHIFT;
10662 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10664 code = LSHIFTRT;
10665 sh = n - sh;
10668 /* DImode rotates need rld*. */
10669 if (mode == DImode && code == ROTATE)
10670 return (nb == 63 || ne == 0 || ne == sh);
10672 /* SImode rotates need rlw*. */
10673 if (mode == SImode && code == ROTATE)
10674 return (nb < 32 && ne < 32 && sh < 32);
10676 /* Wrap-around masks are only okay for rotates. */
10677 if (ne > nb)
10678 return false;
10680 /* Variable shifts are only okay for rotates. */
10681 if (sh < 0)
10682 return false;
10684 /* Don't allow ASHIFT if the mask is wrong for that. */
10685 if (code == ASHIFT && ne < sh)
10686 return false;
10688 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10689 if the mask is wrong for that. */
10690 if (nb < 32 && ne < 32 && sh < 32
10691 && !(code == LSHIFTRT && nb >= 32 - sh))
10692 return true;
10694 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10695 if the mask is wrong for that. */
10696 if (code == LSHIFTRT)
10697 sh = 64 - sh;
10698 if (nb == 63 || ne == 0 || ne == sh)
10699 return !(code == LSHIFTRT && nb >= sh);
10701 return false;
10704 /* Return the instruction template for a shift with mask in mode MODE, with
10705 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10707 const char *
10708 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10710 int nb, ne;
10712 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10713 gcc_unreachable ();
10715 if (mode == DImode && ne == 0)
10717 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10718 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10719 operands[3] = GEN_INT (63 - nb);
10720 if (dot)
10721 return "rld%I2cl. %0,%1,%2,%3";
10722 return "rld%I2cl %0,%1,%2,%3";
10725 if (mode == DImode && nb == 63)
10727 operands[3] = GEN_INT (63 - ne);
10728 if (dot)
10729 return "rld%I2cr. %0,%1,%2,%3";
10730 return "rld%I2cr %0,%1,%2,%3";
10733 if (mode == DImode
10734 && GET_CODE (operands[4]) != LSHIFTRT
10735 && CONST_INT_P (operands[2])
10736 && ne == INTVAL (operands[2]))
10738 operands[3] = GEN_INT (63 - nb);
10739 if (dot)
10740 return "rld%I2c. %0,%1,%2,%3";
10741 return "rld%I2c %0,%1,%2,%3";
10744 if (nb < 32 && ne < 32)
10746 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10747 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10748 operands[3] = GEN_INT (31 - nb);
10749 operands[4] = GEN_INT (31 - ne);
10750 /* This insn can also be a 64-bit rotate with mask that really makes
10751 it just a shift right (with mask); the %h below are to adjust for
10752 that situation (shift count is >= 32 in that case). */
10753 if (dot)
10754 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10755 return "rlw%I2nm %0,%1,%h2,%3,%4";
10758 gcc_unreachable ();
10761 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10762 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10763 ASHIFT, or LSHIFTRT) in mode MODE. */
10765 bool
10766 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10768 int nb, ne;
10770 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10771 return false;
10773 int n = GET_MODE_PRECISION (mode);
10775 int sh = INTVAL (XEXP (shift, 1));
10776 if (sh < 0 || sh >= n)
10777 return false;
10779 rtx_code code = GET_CODE (shift);
10781 /* Convert any shift by 0 to a rotate, to simplify below code. */
10782 if (sh == 0)
10783 code = ROTATE;
10785 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10786 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10787 code = ASHIFT;
10788 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10790 code = LSHIFTRT;
10791 sh = n - sh;
10794 /* DImode rotates need rldimi. */
10795 if (mode == DImode && code == ROTATE)
10796 return (ne == sh);
10798 /* SImode rotates need rlwimi. */
10799 if (mode == SImode && code == ROTATE)
10800 return (nb < 32 && ne < 32 && sh < 32);
10802 /* Wrap-around masks are only okay for rotates. */
10803 if (ne > nb)
10804 return false;
10806 /* Don't allow ASHIFT if the mask is wrong for that. */
10807 if (code == ASHIFT && ne < sh)
10808 return false;
10810 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10811 if the mask is wrong for that. */
10812 if (nb < 32 && ne < 32 && sh < 32
10813 && !(code == LSHIFTRT && nb >= 32 - sh))
10814 return true;
10816 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10817 if the mask is wrong for that. */
10818 if (code == LSHIFTRT)
10819 sh = 64 - sh;
10820 if (ne == sh)
10821 return !(code == LSHIFTRT && nb >= sh);
10823 return false;
10826 /* Return the instruction template for an insert with mask in mode MODE, with
10827 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10829 const char *
10830 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10832 int nb, ne;
10834 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10835 gcc_unreachable ();
10837 /* Prefer rldimi because rlwimi is cracked. */
10838 if (TARGET_POWERPC64
10839 && (!dot || mode == DImode)
10840 && GET_CODE (operands[4]) != LSHIFTRT
10841 && ne == INTVAL (operands[2]))
10843 operands[3] = GEN_INT (63 - nb);
10844 if (dot)
10845 return "rldimi. %0,%1,%2,%3";
10846 return "rldimi %0,%1,%2,%3";
10849 if (nb < 32 && ne < 32)
10851 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10852 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10853 operands[3] = GEN_INT (31 - nb);
10854 operands[4] = GEN_INT (31 - ne);
10855 if (dot)
10856 return "rlwimi. %0,%1,%2,%3,%4";
10857 return "rlwimi %0,%1,%2,%3,%4";
10860 gcc_unreachable ();
10863 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10864 using two machine instructions. */
10866 bool
10867 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10869 /* There are two kinds of AND we can handle with two insns:
10870 1) those we can do with two rl* insn;
10871 2) ori[s];xori[s].
10873 We do not handle that last case yet. */
10875 /* If there is just one stretch of ones, we can do it. */
10876 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10877 return true;
10879 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10880 one insn, we can do the whole thing with two. */
10881 unsigned HOST_WIDE_INT val = INTVAL (c);
10882 unsigned HOST_WIDE_INT bit1 = val & -val;
10883 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10884 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10885 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10886 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10889 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10890 If EXPAND is true, split rotate-and-mask instructions we generate to
10891 their constituent parts as well (this is used during expand); if DOT
10892 is 1, make the last insn a record-form instruction clobbering the
10893 destination GPR and setting the CC reg (from operands[3]); if 2, set
10894 that GPR as well as the CC reg. */
10896 void
10897 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10899 gcc_assert (!(expand && dot));
10901 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10903 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10904 shift right. This generates better code than doing the masks without
10905 shifts, or shifting first right and then left. */
10906 int nb, ne;
10907 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10909 gcc_assert (mode == DImode);
10911 int shift = 63 - nb;
10912 if (expand)
10914 rtx tmp1 = gen_reg_rtx (DImode);
10915 rtx tmp2 = gen_reg_rtx (DImode);
10916 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10917 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10918 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10920 else
10922 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10923 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10924 emit_move_insn (operands[0], tmp);
10925 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10926 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10928 return;
10931 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10932 that does the rest. */
10933 unsigned HOST_WIDE_INT bit1 = val & -val;
10934 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10935 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10936 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10938 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10939 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10941 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10943 /* Two "no-rotate"-and-mask instructions, for SImode. */
10944 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10946 gcc_assert (mode == SImode);
10948 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10949 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10950 emit_move_insn (reg, tmp);
10951 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10952 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10953 return;
10956 gcc_assert (mode == DImode);
10958 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10959 insns; we have to do the first in SImode, because it wraps. */
10960 if (mask2 <= 0xffffffff
10961 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10963 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10964 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10965 GEN_INT (mask1));
10966 rtx reg_low = gen_lowpart (SImode, reg);
10967 emit_move_insn (reg_low, tmp);
10968 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10969 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10970 return;
10973 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10974 at the top end), rotate back and clear the other hole. */
10975 int right = exact_log2 (bit3);
10976 int left = 64 - right;
10978 /* Rotate the mask too. */
10979 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10981 if (expand)
10983 rtx tmp1 = gen_reg_rtx (DImode);
10984 rtx tmp2 = gen_reg_rtx (DImode);
10985 rtx tmp3 = gen_reg_rtx (DImode);
10986 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10987 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10988 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10989 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10991 else
10993 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10994 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10995 emit_move_insn (operands[0], tmp);
10996 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10997 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10998 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11002 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11003 for lfq and stfq insns iff the registers are hard registers. */
11006 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11008 /* We might have been passed a SUBREG. */
11009 if (!REG_P (reg1) || !REG_P (reg2))
11010 return 0;
11012 /* We might have been passed non floating point registers. */
11013 if (!FP_REGNO_P (REGNO (reg1))
11014 || !FP_REGNO_P (REGNO (reg2)))
11015 return 0;
11017 return (REGNO (reg1) == REGNO (reg2) - 1);
11020 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11021 addr1 and addr2 must be in consecutive memory locations
11022 (addr2 == addr1 + 8). */
11025 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11027 rtx addr1, addr2;
11028 unsigned int reg1, reg2;
11029 int offset1, offset2;
11031 /* The mems cannot be volatile. */
11032 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11033 return 0;
11035 addr1 = XEXP (mem1, 0);
11036 addr2 = XEXP (mem2, 0);
11038 /* Extract an offset (if used) from the first addr. */
11039 if (GET_CODE (addr1) == PLUS)
11041 /* If not a REG, return zero. */
11042 if (!REG_P (XEXP (addr1, 0)))
11043 return 0;
11044 else
11046 reg1 = REGNO (XEXP (addr1, 0));
11047 /* The offset must be constant! */
11048 if (!CONST_INT_P (XEXP (addr1, 1)))
11049 return 0;
11050 offset1 = INTVAL (XEXP (addr1, 1));
11053 else if (!REG_P (addr1))
11054 return 0;
11055 else
11057 reg1 = REGNO (addr1);
11058 /* This was a simple (mem (reg)) expression. Offset is 0. */
11059 offset1 = 0;
11062 /* And now for the second addr. */
11063 if (GET_CODE (addr2) == PLUS)
11065 /* If not a REG, return zero. */
11066 if (!REG_P (XEXP (addr2, 0)))
11067 return 0;
11068 else
11070 reg2 = REGNO (XEXP (addr2, 0));
11071 /* The offset must be constant. */
11072 if (!CONST_INT_P (XEXP (addr2, 1)))
11073 return 0;
11074 offset2 = INTVAL (XEXP (addr2, 1));
11077 else if (!REG_P (addr2))
11078 return 0;
11079 else
11081 reg2 = REGNO (addr2);
11082 /* This was a simple (mem (reg)) expression. Offset is 0. */
11083 offset2 = 0;
11086 /* Both of these must have the same base register. */
11087 if (reg1 != reg2)
11088 return 0;
11090 /* The offset for the second addr must be 8 more than the first addr. */
11091 if (offset2 != offset1 + 8)
11092 return 0;
11094 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11095 instructions. */
11096 return 1;
11099 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11100 need to use DDmode, in all other cases we can use the same mode. */
11101 static machine_mode
11102 rs6000_secondary_memory_needed_mode (machine_mode mode)
11104 if (lra_in_progress && mode == SDmode)
11105 return DDmode;
11106 return mode;
11109 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11110 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11111 only work on the traditional altivec registers, note if an altivec register
11112 was chosen. */
11114 static enum rs6000_reg_type
11115 register_to_reg_type (rtx reg, bool *is_altivec)
11117 HOST_WIDE_INT regno;
11118 enum reg_class rclass;
11120 if (SUBREG_P (reg))
11121 reg = SUBREG_REG (reg);
11123 if (!REG_P (reg))
11124 return NO_REG_TYPE;
11126 regno = REGNO (reg);
11127 if (!HARD_REGISTER_NUM_P (regno))
11129 if (!lra_in_progress && !reload_completed)
11130 return PSEUDO_REG_TYPE;
11132 regno = true_regnum (reg);
11133 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
11134 return PSEUDO_REG_TYPE;
11137 gcc_assert (regno >= 0);
11139 if (is_altivec && ALTIVEC_REGNO_P (regno))
11140 *is_altivec = true;
11142 rclass = rs6000_regno_regclass[regno];
11143 return reg_class_to_reg_type[(int)rclass];
11146 /* Helper function to return the cost of adding a TOC entry address. */
11148 static inline int
11149 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
11151 int ret;
11153 if (TARGET_CMODEL != CMODEL_SMALL)
11154 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
11156 else
11157 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
11159 return ret;
11162 /* Helper function for rs6000_secondary_reload to determine whether the memory
11163 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
11164 needs reloading. Return negative if the memory is not handled by the memory
11165 helper functions and to try a different reload method, 0 if no additional
11166 instructions are need, and positive to give the extra cost for the
11167 memory. */
11169 static int
11170 rs6000_secondary_reload_memory (rtx addr,
11171 enum reg_class rclass,
11172 machine_mode mode)
11174 int extra_cost = 0;
11175 rtx reg, and_arg, plus_arg0, plus_arg1;
11176 addr_mask_type addr_mask;
11177 const char *type = NULL;
11178 const char *fail_msg = NULL;
11180 if (GPR_REG_CLASS_P (rclass))
11181 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11183 else if (rclass == FLOAT_REGS)
11184 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11186 else if (rclass == ALTIVEC_REGS)
11187 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11189 /* For the combined VSX_REGS, turn off Altivec AND -16. */
11190 else if (rclass == VSX_REGS)
11191 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
11192 & ~RELOAD_REG_AND_M16);
11194 /* If the register allocator hasn't made up its mind yet on the register
11195 class to use, settle on defaults to use. */
11196 else if (rclass == NO_REGS)
11198 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
11199 & ~RELOAD_REG_AND_M16);
11201 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
11202 addr_mask &= ~(RELOAD_REG_INDEXED
11203 | RELOAD_REG_PRE_INCDEC
11204 | RELOAD_REG_PRE_MODIFY);
11207 else
11208 addr_mask = 0;
11210 /* If the register isn't valid in this register class, just return now. */
11211 if ((addr_mask & RELOAD_REG_VALID) == 0)
11213 if (TARGET_DEBUG_ADDR)
11215 fprintf (stderr,
11216 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11217 "not valid in class\n",
11218 GET_MODE_NAME (mode), reg_class_names[rclass]);
11219 debug_rtx (addr);
11222 return -1;
11225 switch (GET_CODE (addr))
11227 /* Does the register class supports auto update forms for this mode? We
11228 don't need a scratch register, since the powerpc only supports
11229 PRE_INC, PRE_DEC, and PRE_MODIFY. */
11230 case PRE_INC:
11231 case PRE_DEC:
11232 reg = XEXP (addr, 0);
11233 if (!base_reg_operand (addr, GET_MODE (reg)))
11235 fail_msg = "no base register #1";
11236 extra_cost = -1;
11239 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11241 extra_cost = 1;
11242 type = "update";
11244 break;
11246 case PRE_MODIFY:
11247 reg = XEXP (addr, 0);
11248 plus_arg1 = XEXP (addr, 1);
11249 if (!base_reg_operand (reg, GET_MODE (reg))
11250 || GET_CODE (plus_arg1) != PLUS
11251 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11253 fail_msg = "bad PRE_MODIFY";
11254 extra_cost = -1;
11257 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11259 extra_cost = 1;
11260 type = "update";
11262 break;
11264 /* Do we need to simulate AND -16 to clear the bottom address bits used
11265 in VMX load/stores? Only allow the AND for vector sizes. */
11266 case AND:
11267 and_arg = XEXP (addr, 0);
11268 if (GET_MODE_SIZE (mode) != 16
11269 || !CONST_INT_P (XEXP (addr, 1))
11270 || INTVAL (XEXP (addr, 1)) != -16)
11272 fail_msg = "bad Altivec AND #1";
11273 extra_cost = -1;
11276 if (rclass != ALTIVEC_REGS)
11278 if (legitimate_indirect_address_p (and_arg, false))
11279 extra_cost = 1;
11281 else if (legitimate_indexed_address_p (and_arg, false))
11282 extra_cost = 2;
11284 else
11286 fail_msg = "bad Altivec AND #2";
11287 extra_cost = -1;
11290 type = "and";
11292 break;
11294 /* If this is an indirect address, make sure it is a base register. */
11295 case REG:
11296 case SUBREG:
11297 if (!legitimate_indirect_address_p (addr, false))
11299 extra_cost = 1;
11300 type = "move";
11302 break;
11304 /* If this is an indexed address, make sure the register class can handle
11305 indexed addresses for this mode. */
11306 case PLUS:
11307 plus_arg0 = XEXP (addr, 0);
11308 plus_arg1 = XEXP (addr, 1);
11310 /* (plus (plus (reg) (constant)) (constant)) is generated during
11311 push_reload processing, so handle it now. */
11312 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11314 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11316 extra_cost = 1;
11317 type = "offset";
11321 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11322 push_reload processing, so handle it now. */
11323 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11325 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11327 extra_cost = 1;
11328 type = "indexed #2";
11332 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11334 fail_msg = "no base register #2";
11335 extra_cost = -1;
11338 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11340 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11341 || !legitimate_indexed_address_p (addr, false))
11343 extra_cost = 1;
11344 type = "indexed";
11348 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11349 && CONST_INT_P (plus_arg1))
11351 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11353 extra_cost = 1;
11354 type = "vector d-form offset";
11358 /* Make sure the register class can handle offset addresses. */
11359 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11361 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11363 extra_cost = 1;
11364 type = "offset #2";
11368 else
11370 fail_msg = "bad PLUS";
11371 extra_cost = -1;
11374 break;
11376 case LO_SUM:
11377 /* Quad offsets are restricted and can't handle normal addresses. */
11378 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11380 extra_cost = -1;
11381 type = "vector d-form lo_sum";
11384 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11386 fail_msg = "bad LO_SUM";
11387 extra_cost = -1;
11390 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11392 extra_cost = 1;
11393 type = "lo_sum";
11395 break;
11397 /* Static addresses need to create a TOC entry. */
11398 case CONST:
11399 case SYMBOL_REF:
11400 case LABEL_REF:
11401 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11403 extra_cost = -1;
11404 type = "vector d-form lo_sum #2";
11407 else
11409 type = "address";
11410 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11412 break;
11414 /* TOC references look like offsetable memory. */
11415 case UNSPEC:
11416 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11418 fail_msg = "bad UNSPEC";
11419 extra_cost = -1;
11422 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11424 extra_cost = -1;
11425 type = "vector d-form lo_sum #3";
11428 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11430 extra_cost = 1;
11431 type = "toc reference";
11433 break;
11435 default:
11437 fail_msg = "bad address";
11438 extra_cost = -1;
11442 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11444 if (extra_cost < 0)
11445 fprintf (stderr,
11446 "rs6000_secondary_reload_memory error: mode = %s, "
11447 "class = %s, addr_mask = '%s', %s\n",
11448 GET_MODE_NAME (mode),
11449 reg_class_names[rclass],
11450 rs6000_debug_addr_mask (addr_mask, false),
11451 (fail_msg != NULL) ? fail_msg : "<bad address>");
11453 else
11454 fprintf (stderr,
11455 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11456 "addr_mask = '%s', extra cost = %d, %s\n",
11457 GET_MODE_NAME (mode),
11458 reg_class_names[rclass],
11459 rs6000_debug_addr_mask (addr_mask, false),
11460 extra_cost,
11461 (type) ? type : "<none>");
11463 debug_rtx (addr);
11466 return extra_cost;
11469 /* Helper function for rs6000_secondary_reload to return true if a move to a
11470 different register classe is really a simple move. */
11472 static bool
11473 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11474 enum rs6000_reg_type from_type,
11475 machine_mode mode)
11477 int size = GET_MODE_SIZE (mode);
11479 /* Add support for various direct moves available. In this function, we only
11480 look at cases where we don't need any extra registers, and one or more
11481 simple move insns are issued. Originally small integers are not allowed
11482 in FPR/VSX registers. Single precision binary floating is not a simple
11483 move because we need to convert to the single precision memory layout.
11484 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11485 need special direct move handling, which we do not support yet. */
11486 if (TARGET_DIRECT_MOVE
11487 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11488 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11490 if (TARGET_POWERPC64)
11492 /* ISA 2.07: MTVSRD or MVFVSRD. */
11493 if (size == 8)
11494 return true;
11496 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11497 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11498 return true;
11501 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11502 if (TARGET_P8_VECTOR)
11504 if (mode == SImode)
11505 return true;
11507 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11508 return true;
11511 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11512 if (mode == SDmode)
11513 return true;
11516 /* Move to/from SPR. */
11517 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11518 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11519 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11520 return true;
11522 return false;
11525 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11526 special direct moves that involve allocating an extra register, return the
11527 insn code of the helper function if there is such a function or
11528 CODE_FOR_nothing if not. */
11530 static bool
11531 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11532 enum rs6000_reg_type from_type,
11533 machine_mode mode,
11534 secondary_reload_info *sri,
11535 bool altivec_p)
11537 bool ret = false;
11538 enum insn_code icode = CODE_FOR_nothing;
11539 int cost = 0;
11540 int size = GET_MODE_SIZE (mode);
11542 if (TARGET_POWERPC64 && size == 16)
11544 /* Handle moving 128-bit values from GPRs to VSX point registers on
11545 ISA 2.07 (power8, power9) when running in 64-bit mode using
11546 XXPERMDI to glue the two 64-bit values back together. */
11547 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11549 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11550 icode = reg_addr[mode].reload_vsx_gpr;
11553 /* Handle moving 128-bit values from VSX point registers to GPRs on
11554 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11555 bottom 64-bit value. */
11556 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11558 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11559 icode = reg_addr[mode].reload_gpr_vsx;
11563 else if (TARGET_POWERPC64 && mode == SFmode)
11565 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11567 cost = 3; /* xscvdpspn, mfvsrd, and. */
11568 icode = reg_addr[mode].reload_gpr_vsx;
11571 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11573 cost = 2; /* mtvsrz, xscvspdpn. */
11574 icode = reg_addr[mode].reload_vsx_gpr;
11578 else if (!TARGET_POWERPC64 && size == 8)
11580 /* Handle moving 64-bit values from GPRs to floating point registers on
11581 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11582 32-bit values back together. Altivec register classes must be handled
11583 specially since a different instruction is used, and the secondary
11584 reload support requires a single instruction class in the scratch
11585 register constraint. However, right now TFmode is not allowed in
11586 Altivec registers, so the pattern will never match. */
11587 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11589 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11590 icode = reg_addr[mode].reload_fpr_gpr;
11594 if (icode != CODE_FOR_nothing)
11596 ret = true;
11597 if (sri)
11599 sri->icode = icode;
11600 sri->extra_cost = cost;
11604 return ret;
11607 /* Return whether a move between two register classes can be done either
11608 directly (simple move) or via a pattern that uses a single extra temporary
11609 (using ISA 2.07's direct move in this case. */
11611 static bool
11612 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11613 enum rs6000_reg_type from_type,
11614 machine_mode mode,
11615 secondary_reload_info *sri,
11616 bool altivec_p)
11618 /* Fall back to load/store reloads if either type is not a register. */
11619 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11620 return false;
11622 /* If we haven't allocated registers yet, assume the move can be done for the
11623 standard register types. */
11624 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11625 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11626 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11627 return true;
11629 /* Moves to the same set of registers is a simple move for non-specialized
11630 registers. */
11631 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11632 return true;
11634 /* Check whether a simple move can be done directly. */
11635 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11637 if (sri)
11639 sri->icode = CODE_FOR_nothing;
11640 sri->extra_cost = 0;
11642 return true;
11645 /* Now check if we can do it in a few steps. */
11646 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11647 altivec_p);
11650 /* Inform reload about cases where moving X with a mode MODE to a register in
11651 RCLASS requires an extra scratch or immediate register. Return the class
11652 needed for the immediate register.
11654 For VSX and Altivec, we may need a register to convert sp+offset into
11655 reg+sp.
11657 For misaligned 64-bit gpr loads and stores we need a register to
11658 convert an offset address to indirect. */
11660 static reg_class_t
11661 rs6000_secondary_reload (bool in_p,
11662 rtx x,
11663 reg_class_t rclass_i,
11664 machine_mode mode,
11665 secondary_reload_info *sri)
11667 enum reg_class rclass = (enum reg_class) rclass_i;
11668 reg_class_t ret = ALL_REGS;
11669 enum insn_code icode;
11670 bool default_p = false;
11671 bool done_p = false;
11673 /* Allow subreg of memory before/during reload. */
11674 bool memory_p = (MEM_P (x)
11675 || (!reload_completed && SUBREG_P (x)
11676 && MEM_P (SUBREG_REG (x))));
11678 sri->icode = CODE_FOR_nothing;
11679 sri->t_icode = CODE_FOR_nothing;
11680 sri->extra_cost = 0;
11681 icode = ((in_p)
11682 ? reg_addr[mode].reload_load
11683 : reg_addr[mode].reload_store);
11685 if (REG_P (x) || register_operand (x, mode))
11687 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11688 bool altivec_p = (rclass == ALTIVEC_REGS);
11689 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11691 if (!in_p)
11692 std::swap (to_type, from_type);
11694 /* Can we do a direct move of some sort? */
11695 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11696 altivec_p))
11698 icode = (enum insn_code)sri->icode;
11699 default_p = false;
11700 done_p = true;
11701 ret = NO_REGS;
11705 /* Make sure 0.0 is not reloaded or forced into memory. */
11706 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11708 ret = NO_REGS;
11709 default_p = false;
11710 done_p = true;
11713 /* If this is a scalar floating point value and we want to load it into the
11714 traditional Altivec registers, do it via a move via a traditional floating
11715 point register, unless we have D-form addressing. Also make sure that
11716 non-zero constants use a FPR. */
11717 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11718 && !mode_supports_vmx_dform (mode)
11719 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11720 && (memory_p || CONST_DOUBLE_P (x)))
11722 ret = FLOAT_REGS;
11723 default_p = false;
11724 done_p = true;
11727 /* Handle reload of load/stores if we have reload helper functions. */
11728 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11730 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11731 mode);
11733 if (extra_cost >= 0)
11735 done_p = true;
11736 ret = NO_REGS;
11737 if (extra_cost > 0)
11739 sri->extra_cost = extra_cost;
11740 sri->icode = icode;
11745 /* Handle unaligned loads and stores of integer registers. */
11746 if (!done_p && TARGET_POWERPC64
11747 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11748 && memory_p
11749 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11751 rtx addr = XEXP (x, 0);
11752 rtx off = address_offset (addr);
11754 if (off != NULL_RTX)
11756 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11757 unsigned HOST_WIDE_INT offset = INTVAL (off);
11759 /* We need a secondary reload when our legitimate_address_p
11760 says the address is good (as otherwise the entire address
11761 will be reloaded), and the offset is not a multiple of
11762 four or we have an address wrap. Address wrap will only
11763 occur for LO_SUMs since legitimate_offset_address_p
11764 rejects addresses for 16-byte mems that will wrap. */
11765 if (GET_CODE (addr) == LO_SUM
11766 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11767 && ((offset & 3) != 0
11768 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11769 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11770 && (offset & 3) != 0))
11772 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11773 if (in_p)
11774 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11775 : CODE_FOR_reload_di_load);
11776 else
11777 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11778 : CODE_FOR_reload_di_store);
11779 sri->extra_cost = 2;
11780 ret = NO_REGS;
11781 done_p = true;
11783 else
11784 default_p = true;
11786 else
11787 default_p = true;
11790 if (!done_p && !TARGET_POWERPC64
11791 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11792 && memory_p
11793 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11795 rtx addr = XEXP (x, 0);
11796 rtx off = address_offset (addr);
11798 if (off != NULL_RTX)
11800 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11801 unsigned HOST_WIDE_INT offset = INTVAL (off);
11803 /* We need a secondary reload when our legitimate_address_p
11804 says the address is good (as otherwise the entire address
11805 will be reloaded), and we have a wrap.
11807 legitimate_lo_sum_address_p allows LO_SUM addresses to
11808 have any offset so test for wrap in the low 16 bits.
11810 legitimate_offset_address_p checks for the range
11811 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11812 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11813 [0x7ff4,0x7fff] respectively, so test for the
11814 intersection of these ranges, [0x7ffc,0x7fff] and
11815 [0x7ff4,0x7ff7] respectively.
11817 Note that the address we see here may have been
11818 manipulated by legitimize_reload_address. */
11819 if (GET_CODE (addr) == LO_SUM
11820 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11821 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11823 if (in_p)
11824 sri->icode = CODE_FOR_reload_si_load;
11825 else
11826 sri->icode = CODE_FOR_reload_si_store;
11827 sri->extra_cost = 2;
11828 ret = NO_REGS;
11829 done_p = true;
11831 else
11832 default_p = true;
11834 else
11835 default_p = true;
11838 if (!done_p)
11839 default_p = true;
11841 if (default_p)
11842 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11844 gcc_assert (ret != ALL_REGS);
11846 if (TARGET_DEBUG_ADDR)
11848 fprintf (stderr,
11849 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11850 "mode = %s",
11851 reg_class_names[ret],
11852 in_p ? "true" : "false",
11853 reg_class_names[rclass],
11854 GET_MODE_NAME (mode));
11856 if (reload_completed)
11857 fputs (", after reload", stderr);
11859 if (!done_p)
11860 fputs (", done_p not set", stderr);
11862 if (default_p)
11863 fputs (", default secondary reload", stderr);
11865 if (sri->icode != CODE_FOR_nothing)
11866 fprintf (stderr, ", reload func = %s, extra cost = %d",
11867 insn_data[sri->icode].name, sri->extra_cost);
11869 else if (sri->extra_cost > 0)
11870 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11872 fputs ("\n", stderr);
11873 debug_rtx (x);
11876 return ret;
11879 /* Better tracing for rs6000_secondary_reload_inner. */
11881 static void
11882 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11883 bool store_p)
11885 rtx set, clobber;
11887 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11889 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11890 store_p ? "store" : "load");
11892 if (store_p)
11893 set = gen_rtx_SET (mem, reg);
11894 else
11895 set = gen_rtx_SET (reg, mem);
11897 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11898 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11901 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11902 ATTRIBUTE_NORETURN;
11904 static void
11905 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11906 bool store_p)
11908 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11909 gcc_unreachable ();
11912 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11913 reload helper functions. These were identified in
11914 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11915 reload, it calls the insns:
11916 reload_<RELOAD:mode>_<P:mptrsize>_store
11917 reload_<RELOAD:mode>_<P:mptrsize>_load
11919 which in turn calls this function, to do whatever is necessary to create
11920 valid addresses. */
11922 void
11923 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11925 int regno = true_regnum (reg);
11926 machine_mode mode = GET_MODE (reg);
11927 addr_mask_type addr_mask;
11928 rtx addr;
11929 rtx new_addr;
11930 rtx op_reg, op0, op1;
11931 rtx and_op;
11932 rtx cc_clobber;
11933 rtvec rv;
11935 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11936 || !base_reg_operand (scratch, GET_MODE (scratch)))
11937 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11939 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11940 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11942 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11943 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11945 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11946 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11948 else
11949 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11951 /* Make sure the mode is valid in this register class. */
11952 if ((addr_mask & RELOAD_REG_VALID) == 0)
11953 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11955 if (TARGET_DEBUG_ADDR)
11956 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11958 new_addr = addr = XEXP (mem, 0);
11959 switch (GET_CODE (addr))
11961 /* Does the register class support auto update forms for this mode? If
11962 not, do the update now. We don't need a scratch register, since the
11963 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11964 case PRE_INC:
11965 case PRE_DEC:
11966 op_reg = XEXP (addr, 0);
11967 if (!base_reg_operand (op_reg, Pmode))
11968 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11970 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11972 int delta = GET_MODE_SIZE (mode);
11973 if (GET_CODE (addr) == PRE_DEC)
11974 delta = -delta;
11975 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11976 new_addr = op_reg;
11978 break;
11980 case PRE_MODIFY:
11981 op0 = XEXP (addr, 0);
11982 op1 = XEXP (addr, 1);
11983 if (!base_reg_operand (op0, Pmode)
11984 || GET_CODE (op1) != PLUS
11985 || !rtx_equal_p (op0, XEXP (op1, 0)))
11986 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11988 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11990 emit_insn (gen_rtx_SET (op0, op1));
11991 new_addr = reg;
11993 break;
11995 /* Do we need to simulate AND -16 to clear the bottom address bits used
11996 in VMX load/stores? */
11997 case AND:
11998 op0 = XEXP (addr, 0);
11999 op1 = XEXP (addr, 1);
12000 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12002 if (REG_P (op0) || SUBREG_P (op0))
12003 op_reg = op0;
12005 else if (GET_CODE (op1) == PLUS)
12007 emit_insn (gen_rtx_SET (scratch, op1));
12008 op_reg = scratch;
12011 else
12012 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12014 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12015 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12016 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12017 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12018 new_addr = scratch;
12020 break;
12022 /* If this is an indirect address, make sure it is a base register. */
12023 case REG:
12024 case SUBREG:
12025 if (!base_reg_operand (addr, GET_MODE (addr)))
12027 emit_insn (gen_rtx_SET (scratch, addr));
12028 new_addr = scratch;
12030 break;
12032 /* If this is an indexed address, make sure the register class can handle
12033 indexed addresses for this mode. */
12034 case PLUS:
12035 op0 = XEXP (addr, 0);
12036 op1 = XEXP (addr, 1);
12037 if (!base_reg_operand (op0, Pmode))
12038 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12040 else if (int_reg_operand (op1, Pmode))
12042 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12044 emit_insn (gen_rtx_SET (scratch, addr));
12045 new_addr = scratch;
12049 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
12051 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
12052 || !quad_address_p (addr, mode, false))
12054 emit_insn (gen_rtx_SET (scratch, addr));
12055 new_addr = scratch;
12059 /* Make sure the register class can handle offset addresses. */
12060 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12062 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12064 emit_insn (gen_rtx_SET (scratch, addr));
12065 new_addr = scratch;
12069 else
12070 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12072 break;
12074 case LO_SUM:
12075 op0 = XEXP (addr, 0);
12076 op1 = XEXP (addr, 1);
12077 if (!base_reg_operand (op0, Pmode))
12078 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12080 else if (int_reg_operand (op1, Pmode))
12082 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12084 emit_insn (gen_rtx_SET (scratch, addr));
12085 new_addr = scratch;
12089 /* Quad offsets are restricted and can't handle normal addresses. */
12090 else if (mode_supports_dq_form (mode))
12092 emit_insn (gen_rtx_SET (scratch, addr));
12093 new_addr = scratch;
12096 /* Make sure the register class can handle offset addresses. */
12097 else if (legitimate_lo_sum_address_p (mode, addr, false))
12099 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12101 emit_insn (gen_rtx_SET (scratch, addr));
12102 new_addr = scratch;
12106 else
12107 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12109 break;
12111 case SYMBOL_REF:
12112 case CONST:
12113 case LABEL_REF:
12114 rs6000_emit_move (scratch, addr, Pmode);
12115 new_addr = scratch;
12116 break;
12118 default:
12119 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12122 /* Adjust the address if it changed. */
12123 if (addr != new_addr)
12125 mem = replace_equiv_address_nv (mem, new_addr);
12126 if (TARGET_DEBUG_ADDR)
12127 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
12130 /* Now create the move. */
12131 if (store_p)
12132 emit_insn (gen_rtx_SET (mem, reg));
12133 else
12134 emit_insn (gen_rtx_SET (reg, mem));
12136 return;
12139 /* Convert reloads involving 64-bit gprs and misaligned offset
12140 addressing, or multiple 32-bit gprs and offsets that are too large,
12141 to use indirect addressing. */
12143 void
12144 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
12146 int regno = true_regnum (reg);
12147 enum reg_class rclass;
12148 rtx addr;
12149 rtx scratch_or_premodify = scratch;
12151 if (TARGET_DEBUG_ADDR)
12153 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
12154 store_p ? "store" : "load");
12155 fprintf (stderr, "reg:\n");
12156 debug_rtx (reg);
12157 fprintf (stderr, "mem:\n");
12158 debug_rtx (mem);
12159 fprintf (stderr, "scratch:\n");
12160 debug_rtx (scratch);
12163 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
12164 gcc_assert (MEM_P (mem));
12165 rclass = REGNO_REG_CLASS (regno);
12166 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
12167 addr = XEXP (mem, 0);
12169 if (GET_CODE (addr) == PRE_MODIFY)
12171 gcc_assert (REG_P (XEXP (addr, 0))
12172 && GET_CODE (XEXP (addr, 1)) == PLUS
12173 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
12174 scratch_or_premodify = XEXP (addr, 0);
12175 addr = XEXP (addr, 1);
12177 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
12179 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
12181 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
12183 /* Now create the move. */
12184 if (store_p)
12185 emit_insn (gen_rtx_SET (mem, reg));
12186 else
12187 emit_insn (gen_rtx_SET (reg, mem));
12189 return;
12192 /* Given an rtx X being reloaded into a reg required to be
12193 in class CLASS, return the class of reg to actually use.
12194 In general this is just CLASS; but on some machines
12195 in some cases it is preferable to use a more restrictive class.
12197 On the RS/6000, we have to return NO_REGS when we want to reload a
12198 floating-point CONST_DOUBLE to force it to be copied to memory.
12200 We also don't want to reload integer values into floating-point
12201 registers if we can at all help it. In fact, this can
12202 cause reload to die, if it tries to generate a reload of CTR
12203 into a FP register and discovers it doesn't have the memory location
12204 required.
12206 ??? Would it be a good idea to have reload do the converse, that is
12207 try to reload floating modes into FP registers if possible?
12210 static enum reg_class
12211 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
12213 machine_mode mode = GET_MODE (x);
12214 bool is_constant = CONSTANT_P (x);
12216 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
12217 reload class for it. */
12218 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12219 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
12220 return NO_REGS;
12222 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
12223 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
12224 return NO_REGS;
12226 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
12227 the reloading of address expressions using PLUS into floating point
12228 registers. */
12229 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
12231 if (is_constant)
12233 /* Zero is always allowed in all VSX registers. */
12234 if (x == CONST0_RTX (mode))
12235 return rclass;
12237 /* If this is a vector constant that can be formed with a few Altivec
12238 instructions, we want altivec registers. */
12239 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
12240 return ALTIVEC_REGS;
12242 /* If this is an integer constant that can easily be loaded into
12243 vector registers, allow it. */
12244 if (CONST_INT_P (x))
12246 HOST_WIDE_INT value = INTVAL (x);
12248 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
12249 2.06 can generate it in the Altivec registers with
12250 VSPLTI<x>. */
12251 if (value == -1)
12253 if (TARGET_P8_VECTOR)
12254 return rclass;
12255 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12256 return ALTIVEC_REGS;
12257 else
12258 return NO_REGS;
12261 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12262 a sign extend in the Altivec registers. */
12263 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12264 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12265 return ALTIVEC_REGS;
12268 /* Force constant to memory. */
12269 return NO_REGS;
12272 /* D-form addressing can easily reload the value. */
12273 if (mode_supports_vmx_dform (mode)
12274 || mode_supports_dq_form (mode))
12275 return rclass;
12277 /* If this is a scalar floating point value and we don't have D-form
12278 addressing, prefer the traditional floating point registers so that we
12279 can use D-form (register+offset) addressing. */
12280 if (rclass == VSX_REGS
12281 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12282 return FLOAT_REGS;
12284 /* Prefer the Altivec registers if Altivec is handling the vector
12285 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12286 loads. */
12287 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12288 || mode == V1TImode)
12289 return ALTIVEC_REGS;
12291 return rclass;
12294 if (is_constant || GET_CODE (x) == PLUS)
12296 if (reg_class_subset_p (GENERAL_REGS, rclass))
12297 return GENERAL_REGS;
12298 if (reg_class_subset_p (BASE_REGS, rclass))
12299 return BASE_REGS;
12300 return NO_REGS;
12303 /* For the vector pair and vector quad modes, prefer their natural register
12304 (VSX or FPR) rather than GPR registers. For other integer types, prefer
12305 the GPR registers. */
12306 if (rclass == GEN_OR_FLOAT_REGS)
12308 if (mode == POImode)
12309 return VSX_REGS;
12311 if (mode == PXImode)
12312 return FLOAT_REGS;
12314 if (GET_MODE_CLASS (mode) == MODE_INT)
12315 return GENERAL_REGS;
12318 return rclass;
12321 /* Debug version of rs6000_preferred_reload_class. */
12322 static enum reg_class
12323 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12325 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12327 fprintf (stderr,
12328 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12329 "mode = %s, x:\n",
12330 reg_class_names[ret], reg_class_names[rclass],
12331 GET_MODE_NAME (GET_MODE (x)));
12332 debug_rtx (x);
12334 return ret;
12337 /* If we are copying between FP or AltiVec registers and anything else, we need
12338 a memory location. The exception is when we are targeting ppc64 and the
12339 move to/from fpr to gpr instructions are available. Also, under VSX, you
12340 can copy vector registers from the FP register set to the Altivec register
12341 set and vice versa. */
12343 static bool
12344 rs6000_secondary_memory_needed (machine_mode mode,
12345 reg_class_t from_class,
12346 reg_class_t to_class)
12348 enum rs6000_reg_type from_type, to_type;
12349 bool altivec_p = ((from_class == ALTIVEC_REGS)
12350 || (to_class == ALTIVEC_REGS));
12352 /* If a simple/direct move is available, we don't need secondary memory */
12353 from_type = reg_class_to_reg_type[(int)from_class];
12354 to_type = reg_class_to_reg_type[(int)to_class];
12356 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12357 (secondary_reload_info *)0, altivec_p))
12358 return false;
12360 /* If we have a floating point or vector register class, we need to use
12361 memory to transfer the data. */
12362 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12363 return true;
12365 return false;
12368 /* Debug version of rs6000_secondary_memory_needed. */
12369 static bool
12370 rs6000_debug_secondary_memory_needed (machine_mode mode,
12371 reg_class_t from_class,
12372 reg_class_t to_class)
12374 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12376 fprintf (stderr,
12377 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12378 "to_class = %s, mode = %s\n",
12379 ret ? "true" : "false",
12380 reg_class_names[from_class],
12381 reg_class_names[to_class],
12382 GET_MODE_NAME (mode));
12384 return ret;
12387 /* Return the register class of a scratch register needed to copy IN into
12388 or out of a register in RCLASS in MODE. If it can be done directly,
12389 NO_REGS is returned. */
12391 static enum reg_class
12392 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12393 rtx in)
12395 int regno;
12397 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12398 #if TARGET_MACHO
12399 && MACHOPIC_INDIRECT
12400 #endif
12403 /* We cannot copy a symbolic operand directly into anything
12404 other than BASE_REGS for TARGET_ELF. So indicate that a
12405 register from BASE_REGS is needed as an intermediate
12406 register.
12408 On Darwin, pic addresses require a load from memory, which
12409 needs a base register. */
12410 if (rclass != BASE_REGS
12411 && (SYMBOL_REF_P (in)
12412 || GET_CODE (in) == HIGH
12413 || GET_CODE (in) == LABEL_REF
12414 || GET_CODE (in) == CONST))
12415 return BASE_REGS;
12418 if (REG_P (in))
12420 regno = REGNO (in);
12421 if (!HARD_REGISTER_NUM_P (regno))
12423 regno = true_regnum (in);
12424 if (!HARD_REGISTER_NUM_P (regno))
12425 regno = -1;
12428 else if (SUBREG_P (in))
12430 regno = true_regnum (in);
12431 if (!HARD_REGISTER_NUM_P (regno))
12432 regno = -1;
12434 else
12435 regno = -1;
12437 /* If we have VSX register moves, prefer moving scalar values between
12438 Altivec registers and GPR by going via an FPR (and then via memory)
12439 instead of reloading the secondary memory address for Altivec moves. */
12440 if (TARGET_VSX
12441 && GET_MODE_SIZE (mode) < 16
12442 && !mode_supports_vmx_dform (mode)
12443 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12444 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12445 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12446 && (regno >= 0 && INT_REGNO_P (regno)))))
12447 return FLOAT_REGS;
12449 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12450 into anything. */
12451 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12452 || (regno >= 0 && INT_REGNO_P (regno)))
12453 return NO_REGS;
12455 /* Constants, memory, and VSX registers can go into VSX registers (both the
12456 traditional floating point and the altivec registers). */
12457 if (rclass == VSX_REGS
12458 && (regno == -1 || VSX_REGNO_P (regno)))
12459 return NO_REGS;
12461 /* Constants, memory, and FP registers can go into FP registers. */
12462 if ((regno == -1 || FP_REGNO_P (regno))
12463 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12464 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12466 /* Memory, and AltiVec registers can go into AltiVec registers. */
12467 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12468 && rclass == ALTIVEC_REGS)
12469 return NO_REGS;
12471 /* We can copy among the CR registers. */
12472 if ((rclass == CR_REGS || rclass == CR0_REGS)
12473 && regno >= 0 && CR_REGNO_P (regno))
12474 return NO_REGS;
12476 /* Otherwise, we need GENERAL_REGS. */
12477 return GENERAL_REGS;
12480 /* Debug version of rs6000_secondary_reload_class. */
12481 static enum reg_class
12482 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12483 machine_mode mode, rtx in)
12485 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12486 fprintf (stderr,
12487 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12488 "mode = %s, input rtx:\n",
12489 reg_class_names[ret], reg_class_names[rclass],
12490 GET_MODE_NAME (mode));
12491 debug_rtx (in);
12493 return ret;
12496 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12498 static bool
12499 rs6000_can_change_mode_class (machine_mode from,
12500 machine_mode to,
12501 reg_class_t rclass)
12503 unsigned from_size = GET_MODE_SIZE (from);
12504 unsigned to_size = GET_MODE_SIZE (to);
12506 if (from_size != to_size)
12508 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12510 if (reg_classes_intersect_p (xclass, rclass))
12512 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12513 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12514 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12515 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12517 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12518 single register under VSX because the scalar part of the register
12519 is in the upper 64-bits, and not the lower 64-bits. Types like
12520 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12521 IEEE floating point can't overlap, and neither can small
12522 values. */
12524 if (to_float128_vector_p && from_float128_vector_p)
12525 return true;
12527 else if (to_float128_vector_p || from_float128_vector_p)
12528 return false;
12530 /* TDmode in floating-mode registers must always go into a register
12531 pair with the most significant word in the even-numbered register
12532 to match ISA requirements. In little-endian mode, this does not
12533 match subreg numbering, so we cannot allow subregs. */
12534 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12535 return false;
12537 /* Allow SD<->DD changes, since SDmode values are stored in
12538 the low half of the DDmode, just like target-independent
12539 code expects. We need to allow at least SD->DD since
12540 rs6000_secondary_memory_needed_mode asks for that change
12541 to be made for SD reloads. */
12542 if ((to == DDmode && from == SDmode)
12543 || (to == SDmode && from == DDmode))
12544 return true;
12546 if (from_size < 8 || to_size < 8)
12547 return false;
12549 if (from_size == 8 && (8 * to_nregs) != to_size)
12550 return false;
12552 if (to_size == 8 && (8 * from_nregs) != from_size)
12553 return false;
12555 return true;
12557 else
12558 return true;
12561 /* Since the VSX register set includes traditional floating point registers
12562 and altivec registers, just check for the size being different instead of
12563 trying to check whether the modes are vector modes. Otherwise it won't
12564 allow say DF and DI to change classes. For types like TFmode and TDmode
12565 that take 2 64-bit registers, rather than a single 128-bit register, don't
12566 allow subregs of those types to other 128 bit types. */
12567 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12569 unsigned num_regs = (from_size + 15) / 16;
12570 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12571 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12572 return false;
12574 return (from_size == 8 || from_size == 16);
12577 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12578 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12579 return false;
12581 return true;
12584 /* Debug version of rs6000_can_change_mode_class. */
12585 static bool
12586 rs6000_debug_can_change_mode_class (machine_mode from,
12587 machine_mode to,
12588 reg_class_t rclass)
12590 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12592 fprintf (stderr,
12593 "rs6000_can_change_mode_class, return %s, from = %s, "
12594 "to = %s, rclass = %s\n",
12595 ret ? "true" : "false",
12596 GET_MODE_NAME (from), GET_MODE_NAME (to),
12597 reg_class_names[rclass]);
12599 return ret;
12602 /* Return a string to do a move operation of 128 bits of data. */
12604 const char *
12605 rs6000_output_move_128bit (rtx operands[])
12607 rtx dest = operands[0];
12608 rtx src = operands[1];
12609 machine_mode mode = GET_MODE (dest);
12610 int dest_regno;
12611 int src_regno;
12612 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12613 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12615 if (REG_P (dest))
12617 dest_regno = REGNO (dest);
12618 dest_gpr_p = INT_REGNO_P (dest_regno);
12619 dest_fp_p = FP_REGNO_P (dest_regno);
12620 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12621 dest_vsx_p = dest_fp_p | dest_vmx_p;
12623 else
12625 dest_regno = -1;
12626 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12629 if (REG_P (src))
12631 src_regno = REGNO (src);
12632 src_gpr_p = INT_REGNO_P (src_regno);
12633 src_fp_p = FP_REGNO_P (src_regno);
12634 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12635 src_vsx_p = src_fp_p | src_vmx_p;
12637 else
12639 src_regno = -1;
12640 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12643 /* Register moves. */
12644 if (dest_regno >= 0 && src_regno >= 0)
12646 if (dest_gpr_p)
12648 if (src_gpr_p)
12649 return "#";
12651 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12652 return (WORDS_BIG_ENDIAN
12653 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12654 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12656 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12657 return "#";
12660 else if (TARGET_VSX && dest_vsx_p)
12662 if (src_vsx_p)
12663 return "xxlor %x0,%x1,%x1";
12665 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12666 return (WORDS_BIG_ENDIAN
12667 ? "mtvsrdd %x0,%1,%L1"
12668 : "mtvsrdd %x0,%L1,%1");
12670 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12671 return "#";
12674 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12675 return "vor %0,%1,%1";
12677 else if (dest_fp_p && src_fp_p)
12678 return "#";
12681 /* Loads. */
12682 else if (dest_regno >= 0 && MEM_P (src))
12684 if (dest_gpr_p)
12686 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12687 return "lq %0,%1";
12688 else
12689 return "#";
12692 else if (TARGET_ALTIVEC && dest_vmx_p
12693 && altivec_indexed_or_indirect_operand (src, mode))
12694 return "lvx %0,%y1";
12696 else if (TARGET_VSX && dest_vsx_p)
12698 if (mode_supports_dq_form (mode)
12699 && quad_address_p (XEXP (src, 0), mode, true))
12700 return "lxv %x0,%1";
12702 else if (TARGET_P9_VECTOR)
12703 return "lxvx %x0,%y1";
12705 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12706 return "lxvw4x %x0,%y1";
12708 else
12709 return "lxvd2x %x0,%y1";
12712 else if (TARGET_ALTIVEC && dest_vmx_p)
12713 return "lvx %0,%y1";
12715 else if (dest_fp_p)
12716 return "#";
12719 /* Stores. */
12720 else if (src_regno >= 0 && MEM_P (dest))
12722 if (src_gpr_p)
12724 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12725 return "stq %1,%0";
12726 else
12727 return "#";
12730 else if (TARGET_ALTIVEC && src_vmx_p
12731 && altivec_indexed_or_indirect_operand (dest, mode))
12732 return "stvx %1,%y0";
12734 else if (TARGET_VSX && src_vsx_p)
12736 if (mode_supports_dq_form (mode)
12737 && quad_address_p (XEXP (dest, 0), mode, true))
12738 return "stxv %x1,%0";
12740 else if (TARGET_P9_VECTOR)
12741 return "stxvx %x1,%y0";
12743 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12744 return "stxvw4x %x1,%y0";
12746 else
12747 return "stxvd2x %x1,%y0";
12750 else if (TARGET_ALTIVEC && src_vmx_p)
12751 return "stvx %1,%y0";
12753 else if (src_fp_p)
12754 return "#";
12757 /* Constants. */
12758 else if (dest_regno >= 0
12759 && (CONST_INT_P (src)
12760 || CONST_WIDE_INT_P (src)
12761 || CONST_DOUBLE_P (src)
12762 || GET_CODE (src) == CONST_VECTOR))
12764 if (dest_gpr_p)
12765 return "#";
12767 else if ((dest_vmx_p && TARGET_ALTIVEC)
12768 || (dest_vsx_p && TARGET_VSX))
12769 return output_vec_const_move (operands);
12772 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12775 /* Validate a 128-bit move. */
12776 bool
12777 rs6000_move_128bit_ok_p (rtx operands[])
12779 machine_mode mode = GET_MODE (operands[0]);
12780 return (gpc_reg_operand (operands[0], mode)
12781 || gpc_reg_operand (operands[1], mode));
12784 /* Return true if a 128-bit move needs to be split. */
12785 bool
12786 rs6000_split_128bit_ok_p (rtx operands[])
12788 if (!reload_completed)
12789 return false;
12791 if (!gpr_or_gpr_p (operands[0], operands[1]))
12792 return false;
12794 if (quad_load_store_p (operands[0], operands[1]))
12795 return false;
12797 return true;
12801 /* Given a comparison operation, return the bit number in CCR to test. We
12802 know this is a valid comparison.
12804 SCC_P is 1 if this is for an scc. That means that %D will have been
12805 used instead of %C, so the bits will be in different places.
12807 Return -1 if OP isn't a valid comparison for some reason. */
12810 ccr_bit (rtx op, int scc_p)
12812 enum rtx_code code = GET_CODE (op);
12813 machine_mode cc_mode;
12814 int cc_regnum;
12815 int base_bit;
12816 rtx reg;
12818 if (!COMPARISON_P (op))
12819 return -1;
12821 reg = XEXP (op, 0);
12823 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12824 return -1;
12826 cc_mode = GET_MODE (reg);
12827 cc_regnum = REGNO (reg);
12828 base_bit = 4 * (cc_regnum - CR0_REGNO);
12830 validate_condition_mode (code, cc_mode);
12832 /* When generating a sCOND operation, only positive conditions are
12833 allowed. */
12834 if (scc_p)
12835 switch (code)
12837 case EQ:
12838 case GT:
12839 case LT:
12840 case UNORDERED:
12841 case GTU:
12842 case LTU:
12843 break;
12844 default:
12845 return -1;
12848 switch (code)
12850 case NE:
12851 return scc_p ? base_bit + 3 : base_bit + 2;
12852 case EQ:
12853 return base_bit + 2;
12854 case GT: case GTU: case UNLE:
12855 return base_bit + 1;
12856 case LT: case LTU: case UNGE:
12857 return base_bit;
12858 case ORDERED: case UNORDERED:
12859 return base_bit + 3;
12861 case GE: case GEU:
12862 /* If scc, we will have done a cror to put the bit in the
12863 unordered position. So test that bit. For integer, this is ! LT
12864 unless this is an scc insn. */
12865 return scc_p ? base_bit + 3 : base_bit;
12867 case LE: case LEU:
12868 return scc_p ? base_bit + 3 : base_bit + 1;
12870 default:
12871 return -1;
12875 /* Return the GOT register. */
12878 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12880 /* The second flow pass currently (June 1999) can't update
12881 regs_ever_live without disturbing other parts of the compiler, so
12882 update it here to make the prolog/epilogue code happy. */
12883 if (!can_create_pseudo_p ()
12884 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12885 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12887 crtl->uses_pic_offset_table = 1;
12889 return pic_offset_table_rtx;
12892 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12894 /* Write out a function code label. */
12896 void
12897 rs6000_output_function_entry (FILE *file, const char *fname)
12899 if (fname[0] != '.')
12901 switch (DEFAULT_ABI)
12903 default:
12904 gcc_unreachable ();
12906 case ABI_AIX:
12907 if (DOT_SYMBOLS)
12908 putc ('.', file);
12909 else
12910 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12911 break;
12913 case ABI_ELFv2:
12914 case ABI_V4:
12915 case ABI_DARWIN:
12916 break;
12920 RS6000_OUTPUT_BASENAME (file, fname);
12923 /* Print an operand. Recognize special options, documented below. */
12925 #if TARGET_ELF
12926 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12927 only introduced by the linker, when applying the sda21
12928 relocation. */
12929 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12930 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12931 #else
12932 #define SMALL_DATA_RELOC "sda21"
12933 #define SMALL_DATA_REG 0
12934 #endif
12936 void
12937 print_operand (FILE *file, rtx x, int code)
12939 int i;
12940 unsigned HOST_WIDE_INT uval;
12942 switch (code)
12944 /* %a is output_address. */
12946 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12947 output_operand. */
12949 case 'A':
12950 /* Write the MMA accumulator number associated with VSX register X. */
12951 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
12952 output_operand_lossage ("invalid %%A value");
12953 else
12954 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
12955 return;
12957 case 'D':
12958 /* Like 'J' but get to the GT bit only. */
12959 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12961 output_operand_lossage ("invalid %%D value");
12962 return;
12965 /* Bit 1 is GT bit. */
12966 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12968 /* Add one for shift count in rlinm for scc. */
12969 fprintf (file, "%d", i + 1);
12970 return;
12972 case 'e':
12973 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12974 if (! INT_P (x))
12976 output_operand_lossage ("invalid %%e value");
12977 return;
12980 uval = INTVAL (x);
12981 if ((uval & 0xffff) == 0 && uval != 0)
12982 putc ('s', file);
12983 return;
12985 case 'E':
12986 /* X is a CR register. Print the number of the EQ bit of the CR */
12987 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12988 output_operand_lossage ("invalid %%E value");
12989 else
12990 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12991 return;
12993 case 'f':
12994 /* X is a CR register. Print the shift count needed to move it
12995 to the high-order four bits. */
12996 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12997 output_operand_lossage ("invalid %%f value");
12998 else
12999 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13000 return;
13002 case 'F':
13003 /* Similar, but print the count for the rotate in the opposite
13004 direction. */
13005 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13006 output_operand_lossage ("invalid %%F value");
13007 else
13008 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13009 return;
13011 case 'G':
13012 /* X is a constant integer. If it is negative, print "m",
13013 otherwise print "z". This is to make an aze or ame insn. */
13014 if (!CONST_INT_P (x))
13015 output_operand_lossage ("invalid %%G value");
13016 else if (INTVAL (x) >= 0)
13017 putc ('z', file);
13018 else
13019 putc ('m', file);
13020 return;
13022 case 'h':
13023 /* If constant, output low-order five bits. Otherwise, write
13024 normally. */
13025 if (INT_P (x))
13026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13027 else
13028 print_operand (file, x, 0);
13029 return;
13031 case 'H':
13032 /* If constant, output low-order six bits. Otherwise, write
13033 normally. */
13034 if (INT_P (x))
13035 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13036 else
13037 print_operand (file, x, 0);
13038 return;
13040 case 'I':
13041 /* Print `i' if this is a constant, else nothing. */
13042 if (INT_P (x))
13043 putc ('i', file);
13044 return;
13046 case 'j':
13047 /* Write the bit number in CCR for jump. */
13048 i = ccr_bit (x, 0);
13049 if (i == -1)
13050 output_operand_lossage ("invalid %%j code");
13051 else
13052 fprintf (file, "%d", i);
13053 return;
13055 case 'J':
13056 /* Similar, but add one for shift count in rlinm for scc and pass
13057 scc flag to `ccr_bit'. */
13058 i = ccr_bit (x, 1);
13059 if (i == -1)
13060 output_operand_lossage ("invalid %%J code");
13061 else
13062 /* If we want bit 31, write a shift count of zero, not 32. */
13063 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13064 return;
13066 case 'k':
13067 /* X must be a constant. Write the 1's complement of the
13068 constant. */
13069 if (! INT_P (x))
13070 output_operand_lossage ("invalid %%k value");
13071 else
13072 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
13073 return;
13075 case 'K':
13076 /* X must be a symbolic constant on ELF. Write an
13077 expression suitable for an 'addi' that adds in the low 16
13078 bits of the MEM. */
13079 if (GET_CODE (x) == CONST)
13081 if (GET_CODE (XEXP (x, 0)) != PLUS
13082 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
13083 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
13084 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
13085 output_operand_lossage ("invalid %%K value");
13087 print_operand_address (file, x);
13088 fputs ("@l", file);
13089 return;
13091 /* %l is output_asm_label. */
13093 case 'L':
13094 /* Write second word of DImode or DFmode reference. Works on register
13095 or non-indexed memory only. */
13096 if (REG_P (x))
13097 fputs (reg_names[REGNO (x) + 1], file);
13098 else if (MEM_P (x))
13100 machine_mode mode = GET_MODE (x);
13101 /* Handle possible auto-increment. Since it is pre-increment and
13102 we have already done it, we can just use an offset of word. */
13103 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13104 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13105 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13106 UNITS_PER_WORD));
13107 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13108 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
13109 UNITS_PER_WORD));
13110 else
13111 output_address (mode, XEXP (adjust_address_nv (x, SImode,
13112 UNITS_PER_WORD),
13113 0));
13115 if (small_data_operand (x, GET_MODE (x)))
13116 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13117 reg_names[SMALL_DATA_REG]);
13119 return;
13121 case 'N': /* Unused */
13122 /* Write the number of elements in the vector times 4. */
13123 if (GET_CODE (x) != PARALLEL)
13124 output_operand_lossage ("invalid %%N value");
13125 else
13126 fprintf (file, "%d", XVECLEN (x, 0) * 4);
13127 return;
13129 case 'O': /* Unused */
13130 /* Similar, but subtract 1 first. */
13131 if (GET_CODE (x) != PARALLEL)
13132 output_operand_lossage ("invalid %%O value");
13133 else
13134 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
13135 return;
13137 case 'p':
13138 /* X is a CONST_INT that is a power of two. Output the logarithm. */
13139 if (! INT_P (x)
13140 || INTVAL (x) < 0
13141 || (i = exact_log2 (INTVAL (x))) < 0)
13142 output_operand_lossage ("invalid %%p value");
13143 else
13144 fprintf (file, "%d", i);
13145 return;
13147 case 'P':
13148 /* The operand must be an indirect memory reference. The result
13149 is the register name. */
13150 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
13151 || REGNO (XEXP (x, 0)) >= 32)
13152 output_operand_lossage ("invalid %%P value");
13153 else
13154 fputs (reg_names[REGNO (XEXP (x, 0))], file);
13155 return;
13157 case 'q':
13158 /* This outputs the logical code corresponding to a boolean
13159 expression. The expression may have one or both operands
13160 negated (if one, only the first one). For condition register
13161 logical operations, it will also treat the negated
13162 CR codes as NOTs, but not handle NOTs of them. */
13164 const char *const *t = 0;
13165 const char *s;
13166 enum rtx_code code = GET_CODE (x);
13167 static const char * const tbl[3][3] = {
13168 { "and", "andc", "nor" },
13169 { "or", "orc", "nand" },
13170 { "xor", "eqv", "xor" } };
13172 if (code == AND)
13173 t = tbl[0];
13174 else if (code == IOR)
13175 t = tbl[1];
13176 else if (code == XOR)
13177 t = tbl[2];
13178 else
13179 output_operand_lossage ("invalid %%q value");
13181 if (GET_CODE (XEXP (x, 0)) != NOT)
13182 s = t[0];
13183 else
13185 if (GET_CODE (XEXP (x, 1)) == NOT)
13186 s = t[2];
13187 else
13188 s = t[1];
13191 fputs (s, file);
13193 return;
13195 case 'Q':
13196 if (! TARGET_MFCRF)
13197 return;
13198 fputc (',', file);
13199 /* FALLTHRU */
13201 case 'R':
13202 /* X is a CR register. Print the mask for `mtcrf'. */
13203 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13204 output_operand_lossage ("invalid %%R value");
13205 else
13206 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
13207 return;
13209 case 's':
13210 /* Low 5 bits of 32 - value */
13211 if (! INT_P (x))
13212 output_operand_lossage ("invalid %%s value");
13213 else
13214 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
13215 return;
13217 case 't':
13218 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
13219 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13221 output_operand_lossage ("invalid %%t value");
13222 return;
13225 /* Bit 3 is OV bit. */
13226 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
13228 /* If we want bit 31, write a shift count of zero, not 32. */
13229 fprintf (file, "%d", i == 31 ? 0 : i + 1);
13230 return;
13232 case 'T':
13233 /* Print the symbolic name of a branch target register. */
13234 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13235 x = XVECEXP (x, 0, 0);
13236 if (!REG_P (x) || (REGNO (x) != LR_REGNO
13237 && REGNO (x) != CTR_REGNO))
13238 output_operand_lossage ("invalid %%T value");
13239 else if (REGNO (x) == LR_REGNO)
13240 fputs ("lr", file);
13241 else
13242 fputs ("ctr", file);
13243 return;
13245 case 'u':
13246 /* High-order or low-order 16 bits of constant, whichever is non-zero,
13247 for use in unsigned operand. */
13248 if (! INT_P (x))
13250 output_operand_lossage ("invalid %%u value");
13251 return;
13254 uval = INTVAL (x);
13255 if ((uval & 0xffff) == 0)
13256 uval >>= 16;
13258 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
13259 return;
13261 case 'v':
13262 /* High-order 16 bits of constant for use in signed operand. */
13263 if (! INT_P (x))
13264 output_operand_lossage ("invalid %%v value");
13265 else
13266 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
13267 (INTVAL (x) >> 16) & 0xffff);
13268 return;
13270 case 'U':
13271 /* Print `u' if this has an auto-increment or auto-decrement. */
13272 if (MEM_P (x)
13273 && (GET_CODE (XEXP (x, 0)) == PRE_INC
13274 || GET_CODE (XEXP (x, 0)) == PRE_DEC
13275 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
13276 putc ('u', file);
13277 return;
13279 case 'V':
13280 /* Print the trap code for this operand. */
13281 switch (GET_CODE (x))
13283 case EQ:
13284 fputs ("eq", file); /* 4 */
13285 break;
13286 case NE:
13287 fputs ("ne", file); /* 24 */
13288 break;
13289 case LT:
13290 fputs ("lt", file); /* 16 */
13291 break;
13292 case LE:
13293 fputs ("le", file); /* 20 */
13294 break;
13295 case GT:
13296 fputs ("gt", file); /* 8 */
13297 break;
13298 case GE:
13299 fputs ("ge", file); /* 12 */
13300 break;
13301 case LTU:
13302 fputs ("llt", file); /* 2 */
13303 break;
13304 case LEU:
13305 fputs ("lle", file); /* 6 */
13306 break;
13307 case GTU:
13308 fputs ("lgt", file); /* 1 */
13309 break;
13310 case GEU:
13311 fputs ("lge", file); /* 5 */
13312 break;
13313 default:
13314 output_operand_lossage ("invalid %%V value");
13316 break;
13318 case 'w':
13319 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13320 normally. */
13321 if (INT_P (x))
13322 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13323 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13324 else
13325 print_operand (file, x, 0);
13326 return;
13328 case 'x':
13329 /* X is a FPR or Altivec register used in a VSX context. */
13330 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13331 output_operand_lossage ("invalid %%x value");
13332 else
13334 int reg = REGNO (x);
13335 int vsx_reg = (FP_REGNO_P (reg)
13336 ? reg - 32
13337 : reg - FIRST_ALTIVEC_REGNO + 32);
13339 #ifdef TARGET_REGNAMES
13340 if (TARGET_REGNAMES)
13341 fprintf (file, "%%vs%d", vsx_reg);
13342 else
13343 #endif
13344 fprintf (file, "%d", vsx_reg);
13346 return;
13348 case 'X':
13349 if (MEM_P (x)
13350 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13351 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13352 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13353 putc ('x', file);
13354 return;
13356 case 'Y':
13357 /* Like 'L', for third word of TImode/PTImode */
13358 if (REG_P (x))
13359 fputs (reg_names[REGNO (x) + 2], file);
13360 else if (MEM_P (x))
13362 machine_mode mode = GET_MODE (x);
13363 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13364 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13365 output_address (mode, plus_constant (Pmode,
13366 XEXP (XEXP (x, 0), 0), 8));
13367 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13368 output_address (mode, plus_constant (Pmode,
13369 XEXP (XEXP (x, 0), 0), 8));
13370 else
13371 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13372 if (small_data_operand (x, GET_MODE (x)))
13373 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13374 reg_names[SMALL_DATA_REG]);
13376 return;
13378 case 'z':
13379 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13380 x = XVECEXP (x, 0, 1);
13381 /* X is a SYMBOL_REF. Write out the name preceded by a
13382 period and without any trailing data in brackets. Used for function
13383 names. If we are configured for System V (or the embedded ABI) on
13384 the PowerPC, do not emit the period, since those systems do not use
13385 TOCs and the like. */
13386 if (!SYMBOL_REF_P (x))
13388 output_operand_lossage ("invalid %%z value");
13389 return;
13392 /* For macho, check to see if we need a stub. */
13393 if (TARGET_MACHO)
13395 const char *name = XSTR (x, 0);
13396 #if TARGET_MACHO
13397 if (darwin_symbol_stubs
13398 && MACHOPIC_INDIRECT
13399 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13400 name = machopic_indirection_name (x, /*stub_p=*/true);
13401 #endif
13402 assemble_name (file, name);
13404 else if (!DOT_SYMBOLS)
13405 assemble_name (file, XSTR (x, 0));
13406 else
13407 rs6000_output_function_entry (file, XSTR (x, 0));
13408 return;
13410 case 'Z':
13411 /* Like 'L', for last word of TImode/PTImode. */
13412 if (REG_P (x))
13413 fputs (reg_names[REGNO (x) + 3], file);
13414 else if (MEM_P (x))
13416 machine_mode mode = GET_MODE (x);
13417 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13418 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13419 output_address (mode, plus_constant (Pmode,
13420 XEXP (XEXP (x, 0), 0), 12));
13421 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13422 output_address (mode, plus_constant (Pmode,
13423 XEXP (XEXP (x, 0), 0), 12));
13424 else
13425 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13426 if (small_data_operand (x, GET_MODE (x)))
13427 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13428 reg_names[SMALL_DATA_REG]);
13430 return;
13432 /* Print AltiVec memory operand. */
13433 case 'y':
13435 rtx tmp;
13437 gcc_assert (MEM_P (x));
13439 tmp = XEXP (x, 0);
13441 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13442 && GET_CODE (tmp) == AND
13443 && CONST_INT_P (XEXP (tmp, 1))
13444 && INTVAL (XEXP (tmp, 1)) == -16)
13445 tmp = XEXP (tmp, 0);
13446 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13447 && GET_CODE (tmp) == PRE_MODIFY)
13448 tmp = XEXP (tmp, 1);
13449 if (REG_P (tmp))
13450 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13451 else
13453 if (GET_CODE (tmp) != PLUS
13454 || !REG_P (XEXP (tmp, 0))
13455 || !REG_P (XEXP (tmp, 1)))
13457 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13458 break;
13461 if (REGNO (XEXP (tmp, 0)) == 0)
13462 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13463 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13464 else
13465 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13466 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13468 break;
13471 case 0:
13472 if (REG_P (x))
13473 fprintf (file, "%s", reg_names[REGNO (x)]);
13474 else if (MEM_P (x))
13476 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13477 know the width from the mode. */
13478 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13479 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13480 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13481 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13482 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13483 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13484 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13485 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13486 else
13487 output_address (GET_MODE (x), XEXP (x, 0));
13489 else if (toc_relative_expr_p (x, false,
13490 &tocrel_base_oac, &tocrel_offset_oac))
13491 /* This hack along with a corresponding hack in
13492 rs6000_output_addr_const_extra arranges to output addends
13493 where the assembler expects to find them. eg.
13494 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13495 without this hack would be output as "x@toc+4". We
13496 want "x+4@toc". */
13497 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13498 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13499 output_addr_const (file, XVECEXP (x, 0, 0));
13500 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13501 output_addr_const (file, XVECEXP (x, 0, 1));
13502 else
13503 output_addr_const (file, x);
13504 return;
13506 case '&':
13507 if (const char *name = get_some_local_dynamic_name ())
13508 assemble_name (file, name);
13509 else
13510 output_operand_lossage ("'%%&' used without any "
13511 "local dynamic TLS references");
13512 return;
13514 default:
13515 output_operand_lossage ("invalid %%xn code");
13519 /* Print the address of an operand. */
13521 void
13522 print_operand_address (FILE *file, rtx x)
13524 if (REG_P (x))
13525 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13527 /* Is it a PC-relative address? */
13528 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13530 HOST_WIDE_INT offset;
13532 if (GET_CODE (x) == CONST)
13533 x = XEXP (x, 0);
13535 if (GET_CODE (x) == PLUS)
13537 offset = INTVAL (XEXP (x, 1));
13538 x = XEXP (x, 0);
13540 else
13541 offset = 0;
13543 output_addr_const (file, x);
13545 if (offset)
13546 fprintf (file, "%+" PRId64, offset);
13548 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13549 fprintf (file, "@got");
13551 fprintf (file, "@pcrel");
13553 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13554 || GET_CODE (x) == LABEL_REF)
13556 output_addr_const (file, x);
13557 if (small_data_operand (x, GET_MODE (x)))
13558 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13559 reg_names[SMALL_DATA_REG]);
13560 else
13561 gcc_assert (!TARGET_TOC);
13563 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13564 && REG_P (XEXP (x, 1)))
13566 if (REGNO (XEXP (x, 0)) == 0)
13567 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13568 reg_names[ REGNO (XEXP (x, 0)) ]);
13569 else
13570 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13571 reg_names[ REGNO (XEXP (x, 1)) ]);
13573 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13574 && CONST_INT_P (XEXP (x, 1)))
13575 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13576 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13577 #if TARGET_MACHO
13578 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13579 && CONSTANT_P (XEXP (x, 1)))
13581 fprintf (file, "lo16(");
13582 output_addr_const (file, XEXP (x, 1));
13583 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13585 #endif
13586 #if TARGET_ELF
13587 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13588 && CONSTANT_P (XEXP (x, 1)))
13590 output_addr_const (file, XEXP (x, 1));
13591 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13593 #endif
13594 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13596 /* This hack along with a corresponding hack in
13597 rs6000_output_addr_const_extra arranges to output addends
13598 where the assembler expects to find them. eg.
13599 (lo_sum (reg 9)
13600 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13601 without this hack would be output as "x@toc+8@l(9)". We
13602 want "x+8@toc@l(9)". */
13603 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13604 if (GET_CODE (x) == LO_SUM)
13605 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13606 else
13607 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13609 else
13610 output_addr_const (file, x);
13613 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13615 bool
13616 rs6000_output_addr_const_extra (FILE *file, rtx x)
13618 if (GET_CODE (x) == UNSPEC)
13619 switch (XINT (x, 1))
13621 case UNSPEC_TOCREL:
13622 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13623 && REG_P (XVECEXP (x, 0, 1))
13624 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13625 output_addr_const (file, XVECEXP (x, 0, 0));
13626 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13628 if (INTVAL (tocrel_offset_oac) >= 0)
13629 fprintf (file, "+");
13630 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13632 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13634 putc ('-', file);
13635 assemble_name (file, toc_label_name);
13636 need_toc_init = 1;
13638 else if (TARGET_ELF)
13639 fputs ("@toc", file);
13640 return true;
13642 #if TARGET_MACHO
13643 case UNSPEC_MACHOPIC_OFFSET:
13644 output_addr_const (file, XVECEXP (x, 0, 0));
13645 putc ('-', file);
13646 machopic_output_function_base_name (file);
13647 return true;
13648 #endif
13650 return false;
13653 /* Target hook for assembling integer objects. The PowerPC version has
13654 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13655 is defined. It also needs to handle DI-mode objects on 64-bit
13656 targets. */
13658 static bool
13659 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13661 #ifdef RELOCATABLE_NEEDS_FIXUP
13662 /* Special handling for SI values. */
13663 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13665 static int recurse = 0;
13667 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13668 the .fixup section. Since the TOC section is already relocated, we
13669 don't need to mark it here. We used to skip the text section, but it
13670 should never be valid for relocated addresses to be placed in the text
13671 section. */
13672 if (DEFAULT_ABI == ABI_V4
13673 && (TARGET_RELOCATABLE || flag_pic > 1)
13674 && in_section != toc_section
13675 && !recurse
13676 && !CONST_SCALAR_INT_P (x)
13677 && CONSTANT_P (x))
13679 char buf[256];
13681 recurse = 1;
13682 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13683 fixuplabelno++;
13684 ASM_OUTPUT_LABEL (asm_out_file, buf);
13685 fprintf (asm_out_file, "\t.long\t(");
13686 output_addr_const (asm_out_file, x);
13687 fprintf (asm_out_file, ")@fixup\n");
13688 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13689 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13690 fprintf (asm_out_file, "\t.long\t");
13691 assemble_name (asm_out_file, buf);
13692 fprintf (asm_out_file, "\n\t.previous\n");
13693 recurse = 0;
13694 return true;
13696 /* Remove initial .'s to turn a -mcall-aixdesc function
13697 address into the address of the descriptor, not the function
13698 itself. */
13699 else if (SYMBOL_REF_P (x)
13700 && XSTR (x, 0)[0] == '.'
13701 && DEFAULT_ABI == ABI_AIX)
13703 const char *name = XSTR (x, 0);
13704 while (*name == '.')
13705 name++;
13707 fprintf (asm_out_file, "\t.long\t%s\n", name);
13708 return true;
13711 #endif /* RELOCATABLE_NEEDS_FIXUP */
13712 return default_assemble_integer (x, size, aligned_p);
13715 /* Return a template string for assembly to emit when making an
13716 external call. FUNOP is the call mem argument operand number. */
13718 static const char *
13719 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13721 /* -Wformat-overflow workaround, without which gcc thinks that %u
13722 might produce 10 digits. */
13723 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13725 char arg[12];
13726 arg[0] = 0;
13727 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13729 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13730 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13731 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13732 sprintf (arg, "(%%&@tlsld)");
13735 /* The magic 32768 offset here corresponds to the offset of
13736 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13737 char z[11];
13738 sprintf (z, "%%z%u%s", funop,
13739 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13740 ? "+32768" : ""));
13742 static char str[32]; /* 1 spare */
13743 if (rs6000_pcrel_p ())
13744 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13745 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13746 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13747 sibcall ? "" : "\n\tnop");
13748 else if (DEFAULT_ABI == ABI_V4)
13749 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13750 flag_pic ? "@plt" : "");
13751 #if TARGET_MACHO
13752 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13753 else if (DEFAULT_ABI == ABI_DARWIN)
13755 /* The cookie is in operand func+2. */
13756 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13757 int cookie = INTVAL (operands[funop + 2]);
13758 if (cookie & CALL_LONG)
13760 tree funname = get_identifier (XSTR (operands[funop], 0));
13761 tree labelname = get_prev_label (funname);
13762 gcc_checking_assert (labelname && !sibcall);
13764 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13765 instruction will reach 'foo', otherwise link as 'bl L42'".
13766 "L42" should be a 'branch island', that will do a far jump to
13767 'foo'. Branch islands are generated in
13768 macho_branch_islands(). */
13769 sprintf (str, "jbsr %%z%u,%.10s", funop,
13770 IDENTIFIER_POINTER (labelname));
13772 else
13773 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13774 after the call. */
13775 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13777 #endif
13778 else
13779 gcc_unreachable ();
13780 return str;
13783 const char *
13784 rs6000_call_template (rtx *operands, unsigned int funop)
13786 return rs6000_call_template_1 (operands, funop, false);
13789 const char *
13790 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13792 return rs6000_call_template_1 (operands, funop, true);
13795 /* As above, for indirect calls. */
13797 static const char *
13798 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13799 bool sibcall)
13801 /* -Wformat-overflow workaround, without which gcc thinks that %u
13802 might produce 10 digits. Note that -Wformat-overflow will not
13803 currently warn here for str[], so do not rely on a warning to
13804 ensure str[] is correctly sized. */
13805 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13807 /* Currently, funop is either 0 or 1. The maximum string is always
13808 a !speculate 64-bit __tls_get_addr call.
13810 ABI_ELFv2, pcrel:
13811 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13812 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13813 . 9 crset 2\n\t
13814 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13815 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13816 . 8 beq%T1l-
13817 .---
13818 .142
13820 ABI_AIX:
13821 . 9 ld 2,%3\n\t
13822 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13823 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13824 . 9 crset 2\n\t
13825 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13826 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13827 . 10 beq%T1l-\n\t
13828 . 10 ld 2,%4(1)
13829 .---
13830 .151
13832 ABI_ELFv2:
13833 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13834 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13835 . 9 crset 2\n\t
13836 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13837 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13838 . 10 beq%T1l-\n\t
13839 . 10 ld 2,%3(1)
13840 .---
13841 .142
13843 ABI_V4:
13844 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13845 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13846 . 9 crset 2\n\t
13847 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13848 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13849 . 8 beq%T1l-
13850 .---
13851 .141 */
13852 static char str[160]; /* 8 spare */
13853 char *s = str;
13854 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13856 if (DEFAULT_ABI == ABI_AIX)
13857 s += sprintf (s,
13858 "l%s 2,%%%u\n\t",
13859 ptrload, funop + 3);
13861 /* We don't need the extra code to stop indirect call speculation if
13862 calling via LR. */
13863 bool speculate = (TARGET_MACHO
13864 || rs6000_speculate_indirect_jumps
13865 || (REG_P (operands[funop])
13866 && REGNO (operands[funop]) == LR_REGNO));
13868 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13870 const char *rel64 = TARGET_64BIT ? "64" : "";
13871 char tls[29];
13872 tls[0] = 0;
13873 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13875 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13876 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13877 rel64, funop + 1);
13878 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13879 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13880 rel64);
13883 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
13884 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13885 && flag_pic == 2 ? "+32768" : "");
13886 if (!speculate)
13888 s += sprintf (s,
13889 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13890 tls, rel64, notoc, funop, addend);
13891 s += sprintf (s, "crset 2\n\t");
13893 s += sprintf (s,
13894 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13895 tls, rel64, notoc, funop, addend);
13897 else if (!speculate)
13898 s += sprintf (s, "crset 2\n\t");
13900 if (rs6000_pcrel_p ())
13902 if (speculate)
13903 sprintf (s, "b%%T%ul", funop);
13904 else
13905 sprintf (s, "beq%%T%ul-", funop);
13907 else if (DEFAULT_ABI == ABI_AIX)
13909 if (speculate)
13910 sprintf (s,
13911 "b%%T%ul\n\t"
13912 "l%s 2,%%%u(1)",
13913 funop, ptrload, funop + 4);
13914 else
13915 sprintf (s,
13916 "beq%%T%ul-\n\t"
13917 "l%s 2,%%%u(1)",
13918 funop, ptrload, funop + 4);
13920 else if (DEFAULT_ABI == ABI_ELFv2)
13922 if (speculate)
13923 sprintf (s,
13924 "b%%T%ul\n\t"
13925 "l%s 2,%%%u(1)",
13926 funop, ptrload, funop + 3);
13927 else
13928 sprintf (s,
13929 "beq%%T%ul-\n\t"
13930 "l%s 2,%%%u(1)",
13931 funop, ptrload, funop + 3);
13933 else
13935 if (speculate)
13936 sprintf (s,
13937 "b%%T%u%s",
13938 funop, sibcall ? "" : "l");
13939 else
13940 sprintf (s,
13941 "beq%%T%u%s-%s",
13942 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13944 return str;
13947 const char *
13948 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13950 return rs6000_indirect_call_template_1 (operands, funop, false);
13953 const char *
13954 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13956 return rs6000_indirect_call_template_1 (operands, funop, true);
13959 #if HAVE_AS_PLTSEQ
13960 /* Output indirect call insns. WHICH identifies the type of sequence. */
13961 const char *
13962 rs6000_pltseq_template (rtx *operands, int which)
13964 const char *rel64 = TARGET_64BIT ? "64" : "";
13965 char tls[30];
13966 tls[0] = 0;
13967 if (GET_CODE (operands[3]) == UNSPEC)
13969 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13970 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13971 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13972 off, rel64);
13973 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13974 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13975 off, rel64);
13978 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13979 static char str[96]; /* 10 spare */
13980 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13981 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13982 && flag_pic == 2 ? "+32768" : "");
13983 switch (which)
13985 case RS6000_PLTSEQ_TOCSAVE:
13986 sprintf (str,
13987 "st%s\n\t"
13988 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13989 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13990 tls, rel64);
13991 break;
13992 case RS6000_PLTSEQ_PLT16_HA:
13993 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13994 sprintf (str,
13995 "lis %%0,0\n\t"
13996 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13997 tls, off, rel64);
13998 else
13999 sprintf (str,
14000 "addis %%0,%%1,0\n\t"
14001 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14002 tls, off, rel64, addend);
14003 break;
14004 case RS6000_PLTSEQ_PLT16_LO:
14005 sprintf (str,
14006 "l%s %%0,0(%%1)\n\t"
14007 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14008 TARGET_64BIT ? "d" : "wz",
14009 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14010 break;
14011 case RS6000_PLTSEQ_MTCTR:
14012 sprintf (str,
14013 "mtctr %%1\n\t"
14014 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14015 tls, rel64, addend);
14016 break;
14017 case RS6000_PLTSEQ_PLT_PCREL34:
14018 sprintf (str,
14019 "pl%s %%0,0(0),1\n\t"
14020 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14021 TARGET_64BIT ? "d" : "wz",
14022 tls, rel64);
14023 break;
14024 default:
14025 gcc_unreachable ();
14027 return str;
14029 #endif
14031 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14032 /* Emit an assembler directive to set symbol visibility for DECL to
14033 VISIBILITY_TYPE. */
14035 static void
14036 rs6000_assemble_visibility (tree decl, int vis)
14038 if (TARGET_XCOFF)
14039 return;
14041 /* Functions need to have their entry point symbol visibility set as
14042 well as their descriptor symbol visibility. */
14043 if (DEFAULT_ABI == ABI_AIX
14044 && DOT_SYMBOLS
14045 && TREE_CODE (decl) == FUNCTION_DECL)
14047 static const char * const visibility_types[] = {
14048 NULL, "protected", "hidden", "internal"
14051 const char *name, *type;
14053 name = ((* targetm.strip_name_encoding)
14054 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
14055 type = visibility_types[vis];
14057 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
14058 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
14060 else
14061 default_assemble_visibility (decl, vis);
14063 #endif
14065 enum rtx_code
14066 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
14068 /* Reversal of FP compares takes care -- an ordered compare
14069 becomes an unordered compare and vice versa. */
14070 if (mode == CCFPmode
14071 && (!flag_finite_math_only
14072 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
14073 || code == UNEQ || code == LTGT))
14074 return reverse_condition_maybe_unordered (code);
14075 else
14076 return reverse_condition (code);
14079 /* Generate a compare for CODE. Return a brand-new rtx that
14080 represents the result of the compare. */
14082 static rtx
14083 rs6000_generate_compare (rtx cmp, machine_mode mode)
14085 machine_mode comp_mode;
14086 rtx compare_result;
14087 enum rtx_code code = GET_CODE (cmp);
14088 rtx op0 = XEXP (cmp, 0);
14089 rtx op1 = XEXP (cmp, 1);
14091 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14092 comp_mode = CCmode;
14093 else if (FLOAT_MODE_P (mode))
14094 comp_mode = CCFPmode;
14095 else if (code == GTU || code == LTU
14096 || code == GEU || code == LEU)
14097 comp_mode = CCUNSmode;
14098 else if ((code == EQ || code == NE)
14099 && unsigned_reg_p (op0)
14100 && (unsigned_reg_p (op1)
14101 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
14102 /* These are unsigned values, perhaps there will be a later
14103 ordering compare that can be shared with this one. */
14104 comp_mode = CCUNSmode;
14105 else
14106 comp_mode = CCmode;
14108 /* If we have an unsigned compare, make sure we don't have a signed value as
14109 an immediate. */
14110 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
14111 && INTVAL (op1) < 0)
14113 op0 = copy_rtx_if_shared (op0);
14114 op1 = force_reg (GET_MODE (op0), op1);
14115 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
14118 /* First, the compare. */
14119 compare_result = gen_reg_rtx (comp_mode);
14121 /* IEEE 128-bit support in VSX registers when we do not have hardware
14122 support. */
14123 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
14125 rtx libfunc = NULL_RTX;
14126 bool check_nan = false;
14127 rtx dest;
14129 switch (code)
14131 case EQ:
14132 case NE:
14133 libfunc = optab_libfunc (eq_optab, mode);
14134 break;
14136 case GT:
14137 case GE:
14138 libfunc = optab_libfunc (ge_optab, mode);
14139 break;
14141 case LT:
14142 case LE:
14143 libfunc = optab_libfunc (le_optab, mode);
14144 break;
14146 case UNORDERED:
14147 case ORDERED:
14148 libfunc = optab_libfunc (unord_optab, mode);
14149 code = (code == UNORDERED) ? NE : EQ;
14150 break;
14152 case UNGE:
14153 case UNGT:
14154 check_nan = true;
14155 libfunc = optab_libfunc (ge_optab, mode);
14156 code = (code == UNGE) ? GE : GT;
14157 break;
14159 case UNLE:
14160 case UNLT:
14161 check_nan = true;
14162 libfunc = optab_libfunc (le_optab, mode);
14163 code = (code == UNLE) ? LE : LT;
14164 break;
14166 case UNEQ:
14167 case LTGT:
14168 check_nan = true;
14169 libfunc = optab_libfunc (eq_optab, mode);
14170 code = (code = UNEQ) ? EQ : NE;
14171 break;
14173 default:
14174 gcc_unreachable ();
14177 gcc_assert (libfunc);
14179 if (!check_nan)
14180 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14181 SImode, op0, mode, op1, mode);
14183 /* The library signals an exception for signalling NaNs, so we need to
14184 handle isgreater, etc. by first checking isordered. */
14185 else
14187 rtx ne_rtx, normal_dest, unord_dest;
14188 rtx unord_func = optab_libfunc (unord_optab, mode);
14189 rtx join_label = gen_label_rtx ();
14190 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
14191 rtx unord_cmp = gen_reg_rtx (comp_mode);
14194 /* Test for either value being a NaN. */
14195 gcc_assert (unord_func);
14196 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
14197 SImode, op0, mode, op1, mode);
14199 /* Set value (0) if either value is a NaN, and jump to the join
14200 label. */
14201 dest = gen_reg_rtx (SImode);
14202 emit_move_insn (dest, const1_rtx);
14203 emit_insn (gen_rtx_SET (unord_cmp,
14204 gen_rtx_COMPARE (comp_mode, unord_dest,
14205 const0_rtx)));
14207 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
14208 emit_jump_insn (gen_rtx_SET (pc_rtx,
14209 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
14210 join_ref,
14211 pc_rtx)));
14213 /* Do the normal comparison, knowing that the values are not
14214 NaNs. */
14215 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
14216 SImode, op0, mode, op1, mode);
14218 emit_insn (gen_cstoresi4 (dest,
14219 gen_rtx_fmt_ee (code, SImode, normal_dest,
14220 const0_rtx),
14221 normal_dest, const0_rtx));
14223 /* Join NaN and non-Nan paths. Compare dest against 0. */
14224 emit_label (join_label);
14225 code = NE;
14228 emit_insn (gen_rtx_SET (compare_result,
14229 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
14232 else
14234 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
14235 CLOBBERs to match cmptf_internal2 pattern. */
14236 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
14237 && FLOAT128_IBM_P (GET_MODE (op0))
14238 && TARGET_HARD_FLOAT)
14239 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14240 gen_rtvec (10,
14241 gen_rtx_SET (compare_result,
14242 gen_rtx_COMPARE (comp_mode, op0, op1)),
14243 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14244 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14245 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14246 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14247 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14248 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14249 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14250 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
14251 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
14252 else if (GET_CODE (op1) == UNSPEC
14253 && XINT (op1, 1) == UNSPEC_SP_TEST)
14255 rtx op1b = XVECEXP (op1, 0, 0);
14256 comp_mode = CCEQmode;
14257 compare_result = gen_reg_rtx (CCEQmode);
14258 if (TARGET_64BIT)
14259 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
14260 else
14261 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
14263 else
14264 emit_insn (gen_rtx_SET (compare_result,
14265 gen_rtx_COMPARE (comp_mode, op0, op1)));
14268 validate_condition_mode (code, GET_MODE (compare_result));
14270 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
14274 /* Return the diagnostic message string if the binary operation OP is
14275 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14277 static const char*
14278 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
14279 const_tree type1,
14280 const_tree type2)
14282 machine_mode mode1 = TYPE_MODE (type1);
14283 machine_mode mode2 = TYPE_MODE (type2);
14285 /* For complex modes, use the inner type. */
14286 if (COMPLEX_MODE_P (mode1))
14287 mode1 = GET_MODE_INNER (mode1);
14289 if (COMPLEX_MODE_P (mode2))
14290 mode2 = GET_MODE_INNER (mode2);
14292 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14293 double to intermix unless -mfloat128-convert. */
14294 if (mode1 == mode2)
14295 return NULL;
14297 if (!TARGET_FLOAT128_CVT)
14299 if ((mode1 == KFmode && mode2 == IFmode)
14300 || (mode1 == IFmode && mode2 == KFmode))
14301 return N_("__float128 and __ibm128 cannot be used in the same "
14302 "expression");
14304 if (TARGET_IEEEQUAD
14305 && ((mode1 == IFmode && mode2 == TFmode)
14306 || (mode1 == TFmode && mode2 == IFmode)))
14307 return N_("__ibm128 and long double cannot be used in the same "
14308 "expression");
14310 if (!TARGET_IEEEQUAD
14311 && ((mode1 == KFmode && mode2 == TFmode)
14312 || (mode1 == TFmode && mode2 == KFmode)))
14313 return N_("__float128 and long double cannot be used in the same "
14314 "expression");
14317 return NULL;
14321 /* Expand floating point conversion to/from __float128 and __ibm128. */
14323 void
14324 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14326 machine_mode dest_mode = GET_MODE (dest);
14327 machine_mode src_mode = GET_MODE (src);
14328 convert_optab cvt = unknown_optab;
14329 bool do_move = false;
14330 rtx libfunc = NULL_RTX;
14331 rtx dest2;
14332 typedef rtx (*rtx_2func_t) (rtx, rtx);
14333 rtx_2func_t hw_convert = (rtx_2func_t)0;
14334 size_t kf_or_tf;
14336 struct hw_conv_t {
14337 rtx_2func_t from_df;
14338 rtx_2func_t from_sf;
14339 rtx_2func_t from_si_sign;
14340 rtx_2func_t from_si_uns;
14341 rtx_2func_t from_di_sign;
14342 rtx_2func_t from_di_uns;
14343 rtx_2func_t to_df;
14344 rtx_2func_t to_sf;
14345 rtx_2func_t to_si_sign;
14346 rtx_2func_t to_si_uns;
14347 rtx_2func_t to_di_sign;
14348 rtx_2func_t to_di_uns;
14349 } hw_conversions[2] = {
14350 /* convertions to/from KFmode */
14352 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14353 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14354 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14355 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14356 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14357 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14358 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14359 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14360 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14361 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14362 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14363 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14366 /* convertions to/from TFmode */
14368 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14369 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14370 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14371 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14372 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14373 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14374 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14375 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14376 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14377 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14378 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14379 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14383 if (dest_mode == src_mode)
14384 gcc_unreachable ();
14386 /* Eliminate memory operations. */
14387 if (MEM_P (src))
14388 src = force_reg (src_mode, src);
14390 if (MEM_P (dest))
14392 rtx tmp = gen_reg_rtx (dest_mode);
14393 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14394 rs6000_emit_move (dest, tmp, dest_mode);
14395 return;
14398 /* Convert to IEEE 128-bit floating point. */
14399 if (FLOAT128_IEEE_P (dest_mode))
14401 if (dest_mode == KFmode)
14402 kf_or_tf = 0;
14403 else if (dest_mode == TFmode)
14404 kf_or_tf = 1;
14405 else
14406 gcc_unreachable ();
14408 switch (src_mode)
14410 case E_DFmode:
14411 cvt = sext_optab;
14412 hw_convert = hw_conversions[kf_or_tf].from_df;
14413 break;
14415 case E_SFmode:
14416 cvt = sext_optab;
14417 hw_convert = hw_conversions[kf_or_tf].from_sf;
14418 break;
14420 case E_KFmode:
14421 case E_IFmode:
14422 case E_TFmode:
14423 if (FLOAT128_IBM_P (src_mode))
14424 cvt = sext_optab;
14425 else
14426 do_move = true;
14427 break;
14429 case E_SImode:
14430 if (unsigned_p)
14432 cvt = ufloat_optab;
14433 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14435 else
14437 cvt = sfloat_optab;
14438 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14440 break;
14442 case E_DImode:
14443 if (unsigned_p)
14445 cvt = ufloat_optab;
14446 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14448 else
14450 cvt = sfloat_optab;
14451 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14453 break;
14455 default:
14456 gcc_unreachable ();
14460 /* Convert from IEEE 128-bit floating point. */
14461 else if (FLOAT128_IEEE_P (src_mode))
14463 if (src_mode == KFmode)
14464 kf_or_tf = 0;
14465 else if (src_mode == TFmode)
14466 kf_or_tf = 1;
14467 else
14468 gcc_unreachable ();
14470 switch (dest_mode)
14472 case E_DFmode:
14473 cvt = trunc_optab;
14474 hw_convert = hw_conversions[kf_or_tf].to_df;
14475 break;
14477 case E_SFmode:
14478 cvt = trunc_optab;
14479 hw_convert = hw_conversions[kf_or_tf].to_sf;
14480 break;
14482 case E_KFmode:
14483 case E_IFmode:
14484 case E_TFmode:
14485 if (FLOAT128_IBM_P (dest_mode))
14486 cvt = trunc_optab;
14487 else
14488 do_move = true;
14489 break;
14491 case E_SImode:
14492 if (unsigned_p)
14494 cvt = ufix_optab;
14495 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14497 else
14499 cvt = sfix_optab;
14500 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14502 break;
14504 case E_DImode:
14505 if (unsigned_p)
14507 cvt = ufix_optab;
14508 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14510 else
14512 cvt = sfix_optab;
14513 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14515 break;
14517 default:
14518 gcc_unreachable ();
14522 /* Both IBM format. */
14523 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14524 do_move = true;
14526 else
14527 gcc_unreachable ();
14529 /* Handle conversion between TFmode/KFmode/IFmode. */
14530 if (do_move)
14531 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14533 /* Handle conversion if we have hardware support. */
14534 else if (TARGET_FLOAT128_HW && hw_convert)
14535 emit_insn ((hw_convert) (dest, src));
14537 /* Call an external function to do the conversion. */
14538 else if (cvt != unknown_optab)
14540 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14541 gcc_assert (libfunc != NULL_RTX);
14543 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14544 src, src_mode);
14546 gcc_assert (dest2 != NULL_RTX);
14547 if (!rtx_equal_p (dest, dest2))
14548 emit_move_insn (dest, dest2);
14551 else
14552 gcc_unreachable ();
14554 return;
14558 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14559 can be used as that dest register. Return the dest register. */
14562 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14564 if (op2 == const0_rtx)
14565 return op1;
14567 if (GET_CODE (scratch) == SCRATCH)
14568 scratch = gen_reg_rtx (mode);
14570 if (logical_operand (op2, mode))
14571 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14572 else
14573 emit_insn (gen_rtx_SET (scratch,
14574 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14576 return scratch;
14579 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14580 requires this. The result is mode MODE. */
14582 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14584 rtx cond[2];
14585 int n = 0;
14586 if (code == LTGT || code == LE || code == UNLT)
14587 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14588 if (code == LTGT || code == GE || code == UNGT)
14589 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14590 if (code == LE || code == GE || code == UNEQ)
14591 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14592 if (code == UNLT || code == UNGT || code == UNEQ)
14593 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14595 gcc_assert (n == 2);
14597 rtx cc = gen_reg_rtx (CCEQmode);
14598 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14599 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14601 return cc;
14604 void
14605 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14607 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14608 rtx_code cond_code = GET_CODE (condition_rtx);
14610 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14611 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14613 else if (cond_code == NE
14614 || cond_code == GE || cond_code == LE
14615 || cond_code == GEU || cond_code == LEU
14616 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14618 rtx not_result = gen_reg_rtx (CCEQmode);
14619 rtx not_op, rev_cond_rtx;
14620 machine_mode cc_mode;
14622 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14624 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14625 SImode, XEXP (condition_rtx, 0), const0_rtx);
14626 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14627 emit_insn (gen_rtx_SET (not_result, not_op));
14628 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14631 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14632 if (op_mode == VOIDmode)
14633 op_mode = GET_MODE (XEXP (operands[1], 1));
14635 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14637 PUT_MODE (condition_rtx, DImode);
14638 convert_move (operands[0], condition_rtx, 0);
14640 else
14642 PUT_MODE (condition_rtx, SImode);
14643 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14647 /* Emit a branch of kind CODE to location LOC. */
14649 void
14650 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14652 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14653 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14654 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14655 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14658 /* Return the string to output a conditional branch to LABEL, which is
14659 the operand template of the label, or NULL if the branch is really a
14660 conditional return.
14662 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14663 condition code register and its mode specifies what kind of
14664 comparison we made.
14666 REVERSED is nonzero if we should reverse the sense of the comparison.
14668 INSN is the insn. */
14670 char *
14671 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14673 static char string[64];
14674 enum rtx_code code = GET_CODE (op);
14675 rtx cc_reg = XEXP (op, 0);
14676 machine_mode mode = GET_MODE (cc_reg);
14677 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14678 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14679 int really_reversed = reversed ^ need_longbranch;
14680 char *s = string;
14681 const char *ccode;
14682 const char *pred;
14683 rtx note;
14685 validate_condition_mode (code, mode);
14687 /* Work out which way this really branches. We could use
14688 reverse_condition_maybe_unordered here always but this
14689 makes the resulting assembler clearer. */
14690 if (really_reversed)
14692 /* Reversal of FP compares takes care -- an ordered compare
14693 becomes an unordered compare and vice versa. */
14694 if (mode == CCFPmode)
14695 code = reverse_condition_maybe_unordered (code);
14696 else
14697 code = reverse_condition (code);
14700 switch (code)
14702 /* Not all of these are actually distinct opcodes, but
14703 we distinguish them for clarity of the resulting assembler. */
14704 case NE: case LTGT:
14705 ccode = "ne"; break;
14706 case EQ: case UNEQ:
14707 ccode = "eq"; break;
14708 case GE: case GEU:
14709 ccode = "ge"; break;
14710 case GT: case GTU: case UNGT:
14711 ccode = "gt"; break;
14712 case LE: case LEU:
14713 ccode = "le"; break;
14714 case LT: case LTU: case UNLT:
14715 ccode = "lt"; break;
14716 case UNORDERED: ccode = "un"; break;
14717 case ORDERED: ccode = "nu"; break;
14718 case UNGE: ccode = "nl"; break;
14719 case UNLE: ccode = "ng"; break;
14720 default:
14721 gcc_unreachable ();
14724 /* Maybe we have a guess as to how likely the branch is. */
14725 pred = "";
14726 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14727 if (note != NULL_RTX)
14729 /* PROB is the difference from 50%. */
14730 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14731 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14733 /* Only hint for highly probable/improbable branches on newer cpus when
14734 we have real profile data, as static prediction overrides processor
14735 dynamic prediction. For older cpus we may as well always hint, but
14736 assume not taken for branches that are very close to 50% as a
14737 mispredicted taken branch is more expensive than a
14738 mispredicted not-taken branch. */
14739 if (rs6000_always_hint
14740 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14741 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14742 && br_prob_note_reliable_p (note)))
14744 if (abs (prob) > REG_BR_PROB_BASE / 20
14745 && ((prob > 0) ^ need_longbranch))
14746 pred = "+";
14747 else
14748 pred = "-";
14752 if (label == NULL)
14753 s += sprintf (s, "b%slr%s ", ccode, pred);
14754 else
14755 s += sprintf (s, "b%s%s ", ccode, pred);
14757 /* We need to escape any '%' characters in the reg_names string.
14758 Assume they'd only be the first character.... */
14759 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14760 *s++ = '%';
14761 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14763 if (label != NULL)
14765 /* If the branch distance was too far, we may have to use an
14766 unconditional branch to go the distance. */
14767 if (need_longbranch)
14768 s += sprintf (s, ",$+8\n\tb %s", label);
14769 else
14770 s += sprintf (s, ",%s", label);
14773 return string;
14776 /* Return insn for VSX or Altivec comparisons. */
14778 static rtx
14779 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14781 rtx mask;
14782 machine_mode mode = GET_MODE (op0);
14784 switch (code)
14786 default:
14787 break;
14789 case GE:
14790 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14791 return NULL_RTX;
14792 /* FALLTHRU */
14794 case EQ:
14795 case GT:
14796 case GTU:
14797 case ORDERED:
14798 case UNORDERED:
14799 case UNEQ:
14800 case LTGT:
14801 mask = gen_reg_rtx (mode);
14802 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14803 return mask;
14806 return NULL_RTX;
14809 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14810 DMODE is expected destination mode. This is a recursive function. */
14812 static rtx
14813 rs6000_emit_vector_compare (enum rtx_code rcode,
14814 rtx op0, rtx op1,
14815 machine_mode dmode)
14817 rtx mask;
14818 bool swap_operands = false;
14819 bool try_again = false;
14821 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14822 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14824 /* See if the comparison works as is. */
14825 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14826 if (mask)
14827 return mask;
14829 switch (rcode)
14831 case LT:
14832 rcode = GT;
14833 swap_operands = true;
14834 try_again = true;
14835 break;
14836 case LTU:
14837 rcode = GTU;
14838 swap_operands = true;
14839 try_again = true;
14840 break;
14841 case NE:
14842 case UNLE:
14843 case UNLT:
14844 case UNGE:
14845 case UNGT:
14846 /* Invert condition and try again.
14847 e.g., A != B becomes ~(A==B). */
14849 enum rtx_code rev_code;
14850 enum insn_code nor_code;
14851 rtx mask2;
14853 rev_code = reverse_condition_maybe_unordered (rcode);
14854 if (rev_code == UNKNOWN)
14855 return NULL_RTX;
14857 nor_code = optab_handler (one_cmpl_optab, dmode);
14858 if (nor_code == CODE_FOR_nothing)
14859 return NULL_RTX;
14861 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14862 if (!mask2)
14863 return NULL_RTX;
14865 mask = gen_reg_rtx (dmode);
14866 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14867 return mask;
14869 break;
14870 case GE:
14871 case GEU:
14872 case LE:
14873 case LEU:
14874 /* Try GT/GTU/LT/LTU OR EQ */
14876 rtx c_rtx, eq_rtx;
14877 enum insn_code ior_code;
14878 enum rtx_code new_code;
14880 switch (rcode)
14882 case GE:
14883 new_code = GT;
14884 break;
14886 case GEU:
14887 new_code = GTU;
14888 break;
14890 case LE:
14891 new_code = LT;
14892 break;
14894 case LEU:
14895 new_code = LTU;
14896 break;
14898 default:
14899 gcc_unreachable ();
14902 ior_code = optab_handler (ior_optab, dmode);
14903 if (ior_code == CODE_FOR_nothing)
14904 return NULL_RTX;
14906 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14907 if (!c_rtx)
14908 return NULL_RTX;
14910 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14911 if (!eq_rtx)
14912 return NULL_RTX;
14914 mask = gen_reg_rtx (dmode);
14915 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14916 return mask;
14918 break;
14919 default:
14920 return NULL_RTX;
14923 if (try_again)
14925 if (swap_operands)
14926 std::swap (op0, op1);
14928 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14929 if (mask)
14930 return mask;
14933 /* You only get two chances. */
14934 return NULL_RTX;
14937 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14938 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14939 operands for the relation operation COND. */
14942 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14943 rtx cond, rtx cc_op0, rtx cc_op1)
14945 machine_mode dest_mode = GET_MODE (dest);
14946 machine_mode mask_mode = GET_MODE (cc_op0);
14947 enum rtx_code rcode = GET_CODE (cond);
14948 machine_mode cc_mode = CCmode;
14949 rtx mask;
14950 rtx cond2;
14951 bool invert_move = false;
14953 if (VECTOR_UNIT_NONE_P (dest_mode))
14954 return 0;
14956 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14957 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14959 switch (rcode)
14961 /* Swap operands if we can, and fall back to doing the operation as
14962 specified, and doing a NOR to invert the test. */
14963 case NE:
14964 case UNLE:
14965 case UNLT:
14966 case UNGE:
14967 case UNGT:
14968 /* Invert condition and try again.
14969 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14970 invert_move = true;
14971 rcode = reverse_condition_maybe_unordered (rcode);
14972 if (rcode == UNKNOWN)
14973 return 0;
14974 break;
14976 case GE:
14977 case LE:
14978 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14980 /* Invert condition to avoid compound test. */
14981 invert_move = true;
14982 rcode = reverse_condition (rcode);
14984 break;
14986 case GTU:
14987 case GEU:
14988 case LTU:
14989 case LEU:
14990 /* Mark unsigned tests with CCUNSmode. */
14991 cc_mode = CCUNSmode;
14993 /* Invert condition to avoid compound test if necessary. */
14994 if (rcode == GEU || rcode == LEU)
14996 invert_move = true;
14997 rcode = reverse_condition (rcode);
14999 break;
15001 default:
15002 break;
15005 /* Get the vector mask for the given relational operations. */
15006 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
15008 if (!mask)
15009 return 0;
15011 if (invert_move)
15012 std::swap (op_true, op_false);
15014 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15015 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
15016 && (GET_CODE (op_true) == CONST_VECTOR
15017 || GET_CODE (op_false) == CONST_VECTOR))
15019 rtx constant_0 = CONST0_RTX (dest_mode);
15020 rtx constant_m1 = CONSTM1_RTX (dest_mode);
15022 if (op_true == constant_m1 && op_false == constant_0)
15024 emit_move_insn (dest, mask);
15025 return 1;
15028 else if (op_true == constant_0 && op_false == constant_m1)
15030 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
15031 return 1;
15034 /* If we can't use the vector comparison directly, perhaps we can use
15035 the mask for the true or false fields, instead of loading up a
15036 constant. */
15037 if (op_true == constant_m1)
15038 op_true = mask;
15040 if (op_false == constant_0)
15041 op_false = mask;
15044 if (!REG_P (op_true) && !SUBREG_P (op_true))
15045 op_true = force_reg (dest_mode, op_true);
15047 if (!REG_P (op_false) && !SUBREG_P (op_false))
15048 op_false = force_reg (dest_mode, op_false);
15050 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
15051 CONST0_RTX (dest_mode));
15052 emit_insn (gen_rtx_SET (dest,
15053 gen_rtx_IF_THEN_ELSE (dest_mode,
15054 cond2,
15055 op_true,
15056 op_false)));
15057 return 1;
15060 /* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
15061 minimum with "C" semantics.
15063 Unless you use -ffast-math, you can't use these instructions to replace
15064 conditions that implicitly reverse the condition because the comparison
15065 might generate a NaN or signed zer0.
15067 I.e. the following can be replaced all of the time
15068 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
15069 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
15070 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
15071 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
15073 The following can be replaced only if -ffast-math is used:
15074 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
15075 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
15076 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
15077 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
15079 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15080 nonzero/true, FALSE_COND if it is zero/false.
15082 Return false if we can't generate the appropriate minimum or maximum, and
15083 true if we can did the minimum or maximum. */
15085 static bool
15086 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15088 enum rtx_code code = GET_CODE (op);
15089 rtx op0 = XEXP (op, 0);
15090 rtx op1 = XEXP (op, 1);
15091 machine_mode compare_mode = GET_MODE (op0);
15092 machine_mode result_mode = GET_MODE (dest);
15093 bool max_p = false;
15095 if (result_mode != compare_mode)
15096 return false;
15098 if (code == GE || code == GT)
15099 max_p = true;
15100 else if (code == LE || code == LT)
15101 max_p = false;
15102 else
15103 return false;
15105 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
15108 /* Only when NaNs and signed-zeros are not in effect, smax could be
15109 used for `op0 < op1 ? op1 : op0`, and smin could be used for
15110 `op0 > op1 ? op1 : op0`. */
15111 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
15112 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
15113 max_p = !max_p;
15115 else
15116 return false;
15118 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
15119 return true;
15122 /* Possibly emit a floating point conditional move by generating a compare that
15123 sets a mask instruction and a XXSEL select instruction.
15125 Move TRUE_COND to DEST if OP of the operands of the last comparison is
15126 nonzero/true, FALSE_COND if it is zero/false.
15128 Return false if the operation cannot be generated, and true if we could
15129 generate the instruction. */
15131 static bool
15132 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15134 enum rtx_code code = GET_CODE (op);
15135 rtx op0 = XEXP (op, 0);
15136 rtx op1 = XEXP (op, 1);
15137 machine_mode result_mode = GET_MODE (dest);
15138 rtx compare_rtx;
15139 rtx cmove_rtx;
15140 rtx clobber_rtx;
15142 if (!can_create_pseudo_p ())
15143 return 0;
15145 switch (code)
15147 case EQ:
15148 case GE:
15149 case GT:
15150 break;
15152 case NE:
15153 case LT:
15154 case LE:
15155 code = swap_condition (code);
15156 std::swap (op0, op1);
15157 break;
15159 default:
15160 return false;
15163 /* Generate: [(parallel [(set (dest)
15164 (if_then_else (op (cmp1) (cmp2))
15165 (true)
15166 (false)))
15167 (clobber (scratch))])]. */
15169 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
15170 cmove_rtx = gen_rtx_SET (dest,
15171 gen_rtx_IF_THEN_ELSE (result_mode,
15172 compare_rtx,
15173 true_cond,
15174 false_cond));
15176 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
15177 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15178 gen_rtvec (2, cmove_rtx, clobber_rtx)));
15180 return true;
15183 /* Helper function to return true if the target has instructions to do a
15184 compare and set mask instruction that can be used with XXSEL to implement a
15185 conditional move. It is also assumed that such a target also supports the
15186 "C" minimum and maximum instructions. */
15188 static bool
15189 have_compare_and_set_mask (machine_mode mode)
15191 switch (mode)
15193 case E_SFmode:
15194 case E_DFmode:
15195 return TARGET_P9_MINMAX;
15197 default:
15198 break;
15201 return false;
15204 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
15205 operands of the last comparison is nonzero/true, FALSE_COND if it
15206 is zero/false. Return 0 if the hardware has no such operation. */
15208 bool
15209 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15211 enum rtx_code code = GET_CODE (op);
15212 rtx op0 = XEXP (op, 0);
15213 rtx op1 = XEXP (op, 1);
15214 machine_mode compare_mode = GET_MODE (op0);
15215 machine_mode result_mode = GET_MODE (dest);
15216 rtx temp;
15217 bool is_against_zero;
15219 /* These modes should always match. */
15220 if (GET_MODE (op1) != compare_mode
15221 /* In the isel case however, we can use a compare immediate, so
15222 op1 may be a small constant. */
15223 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
15224 return false;
15225 if (GET_MODE (true_cond) != result_mode)
15226 return false;
15227 if (GET_MODE (false_cond) != result_mode)
15228 return false;
15230 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
15231 instructions. */
15232 if (have_compare_and_set_mask (compare_mode)
15233 && have_compare_and_set_mask (result_mode))
15235 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
15236 return true;
15238 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
15239 return true;
15242 /* Don't allow using floating point comparisons for integer results for
15243 now. */
15244 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
15245 return false;
15247 /* First, work out if the hardware can do this at all, or
15248 if it's too slow.... */
15249 if (!FLOAT_MODE_P (compare_mode))
15251 if (TARGET_ISEL)
15252 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
15253 return false;
15256 is_against_zero = op1 == CONST0_RTX (compare_mode);
15258 /* A floating-point subtract might overflow, underflow, or produce
15259 an inexact result, thus changing the floating-point flags, so it
15260 can't be generated if we care about that. It's safe if one side
15261 of the construct is zero, since then no subtract will be
15262 generated. */
15263 if (SCALAR_FLOAT_MODE_P (compare_mode)
15264 && flag_trapping_math && ! is_against_zero)
15265 return false;
15267 /* Eliminate half of the comparisons by switching operands, this
15268 makes the remaining code simpler. */
15269 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
15270 || code == LTGT || code == LT || code == UNLE)
15272 code = reverse_condition_maybe_unordered (code);
15273 temp = true_cond;
15274 true_cond = false_cond;
15275 false_cond = temp;
15278 /* UNEQ and LTGT take four instructions for a comparison with zero,
15279 it'll probably be faster to use a branch here too. */
15280 if (code == UNEQ && HONOR_NANS (compare_mode))
15281 return false;
15283 /* We're going to try to implement comparisons by performing
15284 a subtract, then comparing against zero. Unfortunately,
15285 Inf - Inf is NaN which is not zero, and so if we don't
15286 know that the operand is finite and the comparison
15287 would treat EQ different to UNORDERED, we can't do it. */
15288 if (HONOR_INFINITIES (compare_mode)
15289 && code != GT && code != UNGE
15290 && (!CONST_DOUBLE_P (op1)
15291 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
15292 /* Constructs of the form (a OP b ? a : b) are safe. */
15293 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
15294 || (! rtx_equal_p (op0, true_cond)
15295 && ! rtx_equal_p (op1, true_cond))))
15296 return false;
15298 /* At this point we know we can use fsel. */
15300 /* Don't allow compare_mode other than SFmode or DFmode, for others there
15301 is no fsel instruction. */
15302 if (compare_mode != SFmode && compare_mode != DFmode)
15303 return false;
15305 /* Reduce the comparison to a comparison against zero. */
15306 if (! is_against_zero)
15308 temp = gen_reg_rtx (compare_mode);
15309 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
15310 op0 = temp;
15311 op1 = CONST0_RTX (compare_mode);
15314 /* If we don't care about NaNs we can reduce some of the comparisons
15315 down to faster ones. */
15316 if (! HONOR_NANS (compare_mode))
15317 switch (code)
15319 case GT:
15320 code = LE;
15321 temp = true_cond;
15322 true_cond = false_cond;
15323 false_cond = temp;
15324 break;
15325 case UNGE:
15326 code = GE;
15327 break;
15328 case UNEQ:
15329 code = EQ;
15330 break;
15331 default:
15332 break;
15335 /* Now, reduce everything down to a GE. */
15336 switch (code)
15338 case GE:
15339 break;
15341 case LE:
15342 temp = gen_reg_rtx (compare_mode);
15343 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15344 op0 = temp;
15345 break;
15347 case ORDERED:
15348 temp = gen_reg_rtx (compare_mode);
15349 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15350 op0 = temp;
15351 break;
15353 case EQ:
15354 temp = gen_reg_rtx (compare_mode);
15355 emit_insn (gen_rtx_SET (temp,
15356 gen_rtx_NEG (compare_mode,
15357 gen_rtx_ABS (compare_mode, op0))));
15358 op0 = temp;
15359 break;
15361 case UNGE:
15362 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15363 temp = gen_reg_rtx (result_mode);
15364 emit_insn (gen_rtx_SET (temp,
15365 gen_rtx_IF_THEN_ELSE (result_mode,
15366 gen_rtx_GE (VOIDmode,
15367 op0, op1),
15368 true_cond, false_cond)));
15369 false_cond = true_cond;
15370 true_cond = temp;
15372 temp = gen_reg_rtx (compare_mode);
15373 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15374 op0 = temp;
15375 break;
15377 case GT:
15378 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15379 temp = gen_reg_rtx (result_mode);
15380 emit_insn (gen_rtx_SET (temp,
15381 gen_rtx_IF_THEN_ELSE (result_mode,
15382 gen_rtx_GE (VOIDmode,
15383 op0, op1),
15384 true_cond, false_cond)));
15385 true_cond = false_cond;
15386 false_cond = temp;
15388 temp = gen_reg_rtx (compare_mode);
15389 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15390 op0 = temp;
15391 break;
15393 default:
15394 gcc_unreachable ();
15397 emit_insn (gen_rtx_SET (dest,
15398 gen_rtx_IF_THEN_ELSE (result_mode,
15399 gen_rtx_GE (VOIDmode,
15400 op0, op1),
15401 true_cond, false_cond)));
15402 return true;
15405 /* Same as above, but for ints (isel). */
15407 bool
15408 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15410 rtx condition_rtx, cr;
15411 machine_mode mode = GET_MODE (dest);
15412 enum rtx_code cond_code;
15413 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15414 bool signedp;
15416 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15417 return false;
15419 /* We still have to do the compare, because isel doesn't do a
15420 compare, it just looks at the CRx bits set by a previous compare
15421 instruction. */
15422 condition_rtx = rs6000_generate_compare (op, mode);
15423 cond_code = GET_CODE (condition_rtx);
15424 cr = XEXP (condition_rtx, 0);
15425 signedp = GET_MODE (cr) == CCmode;
15427 isel_func = (mode == SImode
15428 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15429 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15431 switch (cond_code)
15433 case LT: case GT: case LTU: case GTU: case EQ:
15434 /* isel handles these directly. */
15435 break;
15437 default:
15438 /* We need to swap the sense of the comparison. */
15440 std::swap (false_cond, true_cond);
15441 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15443 break;
15446 false_cond = force_reg (mode, false_cond);
15447 if (true_cond != const0_rtx)
15448 true_cond = force_reg (mode, true_cond);
15450 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15452 return true;
15455 void
15456 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15458 machine_mode mode = GET_MODE (op0);
15459 enum rtx_code c;
15460 rtx target;
15462 /* VSX/altivec have direct min/max insns. */
15463 if ((code == SMAX || code == SMIN)
15464 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15465 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15467 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15468 return;
15471 if (code == SMAX || code == SMIN)
15472 c = GE;
15473 else
15474 c = GEU;
15476 if (code == SMAX || code == UMAX)
15477 target = emit_conditional_move (dest, c, op0, op1, mode,
15478 op0, op1, mode, 0);
15479 else
15480 target = emit_conditional_move (dest, c, op0, op1, mode,
15481 op1, op0, mode, 0);
15482 gcc_assert (target);
15483 if (target != dest)
15484 emit_move_insn (dest, target);
15487 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15488 COND is true. Mark the jump as unlikely to be taken. */
15490 static void
15491 emit_unlikely_jump (rtx cond, rtx label)
15493 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15494 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15495 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15498 /* A subroutine of the atomic operation splitters. Emit a load-locked
15499 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15500 the zero_extend operation. */
15502 static void
15503 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15505 rtx (*fn) (rtx, rtx) = NULL;
15507 switch (mode)
15509 case E_QImode:
15510 fn = gen_load_lockedqi;
15511 break;
15512 case E_HImode:
15513 fn = gen_load_lockedhi;
15514 break;
15515 case E_SImode:
15516 if (GET_MODE (mem) == QImode)
15517 fn = gen_load_lockedqi_si;
15518 else if (GET_MODE (mem) == HImode)
15519 fn = gen_load_lockedhi_si;
15520 else
15521 fn = gen_load_lockedsi;
15522 break;
15523 case E_DImode:
15524 fn = gen_load_lockeddi;
15525 break;
15526 case E_TImode:
15527 fn = gen_load_lockedti;
15528 break;
15529 default:
15530 gcc_unreachable ();
15532 emit_insn (fn (reg, mem));
15535 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15536 instruction in MODE. */
15538 static void
15539 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15541 rtx (*fn) (rtx, rtx, rtx) = NULL;
15543 switch (mode)
15545 case E_QImode:
15546 fn = gen_store_conditionalqi;
15547 break;
15548 case E_HImode:
15549 fn = gen_store_conditionalhi;
15550 break;
15551 case E_SImode:
15552 fn = gen_store_conditionalsi;
15553 break;
15554 case E_DImode:
15555 fn = gen_store_conditionaldi;
15556 break;
15557 case E_TImode:
15558 fn = gen_store_conditionalti;
15559 break;
15560 default:
15561 gcc_unreachable ();
15564 /* Emit sync before stwcx. to address PPC405 Erratum. */
15565 if (PPC405_ERRATUM77)
15566 emit_insn (gen_hwsync ());
15568 emit_insn (fn (res, mem, val));
15571 /* Expand barriers before and after a load_locked/store_cond sequence. */
15573 static rtx
15574 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15576 rtx addr = XEXP (mem, 0);
15578 if (!legitimate_indirect_address_p (addr, reload_completed)
15579 && !legitimate_indexed_address_p (addr, reload_completed))
15581 addr = force_reg (Pmode, addr);
15582 mem = replace_equiv_address_nv (mem, addr);
15585 switch (model)
15587 case MEMMODEL_RELAXED:
15588 case MEMMODEL_CONSUME:
15589 case MEMMODEL_ACQUIRE:
15590 break;
15591 case MEMMODEL_RELEASE:
15592 case MEMMODEL_ACQ_REL:
15593 emit_insn (gen_lwsync ());
15594 break;
15595 case MEMMODEL_SEQ_CST:
15596 emit_insn (gen_hwsync ());
15597 break;
15598 default:
15599 gcc_unreachable ();
15601 return mem;
15604 static void
15605 rs6000_post_atomic_barrier (enum memmodel model)
15607 switch (model)
15609 case MEMMODEL_RELAXED:
15610 case MEMMODEL_CONSUME:
15611 case MEMMODEL_RELEASE:
15612 break;
15613 case MEMMODEL_ACQUIRE:
15614 case MEMMODEL_ACQ_REL:
15615 case MEMMODEL_SEQ_CST:
15616 emit_insn (gen_isync ());
15617 break;
15618 default:
15619 gcc_unreachable ();
15623 /* A subroutine of the various atomic expanders. For sub-word operations,
15624 we must adjust things to operate on SImode. Given the original MEM,
15625 return a new aligned memory. Also build and return the quantities by
15626 which to shift and mask. */
15628 static rtx
15629 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15631 rtx addr, align, shift, mask, mem;
15632 HOST_WIDE_INT shift_mask;
15633 machine_mode mode = GET_MODE (orig_mem);
15635 /* For smaller modes, we have to implement this via SImode. */
15636 shift_mask = (mode == QImode ? 0x18 : 0x10);
15638 addr = XEXP (orig_mem, 0);
15639 addr = force_reg (GET_MODE (addr), addr);
15641 /* Aligned memory containing subword. Generate a new memory. We
15642 do not want any of the existing MEM_ATTR data, as we're now
15643 accessing memory outside the original object. */
15644 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15645 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15646 mem = gen_rtx_MEM (SImode, align);
15647 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15648 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15649 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15651 /* Shift amount for subword relative to aligned word. */
15652 shift = gen_reg_rtx (SImode);
15653 addr = gen_lowpart (SImode, addr);
15654 rtx tmp = gen_reg_rtx (SImode);
15655 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15656 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15657 if (BYTES_BIG_ENDIAN)
15658 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15659 shift, 1, OPTAB_LIB_WIDEN);
15660 *pshift = shift;
15662 /* Mask for insertion. */
15663 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15664 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15665 *pmask = mask;
15667 return mem;
15670 /* A subroutine of the various atomic expanders. For sub-word operands,
15671 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15673 static rtx
15674 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15676 rtx x;
15678 x = gen_reg_rtx (SImode);
15679 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15680 gen_rtx_NOT (SImode, mask),
15681 oldval)));
15683 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15685 return x;
15688 /* A subroutine of the various atomic expanders. For sub-word operands,
15689 extract WIDE to NARROW via SHIFT. */
15691 static void
15692 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15694 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15695 wide, 1, OPTAB_LIB_WIDEN);
15696 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15699 /* Expand an atomic compare and swap operation. */
15701 void
15702 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15704 rtx boolval, retval, mem, oldval, newval, cond;
15705 rtx label1, label2, x, mask, shift;
15706 machine_mode mode, orig_mode;
15707 enum memmodel mod_s, mod_f;
15708 bool is_weak;
15710 boolval = operands[0];
15711 retval = operands[1];
15712 mem = operands[2];
15713 oldval = operands[3];
15714 newval = operands[4];
15715 is_weak = (INTVAL (operands[5]) != 0);
15716 mod_s = memmodel_base (INTVAL (operands[6]));
15717 mod_f = memmodel_base (INTVAL (operands[7]));
15718 orig_mode = mode = GET_MODE (mem);
15720 mask = shift = NULL_RTX;
15721 if (mode == QImode || mode == HImode)
15723 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15724 lwarx and shift/mask operations. With power8, we need to do the
15725 comparison in SImode, but the store is still done in QI/HImode. */
15726 oldval = convert_modes (SImode, mode, oldval, 1);
15728 if (!TARGET_SYNC_HI_QI)
15730 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15732 /* Shift and mask OLDVAL into position with the word. */
15733 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15734 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15736 /* Shift and mask NEWVAL into position within the word. */
15737 newval = convert_modes (SImode, mode, newval, 1);
15738 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15739 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15742 /* Prepare to adjust the return value. */
15743 retval = gen_reg_rtx (SImode);
15744 mode = SImode;
15746 else if (reg_overlap_mentioned_p (retval, oldval))
15747 oldval = copy_to_reg (oldval);
15749 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15750 oldval = copy_to_mode_reg (mode, oldval);
15752 if (reg_overlap_mentioned_p (retval, newval))
15753 newval = copy_to_reg (newval);
15755 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15757 label1 = NULL_RTX;
15758 if (!is_weak)
15760 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15761 emit_label (XEXP (label1, 0));
15763 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15765 emit_load_locked (mode, retval, mem);
15767 x = retval;
15768 if (mask)
15769 x = expand_simple_binop (SImode, AND, retval, mask,
15770 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15772 cond = gen_reg_rtx (CCmode);
15773 /* If we have TImode, synthesize a comparison. */
15774 if (mode != TImode)
15775 x = gen_rtx_COMPARE (CCmode, x, oldval);
15776 else
15778 rtx xor1_result = gen_reg_rtx (DImode);
15779 rtx xor2_result = gen_reg_rtx (DImode);
15780 rtx or_result = gen_reg_rtx (DImode);
15781 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15782 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15783 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15784 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15786 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15787 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15788 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15789 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15792 emit_insn (gen_rtx_SET (cond, x));
15794 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15795 emit_unlikely_jump (x, label2);
15797 x = newval;
15798 if (mask)
15799 x = rs6000_mask_atomic_subword (retval, newval, mask);
15801 emit_store_conditional (orig_mode, cond, mem, x);
15803 if (!is_weak)
15805 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15806 emit_unlikely_jump (x, label1);
15809 if (!is_mm_relaxed (mod_f))
15810 emit_label (XEXP (label2, 0));
15812 rs6000_post_atomic_barrier (mod_s);
15814 if (is_mm_relaxed (mod_f))
15815 emit_label (XEXP (label2, 0));
15817 if (shift)
15818 rs6000_finish_atomic_subword (operands[1], retval, shift);
15819 else if (mode != GET_MODE (operands[1]))
15820 convert_move (operands[1], retval, 1);
15822 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15823 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15824 emit_insn (gen_rtx_SET (boolval, x));
15827 /* Expand an atomic exchange operation. */
15829 void
15830 rs6000_expand_atomic_exchange (rtx operands[])
15832 rtx retval, mem, val, cond;
15833 machine_mode mode;
15834 enum memmodel model;
15835 rtx label, x, mask, shift;
15837 retval = operands[0];
15838 mem = operands[1];
15839 val = operands[2];
15840 model = memmodel_base (INTVAL (operands[3]));
15841 mode = GET_MODE (mem);
15843 mask = shift = NULL_RTX;
15844 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15846 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15848 /* Shift and mask VAL into position with the word. */
15849 val = convert_modes (SImode, mode, val, 1);
15850 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15851 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15853 /* Prepare to adjust the return value. */
15854 retval = gen_reg_rtx (SImode);
15855 mode = SImode;
15858 mem = rs6000_pre_atomic_barrier (mem, model);
15860 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15861 emit_label (XEXP (label, 0));
15863 emit_load_locked (mode, retval, mem);
15865 x = val;
15866 if (mask)
15867 x = rs6000_mask_atomic_subword (retval, val, mask);
15869 cond = gen_reg_rtx (CCmode);
15870 emit_store_conditional (mode, cond, mem, x);
15872 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15873 emit_unlikely_jump (x, label);
15875 rs6000_post_atomic_barrier (model);
15877 if (shift)
15878 rs6000_finish_atomic_subword (operands[0], retval, shift);
15881 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15882 to perform. MEM is the memory on which to operate. VAL is the second
15883 operand of the binary operator. BEFORE and AFTER are optional locations to
15884 return the value of MEM either before of after the operation. MODEL_RTX
15885 is a CONST_INT containing the memory model to use. */
15887 void
15888 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15889 rtx orig_before, rtx orig_after, rtx model_rtx)
15891 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15892 machine_mode mode = GET_MODE (mem);
15893 machine_mode store_mode = mode;
15894 rtx label, x, cond, mask, shift;
15895 rtx before = orig_before, after = orig_after;
15897 mask = shift = NULL_RTX;
15898 /* On power8, we want to use SImode for the operation. On previous systems,
15899 use the operation in a subword and shift/mask to get the proper byte or
15900 halfword. */
15901 if (mode == QImode || mode == HImode)
15903 if (TARGET_SYNC_HI_QI)
15905 val = convert_modes (SImode, mode, val, 1);
15907 /* Prepare to adjust the return value. */
15908 before = gen_reg_rtx (SImode);
15909 if (after)
15910 after = gen_reg_rtx (SImode);
15911 mode = SImode;
15913 else
15915 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15917 /* Shift and mask VAL into position with the word. */
15918 val = convert_modes (SImode, mode, val, 1);
15919 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15920 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15922 switch (code)
15924 case IOR:
15925 case XOR:
15926 /* We've already zero-extended VAL. That is sufficient to
15927 make certain that it does not affect other bits. */
15928 mask = NULL;
15929 break;
15931 case AND:
15932 /* If we make certain that all of the other bits in VAL are
15933 set, that will be sufficient to not affect other bits. */
15934 x = gen_rtx_NOT (SImode, mask);
15935 x = gen_rtx_IOR (SImode, x, val);
15936 emit_insn (gen_rtx_SET (val, x));
15937 mask = NULL;
15938 break;
15940 case NOT:
15941 case PLUS:
15942 case MINUS:
15943 /* These will all affect bits outside the field and need
15944 adjustment via MASK within the loop. */
15945 break;
15947 default:
15948 gcc_unreachable ();
15951 /* Prepare to adjust the return value. */
15952 before = gen_reg_rtx (SImode);
15953 if (after)
15954 after = gen_reg_rtx (SImode);
15955 store_mode = mode = SImode;
15959 mem = rs6000_pre_atomic_barrier (mem, model);
15961 label = gen_label_rtx ();
15962 emit_label (label);
15963 label = gen_rtx_LABEL_REF (VOIDmode, label);
15965 if (before == NULL_RTX)
15966 before = gen_reg_rtx (mode);
15968 emit_load_locked (mode, before, mem);
15970 if (code == NOT)
15972 x = expand_simple_binop (mode, AND, before, val,
15973 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15974 after = expand_simple_unop (mode, NOT, x, after, 1);
15976 else
15978 after = expand_simple_binop (mode, code, before, val,
15979 after, 1, OPTAB_LIB_WIDEN);
15982 x = after;
15983 if (mask)
15985 x = expand_simple_binop (SImode, AND, after, mask,
15986 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15987 x = rs6000_mask_atomic_subword (before, x, mask);
15989 else if (store_mode != mode)
15990 x = convert_modes (store_mode, mode, x, 1);
15992 cond = gen_reg_rtx (CCmode);
15993 emit_store_conditional (store_mode, cond, mem, x);
15995 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15996 emit_unlikely_jump (x, label);
15998 rs6000_post_atomic_barrier (model);
16000 if (shift)
16002 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16003 then do the calcuations in a SImode register. */
16004 if (orig_before)
16005 rs6000_finish_atomic_subword (orig_before, before, shift);
16006 if (orig_after)
16007 rs6000_finish_atomic_subword (orig_after, after, shift);
16009 else if (store_mode != mode)
16011 /* QImode/HImode on machines with lbarx/lharx where we do the native
16012 operation and then do the calcuations in a SImode register. */
16013 if (orig_before)
16014 convert_move (orig_before, before, 1);
16015 if (orig_after)
16016 convert_move (orig_after, after, 1);
16018 else if (orig_after && after != orig_after)
16019 emit_move_insn (orig_after, after);
16022 /* Emit instructions to move SRC to DST. Called by splitters for
16023 multi-register moves. It will emit at most one instruction for
16024 each register that is accessed; that is, it won't emit li/lis pairs
16025 (or equivalent for 64-bit code). One of SRC or DST must be a hard
16026 register. */
16028 void
16029 rs6000_split_multireg_move (rtx dst, rtx src)
16031 /* The register number of the first register being moved. */
16032 int reg;
16033 /* The mode that is to be moved. */
16034 machine_mode mode;
16035 /* The mode that the move is being done in, and its size. */
16036 machine_mode reg_mode;
16037 int reg_mode_size;
16038 /* The number of registers that will be moved. */
16039 int nregs;
16041 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
16042 mode = GET_MODE (dst);
16043 nregs = hard_regno_nregs (reg, mode);
16045 /* If we have a vector quad register for MMA, and this is a load or store,
16046 see if we can use vector paired load/stores. */
16047 if (mode == PXImode && TARGET_MMA
16048 && (MEM_P (dst) || MEM_P (src)))
16050 reg_mode = POImode;
16051 nregs /= 2;
16053 /* If we have a vector pair/quad mode, split it into two/four separate
16054 vectors. */
16055 else if (mode == POImode || mode == PXImode)
16056 reg_mode = V1TImode;
16057 else if (FP_REGNO_P (reg))
16058 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
16059 (TARGET_HARD_FLOAT ? DFmode : SFmode);
16060 else if (ALTIVEC_REGNO_P (reg))
16061 reg_mode = V16QImode;
16062 else
16063 reg_mode = word_mode;
16064 reg_mode_size = GET_MODE_SIZE (reg_mode);
16066 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
16068 /* TDmode residing in FP registers is special, since the ISA requires that
16069 the lower-numbered word of a register pair is always the most significant
16070 word, even in little-endian mode. This does not match the usual subreg
16071 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
16072 the appropriate constituent registers "by hand" in little-endian mode.
16074 Note we do not need to check for destructive overlap here since TDmode
16075 can only reside in even/odd register pairs. */
16076 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
16078 rtx p_src, p_dst;
16079 int i;
16081 for (i = 0; i < nregs; i++)
16083 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
16084 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
16085 else
16086 p_src = simplify_gen_subreg (reg_mode, src, mode,
16087 i * reg_mode_size);
16089 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
16090 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
16091 else
16092 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
16093 i * reg_mode_size);
16095 emit_insn (gen_rtx_SET (p_dst, p_src));
16098 return;
16101 /* The __vector_pair and __vector_quad modes are multi-register modes,
16102 so if have to load or store the registers, we have to be careful to
16103 properly swap them if we're in little endian mode below. This means
16104 the last register gets the first memory location. */
16105 if (mode == POImode || mode == PXImode)
16107 if (MEM_P (dst))
16109 unsigned offset = 0;
16110 unsigned size = GET_MODE_SIZE (reg_mode);
16112 /* If we are reading an accumulator register, we have to
16113 deprime it before we can access it. */
16114 if (TARGET_MMA
16115 && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
16116 emit_insn (gen_mma_xxmfacc (src, src));
16118 for (int i = 0; i < nregs; i++)
16120 unsigned subreg = (WORDS_BIG_ENDIAN)
16121 ? i * size : (nregs - 1 - i) * size;
16122 rtx dst2 = adjust_address (dst, reg_mode, offset);
16123 rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg);
16124 offset += size;
16125 emit_insn (gen_rtx_SET (dst2, src2));
16128 return;
16131 if (MEM_P (src))
16133 unsigned offset = 0;
16134 unsigned size = GET_MODE_SIZE (reg_mode);
16136 for (int i = 0; i < nregs; i++)
16138 unsigned subreg = (WORDS_BIG_ENDIAN)
16139 ? i * size : (nregs - 1 - i) * size;
16140 rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg);
16141 rtx src2 = adjust_address (src, reg_mode, offset);
16142 offset += size;
16143 emit_insn (gen_rtx_SET (dst2, src2));
16146 /* If we are writing an accumulator register, we have to
16147 prime it after we've written it. */
16148 if (TARGET_MMA
16149 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
16150 emit_insn (gen_mma_xxmtacc (dst, dst));
16152 return;
16155 if (GET_CODE (src) == UNSPEC)
16157 gcc_assert (REG_P (dst)
16158 && FP_REGNO_P (REGNO (dst))
16159 && XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC);
16161 reg_mode = GET_MODE (XVECEXP (src, 0, 0));
16162 for (int i = 0; i < XVECLEN (src, 0); i++)
16164 rtx dst_i = gen_rtx_REG (reg_mode, reg + i);
16165 emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i)));
16168 /* We are writing an accumulator register, so we have to
16169 prime it after we've written it. */
16170 emit_insn (gen_mma_xxmtacc (dst, dst));
16172 return;
16175 /* Register -> register moves can use common code. */
16178 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
16180 /* If we are reading an accumulator register, we have to
16181 deprime it before we can access it. */
16182 if (TARGET_MMA
16183 && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
16184 emit_insn (gen_mma_xxmfacc (src, src));
16186 /* Move register range backwards, if we might have destructive
16187 overlap. */
16188 int i;
16189 for (i = nregs - 1; i >= 0; i--)
16190 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16191 i * reg_mode_size),
16192 simplify_gen_subreg (reg_mode, src, mode,
16193 i * reg_mode_size)));
16195 /* If we are writing an accumulator register, we have to
16196 prime it after we've written it. */
16197 if (TARGET_MMA
16198 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
16199 emit_insn (gen_mma_xxmtacc (dst, dst));
16201 else
16203 int i;
16204 int j = -1;
16205 bool used_update = false;
16206 rtx restore_basereg = NULL_RTX;
16208 if (MEM_P (src) && INT_REGNO_P (reg))
16210 rtx breg;
16212 if (GET_CODE (XEXP (src, 0)) == PRE_INC
16213 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
16215 rtx delta_rtx;
16216 breg = XEXP (XEXP (src, 0), 0);
16217 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
16218 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
16219 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
16220 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16221 src = replace_equiv_address (src, breg);
16223 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
16225 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
16227 rtx basereg = XEXP (XEXP (src, 0), 0);
16228 if (TARGET_UPDATE)
16230 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
16231 emit_insn (gen_rtx_SET (ndst,
16232 gen_rtx_MEM (reg_mode,
16233 XEXP (src, 0))));
16234 used_update = true;
16236 else
16237 emit_insn (gen_rtx_SET (basereg,
16238 XEXP (XEXP (src, 0), 1)));
16239 src = replace_equiv_address (src, basereg);
16241 else
16243 rtx basereg = gen_rtx_REG (Pmode, reg);
16244 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
16245 src = replace_equiv_address (src, basereg);
16249 breg = XEXP (src, 0);
16250 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
16251 breg = XEXP (breg, 0);
16253 /* If the base register we are using to address memory is
16254 also a destination reg, then change that register last. */
16255 if (REG_P (breg)
16256 && REGNO (breg) >= REGNO (dst)
16257 && REGNO (breg) < REGNO (dst) + nregs)
16258 j = REGNO (breg) - REGNO (dst);
16260 else if (MEM_P (dst) && INT_REGNO_P (reg))
16262 rtx breg;
16264 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
16265 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
16267 rtx delta_rtx;
16268 breg = XEXP (XEXP (dst, 0), 0);
16269 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
16270 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
16271 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
16273 /* We have to update the breg before doing the store.
16274 Use store with update, if available. */
16276 if (TARGET_UPDATE)
16278 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16279 emit_insn (TARGET_32BIT
16280 ? (TARGET_POWERPC64
16281 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
16282 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
16283 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
16284 used_update = true;
16286 else
16287 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
16288 dst = replace_equiv_address (dst, breg);
16290 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
16291 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
16293 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
16295 rtx basereg = XEXP (XEXP (dst, 0), 0);
16296 if (TARGET_UPDATE)
16298 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
16299 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
16300 XEXP (dst, 0)),
16301 nsrc));
16302 used_update = true;
16304 else
16305 emit_insn (gen_rtx_SET (basereg,
16306 XEXP (XEXP (dst, 0), 1)));
16307 dst = replace_equiv_address (dst, basereg);
16309 else
16311 rtx basereg = XEXP (XEXP (dst, 0), 0);
16312 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
16313 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
16314 && REG_P (basereg)
16315 && REG_P (offsetreg)
16316 && REGNO (basereg) != REGNO (offsetreg));
16317 if (REGNO (basereg) == 0)
16319 rtx tmp = offsetreg;
16320 offsetreg = basereg;
16321 basereg = tmp;
16323 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
16324 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
16325 dst = replace_equiv_address (dst, basereg);
16328 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
16329 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
16332 /* If we are reading an accumulator register, we have to
16333 deprime it before we can access it. */
16334 if (TARGET_MMA && REG_P (src)
16335 && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src)))
16336 emit_insn (gen_mma_xxmfacc (src, src));
16338 for (i = 0; i < nregs; i++)
16340 /* Calculate index to next subword. */
16341 ++j;
16342 if (j == nregs)
16343 j = 0;
16345 /* If compiler already emitted move of first word by
16346 store with update, no need to do anything. */
16347 if (j == 0 && used_update)
16348 continue;
16350 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
16351 j * reg_mode_size),
16352 simplify_gen_subreg (reg_mode, src, mode,
16353 j * reg_mode_size)));
16356 /* If we are writing an accumulator register, we have to
16357 prime it after we've written it. */
16358 if (TARGET_MMA && REG_P (dst)
16359 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst)))
16360 emit_insn (gen_mma_xxmtacc (dst, dst));
16362 if (restore_basereg != NULL_RTX)
16363 emit_insn (restore_basereg);
16367 static GTY(()) alias_set_type TOC_alias_set = -1;
16369 alias_set_type
16370 get_TOC_alias_set (void)
16372 if (TOC_alias_set == -1)
16373 TOC_alias_set = new_alias_set ();
16374 return TOC_alias_set;
16377 /* The mode the ABI uses for a word. This is not the same as word_mode
16378 for -m32 -mpowerpc64. This is used to implement various target hooks. */
16380 static scalar_int_mode
16381 rs6000_abi_word_mode (void)
16383 return TARGET_32BIT ? SImode : DImode;
16386 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
16387 static char *
16388 rs6000_offload_options (void)
16390 if (TARGET_64BIT)
16391 return xstrdup ("-foffload-abi=lp64");
16392 else
16393 return xstrdup ("-foffload-abi=ilp32");
16397 /* A quick summary of the various types of 'constant-pool tables'
16398 under PowerPC:
16400 Target Flags Name One table per
16401 AIX (none) AIX TOC object file
16402 AIX -mfull-toc AIX TOC object file
16403 AIX -mminimal-toc AIX minimal TOC translation unit
16404 SVR4/EABI (none) SVR4 SDATA object file
16405 SVR4/EABI -fpic SVR4 pic object file
16406 SVR4/EABI -fPIC SVR4 PIC translation unit
16407 SVR4/EABI -mrelocatable EABI TOC function
16408 SVR4/EABI -maix AIX TOC object file
16409 SVR4/EABI -maix -mminimal-toc
16410 AIX minimal TOC translation unit
16412 Name Reg. Set by entries contains:
16413 made by addrs? fp? sum?
16415 AIX TOC 2 crt0 as Y option option
16416 AIX minimal TOC 30 prolog gcc Y Y option
16417 SVR4 SDATA 13 crt0 gcc N Y N
16418 SVR4 pic 30 prolog ld Y not yet N
16419 SVR4 PIC 30 prolog gcc Y option option
16420 EABI TOC 30 prolog gcc Y option option
16424 /* Hash functions for the hash table. */
16426 static unsigned
16427 rs6000_hash_constant (rtx k)
16429 enum rtx_code code = GET_CODE (k);
16430 machine_mode mode = GET_MODE (k);
16431 unsigned result = (code << 3) ^ mode;
16432 const char *format;
16433 int flen, fidx;
16435 format = GET_RTX_FORMAT (code);
16436 flen = strlen (format);
16437 fidx = 0;
16439 switch (code)
16441 case LABEL_REF:
16442 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
16444 case CONST_WIDE_INT:
16446 int i;
16447 flen = CONST_WIDE_INT_NUNITS (k);
16448 for (i = 0; i < flen; i++)
16449 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16450 return result;
16453 case CONST_DOUBLE:
16454 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16456 case CODE_LABEL:
16457 fidx = 3;
16458 break;
16460 default:
16461 break;
16464 for (; fidx < flen; fidx++)
16465 switch (format[fidx])
16467 case 's':
16469 unsigned i, len;
16470 const char *str = XSTR (k, fidx);
16471 len = strlen (str);
16472 result = result * 613 + len;
16473 for (i = 0; i < len; i++)
16474 result = result * 613 + (unsigned) str[i];
16475 break;
16477 case 'u':
16478 case 'e':
16479 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16480 break;
16481 case 'i':
16482 case 'n':
16483 result = result * 613 + (unsigned) XINT (k, fidx);
16484 break;
16485 case 'w':
16486 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16487 result = result * 613 + (unsigned) XWINT (k, fidx);
16488 else
16490 size_t i;
16491 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16492 result = result * 613 + (unsigned) (XWINT (k, fidx)
16493 >> CHAR_BIT * i);
16495 break;
16496 case '0':
16497 break;
16498 default:
16499 gcc_unreachable ();
16502 return result;
16505 hashval_t
16506 toc_hasher::hash (toc_hash_struct *thc)
16508 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16511 /* Compare H1 and H2 for equivalence. */
16513 bool
16514 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16516 rtx r1 = h1->key;
16517 rtx r2 = h2->key;
16519 if (h1->key_mode != h2->key_mode)
16520 return 0;
16522 return rtx_equal_p (r1, r2);
16525 /* These are the names given by the C++ front-end to vtables, and
16526 vtable-like objects. Ideally, this logic should not be here;
16527 instead, there should be some programmatic way of inquiring as
16528 to whether or not an object is a vtable. */
16530 #define VTABLE_NAME_P(NAME) \
16531 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16532 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16533 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16534 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16535 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16537 #ifdef NO_DOLLAR_IN_LABEL
16538 /* Return a GGC-allocated character string translating dollar signs in
16539 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16541 const char *
16542 rs6000_xcoff_strip_dollar (const char *name)
16544 char *strip, *p;
16545 const char *q;
16546 size_t len;
16548 q = (const char *) strchr (name, '$');
16550 if (q == 0 || q == name)
16551 return name;
16553 len = strlen (name);
16554 strip = XALLOCAVEC (char, len + 1);
16555 strcpy (strip, name);
16556 p = strip + (q - name);
16557 while (p)
16559 *p = '_';
16560 p = strchr (p + 1, '$');
16563 return ggc_alloc_string (strip, len);
16565 #endif
16567 void
16568 rs6000_output_symbol_ref (FILE *file, rtx x)
16570 const char *name = XSTR (x, 0);
16572 /* Currently C++ toc references to vtables can be emitted before it
16573 is decided whether the vtable is public or private. If this is
16574 the case, then the linker will eventually complain that there is
16575 a reference to an unknown section. Thus, for vtables only,
16576 we emit the TOC reference to reference the identifier and not the
16577 symbol. */
16578 if (VTABLE_NAME_P (name))
16580 RS6000_OUTPUT_BASENAME (file, name);
16582 else
16583 assemble_name (file, name);
16586 /* Output a TOC entry. We derive the entry name from what is being
16587 written. */
16589 void
16590 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16592 char buf[256];
16593 const char *name = buf;
16594 rtx base = x;
16595 HOST_WIDE_INT offset = 0;
16597 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16599 /* When the linker won't eliminate them, don't output duplicate
16600 TOC entries (this happens on AIX if there is any kind of TOC,
16601 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16602 CODE_LABELs. */
16603 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16605 struct toc_hash_struct *h;
16607 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16608 time because GGC is not initialized at that point. */
16609 if (toc_hash_table == NULL)
16610 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16612 h = ggc_alloc<toc_hash_struct> ();
16613 h->key = x;
16614 h->key_mode = mode;
16615 h->labelno = labelno;
16617 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16618 if (*found == NULL)
16619 *found = h;
16620 else /* This is indeed a duplicate.
16621 Set this label equal to that label. */
16623 fputs ("\t.set ", file);
16624 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16625 fprintf (file, "%d,", labelno);
16626 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16627 fprintf (file, "%d\n", ((*found)->labelno));
16629 #ifdef HAVE_AS_TLS
16630 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16631 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16632 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16634 fputs ("\t.set ", file);
16635 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16636 fprintf (file, "%d,", labelno);
16637 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16638 fprintf (file, "%d\n", ((*found)->labelno));
16640 #endif
16641 return;
16645 /* If we're going to put a double constant in the TOC, make sure it's
16646 aligned properly when strict alignment is on. */
16647 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16648 && STRICT_ALIGNMENT
16649 && GET_MODE_BITSIZE (mode) >= 64
16650 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16651 ASM_OUTPUT_ALIGN (file, 3);
16654 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16656 /* Handle FP constants specially. Note that if we have a minimal
16657 TOC, things we put here aren't actually in the TOC, so we can allow
16658 FP constants. */
16659 if (CONST_DOUBLE_P (x)
16660 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16661 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16663 long k[4];
16665 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16666 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16667 else
16668 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16670 if (TARGET_64BIT)
16672 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16673 fputs (DOUBLE_INT_ASM_OP, file);
16674 else
16675 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16676 k[0] & 0xffffffff, k[1] & 0xffffffff,
16677 k[2] & 0xffffffff, k[3] & 0xffffffff);
16678 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16679 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16680 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16681 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16682 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16683 return;
16685 else
16687 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16688 fputs ("\t.long ", file);
16689 else
16690 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16691 k[0] & 0xffffffff, k[1] & 0xffffffff,
16692 k[2] & 0xffffffff, k[3] & 0xffffffff);
16693 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16694 k[0] & 0xffffffff, k[1] & 0xffffffff,
16695 k[2] & 0xffffffff, k[3] & 0xffffffff);
16696 return;
16699 else if (CONST_DOUBLE_P (x)
16700 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16702 long k[2];
16704 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16705 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16706 else
16707 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16709 if (TARGET_64BIT)
16711 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16712 fputs (DOUBLE_INT_ASM_OP, file);
16713 else
16714 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16715 k[0] & 0xffffffff, k[1] & 0xffffffff);
16716 fprintf (file, "0x%lx%08lx\n",
16717 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16718 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16719 return;
16721 else
16723 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16724 fputs ("\t.long ", file);
16725 else
16726 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16727 k[0] & 0xffffffff, k[1] & 0xffffffff);
16728 fprintf (file, "0x%lx,0x%lx\n",
16729 k[0] & 0xffffffff, k[1] & 0xffffffff);
16730 return;
16733 else if (CONST_DOUBLE_P (x)
16734 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16736 long l;
16738 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16739 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16740 else
16741 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16743 if (TARGET_64BIT)
16745 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16746 fputs (DOUBLE_INT_ASM_OP, file);
16747 else
16748 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16749 if (WORDS_BIG_ENDIAN)
16750 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16751 else
16752 fprintf (file, "0x%lx\n", l & 0xffffffff);
16753 return;
16755 else
16757 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16758 fputs ("\t.long ", file);
16759 else
16760 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16761 fprintf (file, "0x%lx\n", l & 0xffffffff);
16762 return;
16765 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16767 unsigned HOST_WIDE_INT low;
16768 HOST_WIDE_INT high;
16770 low = INTVAL (x) & 0xffffffff;
16771 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16773 /* TOC entries are always Pmode-sized, so when big-endian
16774 smaller integer constants in the TOC need to be padded.
16775 (This is still a win over putting the constants in
16776 a separate constant pool, because then we'd have
16777 to have both a TOC entry _and_ the actual constant.)
16779 For a 32-bit target, CONST_INT values are loaded and shifted
16780 entirely within `low' and can be stored in one TOC entry. */
16782 /* It would be easy to make this work, but it doesn't now. */
16783 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16785 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16787 low |= high << 32;
16788 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16789 high = (HOST_WIDE_INT) low >> 32;
16790 low &= 0xffffffff;
16793 if (TARGET_64BIT)
16795 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16796 fputs (DOUBLE_INT_ASM_OP, file);
16797 else
16798 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16799 (long) high & 0xffffffff, (long) low & 0xffffffff);
16800 fprintf (file, "0x%lx%08lx\n",
16801 (long) high & 0xffffffff, (long) low & 0xffffffff);
16802 return;
16804 else
16806 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16808 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16809 fputs ("\t.long ", file);
16810 else
16811 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16812 (long) high & 0xffffffff, (long) low & 0xffffffff);
16813 fprintf (file, "0x%lx,0x%lx\n",
16814 (long) high & 0xffffffff, (long) low & 0xffffffff);
16816 else
16818 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16819 fputs ("\t.long ", file);
16820 else
16821 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16822 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16824 return;
16828 if (GET_CODE (x) == CONST)
16830 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16831 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16833 base = XEXP (XEXP (x, 0), 0);
16834 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16837 switch (GET_CODE (base))
16839 case SYMBOL_REF:
16840 name = XSTR (base, 0);
16841 break;
16843 case LABEL_REF:
16844 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16845 CODE_LABEL_NUMBER (XEXP (base, 0)));
16846 break;
16848 case CODE_LABEL:
16849 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16850 break;
16852 default:
16853 gcc_unreachable ();
16856 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16857 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16858 else
16860 fputs ("\t.tc ", file);
16861 RS6000_OUTPUT_BASENAME (file, name);
16863 if (offset < 0)
16864 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16865 else if (offset)
16866 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16868 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16869 after other TOC symbols, reducing overflow of small TOC access
16870 to [TC] symbols. */
16871 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16872 ? "[TE]," : "[TC],", file);
16875 /* Currently C++ toc references to vtables can be emitted before it
16876 is decided whether the vtable is public or private. If this is
16877 the case, then the linker will eventually complain that there is
16878 a TOC reference to an unknown section. Thus, for vtables only,
16879 we emit the TOC reference to reference the symbol and not the
16880 section. */
16881 if (VTABLE_NAME_P (name))
16883 RS6000_OUTPUT_BASENAME (file, name);
16884 if (offset < 0)
16885 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16886 else if (offset > 0)
16887 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16889 else
16890 output_addr_const (file, x);
16892 #if HAVE_AS_TLS
16893 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16895 switch (SYMBOL_REF_TLS_MODEL (base))
16897 case 0:
16898 break;
16899 case TLS_MODEL_LOCAL_EXEC:
16900 fputs ("@le", file);
16901 break;
16902 case TLS_MODEL_INITIAL_EXEC:
16903 fputs ("@ie", file);
16904 break;
16905 /* Use global-dynamic for local-dynamic. */
16906 case TLS_MODEL_GLOBAL_DYNAMIC:
16907 case TLS_MODEL_LOCAL_DYNAMIC:
16908 putc ('\n', file);
16909 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16910 fputs ("\t.tc .", file);
16911 RS6000_OUTPUT_BASENAME (file, name);
16912 fputs ("[TC],", file);
16913 output_addr_const (file, x);
16914 fputs ("@m", file);
16915 break;
16916 default:
16917 gcc_unreachable ();
16920 #endif
16922 putc ('\n', file);
16925 /* Output an assembler pseudo-op to write an ASCII string of N characters
16926 starting at P to FILE.
16928 On the RS/6000, we have to do this using the .byte operation and
16929 write out special characters outside the quoted string.
16930 Also, the assembler is broken; very long strings are truncated,
16931 so we must artificially break them up early. */
16933 void
16934 output_ascii (FILE *file, const char *p, int n)
16936 char c;
16937 int i, count_string;
16938 const char *for_string = "\t.byte \"";
16939 const char *for_decimal = "\t.byte ";
16940 const char *to_close = NULL;
16942 count_string = 0;
16943 for (i = 0; i < n; i++)
16945 c = *p++;
16946 if (c >= ' ' && c < 0177)
16948 if (for_string)
16949 fputs (for_string, file);
16950 putc (c, file);
16952 /* Write two quotes to get one. */
16953 if (c == '"')
16955 putc (c, file);
16956 ++count_string;
16959 for_string = NULL;
16960 for_decimal = "\"\n\t.byte ";
16961 to_close = "\"\n";
16962 ++count_string;
16964 if (count_string >= 512)
16966 fputs (to_close, file);
16968 for_string = "\t.byte \"";
16969 for_decimal = "\t.byte ";
16970 to_close = NULL;
16971 count_string = 0;
16974 else
16976 if (for_decimal)
16977 fputs (for_decimal, file);
16978 fprintf (file, "%d", c);
16980 for_string = "\n\t.byte \"";
16981 for_decimal = ", ";
16982 to_close = "\n";
16983 count_string = 0;
16987 /* Now close the string if we have written one. Then end the line. */
16988 if (to_close)
16989 fputs (to_close, file);
16992 /* Generate a unique section name for FILENAME for a section type
16993 represented by SECTION_DESC. Output goes into BUF.
16995 SECTION_DESC can be any string, as long as it is different for each
16996 possible section type.
16998 We name the section in the same manner as xlc. The name begins with an
16999 underscore followed by the filename (after stripping any leading directory
17000 names) with the last period replaced by the string SECTION_DESC. If
17001 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17002 the name. */
17004 void
17005 rs6000_gen_section_name (char **buf, const char *filename,
17006 const char *section_desc)
17008 const char *q, *after_last_slash, *last_period = 0;
17009 char *p;
17010 int len;
17012 after_last_slash = filename;
17013 for (q = filename; *q; q++)
17015 if (*q == '/')
17016 after_last_slash = q + 1;
17017 else if (*q == '.')
17018 last_period = q;
17021 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17022 *buf = (char *) xmalloc (len);
17024 p = *buf;
17025 *p++ = '_';
17027 for (q = after_last_slash; *q; q++)
17029 if (q == last_period)
17031 strcpy (p, section_desc);
17032 p += strlen (section_desc);
17033 break;
17036 else if (ISALNUM (*q))
17037 *p++ = *q;
17040 if (last_period == 0)
17041 strcpy (p, section_desc);
17042 else
17043 *p = '\0';
17046 /* Emit profile function. */
17048 void
17049 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17051 /* Non-standard profiling for kernels, which just saves LR then calls
17052 _mcount without worrying about arg saves. The idea is to change
17053 the function prologue as little as possible as it isn't easy to
17054 account for arg save/restore code added just for _mcount. */
17055 if (TARGET_PROFILE_KERNEL)
17056 return;
17058 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17060 #ifndef NO_PROFILE_COUNTERS
17061 # define NO_PROFILE_COUNTERS 0
17062 #endif
17063 if (NO_PROFILE_COUNTERS)
17064 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17065 LCT_NORMAL, VOIDmode);
17066 else
17068 char buf[30];
17069 const char *label_name;
17070 rtx fun;
17072 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17073 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17074 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17076 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17077 LCT_NORMAL, VOIDmode, fun, Pmode);
17080 else if (DEFAULT_ABI == ABI_DARWIN)
17082 const char *mcount_name = RS6000_MCOUNT;
17083 int caller_addr_regno = LR_REGNO;
17085 /* Be conservative and always set this, at least for now. */
17086 crtl->uses_pic_offset_table = 1;
17088 #if TARGET_MACHO
17089 /* For PIC code, set up a stub and collect the caller's address
17090 from r0, which is where the prologue puts it. */
17091 if (MACHOPIC_INDIRECT
17092 && crtl->uses_pic_offset_table)
17093 caller_addr_regno = 0;
17094 #endif
17095 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17096 LCT_NORMAL, VOIDmode,
17097 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17101 /* Write function profiler code. */
17103 void
17104 output_function_profiler (FILE *file, int labelno)
17106 char buf[100];
17108 switch (DEFAULT_ABI)
17110 default:
17111 gcc_unreachable ();
17113 case ABI_V4:
17114 if (!TARGET_32BIT)
17116 warning (0, "no profiling of 64-bit code for this ABI");
17117 return;
17119 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17120 fprintf (file, "\tmflr %s\n", reg_names[0]);
17121 if (NO_PROFILE_COUNTERS)
17123 asm_fprintf (file, "\tstw %s,4(%s)\n",
17124 reg_names[0], reg_names[1]);
17126 else if (TARGET_SECURE_PLT && flag_pic)
17128 if (TARGET_LINK_STACK)
17130 char name[32];
17131 get_ppc476_thunk_name (name);
17132 asm_fprintf (file, "\tbl %s\n", name);
17134 else
17135 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17136 asm_fprintf (file, "\tstw %s,4(%s)\n",
17137 reg_names[0], reg_names[1]);
17138 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17139 asm_fprintf (file, "\taddis %s,%s,",
17140 reg_names[12], reg_names[12]);
17141 assemble_name (file, buf);
17142 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17143 assemble_name (file, buf);
17144 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17146 else if (flag_pic == 1)
17148 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17149 asm_fprintf (file, "\tstw %s,4(%s)\n",
17150 reg_names[0], reg_names[1]);
17151 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17152 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17153 assemble_name (file, buf);
17154 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17156 else if (flag_pic > 1)
17158 asm_fprintf (file, "\tstw %s,4(%s)\n",
17159 reg_names[0], reg_names[1]);
17160 /* Now, we need to get the address of the label. */
17161 if (TARGET_LINK_STACK)
17163 char name[32];
17164 get_ppc476_thunk_name (name);
17165 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17166 assemble_name (file, buf);
17167 fputs ("-.\n1:", file);
17168 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17169 asm_fprintf (file, "\taddi %s,%s,4\n",
17170 reg_names[11], reg_names[11]);
17172 else
17174 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17175 assemble_name (file, buf);
17176 fputs ("-.\n1:", file);
17177 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17179 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17180 reg_names[0], reg_names[11]);
17181 asm_fprintf (file, "\tadd %s,%s,%s\n",
17182 reg_names[0], reg_names[0], reg_names[11]);
17184 else
17186 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17187 assemble_name (file, buf);
17188 fputs ("@ha\n", file);
17189 asm_fprintf (file, "\tstw %s,4(%s)\n",
17190 reg_names[0], reg_names[1]);
17191 asm_fprintf (file, "\tla %s,", reg_names[0]);
17192 assemble_name (file, buf);
17193 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17196 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17197 fprintf (file, "\tbl %s%s\n",
17198 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17199 break;
17201 case ABI_AIX:
17202 case ABI_ELFv2:
17203 case ABI_DARWIN:
17204 /* Don't do anything, done in output_profile_hook (). */
17205 break;
17211 /* The following variable value is the last issued insn. */
17213 static rtx_insn *last_scheduled_insn;
17215 /* The following variable helps to balance issuing of load and
17216 store instructions */
17218 static int load_store_pendulum;
17220 /* The following variable helps pair divide insns during scheduling. */
17221 static int divide_cnt;
17222 /* The following variable helps pair and alternate vector and vector load
17223 insns during scheduling. */
17224 static int vec_pairing;
17227 /* Power4 load update and store update instructions are cracked into a
17228 load or store and an integer insn which are executed in the same cycle.
17229 Branches have their own dispatch slot which does not count against the
17230 GCC issue rate, but it changes the program flow so there are no other
17231 instructions to issue in this cycle. */
17233 static int
17234 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17236 last_scheduled_insn = insn;
17237 if (GET_CODE (PATTERN (insn)) == USE
17238 || GET_CODE (PATTERN (insn)) == CLOBBER)
17240 cached_can_issue_more = more;
17241 return cached_can_issue_more;
17244 if (insn_terminates_group_p (insn, current_group))
17246 cached_can_issue_more = 0;
17247 return cached_can_issue_more;
17250 /* If no reservation, but reach here */
17251 if (recog_memoized (insn) < 0)
17252 return more;
17254 if (rs6000_sched_groups)
17256 if (is_microcoded_insn (insn))
17257 cached_can_issue_more = 0;
17258 else if (is_cracked_insn (insn))
17259 cached_can_issue_more = more > 2 ? more - 2 : 0;
17260 else
17261 cached_can_issue_more = more - 1;
17263 return cached_can_issue_more;
17266 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
17267 return 0;
17269 cached_can_issue_more = more - 1;
17270 return cached_can_issue_more;
17273 static int
17274 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
17276 int r = rs6000_variable_issue_1 (insn, more);
17277 if (verbose)
17278 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
17279 return r;
17282 /* Adjust the cost of a scheduling dependency. Return the new cost of
17283 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17285 static int
17286 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
17287 unsigned int)
17289 enum attr_type attr_type;
17291 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
17292 return cost;
17294 switch (dep_type)
17296 case REG_DEP_TRUE:
17298 /* Data dependency; DEP_INSN writes a register that INSN reads
17299 some cycles later. */
17301 /* Separate a load from a narrower, dependent store. */
17302 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
17303 || rs6000_tune == PROCESSOR_POWER10)
17304 && GET_CODE (PATTERN (insn)) == SET
17305 && GET_CODE (PATTERN (dep_insn)) == SET
17306 && MEM_P (XEXP (PATTERN (insn), 1))
17307 && MEM_P (XEXP (PATTERN (dep_insn), 0))
17308 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
17309 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
17310 return cost + 14;
17312 attr_type = get_attr_type (insn);
17314 switch (attr_type)
17316 case TYPE_JMPREG:
17317 /* Tell the first scheduling pass about the latency between
17318 a mtctr and bctr (and mtlr and br/blr). The first
17319 scheduling pass will not know about this latency since
17320 the mtctr instruction, which has the latency associated
17321 to it, will be generated by reload. */
17322 return 4;
17323 case TYPE_BRANCH:
17324 /* Leave some extra cycles between a compare and its
17325 dependent branch, to inhibit expensive mispredicts. */
17326 if ((rs6000_tune == PROCESSOR_PPC603
17327 || rs6000_tune == PROCESSOR_PPC604
17328 || rs6000_tune == PROCESSOR_PPC604e
17329 || rs6000_tune == PROCESSOR_PPC620
17330 || rs6000_tune == PROCESSOR_PPC630
17331 || rs6000_tune == PROCESSOR_PPC750
17332 || rs6000_tune == PROCESSOR_PPC7400
17333 || rs6000_tune == PROCESSOR_PPC7450
17334 || rs6000_tune == PROCESSOR_PPCE5500
17335 || rs6000_tune == PROCESSOR_PPCE6500
17336 || rs6000_tune == PROCESSOR_POWER4
17337 || rs6000_tune == PROCESSOR_POWER5
17338 || rs6000_tune == PROCESSOR_POWER7
17339 || rs6000_tune == PROCESSOR_POWER8
17340 || rs6000_tune == PROCESSOR_POWER9
17341 || rs6000_tune == PROCESSOR_POWER10
17342 || rs6000_tune == PROCESSOR_CELL)
17343 && recog_memoized (dep_insn)
17344 && (INSN_CODE (dep_insn) >= 0))
17346 switch (get_attr_type (dep_insn))
17348 case TYPE_CMP:
17349 case TYPE_FPCOMPARE:
17350 case TYPE_CR_LOGICAL:
17351 return cost + 2;
17352 case TYPE_EXTS:
17353 case TYPE_MUL:
17354 if (get_attr_dot (dep_insn) == DOT_YES)
17355 return cost + 2;
17356 else
17357 break;
17358 case TYPE_SHIFT:
17359 if (get_attr_dot (dep_insn) == DOT_YES
17360 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
17361 return cost + 2;
17362 else
17363 break;
17364 default:
17365 break;
17367 break;
17369 case TYPE_STORE:
17370 case TYPE_FPSTORE:
17371 if ((rs6000_tune == PROCESSOR_POWER6)
17372 && recog_memoized (dep_insn)
17373 && (INSN_CODE (dep_insn) >= 0))
17376 if (GET_CODE (PATTERN (insn)) != SET)
17377 /* If this happens, we have to extend this to schedule
17378 optimally. Return default for now. */
17379 return cost;
17381 /* Adjust the cost for the case where the value written
17382 by a fixed point operation is used as the address
17383 gen value on a store. */
17384 switch (get_attr_type (dep_insn))
17386 case TYPE_LOAD:
17387 case TYPE_CNTLZ:
17389 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17390 return get_attr_sign_extend (dep_insn)
17391 == SIGN_EXTEND_YES ? 6 : 4;
17392 break;
17394 case TYPE_SHIFT:
17396 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17397 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17398 6 : 3;
17399 break;
17401 case TYPE_INTEGER:
17402 case TYPE_ADD:
17403 case TYPE_LOGICAL:
17404 case TYPE_EXTS:
17405 case TYPE_INSERT:
17407 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17408 return 3;
17409 break;
17411 case TYPE_STORE:
17412 case TYPE_FPLOAD:
17413 case TYPE_FPSTORE:
17415 if (get_attr_update (dep_insn) == UPDATE_YES
17416 && ! rs6000_store_data_bypass_p (dep_insn, insn))
17417 return 3;
17418 break;
17420 case TYPE_MUL:
17422 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17423 return 17;
17424 break;
17426 case TYPE_DIV:
17428 if (! rs6000_store_data_bypass_p (dep_insn, insn))
17429 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17430 break;
17432 default:
17433 break;
17436 break;
17438 case TYPE_LOAD:
17439 if ((rs6000_tune == PROCESSOR_POWER6)
17440 && recog_memoized (dep_insn)
17441 && (INSN_CODE (dep_insn) >= 0))
17444 /* Adjust the cost for the case where the value written
17445 by a fixed point instruction is used within the address
17446 gen portion of a subsequent load(u)(x) */
17447 switch (get_attr_type (dep_insn))
17449 case TYPE_LOAD:
17450 case TYPE_CNTLZ:
17452 if (set_to_load_agen (dep_insn, insn))
17453 return get_attr_sign_extend (dep_insn)
17454 == SIGN_EXTEND_YES ? 6 : 4;
17455 break;
17457 case TYPE_SHIFT:
17459 if (set_to_load_agen (dep_insn, insn))
17460 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17461 6 : 3;
17462 break;
17464 case TYPE_INTEGER:
17465 case TYPE_ADD:
17466 case TYPE_LOGICAL:
17467 case TYPE_EXTS:
17468 case TYPE_INSERT:
17470 if (set_to_load_agen (dep_insn, insn))
17471 return 3;
17472 break;
17474 case TYPE_STORE:
17475 case TYPE_FPLOAD:
17476 case TYPE_FPSTORE:
17478 if (get_attr_update (dep_insn) == UPDATE_YES
17479 && set_to_load_agen (dep_insn, insn))
17480 return 3;
17481 break;
17483 case TYPE_MUL:
17485 if (set_to_load_agen (dep_insn, insn))
17486 return 17;
17487 break;
17489 case TYPE_DIV:
17491 if (set_to_load_agen (dep_insn, insn))
17492 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17493 break;
17495 default:
17496 break;
17499 break;
17501 default:
17502 break;
17505 /* Fall out to return default cost. */
17507 break;
17509 case REG_DEP_OUTPUT:
17510 /* Output dependency; DEP_INSN writes a register that INSN writes some
17511 cycles later. */
17512 if ((rs6000_tune == PROCESSOR_POWER6)
17513 && recog_memoized (dep_insn)
17514 && (INSN_CODE (dep_insn) >= 0))
17516 attr_type = get_attr_type (insn);
17518 switch (attr_type)
17520 case TYPE_FP:
17521 case TYPE_FPSIMPLE:
17522 if (get_attr_type (dep_insn) == TYPE_FP
17523 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17524 return 1;
17525 break;
17526 default:
17527 break;
17530 /* Fall through, no cost for output dependency. */
17531 /* FALLTHRU */
17533 case REG_DEP_ANTI:
17534 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17535 cycles later. */
17536 return 0;
17538 default:
17539 gcc_unreachable ();
17542 return cost;
17545 /* Debug version of rs6000_adjust_cost. */
17547 static int
17548 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17549 int cost, unsigned int dw)
17551 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17553 if (ret != cost)
17555 const char *dep;
17557 switch (dep_type)
17559 default: dep = "unknown depencency"; break;
17560 case REG_DEP_TRUE: dep = "data dependency"; break;
17561 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17562 case REG_DEP_ANTI: dep = "anti depencency"; break;
17565 fprintf (stderr,
17566 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17567 "%s, insn:\n", ret, cost, dep);
17569 debug_rtx (insn);
17572 return ret;
17575 /* The function returns a true if INSN is microcoded.
17576 Return false otherwise. */
17578 static bool
17579 is_microcoded_insn (rtx_insn *insn)
17581 if (!insn || !NONDEBUG_INSN_P (insn)
17582 || GET_CODE (PATTERN (insn)) == USE
17583 || GET_CODE (PATTERN (insn)) == CLOBBER)
17584 return false;
17586 if (rs6000_tune == PROCESSOR_CELL)
17587 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17589 if (rs6000_sched_groups
17590 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17592 enum attr_type type = get_attr_type (insn);
17593 if ((type == TYPE_LOAD
17594 && get_attr_update (insn) == UPDATE_YES
17595 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17596 || ((type == TYPE_LOAD || type == TYPE_STORE)
17597 && get_attr_update (insn) == UPDATE_YES
17598 && get_attr_indexed (insn) == INDEXED_YES)
17599 || type == TYPE_MFCR)
17600 return true;
17603 return false;
17606 /* The function returns true if INSN is cracked into 2 instructions
17607 by the processor (and therefore occupies 2 issue slots). */
17609 static bool
17610 is_cracked_insn (rtx_insn *insn)
17612 if (!insn || !NONDEBUG_INSN_P (insn)
17613 || GET_CODE (PATTERN (insn)) == USE
17614 || GET_CODE (PATTERN (insn)) == CLOBBER)
17615 return false;
17617 if (rs6000_sched_groups
17618 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17620 enum attr_type type = get_attr_type (insn);
17621 if ((type == TYPE_LOAD
17622 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17623 && get_attr_update (insn) == UPDATE_NO)
17624 || (type == TYPE_LOAD
17625 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17626 && get_attr_update (insn) == UPDATE_YES
17627 && get_attr_indexed (insn) == INDEXED_NO)
17628 || (type == TYPE_STORE
17629 && get_attr_update (insn) == UPDATE_YES
17630 && get_attr_indexed (insn) == INDEXED_NO)
17631 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17632 && get_attr_update (insn) == UPDATE_YES)
17633 || (type == TYPE_CR_LOGICAL
17634 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17635 || (type == TYPE_EXTS
17636 && get_attr_dot (insn) == DOT_YES)
17637 || (type == TYPE_SHIFT
17638 && get_attr_dot (insn) == DOT_YES
17639 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17640 || (type == TYPE_MUL
17641 && get_attr_dot (insn) == DOT_YES)
17642 || type == TYPE_DIV
17643 || (type == TYPE_INSERT
17644 && get_attr_size (insn) == SIZE_32))
17645 return true;
17648 return false;
17651 /* The function returns true if INSN can be issued only from
17652 the branch slot. */
17654 static bool
17655 is_branch_slot_insn (rtx_insn *insn)
17657 if (!insn || !NONDEBUG_INSN_P (insn)
17658 || GET_CODE (PATTERN (insn)) == USE
17659 || GET_CODE (PATTERN (insn)) == CLOBBER)
17660 return false;
17662 if (rs6000_sched_groups)
17664 enum attr_type type = get_attr_type (insn);
17665 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17666 return true;
17667 return false;
17670 return false;
17673 /* The function returns true if out_inst sets a value that is
17674 used in the address generation computation of in_insn */
17675 static bool
17676 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17678 rtx out_set, in_set;
17680 /* For performance reasons, only handle the simple case where
17681 both loads are a single_set. */
17682 out_set = single_set (out_insn);
17683 if (out_set)
17685 in_set = single_set (in_insn);
17686 if (in_set)
17687 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17690 return false;
17693 /* Try to determine base/offset/size parts of the given MEM.
17694 Return true if successful, false if all the values couldn't
17695 be determined.
17697 This function only looks for REG or REG+CONST address forms.
17698 REG+REG address form will return false. */
17700 static bool
17701 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17702 HOST_WIDE_INT *size)
17704 rtx addr_rtx;
17705 if MEM_SIZE_KNOWN_P (mem)
17706 *size = MEM_SIZE (mem);
17707 else
17708 return false;
17710 addr_rtx = (XEXP (mem, 0));
17711 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17712 addr_rtx = XEXP (addr_rtx, 1);
17714 *offset = 0;
17715 while (GET_CODE (addr_rtx) == PLUS
17716 && CONST_INT_P (XEXP (addr_rtx, 1)))
17718 *offset += INTVAL (XEXP (addr_rtx, 1));
17719 addr_rtx = XEXP (addr_rtx, 0);
17721 if (!REG_P (addr_rtx))
17722 return false;
17724 *base = addr_rtx;
17725 return true;
17728 /* The function returns true if the target storage location of
17729 mem1 is adjacent to the target storage location of mem2 */
17730 /* Return 1 if memory locations are adjacent. */
17732 static bool
17733 adjacent_mem_locations (rtx mem1, rtx mem2)
17735 rtx reg1, reg2;
17736 HOST_WIDE_INT off1, size1, off2, size2;
17738 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17739 && get_memref_parts (mem2, &reg2, &off2, &size2))
17740 return ((REGNO (reg1) == REGNO (reg2))
17741 && ((off1 + size1 == off2)
17742 || (off2 + size2 == off1)));
17744 return false;
17747 /* This function returns true if it can be determined that the two MEM
17748 locations overlap by at least 1 byte based on base reg/offset/size. */
17750 static bool
17751 mem_locations_overlap (rtx mem1, rtx mem2)
17753 rtx reg1, reg2;
17754 HOST_WIDE_INT off1, size1, off2, size2;
17756 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17757 && get_memref_parts (mem2, &reg2, &off2, &size2))
17758 return ((REGNO (reg1) == REGNO (reg2))
17759 && (((off1 <= off2) && (off1 + size1 > off2))
17760 || ((off2 <= off1) && (off2 + size2 > off1))));
17762 return false;
17765 /* A C statement (sans semicolon) to update the integer scheduling
17766 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17767 INSN earlier, reduce the priority to execute INSN later. Do not
17768 define this macro if you do not need to adjust the scheduling
17769 priorities of insns. */
17771 static int
17772 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17774 rtx load_mem, str_mem;
17775 /* On machines (like the 750) which have asymmetric integer units,
17776 where one integer unit can do multiply and divides and the other
17777 can't, reduce the priority of multiply/divide so it is scheduled
17778 before other integer operations. */
17780 #if 0
17781 if (! INSN_P (insn))
17782 return priority;
17784 if (GET_CODE (PATTERN (insn)) == USE)
17785 return priority;
17787 switch (rs6000_tune) {
17788 case PROCESSOR_PPC750:
17789 switch (get_attr_type (insn))
17791 default:
17792 break;
17794 case TYPE_MUL:
17795 case TYPE_DIV:
17796 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17797 priority, priority);
17798 if (priority >= 0 && priority < 0x01000000)
17799 priority >>= 3;
17800 break;
17803 #endif
17805 if (insn_must_be_first_in_group (insn)
17806 && reload_completed
17807 && current_sched_info->sched_max_insns_priority
17808 && rs6000_sched_restricted_insns_priority)
17811 /* Prioritize insns that can be dispatched only in the first
17812 dispatch slot. */
17813 if (rs6000_sched_restricted_insns_priority == 1)
17814 /* Attach highest priority to insn. This means that in
17815 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17816 precede 'priority' (critical path) considerations. */
17817 return current_sched_info->sched_max_insns_priority;
17818 else if (rs6000_sched_restricted_insns_priority == 2)
17819 /* Increase priority of insn by a minimal amount. This means that in
17820 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17821 considerations precede dispatch-slot restriction considerations. */
17822 return (priority + 1);
17825 if (rs6000_tune == PROCESSOR_POWER6
17826 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17827 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17828 /* Attach highest priority to insn if the scheduler has just issued two
17829 stores and this instruction is a load, or two loads and this instruction
17830 is a store. Power6 wants loads and stores scheduled alternately
17831 when possible */
17832 return current_sched_info->sched_max_insns_priority;
17834 return priority;
17837 /* Return true if the instruction is nonpipelined on the Cell. */
17838 static bool
17839 is_nonpipeline_insn (rtx_insn *insn)
17841 enum attr_type type;
17842 if (!insn || !NONDEBUG_INSN_P (insn)
17843 || GET_CODE (PATTERN (insn)) == USE
17844 || GET_CODE (PATTERN (insn)) == CLOBBER)
17845 return false;
17847 type = get_attr_type (insn);
17848 if (type == TYPE_MUL
17849 || type == TYPE_DIV
17850 || type == TYPE_SDIV
17851 || type == TYPE_DDIV
17852 || type == TYPE_SSQRT
17853 || type == TYPE_DSQRT
17854 || type == TYPE_MFCR
17855 || type == TYPE_MFCRF
17856 || type == TYPE_MFJMPR)
17858 return true;
17860 return false;
17864 /* Return how many instructions the machine can issue per cycle. */
17866 static int
17867 rs6000_issue_rate (void)
17869 /* Unless scheduling for register pressure, use issue rate of 1 for
17870 first scheduling pass to decrease degradation. */
17871 if (!reload_completed && !flag_sched_pressure)
17872 return 1;
17874 switch (rs6000_tune) {
17875 case PROCESSOR_RS64A:
17876 case PROCESSOR_PPC601: /* ? */
17877 case PROCESSOR_PPC7450:
17878 return 3;
17879 case PROCESSOR_PPC440:
17880 case PROCESSOR_PPC603:
17881 case PROCESSOR_PPC750:
17882 case PROCESSOR_PPC7400:
17883 case PROCESSOR_PPC8540:
17884 case PROCESSOR_PPC8548:
17885 case PROCESSOR_CELL:
17886 case PROCESSOR_PPCE300C2:
17887 case PROCESSOR_PPCE300C3:
17888 case PROCESSOR_PPCE500MC:
17889 case PROCESSOR_PPCE500MC64:
17890 case PROCESSOR_PPCE5500:
17891 case PROCESSOR_PPCE6500:
17892 case PROCESSOR_TITAN:
17893 return 2;
17894 case PROCESSOR_PPC476:
17895 case PROCESSOR_PPC604:
17896 case PROCESSOR_PPC604e:
17897 case PROCESSOR_PPC620:
17898 case PROCESSOR_PPC630:
17899 return 4;
17900 case PROCESSOR_POWER4:
17901 case PROCESSOR_POWER5:
17902 case PROCESSOR_POWER6:
17903 case PROCESSOR_POWER7:
17904 return 5;
17905 case PROCESSOR_POWER8:
17906 return 7;
17907 case PROCESSOR_POWER9:
17908 case PROCESSOR_POWER10:
17909 return 6;
17910 default:
17911 return 1;
17915 /* Return how many instructions to look ahead for better insn
17916 scheduling. */
17918 static int
17919 rs6000_use_sched_lookahead (void)
17921 switch (rs6000_tune)
17923 case PROCESSOR_PPC8540:
17924 case PROCESSOR_PPC8548:
17925 return 4;
17927 case PROCESSOR_CELL:
17928 return (reload_completed ? 8 : 0);
17930 default:
17931 return 0;
17935 /* We are choosing insn from the ready queue. Return zero if INSN can be
17936 chosen. */
17937 static int
17938 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17940 if (ready_index == 0)
17941 return 0;
17943 if (rs6000_tune != PROCESSOR_CELL)
17944 return 0;
17946 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17948 if (!reload_completed
17949 || is_nonpipeline_insn (insn)
17950 || is_microcoded_insn (insn))
17951 return 1;
17953 return 0;
17956 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17957 and return true. */
17959 static bool
17960 find_mem_ref (rtx pat, rtx *mem_ref)
17962 const char * fmt;
17963 int i, j;
17965 /* stack_tie does not produce any real memory traffic. */
17966 if (tie_operand (pat, VOIDmode))
17967 return false;
17969 if (MEM_P (pat))
17971 *mem_ref = pat;
17972 return true;
17975 /* Recursively process the pattern. */
17976 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17978 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17980 if (fmt[i] == 'e')
17982 if (find_mem_ref (XEXP (pat, i), mem_ref))
17983 return true;
17985 else if (fmt[i] == 'E')
17986 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17988 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17989 return true;
17993 return false;
17996 /* Determine if PAT is a PATTERN of a load insn. */
17998 static bool
17999 is_load_insn1 (rtx pat, rtx *load_mem)
18001 if (!pat || pat == NULL_RTX)
18002 return false;
18004 if (GET_CODE (pat) == SET)
18005 return find_mem_ref (SET_SRC (pat), load_mem);
18007 if (GET_CODE (pat) == PARALLEL)
18009 int i;
18011 for (i = 0; i < XVECLEN (pat, 0); i++)
18012 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18013 return true;
18016 return false;
18019 /* Determine if INSN loads from memory. */
18021 static bool
18022 is_load_insn (rtx insn, rtx *load_mem)
18024 if (!insn || !INSN_P (insn))
18025 return false;
18027 if (CALL_P (insn))
18028 return false;
18030 return is_load_insn1 (PATTERN (insn), load_mem);
18033 /* Determine if PAT is a PATTERN of a store insn. */
18035 static bool
18036 is_store_insn1 (rtx pat, rtx *str_mem)
18038 if (!pat || pat == NULL_RTX)
18039 return false;
18041 if (GET_CODE (pat) == SET)
18042 return find_mem_ref (SET_DEST (pat), str_mem);
18044 if (GET_CODE (pat) == PARALLEL)
18046 int i;
18048 for (i = 0; i < XVECLEN (pat, 0); i++)
18049 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18050 return true;
18053 return false;
18056 /* Determine if INSN stores to memory. */
18058 static bool
18059 is_store_insn (rtx insn, rtx *str_mem)
18061 if (!insn || !INSN_P (insn))
18062 return false;
18064 return is_store_insn1 (PATTERN (insn), str_mem);
18067 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18069 static bool
18070 is_power9_pairable_vec_type (enum attr_type type)
18072 switch (type)
18074 case TYPE_VECSIMPLE:
18075 case TYPE_VECCOMPLEX:
18076 case TYPE_VECDIV:
18077 case TYPE_VECCMP:
18078 case TYPE_VECPERM:
18079 case TYPE_VECFLOAT:
18080 case TYPE_VECFDIV:
18081 case TYPE_VECDOUBLE:
18082 return true;
18083 default:
18084 break;
18086 return false;
18089 /* Returns whether the dependence between INSN and NEXT is considered
18090 costly by the given target. */
18092 static bool
18093 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18095 rtx insn;
18096 rtx next;
18097 rtx load_mem, str_mem;
18099 /* If the flag is not enabled - no dependence is considered costly;
18100 allow all dependent insns in the same group.
18101 This is the most aggressive option. */
18102 if (rs6000_sched_costly_dep == no_dep_costly)
18103 return false;
18105 /* If the flag is set to 1 - a dependence is always considered costly;
18106 do not allow dependent instructions in the same group.
18107 This is the most conservative option. */
18108 if (rs6000_sched_costly_dep == all_deps_costly)
18109 return true;
18111 insn = DEP_PRO (dep);
18112 next = DEP_CON (dep);
18114 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18115 && is_load_insn (next, &load_mem)
18116 && is_store_insn (insn, &str_mem))
18117 /* Prevent load after store in the same group. */
18118 return true;
18120 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18121 && is_load_insn (next, &load_mem)
18122 && is_store_insn (insn, &str_mem)
18123 && DEP_TYPE (dep) == REG_DEP_TRUE
18124 && mem_locations_overlap(str_mem, load_mem))
18125 /* Prevent load after store in the same group if it is a true
18126 dependence. */
18127 return true;
18129 /* The flag is set to X; dependences with latency >= X are considered costly,
18130 and will not be scheduled in the same group. */
18131 if (rs6000_sched_costly_dep <= max_dep_latency
18132 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18133 return true;
18135 return false;
18138 /* Return the next insn after INSN that is found before TAIL is reached,
18139 skipping any "non-active" insns - insns that will not actually occupy
18140 an issue slot. Return NULL_RTX if such an insn is not found. */
18142 static rtx_insn *
18143 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18145 if (insn == NULL_RTX || insn == tail)
18146 return NULL;
18148 while (1)
18150 insn = NEXT_INSN (insn);
18151 if (insn == NULL_RTX || insn == tail)
18152 return NULL;
18154 if (CALL_P (insn)
18155 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18156 || (NONJUMP_INSN_P (insn)
18157 && GET_CODE (PATTERN (insn)) != USE
18158 && GET_CODE (PATTERN (insn)) != CLOBBER
18159 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18160 break;
18162 return insn;
18165 /* Move instruction at POS to the end of the READY list. */
18167 static void
18168 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18170 rtx_insn *tmp;
18171 int i;
18173 tmp = ready[pos];
18174 for (i = pos; i < lastpos; i++)
18175 ready[i] = ready[i + 1];
18176 ready[lastpos] = tmp;
18179 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18181 static int
18182 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18184 /* For Power6, we need to handle some special cases to try and keep the
18185 store queue from overflowing and triggering expensive flushes.
18187 This code monitors how load and store instructions are being issued
18188 and skews the ready list one way or the other to increase the likelihood
18189 that a desired instruction is issued at the proper time.
18191 A couple of things are done. First, we maintain a "load_store_pendulum"
18192 to track the current state of load/store issue.
18194 - If the pendulum is at zero, then no loads or stores have been
18195 issued in the current cycle so we do nothing.
18197 - If the pendulum is 1, then a single load has been issued in this
18198 cycle and we attempt to locate another load in the ready list to
18199 issue with it.
18201 - If the pendulum is -2, then two stores have already been
18202 issued in this cycle, so we increase the priority of the first load
18203 in the ready list to increase it's likelihood of being chosen first
18204 in the next cycle.
18206 - If the pendulum is -1, then a single store has been issued in this
18207 cycle and we attempt to locate another store in the ready list to
18208 issue with it, preferring a store to an adjacent memory location to
18209 facilitate store pairing in the store queue.
18211 - If the pendulum is 2, then two loads have already been
18212 issued in this cycle, so we increase the priority of the first store
18213 in the ready list to increase it's likelihood of being chosen first
18214 in the next cycle.
18216 - If the pendulum < -2 or > 2, then do nothing.
18218 Note: This code covers the most common scenarios. There exist non
18219 load/store instructions which make use of the LSU and which
18220 would need to be accounted for to strictly model the behavior
18221 of the machine. Those instructions are currently unaccounted
18222 for to help minimize compile time overhead of this code.
18224 int pos;
18225 rtx load_mem, str_mem;
18227 if (is_store_insn (last_scheduled_insn, &str_mem))
18228 /* Issuing a store, swing the load_store_pendulum to the left */
18229 load_store_pendulum--;
18230 else if (is_load_insn (last_scheduled_insn, &load_mem))
18231 /* Issuing a load, swing the load_store_pendulum to the right */
18232 load_store_pendulum++;
18233 else
18234 return cached_can_issue_more;
18236 /* If the pendulum is balanced, or there is only one instruction on
18237 the ready list, then all is well, so return. */
18238 if ((load_store_pendulum == 0) || (lastpos <= 0))
18239 return cached_can_issue_more;
18241 if (load_store_pendulum == 1)
18243 /* A load has been issued in this cycle. Scan the ready list
18244 for another load to issue with it */
18245 pos = lastpos;
18247 while (pos >= 0)
18249 if (is_load_insn (ready[pos], &load_mem))
18251 /* Found a load. Move it to the head of the ready list,
18252 and adjust it's priority so that it is more likely to
18253 stay there */
18254 move_to_end_of_ready (ready, pos, lastpos);
18256 if (!sel_sched_p ()
18257 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18258 INSN_PRIORITY (ready[lastpos])++;
18259 break;
18261 pos--;
18264 else if (load_store_pendulum == -2)
18266 /* Two stores have been issued in this cycle. Increase the
18267 priority of the first load in the ready list to favor it for
18268 issuing in the next cycle. */
18269 pos = lastpos;
18271 while (pos >= 0)
18273 if (is_load_insn (ready[pos], &load_mem)
18274 && !sel_sched_p ()
18275 && INSN_PRIORITY_KNOWN (ready[pos]))
18277 INSN_PRIORITY (ready[pos])++;
18279 /* Adjust the pendulum to account for the fact that a load
18280 was found and increased in priority. This is to prevent
18281 increasing the priority of multiple loads */
18282 load_store_pendulum--;
18284 break;
18286 pos--;
18289 else if (load_store_pendulum == -1)
18291 /* A store has been issued in this cycle. Scan the ready list for
18292 another store to issue with it, preferring a store to an adjacent
18293 memory location */
18294 int first_store_pos = -1;
18296 pos = lastpos;
18298 while (pos >= 0)
18300 if (is_store_insn (ready[pos], &str_mem))
18302 rtx str_mem2;
18303 /* Maintain the index of the first store found on the
18304 list */
18305 if (first_store_pos == -1)
18306 first_store_pos = pos;
18308 if (is_store_insn (last_scheduled_insn, &str_mem2)
18309 && adjacent_mem_locations (str_mem, str_mem2))
18311 /* Found an adjacent store. Move it to the head of the
18312 ready list, and adjust it's priority so that it is
18313 more likely to stay there */
18314 move_to_end_of_ready (ready, pos, lastpos);
18316 if (!sel_sched_p ()
18317 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18318 INSN_PRIORITY (ready[lastpos])++;
18320 first_store_pos = -1;
18322 break;
18325 pos--;
18328 if (first_store_pos >= 0)
18330 /* An adjacent store wasn't found, but a non-adjacent store was,
18331 so move the non-adjacent store to the front of the ready
18332 list, and adjust its priority so that it is more likely to
18333 stay there. */
18334 move_to_end_of_ready (ready, first_store_pos, lastpos);
18335 if (!sel_sched_p ()
18336 && INSN_PRIORITY_KNOWN (ready[lastpos]))
18337 INSN_PRIORITY (ready[lastpos])++;
18340 else if (load_store_pendulum == 2)
18342 /* Two loads have been issued in this cycle. Increase the priority
18343 of the first store in the ready list to favor it for issuing in
18344 the next cycle. */
18345 pos = lastpos;
18347 while (pos >= 0)
18349 if (is_store_insn (ready[pos], &str_mem)
18350 && !sel_sched_p ()
18351 && INSN_PRIORITY_KNOWN (ready[pos]))
18353 INSN_PRIORITY (ready[pos])++;
18355 /* Adjust the pendulum to account for the fact that a store
18356 was found and increased in priority. This is to prevent
18357 increasing the priority of multiple stores */
18358 load_store_pendulum++;
18360 break;
18362 pos--;
18366 return cached_can_issue_more;
18369 /* Do Power9 specific sched_reorder2 reordering of ready list. */
18371 static int
18372 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
18374 int pos;
18375 enum attr_type type, type2;
18377 type = get_attr_type (last_scheduled_insn);
18379 /* Try to issue fixed point divides back-to-back in pairs so they will be
18380 routed to separate execution units and execute in parallel. */
18381 if (type == TYPE_DIV && divide_cnt == 0)
18383 /* First divide has been scheduled. */
18384 divide_cnt = 1;
18386 /* Scan the ready list looking for another divide, if found move it
18387 to the end of the list so it is chosen next. */
18388 pos = lastpos;
18389 while (pos >= 0)
18391 if (recog_memoized (ready[pos]) >= 0
18392 && get_attr_type (ready[pos]) == TYPE_DIV)
18394 move_to_end_of_ready (ready, pos, lastpos);
18395 break;
18397 pos--;
18400 else
18402 /* Last insn was the 2nd divide or not a divide, reset the counter. */
18403 divide_cnt = 0;
18405 /* The best dispatch throughput for vector and vector load insns can be
18406 achieved by interleaving a vector and vector load such that they'll
18407 dispatch to the same superslice. If this pairing cannot be achieved
18408 then it is best to pair vector insns together and vector load insns
18409 together.
18411 To aid in this pairing, vec_pairing maintains the current state with
18412 the following values:
18414 0 : Initial state, no vecload/vector pairing has been started.
18416 1 : A vecload or vector insn has been issued and a candidate for
18417 pairing has been found and moved to the end of the ready
18418 list. */
18419 if (type == TYPE_VECLOAD)
18421 /* Issued a vecload. */
18422 if (vec_pairing == 0)
18424 int vecload_pos = -1;
18425 /* We issued a single vecload, look for a vector insn to pair it
18426 with. If one isn't found, try to pair another vecload. */
18427 pos = lastpos;
18428 while (pos >= 0)
18430 if (recog_memoized (ready[pos]) >= 0)
18432 type2 = get_attr_type (ready[pos]);
18433 if (is_power9_pairable_vec_type (type2))
18435 /* Found a vector insn to pair with, move it to the
18436 end of the ready list so it is scheduled next. */
18437 move_to_end_of_ready (ready, pos, lastpos);
18438 vec_pairing = 1;
18439 return cached_can_issue_more;
18441 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18442 /* Remember position of first vecload seen. */
18443 vecload_pos = pos;
18445 pos--;
18447 if (vecload_pos >= 0)
18449 /* Didn't find a vector to pair with but did find a vecload,
18450 move it to the end of the ready list. */
18451 move_to_end_of_ready (ready, vecload_pos, lastpos);
18452 vec_pairing = 1;
18453 return cached_can_issue_more;
18457 else if (is_power9_pairable_vec_type (type))
18459 /* Issued a vector operation. */
18460 if (vec_pairing == 0)
18462 int vec_pos = -1;
18463 /* We issued a single vector insn, look for a vecload to pair it
18464 with. If one isn't found, try to pair another vector. */
18465 pos = lastpos;
18466 while (pos >= 0)
18468 if (recog_memoized (ready[pos]) >= 0)
18470 type2 = get_attr_type (ready[pos]);
18471 if (type2 == TYPE_VECLOAD)
18473 /* Found a vecload insn to pair with, move it to the
18474 end of the ready list so it is scheduled next. */
18475 move_to_end_of_ready (ready, pos, lastpos);
18476 vec_pairing = 1;
18477 return cached_can_issue_more;
18479 else if (is_power9_pairable_vec_type (type2)
18480 && vec_pos == -1)
18481 /* Remember position of first vector insn seen. */
18482 vec_pos = pos;
18484 pos--;
18486 if (vec_pos >= 0)
18488 /* Didn't find a vecload to pair with but did find a vector
18489 insn, move it to the end of the ready list. */
18490 move_to_end_of_ready (ready, vec_pos, lastpos);
18491 vec_pairing = 1;
18492 return cached_can_issue_more;
18497 /* We've either finished a vec/vecload pair, couldn't find an insn to
18498 continue the current pair, or the last insn had nothing to do with
18499 with pairing. In any case, reset the state. */
18500 vec_pairing = 0;
18503 return cached_can_issue_more;
18506 /* We are about to begin issuing insns for this clock cycle. */
18508 static int
18509 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18510 rtx_insn **ready ATTRIBUTE_UNUSED,
18511 int *pn_ready ATTRIBUTE_UNUSED,
18512 int clock_var ATTRIBUTE_UNUSED)
18514 int n_ready = *pn_ready;
18516 if (sched_verbose)
18517 fprintf (dump, "// rs6000_sched_reorder :\n");
18519 /* Reorder the ready list, if the second to last ready insn
18520 is a nonepipeline insn. */
18521 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18523 if (is_nonpipeline_insn (ready[n_ready - 1])
18524 && (recog_memoized (ready[n_ready - 2]) > 0))
18525 /* Simply swap first two insns. */
18526 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18529 if (rs6000_tune == PROCESSOR_POWER6)
18530 load_store_pendulum = 0;
18532 return rs6000_issue_rate ();
18535 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18537 static int
18538 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18539 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18541 if (sched_verbose)
18542 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18544 /* Do Power6 dependent reordering if necessary. */
18545 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18546 return power6_sched_reorder2 (ready, *pn_ready - 1);
18548 /* Do Power9 dependent reordering if necessary. */
18549 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18550 && recog_memoized (last_scheduled_insn) >= 0)
18551 return power9_sched_reorder2 (ready, *pn_ready - 1);
18553 return cached_can_issue_more;
18556 /* Return whether the presence of INSN causes a dispatch group termination
18557 of group WHICH_GROUP.
18559 If WHICH_GROUP == current_group, this function will return true if INSN
18560 causes the termination of the current group (i.e, the dispatch group to
18561 which INSN belongs). This means that INSN will be the last insn in the
18562 group it belongs to.
18564 If WHICH_GROUP == previous_group, this function will return true if INSN
18565 causes the termination of the previous group (i.e, the dispatch group that
18566 precedes the group to which INSN belongs). This means that INSN will be
18567 the first insn in the group it belongs to). */
18569 static bool
18570 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18572 bool first, last;
18574 if (! insn)
18575 return false;
18577 first = insn_must_be_first_in_group (insn);
18578 last = insn_must_be_last_in_group (insn);
18580 if (first && last)
18581 return true;
18583 if (which_group == current_group)
18584 return last;
18585 else if (which_group == previous_group)
18586 return first;
18588 return false;
18592 static bool
18593 insn_must_be_first_in_group (rtx_insn *insn)
18595 enum attr_type type;
18597 if (!insn
18598 || NOTE_P (insn)
18599 || DEBUG_INSN_P (insn)
18600 || GET_CODE (PATTERN (insn)) == USE
18601 || GET_CODE (PATTERN (insn)) == CLOBBER)
18602 return false;
18604 switch (rs6000_tune)
18606 case PROCESSOR_POWER5:
18607 if (is_cracked_insn (insn))
18608 return true;
18609 /* FALLTHRU */
18610 case PROCESSOR_POWER4:
18611 if (is_microcoded_insn (insn))
18612 return true;
18614 if (!rs6000_sched_groups)
18615 return false;
18617 type = get_attr_type (insn);
18619 switch (type)
18621 case TYPE_MFCR:
18622 case TYPE_MFCRF:
18623 case TYPE_MTCR:
18624 case TYPE_CR_LOGICAL:
18625 case TYPE_MTJMPR:
18626 case TYPE_MFJMPR:
18627 case TYPE_DIV:
18628 case TYPE_LOAD_L:
18629 case TYPE_STORE_C:
18630 case TYPE_ISYNC:
18631 case TYPE_SYNC:
18632 return true;
18633 default:
18634 break;
18636 break;
18637 case PROCESSOR_POWER6:
18638 type = get_attr_type (insn);
18640 switch (type)
18642 case TYPE_EXTS:
18643 case TYPE_CNTLZ:
18644 case TYPE_TRAP:
18645 case TYPE_MUL:
18646 case TYPE_INSERT:
18647 case TYPE_FPCOMPARE:
18648 case TYPE_MFCR:
18649 case TYPE_MTCR:
18650 case TYPE_MFJMPR:
18651 case TYPE_MTJMPR:
18652 case TYPE_ISYNC:
18653 case TYPE_SYNC:
18654 case TYPE_LOAD_L:
18655 case TYPE_STORE_C:
18656 return true;
18657 case TYPE_SHIFT:
18658 if (get_attr_dot (insn) == DOT_NO
18659 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18660 return true;
18661 else
18662 break;
18663 case TYPE_DIV:
18664 if (get_attr_size (insn) == SIZE_32)
18665 return true;
18666 else
18667 break;
18668 case TYPE_LOAD:
18669 case TYPE_STORE:
18670 case TYPE_FPLOAD:
18671 case TYPE_FPSTORE:
18672 if (get_attr_update (insn) == UPDATE_YES)
18673 return true;
18674 else
18675 break;
18676 default:
18677 break;
18679 break;
18680 case PROCESSOR_POWER7:
18681 type = get_attr_type (insn);
18683 switch (type)
18685 case TYPE_CR_LOGICAL:
18686 case TYPE_MFCR:
18687 case TYPE_MFCRF:
18688 case TYPE_MTCR:
18689 case TYPE_DIV:
18690 case TYPE_ISYNC:
18691 case TYPE_LOAD_L:
18692 case TYPE_STORE_C:
18693 case TYPE_MFJMPR:
18694 case TYPE_MTJMPR:
18695 return true;
18696 case TYPE_MUL:
18697 case TYPE_SHIFT:
18698 case TYPE_EXTS:
18699 if (get_attr_dot (insn) == DOT_YES)
18700 return true;
18701 else
18702 break;
18703 case TYPE_LOAD:
18704 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18705 || get_attr_update (insn) == UPDATE_YES)
18706 return true;
18707 else
18708 break;
18709 case TYPE_STORE:
18710 case TYPE_FPLOAD:
18711 case TYPE_FPSTORE:
18712 if (get_attr_update (insn) == UPDATE_YES)
18713 return true;
18714 else
18715 break;
18716 default:
18717 break;
18719 break;
18720 case PROCESSOR_POWER8:
18721 type = get_attr_type (insn);
18723 switch (type)
18725 case TYPE_CR_LOGICAL:
18726 case TYPE_MFCR:
18727 case TYPE_MFCRF:
18728 case TYPE_MTCR:
18729 case TYPE_SYNC:
18730 case TYPE_ISYNC:
18731 case TYPE_LOAD_L:
18732 case TYPE_STORE_C:
18733 case TYPE_VECSTORE:
18734 case TYPE_MFJMPR:
18735 case TYPE_MTJMPR:
18736 return true;
18737 case TYPE_SHIFT:
18738 case TYPE_EXTS:
18739 case TYPE_MUL:
18740 if (get_attr_dot (insn) == DOT_YES)
18741 return true;
18742 else
18743 break;
18744 case TYPE_LOAD:
18745 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18746 || get_attr_update (insn) == UPDATE_YES)
18747 return true;
18748 else
18749 break;
18750 case TYPE_STORE:
18751 if (get_attr_update (insn) == UPDATE_YES
18752 && get_attr_indexed (insn) == INDEXED_YES)
18753 return true;
18754 else
18755 break;
18756 default:
18757 break;
18759 break;
18760 default:
18761 break;
18764 return false;
18767 static bool
18768 insn_must_be_last_in_group (rtx_insn *insn)
18770 enum attr_type type;
18772 if (!insn
18773 || NOTE_P (insn)
18774 || DEBUG_INSN_P (insn)
18775 || GET_CODE (PATTERN (insn)) == USE
18776 || GET_CODE (PATTERN (insn)) == CLOBBER)
18777 return false;
18779 switch (rs6000_tune) {
18780 case PROCESSOR_POWER4:
18781 case PROCESSOR_POWER5:
18782 if (is_microcoded_insn (insn))
18783 return true;
18785 if (is_branch_slot_insn (insn))
18786 return true;
18788 break;
18789 case PROCESSOR_POWER6:
18790 type = get_attr_type (insn);
18792 switch (type)
18794 case TYPE_EXTS:
18795 case TYPE_CNTLZ:
18796 case TYPE_TRAP:
18797 case TYPE_MUL:
18798 case TYPE_FPCOMPARE:
18799 case TYPE_MFCR:
18800 case TYPE_MTCR:
18801 case TYPE_MFJMPR:
18802 case TYPE_MTJMPR:
18803 case TYPE_ISYNC:
18804 case TYPE_SYNC:
18805 case TYPE_LOAD_L:
18806 case TYPE_STORE_C:
18807 return true;
18808 case TYPE_SHIFT:
18809 if (get_attr_dot (insn) == DOT_NO
18810 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18811 return true;
18812 else
18813 break;
18814 case TYPE_DIV:
18815 if (get_attr_size (insn) == SIZE_32)
18816 return true;
18817 else
18818 break;
18819 default:
18820 break;
18822 break;
18823 case PROCESSOR_POWER7:
18824 type = get_attr_type (insn);
18826 switch (type)
18828 case TYPE_ISYNC:
18829 case TYPE_SYNC:
18830 case TYPE_LOAD_L:
18831 case TYPE_STORE_C:
18832 return true;
18833 case TYPE_LOAD:
18834 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18835 && get_attr_update (insn) == UPDATE_YES)
18836 return true;
18837 else
18838 break;
18839 case TYPE_STORE:
18840 if (get_attr_update (insn) == UPDATE_YES
18841 && get_attr_indexed (insn) == INDEXED_YES)
18842 return true;
18843 else
18844 break;
18845 default:
18846 break;
18848 break;
18849 case PROCESSOR_POWER8:
18850 type = get_attr_type (insn);
18852 switch (type)
18854 case TYPE_MFCR:
18855 case TYPE_MTCR:
18856 case TYPE_ISYNC:
18857 case TYPE_SYNC:
18858 case TYPE_LOAD_L:
18859 case TYPE_STORE_C:
18860 return true;
18861 case TYPE_LOAD:
18862 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18863 && get_attr_update (insn) == UPDATE_YES)
18864 return true;
18865 else
18866 break;
18867 case TYPE_STORE:
18868 if (get_attr_update (insn) == UPDATE_YES
18869 && get_attr_indexed (insn) == INDEXED_YES)
18870 return true;
18871 else
18872 break;
18873 default:
18874 break;
18876 break;
18877 default:
18878 break;
18881 return false;
18884 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18885 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18887 static bool
18888 is_costly_group (rtx *group_insns, rtx next_insn)
18890 int i;
18891 int issue_rate = rs6000_issue_rate ();
18893 for (i = 0; i < issue_rate; i++)
18895 sd_iterator_def sd_it;
18896 dep_t dep;
18897 rtx insn = group_insns[i];
18899 if (!insn)
18900 continue;
18902 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18904 rtx next = DEP_CON (dep);
18906 if (next == next_insn
18907 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18908 return true;
18912 return false;
18915 /* Utility of the function redefine_groups.
18916 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18917 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18918 to keep it "far" (in a separate group) from GROUP_INSNS, following
18919 one of the following schemes, depending on the value of the flag
18920 -minsert_sched_nops = X:
18921 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18922 in order to force NEXT_INSN into a separate group.
18923 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18924 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18925 insertion (has a group just ended, how many vacant issue slots remain in the
18926 last group, and how many dispatch groups were encountered so far). */
18928 static int
18929 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18930 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18931 int *group_count)
18933 rtx nop;
18934 bool force;
18935 int issue_rate = rs6000_issue_rate ();
18936 bool end = *group_end;
18937 int i;
18939 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18940 return can_issue_more;
18942 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18943 return can_issue_more;
18945 force = is_costly_group (group_insns, next_insn);
18946 if (!force)
18947 return can_issue_more;
18949 if (sched_verbose > 6)
18950 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18951 *group_count ,can_issue_more);
18953 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18955 if (*group_end)
18956 can_issue_more = 0;
18958 /* Since only a branch can be issued in the last issue_slot, it is
18959 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18960 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18961 in this case the last nop will start a new group and the branch
18962 will be forced to the new group. */
18963 if (can_issue_more && !is_branch_slot_insn (next_insn))
18964 can_issue_more--;
18966 /* Do we have a special group ending nop? */
18967 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18968 || rs6000_tune == PROCESSOR_POWER8)
18970 nop = gen_group_ending_nop ();
18971 emit_insn_before (nop, next_insn);
18972 can_issue_more = 0;
18974 else
18975 while (can_issue_more > 0)
18977 nop = gen_nop ();
18978 emit_insn_before (nop, next_insn);
18979 can_issue_more--;
18982 *group_end = true;
18983 return 0;
18986 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18988 int n_nops = rs6000_sched_insert_nops;
18990 /* Nops can't be issued from the branch slot, so the effective
18991 issue_rate for nops is 'issue_rate - 1'. */
18992 if (can_issue_more == 0)
18993 can_issue_more = issue_rate;
18994 can_issue_more--;
18995 if (can_issue_more == 0)
18997 can_issue_more = issue_rate - 1;
18998 (*group_count)++;
18999 end = true;
19000 for (i = 0; i < issue_rate; i++)
19002 group_insns[i] = 0;
19006 while (n_nops > 0)
19008 nop = gen_nop ();
19009 emit_insn_before (nop, next_insn);
19010 if (can_issue_more == issue_rate - 1) /* new group begins */
19011 end = false;
19012 can_issue_more--;
19013 if (can_issue_more == 0)
19015 can_issue_more = issue_rate - 1;
19016 (*group_count)++;
19017 end = true;
19018 for (i = 0; i < issue_rate; i++)
19020 group_insns[i] = 0;
19023 n_nops--;
19026 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19027 can_issue_more++;
19029 /* Is next_insn going to start a new group? */
19030 *group_end
19031 = (end
19032 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19033 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19034 || (can_issue_more < issue_rate &&
19035 insn_terminates_group_p (next_insn, previous_group)));
19036 if (*group_end && end)
19037 (*group_count)--;
19039 if (sched_verbose > 6)
19040 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19041 *group_count, can_issue_more);
19042 return can_issue_more;
19045 return can_issue_more;
19048 /* This function tries to synch the dispatch groups that the compiler "sees"
19049 with the dispatch groups that the processor dispatcher is expected to
19050 form in practice. It tries to achieve this synchronization by forcing the
19051 estimated processor grouping on the compiler (as opposed to the function
19052 'pad_goups' which tries to force the scheduler's grouping on the processor).
19054 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19055 examines the (estimated) dispatch groups that will be formed by the processor
19056 dispatcher. It marks these group boundaries to reflect the estimated
19057 processor grouping, overriding the grouping that the scheduler had marked.
19058 Depending on the value of the flag '-minsert-sched-nops' this function can
19059 force certain insns into separate groups or force a certain distance between
19060 them by inserting nops, for example, if there exists a "costly dependence"
19061 between the insns.
19063 The function estimates the group boundaries that the processor will form as
19064 follows: It keeps track of how many vacant issue slots are available after
19065 each insn. A subsequent insn will start a new group if one of the following
19066 4 cases applies:
19067 - no more vacant issue slots remain in the current dispatch group.
19068 - only the last issue slot, which is the branch slot, is vacant, but the next
19069 insn is not a branch.
19070 - only the last 2 or less issue slots, including the branch slot, are vacant,
19071 which means that a cracked insn (which occupies two issue slots) can't be
19072 issued in this group.
19073 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19074 start a new group. */
19076 static int
19077 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19078 rtx_insn *tail)
19080 rtx_insn *insn, *next_insn;
19081 int issue_rate;
19082 int can_issue_more;
19083 int slot, i;
19084 bool group_end;
19085 int group_count = 0;
19086 rtx *group_insns;
19088 /* Initialize. */
19089 issue_rate = rs6000_issue_rate ();
19090 group_insns = XALLOCAVEC (rtx, issue_rate);
19091 for (i = 0; i < issue_rate; i++)
19093 group_insns[i] = 0;
19095 can_issue_more = issue_rate;
19096 slot = 0;
19097 insn = get_next_active_insn (prev_head_insn, tail);
19098 group_end = false;
19100 while (insn != NULL_RTX)
19102 slot = (issue_rate - can_issue_more);
19103 group_insns[slot] = insn;
19104 can_issue_more =
19105 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19106 if (insn_terminates_group_p (insn, current_group))
19107 can_issue_more = 0;
19109 next_insn = get_next_active_insn (insn, tail);
19110 if (next_insn == NULL_RTX)
19111 return group_count + 1;
19113 /* Is next_insn going to start a new group? */
19114 group_end
19115 = (can_issue_more == 0
19116 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19117 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19118 || (can_issue_more < issue_rate &&
19119 insn_terminates_group_p (next_insn, previous_group)));
19121 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19122 next_insn, &group_end, can_issue_more,
19123 &group_count);
19125 if (group_end)
19127 group_count++;
19128 can_issue_more = 0;
19129 for (i = 0; i < issue_rate; i++)
19131 group_insns[i] = 0;
19135 if (GET_MODE (next_insn) == TImode && can_issue_more)
19136 PUT_MODE (next_insn, VOIDmode);
19137 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19138 PUT_MODE (next_insn, TImode);
19140 insn = next_insn;
19141 if (can_issue_more == 0)
19142 can_issue_more = issue_rate;
19143 } /* while */
19145 return group_count;
19148 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19149 dispatch group boundaries that the scheduler had marked. Pad with nops
19150 any dispatch groups which have vacant issue slots, in order to force the
19151 scheduler's grouping on the processor dispatcher. The function
19152 returns the number of dispatch groups found. */
19154 static int
19155 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19156 rtx_insn *tail)
19158 rtx_insn *insn, *next_insn;
19159 rtx nop;
19160 int issue_rate;
19161 int can_issue_more;
19162 int group_end;
19163 int group_count = 0;
19165 /* Initialize issue_rate. */
19166 issue_rate = rs6000_issue_rate ();
19167 can_issue_more = issue_rate;
19169 insn = get_next_active_insn (prev_head_insn, tail);
19170 next_insn = get_next_active_insn (insn, tail);
19172 while (insn != NULL_RTX)
19174 can_issue_more =
19175 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19177 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
19179 if (next_insn == NULL_RTX)
19180 break;
19182 if (group_end)
19184 /* If the scheduler had marked group termination at this location
19185 (between insn and next_insn), and neither insn nor next_insn will
19186 force group termination, pad the group with nops to force group
19187 termination. */
19188 if (can_issue_more
19189 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
19190 && !insn_terminates_group_p (insn, current_group)
19191 && !insn_terminates_group_p (next_insn, previous_group))
19193 if (!is_branch_slot_insn (next_insn))
19194 can_issue_more--;
19196 while (can_issue_more)
19198 nop = gen_nop ();
19199 emit_insn_before (nop, next_insn);
19200 can_issue_more--;
19204 can_issue_more = issue_rate;
19205 group_count++;
19208 insn = next_insn;
19209 next_insn = get_next_active_insn (insn, tail);
19212 return group_count;
19215 /* We're beginning a new block. Initialize data structures as necessary. */
19217 static void
19218 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
19219 int sched_verbose ATTRIBUTE_UNUSED,
19220 int max_ready ATTRIBUTE_UNUSED)
19222 last_scheduled_insn = NULL;
19223 load_store_pendulum = 0;
19224 divide_cnt = 0;
19225 vec_pairing = 0;
19228 /* The following function is called at the end of scheduling BB.
19229 After reload, it inserts nops at insn group bundling. */
19231 static void
19232 rs6000_sched_finish (FILE *dump, int sched_verbose)
19234 int n_groups;
19236 if (sched_verbose)
19237 fprintf (dump, "=== Finishing schedule.\n");
19239 if (reload_completed && rs6000_sched_groups)
19241 /* Do not run sched_finish hook when selective scheduling enabled. */
19242 if (sel_sched_p ())
19243 return;
19245 if (rs6000_sched_insert_nops == sched_finish_none)
19246 return;
19248 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
19249 n_groups = pad_groups (dump, sched_verbose,
19250 current_sched_info->prev_head,
19251 current_sched_info->next_tail);
19252 else
19253 n_groups = redefine_groups (dump, sched_verbose,
19254 current_sched_info->prev_head,
19255 current_sched_info->next_tail);
19257 if (sched_verbose >= 6)
19259 fprintf (dump, "ngroups = %d\n", n_groups);
19260 print_rtl (dump, current_sched_info->prev_head);
19261 fprintf (dump, "Done finish_sched\n");
19266 struct rs6000_sched_context
19268 short cached_can_issue_more;
19269 rtx_insn *last_scheduled_insn;
19270 int load_store_pendulum;
19271 int divide_cnt;
19272 int vec_pairing;
19275 typedef struct rs6000_sched_context rs6000_sched_context_def;
19276 typedef rs6000_sched_context_def *rs6000_sched_context_t;
19278 /* Allocate store for new scheduling context. */
19279 static void *
19280 rs6000_alloc_sched_context (void)
19282 return xmalloc (sizeof (rs6000_sched_context_def));
19285 /* If CLEAN_P is true then initializes _SC with clean data,
19286 and from the global context otherwise. */
19287 static void
19288 rs6000_init_sched_context (void *_sc, bool clean_p)
19290 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19292 if (clean_p)
19294 sc->cached_can_issue_more = 0;
19295 sc->last_scheduled_insn = NULL;
19296 sc->load_store_pendulum = 0;
19297 sc->divide_cnt = 0;
19298 sc->vec_pairing = 0;
19300 else
19302 sc->cached_can_issue_more = cached_can_issue_more;
19303 sc->last_scheduled_insn = last_scheduled_insn;
19304 sc->load_store_pendulum = load_store_pendulum;
19305 sc->divide_cnt = divide_cnt;
19306 sc->vec_pairing = vec_pairing;
19310 /* Sets the global scheduling context to the one pointed to by _SC. */
19311 static void
19312 rs6000_set_sched_context (void *_sc)
19314 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
19316 gcc_assert (sc != NULL);
19318 cached_can_issue_more = sc->cached_can_issue_more;
19319 last_scheduled_insn = sc->last_scheduled_insn;
19320 load_store_pendulum = sc->load_store_pendulum;
19321 divide_cnt = sc->divide_cnt;
19322 vec_pairing = sc->vec_pairing;
19325 /* Free _SC. */
19326 static void
19327 rs6000_free_sched_context (void *_sc)
19329 gcc_assert (_sc != NULL);
19331 free (_sc);
19334 static bool
19335 rs6000_sched_can_speculate_insn (rtx_insn *insn)
19337 switch (get_attr_type (insn))
19339 case TYPE_DIV:
19340 case TYPE_SDIV:
19341 case TYPE_DDIV:
19342 case TYPE_VECDIV:
19343 case TYPE_SSQRT:
19344 case TYPE_DSQRT:
19345 return false;
19347 default:
19348 return true;
19352 /* Length in units of the trampoline for entering a nested function. */
19355 rs6000_trampoline_size (void)
19357 int ret = 0;
19359 switch (DEFAULT_ABI)
19361 default:
19362 gcc_unreachable ();
19364 case ABI_AIX:
19365 ret = (TARGET_32BIT) ? 12 : 24;
19366 break;
19368 case ABI_ELFv2:
19369 gcc_assert (!TARGET_32BIT);
19370 ret = 32;
19371 break;
19373 case ABI_DARWIN:
19374 case ABI_V4:
19375 ret = (TARGET_32BIT) ? 40 : 48;
19376 break;
19379 return ret;
19382 /* Emit RTL insns to initialize the variable parts of a trampoline.
19383 FNADDR is an RTX for the address of the function's pure code.
19384 CXT is an RTX for the static chain value for the function. */
19386 static void
19387 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
19389 int regsize = (TARGET_32BIT) ? 4 : 8;
19390 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
19391 rtx ctx_reg = force_reg (Pmode, cxt);
19392 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
19394 switch (DEFAULT_ABI)
19396 default:
19397 gcc_unreachable ();
19399 /* Under AIX, just build the 3 word function descriptor */
19400 case ABI_AIX:
19402 rtx fnmem, fn_reg, toc_reg;
19404 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
19405 error ("you cannot take the address of a nested function if you use "
19406 "the %qs option", "-mno-pointers-to-nested-functions");
19408 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
19409 fn_reg = gen_reg_rtx (Pmode);
19410 toc_reg = gen_reg_rtx (Pmode);
19412 /* Macro to shorten the code expansions below. */
19413 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
19415 m_tramp = replace_equiv_address (m_tramp, addr);
19417 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
19418 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
19419 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
19420 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
19421 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
19423 # undef MEM_PLUS
19425 break;
19427 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
19428 case ABI_ELFv2:
19429 case ABI_DARWIN:
19430 case ABI_V4:
19431 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
19432 LCT_NORMAL, VOIDmode,
19433 addr, Pmode,
19434 GEN_INT (rs6000_trampoline_size ()), SImode,
19435 fnaddr, Pmode,
19436 ctx_reg, Pmode);
19437 break;
19442 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19443 identifier as an argument, so the front end shouldn't look it up. */
19445 static bool
19446 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19448 return is_attribute_p ("altivec", attr_id);
19451 /* Handle the "altivec" attribute. The attribute may have
19452 arguments as follows:
19454 __attribute__((altivec(vector__)))
19455 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19456 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19458 and may appear more than once (e.g., 'vector bool char') in a
19459 given declaration. */
19461 static tree
19462 rs6000_handle_altivec_attribute (tree *node,
19463 tree name ATTRIBUTE_UNUSED,
19464 tree args,
19465 int flags ATTRIBUTE_UNUSED,
19466 bool *no_add_attrs)
19468 tree type = *node, result = NULL_TREE;
19469 machine_mode mode;
19470 int unsigned_p;
19471 char altivec_type
19472 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19473 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19474 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19475 : '?');
19477 while (POINTER_TYPE_P (type)
19478 || TREE_CODE (type) == FUNCTION_TYPE
19479 || TREE_CODE (type) == METHOD_TYPE
19480 || TREE_CODE (type) == ARRAY_TYPE)
19481 type = TREE_TYPE (type);
19483 mode = TYPE_MODE (type);
19485 /* Check for invalid AltiVec type qualifiers. */
19486 if (type == long_double_type_node)
19487 error ("use of %<long double%> in AltiVec types is invalid");
19488 else if (type == boolean_type_node)
19489 error ("use of boolean types in AltiVec types is invalid");
19490 else if (TREE_CODE (type) == COMPLEX_TYPE)
19491 error ("use of %<complex%> in AltiVec types is invalid");
19492 else if (DECIMAL_FLOAT_MODE_P (mode))
19493 error ("use of decimal floating point types in AltiVec types is invalid");
19494 else if (!TARGET_VSX)
19496 if (type == long_unsigned_type_node || type == long_integer_type_node)
19498 if (TARGET_64BIT)
19499 error ("use of %<long%> in AltiVec types is invalid for "
19500 "64-bit code without %qs", "-mvsx");
19501 else if (rs6000_warn_altivec_long)
19502 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19503 "use %<int%>");
19505 else if (type == long_long_unsigned_type_node
19506 || type == long_long_integer_type_node)
19507 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19508 "-mvsx");
19509 else if (type == double_type_node)
19510 error ("use of %<double%> in AltiVec types is invalid without %qs",
19511 "-mvsx");
19514 switch (altivec_type)
19516 case 'v':
19517 unsigned_p = TYPE_UNSIGNED (type);
19518 switch (mode)
19520 case E_TImode:
19521 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19522 break;
19523 case E_DImode:
19524 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19525 break;
19526 case E_SImode:
19527 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19528 break;
19529 case E_HImode:
19530 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19531 break;
19532 case E_QImode:
19533 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19534 break;
19535 case E_SFmode: result = V4SF_type_node; break;
19536 case E_DFmode: result = V2DF_type_node; break;
19537 /* If the user says 'vector int bool', we may be handed the 'bool'
19538 attribute _before_ the 'vector' attribute, and so select the
19539 proper type in the 'b' case below. */
19540 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19541 case E_V2DImode: case E_V2DFmode:
19542 result = type;
19543 default: break;
19545 break;
19546 case 'b':
19547 switch (mode)
19549 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19550 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19551 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19552 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19553 default: break;
19555 break;
19556 case 'p':
19557 switch (mode)
19559 case E_V8HImode: result = pixel_V8HI_type_node;
19560 default: break;
19562 default: break;
19565 /* Propagate qualifiers attached to the element type
19566 onto the vector type. */
19567 if (result && result != type && TYPE_QUALS (type))
19568 result = build_qualified_type (result, TYPE_QUALS (type));
19570 *no_add_attrs = true; /* No need to hang on to the attribute. */
19572 if (result)
19573 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19575 return NULL_TREE;
19578 /* AltiVec defines five built-in scalar types that serve as vector
19579 elements; we must teach the compiler how to mangle them. The 128-bit
19580 floating point mangling is target-specific as well. MMA defines
19581 two built-in types to be used as opaque vector types. */
19583 static const char *
19584 rs6000_mangle_type (const_tree type)
19586 type = TYPE_MAIN_VARIANT (type);
19588 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19589 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19590 return NULL;
19592 if (type == bool_char_type_node) return "U6__boolc";
19593 if (type == bool_short_type_node) return "U6__bools";
19594 if (type == pixel_type_node) return "u7__pixel";
19595 if (type == bool_int_type_node) return "U6__booli";
19596 if (type == bool_long_long_type_node) return "U6__boolx";
19598 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19599 return "g";
19600 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19601 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19603 if (type == vector_pair_type_node)
19604 return "u13__vector_pair";
19605 if (type == vector_quad_type_node)
19606 return "u13__vector_quad";
19608 /* For all other types, use the default mangling. */
19609 return NULL;
19612 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19613 struct attribute_spec.handler. */
19615 static tree
19616 rs6000_handle_longcall_attribute (tree *node, tree name,
19617 tree args ATTRIBUTE_UNUSED,
19618 int flags ATTRIBUTE_UNUSED,
19619 bool *no_add_attrs)
19621 if (TREE_CODE (*node) != FUNCTION_TYPE
19622 && TREE_CODE (*node) != FIELD_DECL
19623 && TREE_CODE (*node) != TYPE_DECL)
19625 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19626 name);
19627 *no_add_attrs = true;
19630 return NULL_TREE;
19633 /* Set longcall attributes on all functions declared when
19634 rs6000_default_long_calls is true. */
19635 static void
19636 rs6000_set_default_type_attributes (tree type)
19638 if (rs6000_default_long_calls
19639 && (TREE_CODE (type) == FUNCTION_TYPE
19640 || TREE_CODE (type) == METHOD_TYPE))
19641 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19642 NULL_TREE,
19643 TYPE_ATTRIBUTES (type));
19645 #if TARGET_MACHO
19646 darwin_set_default_type_attributes (type);
19647 #endif
19650 /* Return a reference suitable for calling a function with the
19651 longcall attribute. */
19653 static rtx
19654 rs6000_longcall_ref (rtx call_ref, rtx arg)
19656 /* System V adds '.' to the internal name, so skip them. */
19657 const char *call_name = XSTR (call_ref, 0);
19658 if (*call_name == '.')
19660 while (*call_name == '.')
19661 call_name++;
19663 tree node = get_identifier (call_name);
19664 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19667 if (TARGET_PLTSEQ)
19669 rtx base = const0_rtx;
19670 int regno = 12;
19671 if (rs6000_pcrel_p ())
19673 rtx reg = gen_rtx_REG (Pmode, regno);
19674 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
19675 gen_rtvec (3, base, call_ref, arg),
19676 UNSPECV_PLT_PCREL);
19677 emit_insn (gen_rtx_SET (reg, u));
19678 return reg;
19681 if (DEFAULT_ABI == ABI_ELFv2)
19682 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19683 else
19685 if (flag_pic)
19686 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19687 regno = 11;
19689 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19690 may be used by a function global entry point. For SysV4, r11
19691 is used by __glink_PLTresolve lazy resolver entry. */
19692 rtx reg = gen_rtx_REG (Pmode, regno);
19693 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19694 UNSPEC_PLT16_HA);
19695 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
19696 gen_rtvec (3, reg, call_ref, arg),
19697 UNSPECV_PLT16_LO);
19698 emit_insn (gen_rtx_SET (reg, hi));
19699 emit_insn (gen_rtx_SET (reg, lo));
19700 return reg;
19703 return force_reg (Pmode, call_ref);
19706 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19707 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19708 #endif
19710 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19711 struct attribute_spec.handler. */
19712 static tree
19713 rs6000_handle_struct_attribute (tree *node, tree name,
19714 tree args ATTRIBUTE_UNUSED,
19715 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19717 tree *type = NULL;
19718 if (DECL_P (*node))
19720 if (TREE_CODE (*node) == TYPE_DECL)
19721 type = &TREE_TYPE (*node);
19723 else
19724 type = node;
19726 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19727 || TREE_CODE (*type) == UNION_TYPE)))
19729 warning (OPT_Wattributes, "%qE attribute ignored", name);
19730 *no_add_attrs = true;
19733 else if ((is_attribute_p ("ms_struct", name)
19734 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19735 || ((is_attribute_p ("gcc_struct", name)
19736 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19738 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19739 name);
19740 *no_add_attrs = true;
19743 return NULL_TREE;
19746 static bool
19747 rs6000_ms_bitfield_layout_p (const_tree record_type)
19749 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19750 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19751 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19754 #ifdef USING_ELFOS_H
19756 /* A get_unnamed_section callback, used for switching to toc_section. */
19758 static void
19759 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19761 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19762 && TARGET_MINIMAL_TOC)
19764 if (!toc_initialized)
19766 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19767 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19768 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19769 fprintf (asm_out_file, "\t.tc ");
19770 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19771 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19772 fprintf (asm_out_file, "\n");
19774 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19775 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19776 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19777 fprintf (asm_out_file, " = .+32768\n");
19778 toc_initialized = 1;
19780 else
19781 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19783 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19785 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19786 if (!toc_initialized)
19788 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19789 toc_initialized = 1;
19792 else
19794 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19795 if (!toc_initialized)
19797 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19798 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19799 fprintf (asm_out_file, " = .+32768\n");
19800 toc_initialized = 1;
19805 /* Implement TARGET_ASM_INIT_SECTIONS. */
19807 static void
19808 rs6000_elf_asm_init_sections (void)
19810 toc_section
19811 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19813 sdata2_section
19814 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19815 SDATA2_SECTION_ASM_OP);
19818 /* Implement TARGET_SELECT_RTX_SECTION. */
19820 static section *
19821 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19822 unsigned HOST_WIDE_INT align)
19824 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19825 return toc_section;
19826 else
19827 return default_elf_select_rtx_section (mode, x, align);
19830 /* For a SYMBOL_REF, set generic flags and then perform some
19831 target-specific processing.
19833 When the AIX ABI is requested on a non-AIX system, replace the
19834 function name with the real name (with a leading .) rather than the
19835 function descriptor name. This saves a lot of overriding code to
19836 read the prefixes. */
19838 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19839 static void
19840 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19842 default_encode_section_info (decl, rtl, first);
19844 if (first
19845 && TREE_CODE (decl) == FUNCTION_DECL
19846 && !TARGET_AIX
19847 && DEFAULT_ABI == ABI_AIX)
19849 rtx sym_ref = XEXP (rtl, 0);
19850 size_t len = strlen (XSTR (sym_ref, 0));
19851 char *str = XALLOCAVEC (char, len + 2);
19852 str[0] = '.';
19853 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19854 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19858 static inline bool
19859 compare_section_name (const char *section, const char *templ)
19861 int len;
19863 len = strlen (templ);
19864 return (strncmp (section, templ, len) == 0
19865 && (section[len] == 0 || section[len] == '.'));
19868 bool
19869 rs6000_elf_in_small_data_p (const_tree decl)
19871 if (rs6000_sdata == SDATA_NONE)
19872 return false;
19874 /* We want to merge strings, so we never consider them small data. */
19875 if (TREE_CODE (decl) == STRING_CST)
19876 return false;
19878 /* Functions are never in the small data area. */
19879 if (TREE_CODE (decl) == FUNCTION_DECL)
19880 return false;
19882 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19884 const char *section = DECL_SECTION_NAME (decl);
19885 if (compare_section_name (section, ".sdata")
19886 || compare_section_name (section, ".sdata2")
19887 || compare_section_name (section, ".gnu.linkonce.s")
19888 || compare_section_name (section, ".sbss")
19889 || compare_section_name (section, ".sbss2")
19890 || compare_section_name (section, ".gnu.linkonce.sb")
19891 || strcmp (section, ".PPC.EMB.sdata0") == 0
19892 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19893 return true;
19895 else
19897 /* If we are told not to put readonly data in sdata, then don't. */
19898 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19899 && !rs6000_readonly_in_sdata)
19900 return false;
19902 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19904 if (size > 0
19905 && size <= g_switch_value
19906 /* If it's not public, and we're not going to reference it there,
19907 there's no need to put it in the small data section. */
19908 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19909 return true;
19912 return false;
19915 #endif /* USING_ELFOS_H */
19917 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19919 static bool
19920 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19922 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19925 /* Do not place thread-local symbols refs in the object blocks. */
19927 static bool
19928 rs6000_use_blocks_for_decl_p (const_tree decl)
19930 return !DECL_THREAD_LOCAL_P (decl);
19933 /* Return a REG that occurs in ADDR with coefficient 1.
19934 ADDR can be effectively incremented by incrementing REG.
19936 r0 is special and we must not select it as an address
19937 register by this routine since our caller will try to
19938 increment the returned register via an "la" instruction. */
19941 find_addr_reg (rtx addr)
19943 while (GET_CODE (addr) == PLUS)
19945 if (REG_P (XEXP (addr, 0))
19946 && REGNO (XEXP (addr, 0)) != 0)
19947 addr = XEXP (addr, 0);
19948 else if (REG_P (XEXP (addr, 1))
19949 && REGNO (XEXP (addr, 1)) != 0)
19950 addr = XEXP (addr, 1);
19951 else if (CONSTANT_P (XEXP (addr, 0)))
19952 addr = XEXP (addr, 1);
19953 else if (CONSTANT_P (XEXP (addr, 1)))
19954 addr = XEXP (addr, 0);
19955 else
19956 gcc_unreachable ();
19958 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19959 return addr;
19962 void
19963 rs6000_fatal_bad_address (rtx op)
19965 fatal_insn ("bad address", op);
19968 #if TARGET_MACHO
19970 vec<branch_island, va_gc> *branch_islands;
19972 /* Remember to generate a branch island for far calls to the given
19973 function. */
19975 static void
19976 add_compiler_branch_island (tree label_name, tree function_name,
19977 int line_number)
19979 branch_island bi = {function_name, label_name, line_number};
19980 vec_safe_push (branch_islands, bi);
19983 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19984 already there or not. */
19986 static int
19987 no_previous_def (tree function_name)
19989 branch_island *bi;
19990 unsigned ix;
19992 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19993 if (function_name == bi->function_name)
19994 return 0;
19995 return 1;
19998 /* GET_PREV_LABEL gets the label name from the previous definition of
19999 the function. */
20001 static tree
20002 get_prev_label (tree function_name)
20004 branch_island *bi;
20005 unsigned ix;
20007 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20008 if (function_name == bi->function_name)
20009 return bi->label_name;
20010 return NULL_TREE;
20013 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20015 void
20016 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20018 unsigned int length;
20019 char *symbol_name, *lazy_ptr_name;
20020 char *local_label_0;
20021 static unsigned label = 0;
20023 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20024 symb = (*targetm.strip_name_encoding) (symb);
20026 length = strlen (symb);
20027 symbol_name = XALLOCAVEC (char, length + 32);
20028 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20030 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20031 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20033 if (MACHOPIC_PURE)
20035 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20036 fprintf (file, "\t.align 5\n");
20038 fprintf (file, "%s:\n", stub);
20039 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20041 label++;
20042 local_label_0 = XALLOCAVEC (char, 16);
20043 sprintf (local_label_0, "L%u$spb", label);
20045 fprintf (file, "\tmflr r0\n");
20046 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20047 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20048 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20049 lazy_ptr_name, local_label_0);
20050 fprintf (file, "\tmtlr r0\n");
20051 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20052 (TARGET_64BIT ? "ldu" : "lwzu"),
20053 lazy_ptr_name, local_label_0);
20054 fprintf (file, "\tmtctr r12\n");
20055 fprintf (file, "\tbctr\n");
20057 else /* mdynamic-no-pic or mkernel. */
20059 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20060 fprintf (file, "\t.align 4\n");
20062 fprintf (file, "%s:\n", stub);
20063 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20065 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20066 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20067 (TARGET_64BIT ? "ldu" : "lwzu"),
20068 lazy_ptr_name);
20069 fprintf (file, "\tmtctr r12\n");
20070 fprintf (file, "\tbctr\n");
20073 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20074 fprintf (file, "%s:\n", lazy_ptr_name);
20075 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20076 fprintf (file, "%sdyld_stub_binding_helper\n",
20077 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20080 /* Legitimize PIC addresses. If the address is already
20081 position-independent, we return ORIG. Newly generated
20082 position-independent addresses go into a reg. This is REG if non
20083 zero, otherwise we allocate register(s) as necessary. */
20085 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20088 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20089 rtx reg)
20091 rtx base, offset;
20093 if (reg == NULL && !reload_completed)
20094 reg = gen_reg_rtx (Pmode);
20096 if (GET_CODE (orig) == CONST)
20098 rtx reg_temp;
20100 if (GET_CODE (XEXP (orig, 0)) == PLUS
20101 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20102 return orig;
20104 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20106 /* Use a different reg for the intermediate value, as
20107 it will be marked UNCHANGING. */
20108 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20109 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20110 Pmode, reg_temp);
20111 offset =
20112 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20113 Pmode, reg);
20115 if (CONST_INT_P (offset))
20117 if (SMALL_INT (offset))
20118 return plus_constant (Pmode, base, INTVAL (offset));
20119 else if (!reload_completed)
20120 offset = force_reg (Pmode, offset);
20121 else
20123 rtx mem = force_const_mem (Pmode, orig);
20124 return machopic_legitimize_pic_address (mem, Pmode, reg);
20127 return gen_rtx_PLUS (Pmode, base, offset);
20130 /* Fall back on generic machopic code. */
20131 return machopic_legitimize_pic_address (orig, mode, reg);
20134 /* Output a .machine directive for the Darwin assembler, and call
20135 the generic start_file routine. */
20137 static void
20138 rs6000_darwin_file_start (void)
20140 static const struct
20142 const char *arg;
20143 const char *name;
20144 HOST_WIDE_INT if_set;
20145 } mapping[] = {
20146 { "ppc64", "ppc64", MASK_64BIT },
20147 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
20148 { "power4", "ppc970", 0 },
20149 { "G5", "ppc970", 0 },
20150 { "7450", "ppc7450", 0 },
20151 { "7400", "ppc7400", MASK_ALTIVEC },
20152 { "G4", "ppc7400", 0 },
20153 { "750", "ppc750", 0 },
20154 { "740", "ppc750", 0 },
20155 { "G3", "ppc750", 0 },
20156 { "604e", "ppc604e", 0 },
20157 { "604", "ppc604", 0 },
20158 { "603e", "ppc603", 0 },
20159 { "603", "ppc603", 0 },
20160 { "601", "ppc601", 0 },
20161 { NULL, "ppc", 0 } };
20162 const char *cpu_id = "";
20163 size_t i;
20165 rs6000_file_start ();
20166 darwin_file_start ();
20168 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20170 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
20171 cpu_id = rs6000_default_cpu;
20173 if (global_options_set.x_rs6000_cpu_index)
20174 cpu_id = processor_target_table[rs6000_cpu_index].name;
20176 /* Look through the mapping array. Pick the first name that either
20177 matches the argument, has a bit set in IF_SET that is also set
20178 in the target flags, or has a NULL name. */
20180 i = 0;
20181 while (mapping[i].arg != NULL
20182 && strcmp (mapping[i].arg, cpu_id) != 0
20183 && (mapping[i].if_set & rs6000_isa_flags) == 0)
20184 i++;
20186 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
20189 #endif /* TARGET_MACHO */
20191 #if TARGET_ELF
20192 static int
20193 rs6000_elf_reloc_rw_mask (void)
20195 if (flag_pic)
20196 return 3;
20197 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20198 return 2;
20199 else
20200 return 0;
20203 /* Record an element in the table of global constructors. SYMBOL is
20204 a SYMBOL_REF of the function to be called; PRIORITY is a number
20205 between 0 and MAX_INIT_PRIORITY.
20207 This differs from default_named_section_asm_out_constructor in
20208 that we have special handling for -mrelocatable. */
20210 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
20211 static void
20212 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
20214 const char *section = ".ctors";
20215 char buf[18];
20217 if (priority != DEFAULT_INIT_PRIORITY)
20219 sprintf (buf, ".ctors.%.5u",
20220 /* Invert the numbering so the linker puts us in the proper
20221 order; constructors are run from right to left, and the
20222 linker sorts in increasing order. */
20223 MAX_INIT_PRIORITY - priority);
20224 section = buf;
20227 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20228 assemble_align (POINTER_SIZE);
20230 if (DEFAULT_ABI == ABI_V4
20231 && (TARGET_RELOCATABLE || flag_pic > 1))
20233 fputs ("\t.long (", asm_out_file);
20234 output_addr_const (asm_out_file, symbol);
20235 fputs (")@fixup\n", asm_out_file);
20237 else
20238 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20241 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
20242 static void
20243 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
20245 const char *section = ".dtors";
20246 char buf[18];
20248 if (priority != DEFAULT_INIT_PRIORITY)
20250 sprintf (buf, ".dtors.%.5u",
20251 /* Invert the numbering so the linker puts us in the proper
20252 order; constructors are run from right to left, and the
20253 linker sorts in increasing order. */
20254 MAX_INIT_PRIORITY - priority);
20255 section = buf;
20258 switch_to_section (get_section (section, SECTION_WRITE, NULL));
20259 assemble_align (POINTER_SIZE);
20261 if (DEFAULT_ABI == ABI_V4
20262 && (TARGET_RELOCATABLE || flag_pic > 1))
20264 fputs ("\t.long (", asm_out_file);
20265 output_addr_const (asm_out_file, symbol);
20266 fputs (")@fixup\n", asm_out_file);
20268 else
20269 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
20272 void
20273 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
20275 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
20277 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
20278 ASM_OUTPUT_LABEL (file, name);
20279 fputs (DOUBLE_INT_ASM_OP, file);
20280 rs6000_output_function_entry (file, name);
20281 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
20282 if (DOT_SYMBOLS)
20284 fputs ("\t.size\t", file);
20285 assemble_name (file, name);
20286 fputs (",24\n\t.type\t.", file);
20287 assemble_name (file, name);
20288 fputs (",@function\n", file);
20289 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
20291 fputs ("\t.globl\t.", file);
20292 assemble_name (file, name);
20293 putc ('\n', file);
20296 else
20297 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20298 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20299 rs6000_output_function_entry (file, name);
20300 fputs (":\n", file);
20301 return;
20304 int uses_toc;
20305 if (DEFAULT_ABI == ABI_V4
20306 && (TARGET_RELOCATABLE || flag_pic > 1)
20307 && !TARGET_SECURE_PLT
20308 && (!constant_pool_empty_p () || crtl->profile)
20309 && (uses_toc = uses_TOC ()))
20311 char buf[256];
20313 if (uses_toc == 2)
20314 switch_to_other_text_partition ();
20315 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20317 fprintf (file, "\t.long ");
20318 assemble_name (file, toc_label_name);
20319 need_toc_init = 1;
20320 putc ('-', file);
20321 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20322 assemble_name (file, buf);
20323 putc ('\n', file);
20324 if (uses_toc == 2)
20325 switch_to_other_text_partition ();
20328 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20329 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20331 if (TARGET_CMODEL == CMODEL_LARGE
20332 && rs6000_global_entry_point_prologue_needed_p ())
20334 char buf[256];
20336 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
20338 fprintf (file, "\t.quad .TOC.-");
20339 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20340 assemble_name (file, buf);
20341 putc ('\n', file);
20344 if (DEFAULT_ABI == ABI_AIX)
20346 const char *desc_name, *orig_name;
20348 orig_name = (*targetm.strip_name_encoding) (name);
20349 desc_name = orig_name;
20350 while (*desc_name == '.')
20351 desc_name++;
20353 if (TREE_PUBLIC (decl))
20354 fprintf (file, "\t.globl %s\n", desc_name);
20356 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20357 fprintf (file, "%s:\n", desc_name);
20358 fprintf (file, "\t.long %s\n", orig_name);
20359 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
20360 fputs ("\t.long 0\n", file);
20361 fprintf (file, "\t.previous\n");
20363 ASM_OUTPUT_LABEL (file, name);
20366 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
20367 static void
20368 rs6000_elf_file_end (void)
20370 #ifdef HAVE_AS_GNU_ATTRIBUTE
20371 /* ??? The value emitted depends on options active at file end.
20372 Assume anyone using #pragma or attributes that might change
20373 options knows what they are doing. */
20374 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
20375 && rs6000_passes_float)
20377 int fp;
20379 if (TARGET_HARD_FLOAT)
20380 fp = 1;
20381 else
20382 fp = 2;
20383 if (rs6000_passes_long_double)
20385 if (!TARGET_LONG_DOUBLE_128)
20386 fp |= 2 * 4;
20387 else if (TARGET_IEEEQUAD)
20388 fp |= 3 * 4;
20389 else
20390 fp |= 1 * 4;
20392 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
20394 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
20396 if (rs6000_passes_vector)
20397 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
20398 (TARGET_ALTIVEC_ABI ? 2 : 1));
20399 if (rs6000_returns_struct)
20400 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
20401 aix_struct_return ? 2 : 1);
20403 #endif
20404 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
20405 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
20406 file_end_indicate_exec_stack ();
20407 #endif
20409 if (flag_split_stack)
20410 file_end_indicate_split_stack ();
20412 if (cpu_builtin_p)
20414 /* We have expanded a CPU builtin, so we need to emit a reference to
20415 the special symbol that LIBC uses to declare it supports the
20416 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
20417 switch_to_section (data_section);
20418 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
20419 fprintf (asm_out_file, "\t%s %s\n",
20420 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
20423 #endif
20425 #if TARGET_XCOFF
20427 #ifndef HAVE_XCOFF_DWARF_EXTRAS
20428 #define HAVE_XCOFF_DWARF_EXTRAS 0
20429 #endif
20431 static enum unwind_info_type
20432 rs6000_xcoff_debug_unwind_info (void)
20434 return UI_NONE;
20437 static void
20438 rs6000_xcoff_asm_output_anchor (rtx symbol)
20440 char buffer[100];
20442 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
20443 SYMBOL_REF_BLOCK_OFFSET (symbol));
20444 fprintf (asm_out_file, "%s", SET_ASM_OP);
20445 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20446 fprintf (asm_out_file, ",");
20447 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20448 fprintf (asm_out_file, "\n");
20451 static void
20452 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20454 fputs (GLOBAL_ASM_OP, stream);
20455 RS6000_OUTPUT_BASENAME (stream, name);
20456 putc ('\n', stream);
20459 /* A get_unnamed_decl callback, used for read-only sections. PTR
20460 points to the section string variable. */
20462 static void
20463 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20465 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20466 *(const char *const *) directive,
20467 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20470 /* Likewise for read-write sections. */
20472 static void
20473 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20475 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20476 *(const char *const *) directive,
20477 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20480 static void
20481 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20483 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20484 *(const char *const *) directive,
20485 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20488 /* A get_unnamed_section callback, used for switching to toc_section. */
20490 static void
20491 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20493 if (TARGET_MINIMAL_TOC)
20495 /* toc_section is always selected at least once from
20496 rs6000_xcoff_file_start, so this is guaranteed to
20497 always be defined once and only once in each file. */
20498 if (!toc_initialized)
20500 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20501 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20502 toc_initialized = 1;
20504 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20505 (TARGET_32BIT ? "" : ",3"));
20507 else
20508 fputs ("\t.toc\n", asm_out_file);
20511 /* Implement TARGET_ASM_INIT_SECTIONS. */
20513 static void
20514 rs6000_xcoff_asm_init_sections (void)
20516 read_only_data_section
20517 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20518 &xcoff_read_only_section_name);
20520 private_data_section
20521 = get_unnamed_section (SECTION_WRITE,
20522 rs6000_xcoff_output_readwrite_section_asm_op,
20523 &xcoff_private_data_section_name);
20525 read_only_private_data_section
20526 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20527 &xcoff_private_rodata_section_name);
20529 tls_data_section
20530 = get_unnamed_section (SECTION_TLS,
20531 rs6000_xcoff_output_tls_section_asm_op,
20532 &xcoff_tls_data_section_name);
20534 tls_private_data_section
20535 = get_unnamed_section (SECTION_TLS,
20536 rs6000_xcoff_output_tls_section_asm_op,
20537 &xcoff_private_data_section_name);
20539 toc_section
20540 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20542 readonly_data_section = read_only_data_section;
20545 static int
20546 rs6000_xcoff_reloc_rw_mask (void)
20548 return 3;
20551 static void
20552 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20553 tree decl ATTRIBUTE_UNUSED)
20555 int smclass;
20556 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20558 if (flags & SECTION_EXCLUDE)
20559 smclass = 4;
20560 else if (flags & SECTION_DEBUG)
20562 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20563 return;
20565 else if (flags & SECTION_CODE)
20566 smclass = 0;
20567 else if (flags & SECTION_TLS)
20568 smclass = 3;
20569 else if (flags & SECTION_WRITE)
20570 smclass = 2;
20571 else
20572 smclass = 1;
20574 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20575 (flags & SECTION_CODE) ? "." : "",
20576 name, suffix[smclass], flags & SECTION_ENTSIZE);
20579 #define IN_NAMED_SECTION(DECL) \
20580 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20581 && DECL_SECTION_NAME (DECL) != NULL)
20583 static section *
20584 rs6000_xcoff_select_section (tree decl, int reloc,
20585 unsigned HOST_WIDE_INT align)
20587 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20588 named section. */
20589 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
20591 resolve_unique_section (decl, reloc, true);
20592 if (IN_NAMED_SECTION (decl))
20593 return get_named_section (decl, NULL, reloc);
20596 if (decl_readonly_section (decl, reloc))
20598 if (TREE_PUBLIC (decl))
20599 return read_only_data_section;
20600 else
20601 return read_only_private_data_section;
20603 else
20605 #if HAVE_AS_TLS
20606 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20608 if (TREE_PUBLIC (decl))
20609 return tls_data_section;
20610 else if (bss_initializer_p (decl))
20612 /* Convert to COMMON to emit in BSS. */
20613 DECL_COMMON (decl) = 1;
20614 return tls_comm_section;
20616 else
20617 return tls_private_data_section;
20619 else
20620 #endif
20621 if (TREE_PUBLIC (decl))
20622 return data_section;
20623 else
20624 return private_data_section;
20628 static void
20629 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20631 const char *name;
20633 /* Use select_section for private data and uninitialized data with
20634 alignment <= BIGGEST_ALIGNMENT. */
20635 if (!TREE_PUBLIC (decl)
20636 || DECL_COMMON (decl)
20637 || (DECL_INITIAL (decl) == NULL_TREE
20638 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20639 || DECL_INITIAL (decl) == error_mark_node
20640 || (flag_zero_initialized_in_bss
20641 && initializer_zerop (DECL_INITIAL (decl))))
20642 return;
20644 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20645 name = (*targetm.strip_name_encoding) (name);
20646 set_decl_section_name (decl, name);
20649 /* Select section for constant in constant pool.
20651 On RS/6000, all constants are in the private read-only data area.
20652 However, if this is being placed in the TOC it must be output as a
20653 toc entry. */
20655 static section *
20656 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20657 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20659 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20660 return toc_section;
20661 else
20662 return read_only_private_data_section;
20665 /* Remove any trailing [DS] or the like from the symbol name. */
20667 static const char *
20668 rs6000_xcoff_strip_name_encoding (const char *name)
20670 size_t len;
20671 if (*name == '*')
20672 name++;
20673 len = strlen (name);
20674 if (name[len - 1] == ']')
20675 return ggc_alloc_string (name, len - 4);
20676 else
20677 return name;
20680 /* Section attributes. AIX is always PIC. */
20682 static unsigned int
20683 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20685 unsigned int align;
20686 unsigned int flags = default_section_type_flags (decl, name, reloc);
20688 /* Align to at least UNIT size. */
20689 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20690 align = MIN_UNITS_PER_WORD;
20691 else
20692 /* Increase alignment of large objects if not already stricter. */
20693 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20694 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20695 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20697 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20700 /* Output at beginning of assembler file.
20702 Initialize the section names for the RS/6000 at this point.
20704 Specify filename, including full path, to assembler.
20706 We want to go into the TOC section so at least one .toc will be emitted.
20707 Also, in order to output proper .bs/.es pairs, we need at least one static
20708 [RW] section emitted.
20710 Finally, declare mcount when profiling to make the assembler happy. */
20712 static void
20713 rs6000_xcoff_file_start (void)
20715 rs6000_gen_section_name (&xcoff_bss_section_name,
20716 main_input_filename, ".bss_");
20717 rs6000_gen_section_name (&xcoff_private_data_section_name,
20718 main_input_filename, ".rw_");
20719 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20720 main_input_filename, ".rop_");
20721 rs6000_gen_section_name (&xcoff_read_only_section_name,
20722 main_input_filename, ".ro_");
20723 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20724 main_input_filename, ".tls_");
20725 rs6000_gen_section_name (&xcoff_tbss_section_name,
20726 main_input_filename, ".tbss_[UL]");
20728 fputs ("\t.file\t", asm_out_file);
20729 output_quoted_string (asm_out_file, main_input_filename);
20730 fputc ('\n', asm_out_file);
20731 if (write_symbols != NO_DEBUG)
20732 switch_to_section (private_data_section);
20733 switch_to_section (toc_section);
20734 switch_to_section (text_section);
20735 if (profile_flag)
20736 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20737 rs6000_file_start ();
20740 /* Output at end of assembler file.
20741 On the RS/6000, referencing data should automatically pull in text. */
20743 static void
20744 rs6000_xcoff_file_end (void)
20746 switch_to_section (text_section);
20747 fputs ("_section_.text:\n", asm_out_file);
20748 switch_to_section (data_section);
20749 fputs (TARGET_32BIT
20750 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20751 asm_out_file);
20754 struct declare_alias_data
20756 FILE *file;
20757 bool function_descriptor;
20760 /* Declare alias N. A helper function for for_node_and_aliases. */
20762 static bool
20763 rs6000_declare_alias (struct symtab_node *n, void *d)
20765 struct declare_alias_data *data = (struct declare_alias_data *)d;
20766 /* Main symbol is output specially, because varasm machinery does part of
20767 the job for us - we do not need to declare .globl/lglobs and such. */
20768 if (!n->alias || n->weakref)
20769 return false;
20771 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20772 return false;
20774 /* Prevent assemble_alias from trying to use .set pseudo operation
20775 that does not behave as expected by the middle-end. */
20776 TREE_ASM_WRITTEN (n->decl) = true;
20778 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20779 char *buffer = (char *) alloca (strlen (name) + 2);
20780 char *p;
20781 int dollar_inside = 0;
20783 strcpy (buffer, name);
20784 p = strchr (buffer, '$');
20785 while (p) {
20786 *p = '_';
20787 dollar_inside++;
20788 p = strchr (p + 1, '$');
20790 if (TREE_PUBLIC (n->decl))
20792 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20794 if (dollar_inside) {
20795 if (data->function_descriptor)
20796 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20797 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20799 if (data->function_descriptor)
20801 fputs ("\t.globl .", data->file);
20802 RS6000_OUTPUT_BASENAME (data->file, buffer);
20803 putc ('\n', data->file);
20805 fputs ("\t.globl ", data->file);
20806 RS6000_OUTPUT_BASENAME (data->file, buffer);
20807 putc ('\n', data->file);
20809 #ifdef ASM_WEAKEN_DECL
20810 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20811 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20812 #endif
20814 else
20816 if (dollar_inside)
20818 if (data->function_descriptor)
20819 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20820 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20822 if (data->function_descriptor)
20824 fputs ("\t.lglobl .", data->file);
20825 RS6000_OUTPUT_BASENAME (data->file, buffer);
20826 putc ('\n', data->file);
20828 fputs ("\t.lglobl ", data->file);
20829 RS6000_OUTPUT_BASENAME (data->file, buffer);
20830 putc ('\n', data->file);
20832 if (data->function_descriptor)
20833 fputs (".", data->file);
20834 RS6000_OUTPUT_BASENAME (data->file, buffer);
20835 fputs (":\n", data->file);
20836 return false;
20840 #ifdef HAVE_GAS_HIDDEN
20841 /* Helper function to calculate visibility of a DECL
20842 and return the value as a const string. */
20844 static const char *
20845 rs6000_xcoff_visibility (tree decl)
20847 static const char * const visibility_types[] = {
20848 "", ",protected", ",hidden", ",internal"
20851 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20852 return visibility_types[vis];
20854 #endif
20857 /* This macro produces the initial definition of a function name.
20858 On the RS/6000, we need to place an extra '.' in the function name and
20859 output the function descriptor.
20860 Dollar signs are converted to underscores.
20862 The csect for the function will have already been created when
20863 text_section was selected. We do have to go back to that csect, however.
20865 The third and fourth parameters to the .function pseudo-op (16 and 044)
20866 are placeholders which no longer have any use.
20868 Because AIX assembler's .set command has unexpected semantics, we output
20869 all aliases as alternative labels in front of the definition. */
20871 void
20872 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20874 char *buffer = (char *) alloca (strlen (name) + 1);
20875 char *p;
20876 int dollar_inside = 0;
20877 struct declare_alias_data data = {file, false};
20879 strcpy (buffer, name);
20880 p = strchr (buffer, '$');
20881 while (p) {
20882 *p = '_';
20883 dollar_inside++;
20884 p = strchr (p + 1, '$');
20886 if (TREE_PUBLIC (decl))
20888 if (!RS6000_WEAK || !DECL_WEAK (decl))
20890 if (dollar_inside) {
20891 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20892 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20894 fputs ("\t.globl .", file);
20895 RS6000_OUTPUT_BASENAME (file, buffer);
20896 #ifdef HAVE_GAS_HIDDEN
20897 fputs (rs6000_xcoff_visibility (decl), file);
20898 #endif
20899 putc ('\n', file);
20902 else
20904 if (dollar_inside) {
20905 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20906 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20908 fputs ("\t.lglobl .", file);
20909 RS6000_OUTPUT_BASENAME (file, buffer);
20910 putc ('\n', file);
20912 fputs ("\t.csect ", file);
20913 RS6000_OUTPUT_BASENAME (file, buffer);
20914 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20915 RS6000_OUTPUT_BASENAME (file, buffer);
20916 fputs (":\n", file);
20917 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20918 &data, true);
20919 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20920 RS6000_OUTPUT_BASENAME (file, buffer);
20921 fputs (", TOC[tc0], 0\n", file);
20922 in_section = NULL;
20923 switch_to_section (function_section (decl));
20924 putc ('.', file);
20925 RS6000_OUTPUT_BASENAME (file, buffer);
20926 fputs (":\n", file);
20927 data.function_descriptor = true;
20928 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20929 &data, true);
20930 if (!DECL_IGNORED_P (decl))
20932 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20933 xcoffout_declare_function (file, decl, buffer);
20934 else if (write_symbols == DWARF2_DEBUG)
20936 name = (*targetm.strip_name_encoding) (name);
20937 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20940 return;
20944 /* Output assembly language to globalize a symbol from a DECL,
20945 possibly with visibility. */
20947 void
20948 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20950 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20951 fputs (GLOBAL_ASM_OP, stream);
20952 RS6000_OUTPUT_BASENAME (stream, name);
20953 #ifdef HAVE_GAS_HIDDEN
20954 fputs (rs6000_xcoff_visibility (decl), stream);
20955 #endif
20956 putc ('\n', stream);
20959 /* Output assembly language to define a symbol as COMMON from a DECL,
20960 possibly with visibility. */
20962 void
20963 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20964 tree decl ATTRIBUTE_UNUSED,
20965 const char *name,
20966 unsigned HOST_WIDE_INT size,
20967 unsigned HOST_WIDE_INT align)
20969 unsigned HOST_WIDE_INT align2 = 2;
20971 if (align > 32)
20972 align2 = floor_log2 (align / BITS_PER_UNIT);
20973 else if (size > 4)
20974 align2 = 3;
20976 fputs (COMMON_ASM_OP, stream);
20977 RS6000_OUTPUT_BASENAME (stream, name);
20979 fprintf (stream,
20980 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20981 size, align2);
20983 #ifdef HAVE_GAS_HIDDEN
20984 if (decl != NULL)
20985 fputs (rs6000_xcoff_visibility (decl), stream);
20986 #endif
20987 putc ('\n', stream);
20990 /* This macro produces the initial definition of a object (variable) name.
20991 Because AIX assembler's .set command has unexpected semantics, we output
20992 all aliases as alternative labels in front of the definition. */
20994 void
20995 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20997 struct declare_alias_data data = {file, false};
20998 RS6000_OUTPUT_BASENAME (file, name);
20999 fputs (":\n", file);
21000 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21001 &data, true);
21004 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21006 void
21007 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21009 fputs (integer_asm_op (size, FALSE), file);
21010 assemble_name (file, label);
21011 fputs ("-$", file);
21014 /* Output a symbol offset relative to the dbase for the current object.
21015 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21016 signed offsets.
21018 __gcc_unwind_dbase is embedded in all executables/libraries through
21019 libgcc/config/rs6000/crtdbase.S. */
21021 void
21022 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21024 fputs (integer_asm_op (size, FALSE), file);
21025 assemble_name (file, label);
21026 fputs("-__gcc_unwind_dbase", file);
21029 #ifdef HAVE_AS_TLS
21030 static void
21031 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21033 rtx symbol;
21034 int flags;
21035 const char *symname;
21037 default_encode_section_info (decl, rtl, first);
21039 /* Careful not to prod global register variables. */
21040 if (!MEM_P (rtl))
21041 return;
21042 symbol = XEXP (rtl, 0);
21043 if (!SYMBOL_REF_P (symbol))
21044 return;
21046 flags = SYMBOL_REF_FLAGS (symbol);
21048 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21049 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21051 SYMBOL_REF_FLAGS (symbol) = flags;
21053 /* Append mapping class to extern decls. */
21054 symname = XSTR (symbol, 0);
21055 if (decl /* sync condition with assemble_external () */
21056 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
21057 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
21058 || TREE_CODE (decl) == FUNCTION_DECL)
21059 && symname[strlen (symname) - 1] != ']')
21061 char *newname = (char *) alloca (strlen (symname) + 5);
21062 strcpy (newname, symname);
21063 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
21064 ? "[DS]" : "[UA]"));
21065 XSTR (symbol, 0) = ggc_strdup (newname);
21068 #endif /* HAVE_AS_TLS */
21069 #endif /* TARGET_XCOFF */
21071 void
21072 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21073 const char *name, const char *val)
21075 fputs ("\t.weak\t", stream);
21076 RS6000_OUTPUT_BASENAME (stream, name);
21077 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21078 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21080 if (TARGET_XCOFF)
21081 fputs ("[DS]", stream);
21082 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21083 if (TARGET_XCOFF)
21084 fputs (rs6000_xcoff_visibility (decl), stream);
21085 #endif
21086 fputs ("\n\t.weak\t.", stream);
21087 RS6000_OUTPUT_BASENAME (stream, name);
21089 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21090 if (TARGET_XCOFF)
21091 fputs (rs6000_xcoff_visibility (decl), stream);
21092 #endif
21093 fputc ('\n', stream);
21094 if (val)
21096 #ifdef ASM_OUTPUT_DEF
21097 ASM_OUTPUT_DEF (stream, name, val);
21098 #endif
21099 if (decl && TREE_CODE (decl) == FUNCTION_DECL
21100 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
21102 fputs ("\t.set\t.", stream);
21103 RS6000_OUTPUT_BASENAME (stream, name);
21104 fputs (",.", stream);
21105 RS6000_OUTPUT_BASENAME (stream, val);
21106 fputc ('\n', stream);
21112 /* Return true if INSN should not be copied. */
21114 static bool
21115 rs6000_cannot_copy_insn_p (rtx_insn *insn)
21117 return recog_memoized (insn) >= 0
21118 && get_attr_cannot_copy (insn);
21121 /* Compute a (partial) cost for rtx X. Return true if the complete
21122 cost has been computed, and false if subexpressions should be
21123 scanned. In either case, *TOTAL contains the cost result. */
21125 static bool
21126 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
21127 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
21129 int code = GET_CODE (x);
21131 switch (code)
21133 /* On the RS/6000, if it is valid in the insn, it is free. */
21134 case CONST_INT:
21135 if (((outer_code == SET
21136 || outer_code == PLUS
21137 || outer_code == MINUS)
21138 && (satisfies_constraint_I (x)
21139 || satisfies_constraint_L (x)))
21140 || (outer_code == AND
21141 && (satisfies_constraint_K (x)
21142 || (mode == SImode
21143 ? satisfies_constraint_L (x)
21144 : satisfies_constraint_J (x))))
21145 || ((outer_code == IOR || outer_code == XOR)
21146 && (satisfies_constraint_K (x)
21147 || (mode == SImode
21148 ? satisfies_constraint_L (x)
21149 : satisfies_constraint_J (x))))
21150 || outer_code == ASHIFT
21151 || outer_code == ASHIFTRT
21152 || outer_code == LSHIFTRT
21153 || outer_code == ROTATE
21154 || outer_code == ROTATERT
21155 || outer_code == ZERO_EXTRACT
21156 || (outer_code == MULT
21157 && satisfies_constraint_I (x))
21158 || ((outer_code == DIV || outer_code == UDIV
21159 || outer_code == MOD || outer_code == UMOD)
21160 && exact_log2 (INTVAL (x)) >= 0)
21161 || (outer_code == COMPARE
21162 && (satisfies_constraint_I (x)
21163 || satisfies_constraint_K (x)))
21164 || ((outer_code == EQ || outer_code == NE)
21165 && (satisfies_constraint_I (x)
21166 || satisfies_constraint_K (x)
21167 || (mode == SImode
21168 ? satisfies_constraint_L (x)
21169 : satisfies_constraint_J (x))))
21170 || (outer_code == GTU
21171 && satisfies_constraint_I (x))
21172 || (outer_code == LTU
21173 && satisfies_constraint_P (x)))
21175 *total = 0;
21176 return true;
21178 else if ((outer_code == PLUS
21179 && reg_or_add_cint_operand (x, mode))
21180 || (outer_code == MINUS
21181 && reg_or_sub_cint_operand (x, mode))
21182 || ((outer_code == SET
21183 || outer_code == IOR
21184 || outer_code == XOR)
21185 && (INTVAL (x)
21186 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
21188 *total = COSTS_N_INSNS (1);
21189 return true;
21191 /* FALLTHRU */
21193 case CONST_DOUBLE:
21194 case CONST_WIDE_INT:
21195 case CONST:
21196 case HIGH:
21197 case SYMBOL_REF:
21198 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21199 return true;
21201 case MEM:
21202 /* When optimizing for size, MEM should be slightly more expensive
21203 than generating address, e.g., (plus (reg) (const)).
21204 L1 cache latency is about two instructions. */
21205 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
21206 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
21207 *total += COSTS_N_INSNS (100);
21208 return true;
21210 case LABEL_REF:
21211 *total = 0;
21212 return true;
21214 case PLUS:
21215 case MINUS:
21216 if (FLOAT_MODE_P (mode))
21217 *total = rs6000_cost->fp;
21218 else
21219 *total = COSTS_N_INSNS (1);
21220 return false;
21222 case MULT:
21223 if (CONST_INT_P (XEXP (x, 1))
21224 && satisfies_constraint_I (XEXP (x, 1)))
21226 if (INTVAL (XEXP (x, 1)) >= -256
21227 && INTVAL (XEXP (x, 1)) <= 255)
21228 *total = rs6000_cost->mulsi_const9;
21229 else
21230 *total = rs6000_cost->mulsi_const;
21232 else if (mode == SFmode)
21233 *total = rs6000_cost->fp;
21234 else if (FLOAT_MODE_P (mode))
21235 *total = rs6000_cost->dmul;
21236 else if (mode == DImode)
21237 *total = rs6000_cost->muldi;
21238 else
21239 *total = rs6000_cost->mulsi;
21240 return false;
21242 case FMA:
21243 if (mode == SFmode)
21244 *total = rs6000_cost->fp;
21245 else
21246 *total = rs6000_cost->dmul;
21247 break;
21249 case DIV:
21250 case MOD:
21251 if (FLOAT_MODE_P (mode))
21253 *total = mode == DFmode ? rs6000_cost->ddiv
21254 : rs6000_cost->sdiv;
21255 return false;
21257 /* FALLTHRU */
21259 case UDIV:
21260 case UMOD:
21261 if (CONST_INT_P (XEXP (x, 1))
21262 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
21264 if (code == DIV || code == MOD)
21265 /* Shift, addze */
21266 *total = COSTS_N_INSNS (2);
21267 else
21268 /* Shift */
21269 *total = COSTS_N_INSNS (1);
21271 else
21273 if (GET_MODE (XEXP (x, 1)) == DImode)
21274 *total = rs6000_cost->divdi;
21275 else
21276 *total = rs6000_cost->divsi;
21278 /* Add in shift and subtract for MOD unless we have a mod instruction. */
21279 if (!TARGET_MODULO && (code == MOD || code == UMOD))
21280 *total += COSTS_N_INSNS (2);
21281 return false;
21283 case CTZ:
21284 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
21285 return false;
21287 case FFS:
21288 *total = COSTS_N_INSNS (4);
21289 return false;
21291 case POPCOUNT:
21292 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
21293 return false;
21295 case PARITY:
21296 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
21297 return false;
21299 case NOT:
21300 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
21301 *total = 0;
21302 else
21303 *total = COSTS_N_INSNS (1);
21304 return false;
21306 case AND:
21307 if (CONST_INT_P (XEXP (x, 1)))
21309 rtx left = XEXP (x, 0);
21310 rtx_code left_code = GET_CODE (left);
21312 /* rotate-and-mask: 1 insn. */
21313 if ((left_code == ROTATE
21314 || left_code == ASHIFT
21315 || left_code == LSHIFTRT)
21316 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
21318 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
21319 if (!CONST_INT_P (XEXP (left, 1)))
21320 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
21321 *total += COSTS_N_INSNS (1);
21322 return true;
21325 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
21326 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
21327 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
21328 || (val & 0xffff) == val
21329 || (val & 0xffff0000) == val
21330 || ((val & 0xffff) == 0 && mode == SImode))
21332 *total = rtx_cost (left, mode, AND, 0, speed);
21333 *total += COSTS_N_INSNS (1);
21334 return true;
21337 /* 2 insns. */
21338 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
21340 *total = rtx_cost (left, mode, AND, 0, speed);
21341 *total += COSTS_N_INSNS (2);
21342 return true;
21346 *total = COSTS_N_INSNS (1);
21347 return false;
21349 case IOR:
21350 /* FIXME */
21351 *total = COSTS_N_INSNS (1);
21352 return true;
21354 case CLZ:
21355 case XOR:
21356 case ZERO_EXTRACT:
21357 *total = COSTS_N_INSNS (1);
21358 return false;
21360 case ASHIFT:
21361 /* The EXTSWSLI instruction is a combined instruction. Don't count both
21362 the sign extend and shift separately within the insn. */
21363 if (TARGET_EXTSWSLI && mode == DImode
21364 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
21365 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
21367 *total = 0;
21368 return false;
21370 /* fall through */
21372 case ASHIFTRT:
21373 case LSHIFTRT:
21374 case ROTATE:
21375 case ROTATERT:
21376 /* Handle mul_highpart. */
21377 if (outer_code == TRUNCATE
21378 && GET_CODE (XEXP (x, 0)) == MULT)
21380 if (mode == DImode)
21381 *total = rs6000_cost->muldi;
21382 else
21383 *total = rs6000_cost->mulsi;
21384 return true;
21386 else if (outer_code == AND)
21387 *total = 0;
21388 else
21389 *total = COSTS_N_INSNS (1);
21390 return false;
21392 case SIGN_EXTEND:
21393 case ZERO_EXTEND:
21394 if (MEM_P (XEXP (x, 0)))
21395 *total = 0;
21396 else
21397 *total = COSTS_N_INSNS (1);
21398 return false;
21400 case COMPARE:
21401 case NEG:
21402 case ABS:
21403 if (!FLOAT_MODE_P (mode))
21405 *total = COSTS_N_INSNS (1);
21406 return false;
21408 /* FALLTHRU */
21410 case FLOAT:
21411 case UNSIGNED_FLOAT:
21412 case FIX:
21413 case UNSIGNED_FIX:
21414 case FLOAT_TRUNCATE:
21415 *total = rs6000_cost->fp;
21416 return false;
21418 case FLOAT_EXTEND:
21419 if (mode == DFmode)
21420 *total = rs6000_cost->sfdf_convert;
21421 else
21422 *total = rs6000_cost->fp;
21423 return false;
21425 case CALL:
21426 case IF_THEN_ELSE:
21427 if (!speed)
21429 *total = COSTS_N_INSNS (1);
21430 return true;
21432 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
21434 *total = rs6000_cost->fp;
21435 return false;
21437 break;
21439 case NE:
21440 case EQ:
21441 case GTU:
21442 case LTU:
21443 /* Carry bit requires mode == Pmode.
21444 NEG or PLUS already counted so only add one. */
21445 if (mode == Pmode
21446 && (outer_code == NEG || outer_code == PLUS))
21448 *total = COSTS_N_INSNS (1);
21449 return true;
21451 /* FALLTHRU */
21453 case GT:
21454 case LT:
21455 case UNORDERED:
21456 if (outer_code == SET)
21458 if (XEXP (x, 1) == const0_rtx)
21460 *total = COSTS_N_INSNS (2);
21461 return true;
21463 else
21465 *total = COSTS_N_INSNS (3);
21466 return false;
21469 /* CC COMPARE. */
21470 if (outer_code == COMPARE)
21472 *total = 0;
21473 return true;
21475 break;
21477 default:
21478 break;
21481 return false;
21484 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21486 static bool
21487 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21488 int opno, int *total, bool speed)
21490 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21492 fprintf (stderr,
21493 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21494 "opno = %d, total = %d, speed = %s, x:\n",
21495 ret ? "complete" : "scan inner",
21496 GET_MODE_NAME (mode),
21497 GET_RTX_NAME (outer_code),
21498 opno,
21499 *total,
21500 speed ? "true" : "false");
21502 debug_rtx (x);
21504 return ret;
21507 static int
21508 rs6000_insn_cost (rtx_insn *insn, bool speed)
21510 if (recog_memoized (insn) < 0)
21511 return 0;
21513 /* If we are optimizing for size, just use the length. */
21514 if (!speed)
21515 return get_attr_length (insn);
21517 /* Use the cost if provided. */
21518 int cost = get_attr_cost (insn);
21519 if (cost > 0)
21520 return cost;
21522 /* If the insn tells us how many insns there are, use that. Otherwise use
21523 the length/4. Adjust the insn length to remove the extra size that
21524 prefixed instructions take. */
21525 int n = get_attr_num_insns (insn);
21526 if (n == 0)
21528 int length = get_attr_length (insn);
21529 if (get_attr_prefixed (insn) == PREFIXED_YES)
21531 int adjust = 0;
21532 ADJUST_INSN_LENGTH (insn, adjust);
21533 length -= adjust;
21536 n = length / 4;
21539 enum attr_type type = get_attr_type (insn);
21541 switch (type)
21543 case TYPE_LOAD:
21544 case TYPE_FPLOAD:
21545 case TYPE_VECLOAD:
21546 cost = COSTS_N_INSNS (n + 1);
21547 break;
21549 case TYPE_MUL:
21550 switch (get_attr_size (insn))
21552 case SIZE_8:
21553 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21554 break;
21555 case SIZE_16:
21556 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21557 break;
21558 case SIZE_32:
21559 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21560 break;
21561 case SIZE_64:
21562 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21563 break;
21564 default:
21565 gcc_unreachable ();
21567 break;
21568 case TYPE_DIV:
21569 switch (get_attr_size (insn))
21571 case SIZE_32:
21572 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21573 break;
21574 case SIZE_64:
21575 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21576 break;
21577 default:
21578 gcc_unreachable ();
21580 break;
21582 case TYPE_FP:
21583 cost = n * rs6000_cost->fp;
21584 break;
21585 case TYPE_DMUL:
21586 cost = n * rs6000_cost->dmul;
21587 break;
21588 case TYPE_SDIV:
21589 cost = n * rs6000_cost->sdiv;
21590 break;
21591 case TYPE_DDIV:
21592 cost = n * rs6000_cost->ddiv;
21593 break;
21595 case TYPE_SYNC:
21596 case TYPE_LOAD_L:
21597 case TYPE_MFCR:
21598 case TYPE_MFCRF:
21599 cost = COSTS_N_INSNS (n + 2);
21600 break;
21602 default:
21603 cost = COSTS_N_INSNS (n);
21606 return cost;
21609 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21611 static int
21612 rs6000_debug_address_cost (rtx x, machine_mode mode,
21613 addr_space_t as, bool speed)
21615 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21617 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21618 ret, speed ? "true" : "false");
21619 debug_rtx (x);
21621 return ret;
21625 /* A C expression returning the cost of moving data from a register of class
21626 CLASS1 to one of CLASS2. */
21628 static int
21629 rs6000_register_move_cost (machine_mode mode,
21630 reg_class_t from, reg_class_t to)
21632 int ret;
21633 reg_class_t rclass;
21635 if (TARGET_DEBUG_COST)
21636 dbg_cost_ctrl++;
21638 /* If we have VSX, we can easily move between FPR or Altivec registers,
21639 otherwise we can only easily move within classes.
21640 Do this first so we give best-case answers for union classes
21641 containing both gprs and vsx regs. */
21642 HARD_REG_SET to_vsx, from_vsx;
21643 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21644 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21645 if (!hard_reg_set_empty_p (to_vsx)
21646 && !hard_reg_set_empty_p (from_vsx)
21647 && (TARGET_VSX
21648 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21650 int reg = FIRST_FPR_REGNO;
21651 if (TARGET_VSX
21652 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21653 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21654 reg = FIRST_ALTIVEC_REGNO;
21655 ret = 2 * hard_regno_nregs (reg, mode);
21658 /* Moves from/to GENERAL_REGS. */
21659 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21660 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21662 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21664 if (TARGET_DIRECT_MOVE)
21666 /* Keep the cost for direct moves above that for within
21667 a register class even if the actual processor cost is
21668 comparable. We do this because a direct move insn
21669 can't be a nop, whereas with ideal register
21670 allocation a move within the same class might turn
21671 out to be a nop. */
21672 if (rs6000_tune == PROCESSOR_POWER9
21673 || rs6000_tune == PROCESSOR_POWER10)
21674 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21675 else
21676 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21677 /* SFmode requires a conversion when moving between gprs
21678 and vsx. */
21679 if (mode == SFmode)
21680 ret += 2;
21682 else
21683 ret = (rs6000_memory_move_cost (mode, rclass, false)
21684 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21687 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21688 shift. */
21689 else if (rclass == CR_REGS)
21690 ret = 4;
21692 /* For those processors that have slow LR/CTR moves, make them more
21693 expensive than memory in order to bias spills to memory .*/
21694 else if ((rs6000_tune == PROCESSOR_POWER6
21695 || rs6000_tune == PROCESSOR_POWER7
21696 || rs6000_tune == PROCESSOR_POWER8
21697 || rs6000_tune == PROCESSOR_POWER9)
21698 && reg_class_subset_p (rclass, SPECIAL_REGS))
21699 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21701 else
21702 /* A move will cost one instruction per GPR moved. */
21703 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21706 /* Everything else has to go through GENERAL_REGS. */
21707 else
21708 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21709 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21711 if (TARGET_DEBUG_COST)
21713 if (dbg_cost_ctrl == 1)
21714 fprintf (stderr,
21715 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21716 ret, GET_MODE_NAME (mode), reg_class_names[from],
21717 reg_class_names[to]);
21718 dbg_cost_ctrl--;
21721 return ret;
21724 /* A C expressions returning the cost of moving data of MODE from a register to
21725 or from memory. */
21727 static int
21728 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21729 bool in ATTRIBUTE_UNUSED)
21731 int ret;
21733 if (TARGET_DEBUG_COST)
21734 dbg_cost_ctrl++;
21736 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21737 ret = 4 * hard_regno_nregs (0, mode);
21738 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21739 || reg_classes_intersect_p (rclass, VSX_REGS)))
21740 ret = 4 * hard_regno_nregs (32, mode);
21741 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21742 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21743 else
21744 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21746 if (TARGET_DEBUG_COST)
21748 if (dbg_cost_ctrl == 1)
21749 fprintf (stderr,
21750 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21751 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21752 dbg_cost_ctrl--;
21755 return ret;
21758 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21760 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21761 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21762 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21763 move cost between GENERAL_REGS and VSX_REGS low.
21765 It might seem reasonable to use a union class. After all, if usage
21766 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21767 rather than memory. However, in cases where register pressure of
21768 both is high, like the cactus_adm spec test, allowing
21769 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21770 the first scheduling pass. This is partly due to an allocno of
21771 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21772 class, which gives too high a pressure for GENERAL_REGS and too low
21773 for VSX_REGS. So, force a choice of the subclass here.
21775 The best class is also the union if GENERAL_REGS and VSX_REGS have
21776 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21777 allocno class, since trying to narrow down the class by regno mode
21778 is prone to error. For example, SImode is allowed in VSX regs and
21779 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21780 it would be wrong to choose an allocno of GENERAL_REGS based on
21781 SImode. */
21783 static reg_class_t
21784 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21785 reg_class_t allocno_class,
21786 reg_class_t best_class)
21788 switch (allocno_class)
21790 case GEN_OR_VSX_REGS:
21791 /* best_class must be a subset of allocno_class. */
21792 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21793 || best_class == GEN_OR_FLOAT_REGS
21794 || best_class == VSX_REGS
21795 || best_class == ALTIVEC_REGS
21796 || best_class == FLOAT_REGS
21797 || best_class == GENERAL_REGS
21798 || best_class == BASE_REGS);
21799 /* Use best_class but choose wider classes when copying from the
21800 wider class to best_class is cheap. This mimics IRA choice
21801 of allocno class. */
21802 if (best_class == BASE_REGS)
21803 return GENERAL_REGS;
21804 if (TARGET_VSX
21805 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21806 return VSX_REGS;
21807 return best_class;
21809 default:
21810 break;
21813 return allocno_class;
21816 /* Returns a code for a target-specific builtin that implements
21817 reciprocal of the function, or NULL_TREE if not available. */
21819 static tree
21820 rs6000_builtin_reciprocal (tree fndecl)
21822 switch (DECL_MD_FUNCTION_CODE (fndecl))
21824 case VSX_BUILTIN_XVSQRTDP:
21825 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21826 return NULL_TREE;
21828 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21830 case VSX_BUILTIN_XVSQRTSP:
21831 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21832 return NULL_TREE;
21834 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21836 default:
21837 return NULL_TREE;
21841 /* Load up a constant. If the mode is a vector mode, splat the value across
21842 all of the vector elements. */
21844 static rtx
21845 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21847 rtx reg;
21849 if (mode == SFmode || mode == DFmode)
21851 rtx d = const_double_from_real_value (dconst, mode);
21852 reg = force_reg (mode, d);
21854 else if (mode == V4SFmode)
21856 rtx d = const_double_from_real_value (dconst, SFmode);
21857 rtvec v = gen_rtvec (4, d, d, d, d);
21858 reg = gen_reg_rtx (mode);
21859 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21861 else if (mode == V2DFmode)
21863 rtx d = const_double_from_real_value (dconst, DFmode);
21864 rtvec v = gen_rtvec (2, d, d);
21865 reg = gen_reg_rtx (mode);
21866 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21868 else
21869 gcc_unreachable ();
21871 return reg;
21874 /* Generate an FMA instruction. */
21876 static void
21877 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21879 machine_mode mode = GET_MODE (target);
21880 rtx dst;
21882 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21883 gcc_assert (dst != NULL);
21885 if (dst != target)
21886 emit_move_insn (target, dst);
21889 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21891 static void
21892 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21894 machine_mode mode = GET_MODE (dst);
21895 rtx r;
21897 /* This is a tad more complicated, since the fnma_optab is for
21898 a different expression: fma(-m1, m2, a), which is the same
21899 thing except in the case of signed zeros.
21901 Fortunately we know that if FMA is supported that FNMSUB is
21902 also supported in the ISA. Just expand it directly. */
21904 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21906 r = gen_rtx_NEG (mode, a);
21907 r = gen_rtx_FMA (mode, m1, m2, r);
21908 r = gen_rtx_NEG (mode, r);
21909 emit_insn (gen_rtx_SET (dst, r));
21912 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21913 add a reg_note saying that this was a division. Support both scalar and
21914 vector divide. Assumes no trapping math and finite arguments. */
21916 void
21917 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21919 machine_mode mode = GET_MODE (dst);
21920 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21921 int i;
21923 /* Low precision estimates guarantee 5 bits of accuracy. High
21924 precision estimates guarantee 14 bits of accuracy. SFmode
21925 requires 23 bits of accuracy. DFmode requires 52 bits of
21926 accuracy. Each pass at least doubles the accuracy, leading
21927 to the following. */
21928 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21929 if (mode == DFmode || mode == V2DFmode)
21930 passes++;
21932 enum insn_code code = optab_handler (smul_optab, mode);
21933 insn_gen_fn gen_mul = GEN_FCN (code);
21935 gcc_assert (code != CODE_FOR_nothing);
21937 one = rs6000_load_constant_and_splat (mode, dconst1);
21939 /* x0 = 1./d estimate */
21940 x0 = gen_reg_rtx (mode);
21941 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21942 UNSPEC_FRES)));
21944 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21945 if (passes > 1) {
21947 /* e0 = 1. - d * x0 */
21948 e0 = gen_reg_rtx (mode);
21949 rs6000_emit_nmsub (e0, d, x0, one);
21951 /* x1 = x0 + e0 * x0 */
21952 x1 = gen_reg_rtx (mode);
21953 rs6000_emit_madd (x1, e0, x0, x0);
21955 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21956 ++i, xprev = xnext, eprev = enext) {
21958 /* enext = eprev * eprev */
21959 enext = gen_reg_rtx (mode);
21960 emit_insn (gen_mul (enext, eprev, eprev));
21962 /* xnext = xprev + enext * xprev */
21963 xnext = gen_reg_rtx (mode);
21964 rs6000_emit_madd (xnext, enext, xprev, xprev);
21967 } else
21968 xprev = x0;
21970 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21972 /* u = n * xprev */
21973 u = gen_reg_rtx (mode);
21974 emit_insn (gen_mul (u, n, xprev));
21976 /* v = n - (d * u) */
21977 v = gen_reg_rtx (mode);
21978 rs6000_emit_nmsub (v, d, u, n);
21980 /* dst = (v * xprev) + u */
21981 rs6000_emit_madd (dst, v, xprev, u);
21983 if (note_p)
21984 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21987 /* Goldschmidt's Algorithm for single/double-precision floating point
21988 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21990 void
21991 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21993 machine_mode mode = GET_MODE (src);
21994 rtx e = gen_reg_rtx (mode);
21995 rtx g = gen_reg_rtx (mode);
21996 rtx h = gen_reg_rtx (mode);
21998 /* Low precision estimates guarantee 5 bits of accuracy. High
21999 precision estimates guarantee 14 bits of accuracy. SFmode
22000 requires 23 bits of accuracy. DFmode requires 52 bits of
22001 accuracy. Each pass at least doubles the accuracy, leading
22002 to the following. */
22003 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22004 if (mode == DFmode || mode == V2DFmode)
22005 passes++;
22007 int i;
22008 rtx mhalf;
22009 enum insn_code code = optab_handler (smul_optab, mode);
22010 insn_gen_fn gen_mul = GEN_FCN (code);
22012 gcc_assert (code != CODE_FOR_nothing);
22014 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22016 /* e = rsqrt estimate */
22017 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22018 UNSPEC_RSQRT)));
22020 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22021 if (!recip)
22023 rtx zero = force_reg (mode, CONST0_RTX (mode));
22025 if (mode == SFmode)
22027 rtx target = emit_conditional_move (e, GT, src, zero, mode,
22028 e, zero, mode, 0);
22029 if (target != e)
22030 emit_move_insn (e, target);
22032 else
22034 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22035 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22039 /* g = sqrt estimate. */
22040 emit_insn (gen_mul (g, e, src));
22041 /* h = 1/(2*sqrt) estimate. */
22042 emit_insn (gen_mul (h, e, mhalf));
22044 if (recip)
22046 if (passes == 1)
22048 rtx t = gen_reg_rtx (mode);
22049 rs6000_emit_nmsub (t, g, h, mhalf);
22050 /* Apply correction directly to 1/rsqrt estimate. */
22051 rs6000_emit_madd (dst, e, t, e);
22053 else
22055 for (i = 0; i < passes; i++)
22057 rtx t1 = gen_reg_rtx (mode);
22058 rtx g1 = gen_reg_rtx (mode);
22059 rtx h1 = gen_reg_rtx (mode);
22061 rs6000_emit_nmsub (t1, g, h, mhalf);
22062 rs6000_emit_madd (g1, g, t1, g);
22063 rs6000_emit_madd (h1, h, t1, h);
22065 g = g1;
22066 h = h1;
22068 /* Multiply by 2 for 1/rsqrt. */
22069 emit_insn (gen_add3_insn (dst, h, h));
22072 else
22074 rtx t = gen_reg_rtx (mode);
22075 rs6000_emit_nmsub (t, g, h, mhalf);
22076 rs6000_emit_madd (dst, g, t, g);
22079 return;
22082 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22083 (Power7) targets. DST is the target, and SRC is the argument operand. */
22085 void
22086 rs6000_emit_popcount (rtx dst, rtx src)
22088 machine_mode mode = GET_MODE (dst);
22089 rtx tmp1, tmp2;
22091 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22092 if (TARGET_POPCNTD)
22094 if (mode == SImode)
22095 emit_insn (gen_popcntdsi2 (dst, src));
22096 else
22097 emit_insn (gen_popcntddi2 (dst, src));
22098 return;
22101 tmp1 = gen_reg_rtx (mode);
22103 if (mode == SImode)
22105 emit_insn (gen_popcntbsi2 (tmp1, src));
22106 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
22107 NULL_RTX, 0);
22108 tmp2 = force_reg (SImode, tmp2);
22109 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
22111 else
22113 emit_insn (gen_popcntbdi2 (tmp1, src));
22114 tmp2 = expand_mult (DImode, tmp1,
22115 GEN_INT ((HOST_WIDE_INT)
22116 0x01010101 << 32 | 0x01010101),
22117 NULL_RTX, 0);
22118 tmp2 = force_reg (DImode, tmp2);
22119 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
22124 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22125 target, and SRC is the argument operand. */
22127 void
22128 rs6000_emit_parity (rtx dst, rtx src)
22130 machine_mode mode = GET_MODE (dst);
22131 rtx tmp;
22133 tmp = gen_reg_rtx (mode);
22135 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22136 if (TARGET_CMPB)
22138 if (mode == SImode)
22140 emit_insn (gen_popcntbsi2 (tmp, src));
22141 emit_insn (gen_paritysi2_cmpb (dst, tmp));
22143 else
22145 emit_insn (gen_popcntbdi2 (tmp, src));
22146 emit_insn (gen_paritydi2_cmpb (dst, tmp));
22148 return;
22151 if (mode == SImode)
22153 /* Is mult+shift >= shift+xor+shift+xor? */
22154 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
22156 rtx tmp1, tmp2, tmp3, tmp4;
22158 tmp1 = gen_reg_rtx (SImode);
22159 emit_insn (gen_popcntbsi2 (tmp1, src));
22161 tmp2 = gen_reg_rtx (SImode);
22162 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
22163 tmp3 = gen_reg_rtx (SImode);
22164 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
22166 tmp4 = gen_reg_rtx (SImode);
22167 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
22168 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
22170 else
22171 rs6000_emit_popcount (tmp, src);
22172 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
22174 else
22176 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
22177 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
22179 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
22181 tmp1 = gen_reg_rtx (DImode);
22182 emit_insn (gen_popcntbdi2 (tmp1, src));
22184 tmp2 = gen_reg_rtx (DImode);
22185 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
22186 tmp3 = gen_reg_rtx (DImode);
22187 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
22189 tmp4 = gen_reg_rtx (DImode);
22190 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
22191 tmp5 = gen_reg_rtx (DImode);
22192 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
22194 tmp6 = gen_reg_rtx (DImode);
22195 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
22196 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
22198 else
22199 rs6000_emit_popcount (tmp, src);
22200 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
22204 /* Expand an Altivec constant permutation for little endian mode.
22205 OP0 and OP1 are the input vectors and TARGET is the output vector.
22206 SEL specifies the constant permutation vector.
22208 There are two issues: First, the two input operands must be
22209 swapped so that together they form a double-wide array in LE
22210 order. Second, the vperm instruction has surprising behavior
22211 in LE mode: it interprets the elements of the source vectors
22212 in BE mode ("left to right") and interprets the elements of
22213 the destination vector in LE mode ("right to left"). To
22214 correct for this, we must subtract each element of the permute
22215 control vector from 31.
22217 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
22218 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
22219 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
22220 serve as the permute control vector. Then, in BE mode,
22222 vperm 9,10,11,12
22224 places the desired result in vr9. However, in LE mode the
22225 vector contents will be
22227 vr10 = 00000003 00000002 00000001 00000000
22228 vr11 = 00000007 00000006 00000005 00000004
22230 The result of the vperm using the same permute control vector is
22232 vr9 = 05000000 07000000 01000000 03000000
22234 That is, the leftmost 4 bytes of vr10 are interpreted as the
22235 source for the rightmost 4 bytes of vr9, and so on.
22237 If we change the permute control vector to
22239 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
22241 and issue
22243 vperm 9,11,10,12
22245 we get the desired
22247 vr9 = 00000006 00000004 00000002 00000000. */
22249 static void
22250 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
22251 const vec_perm_indices &sel)
22253 unsigned int i;
22254 rtx perm[16];
22255 rtx constv, unspec;
22257 /* Unpack and adjust the constant selector. */
22258 for (i = 0; i < 16; ++i)
22260 unsigned int elt = 31 - (sel[i] & 31);
22261 perm[i] = GEN_INT (elt);
22264 /* Expand to a permute, swapping the inputs and using the
22265 adjusted selector. */
22266 if (!REG_P (op0))
22267 op0 = force_reg (V16QImode, op0);
22268 if (!REG_P (op1))
22269 op1 = force_reg (V16QImode, op1);
22271 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
22272 constv = force_reg (V16QImode, constv);
22273 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
22274 UNSPEC_VPERM);
22275 if (!REG_P (target))
22277 rtx tmp = gen_reg_rtx (V16QImode);
22278 emit_move_insn (tmp, unspec);
22279 unspec = tmp;
22282 emit_move_insn (target, unspec);
22285 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
22286 permute control vector. But here it's not a constant, so we must
22287 generate a vector NAND or NOR to do the adjustment. */
22289 void
22290 altivec_expand_vec_perm_le (rtx operands[4])
22292 rtx notx, iorx, unspec;
22293 rtx target = operands[0];
22294 rtx op0 = operands[1];
22295 rtx op1 = operands[2];
22296 rtx sel = operands[3];
22297 rtx tmp = target;
22298 rtx norreg = gen_reg_rtx (V16QImode);
22299 machine_mode mode = GET_MODE (target);
22301 /* Get everything in regs so the pattern matches. */
22302 if (!REG_P (op0))
22303 op0 = force_reg (mode, op0);
22304 if (!REG_P (op1))
22305 op1 = force_reg (mode, op1);
22306 if (!REG_P (sel))
22307 sel = force_reg (V16QImode, sel);
22308 if (!REG_P (target))
22309 tmp = gen_reg_rtx (mode);
22311 if (TARGET_P9_VECTOR)
22313 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
22314 UNSPEC_VPERMR);
22316 else
22318 /* Invert the selector with a VNAND if available, else a VNOR.
22319 The VNAND is preferred for future fusion opportunities. */
22320 notx = gen_rtx_NOT (V16QImode, sel);
22321 iorx = (TARGET_P8_VECTOR
22322 ? gen_rtx_IOR (V16QImode, notx, notx)
22323 : gen_rtx_AND (V16QImode, notx, notx));
22324 emit_insn (gen_rtx_SET (norreg, iorx));
22326 /* Permute with operands reversed and adjusted selector. */
22327 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
22328 UNSPEC_VPERM);
22331 /* Copy into target, possibly by way of a register. */
22332 if (!REG_P (target))
22334 emit_move_insn (tmp, unspec);
22335 unspec = tmp;
22338 emit_move_insn (target, unspec);
22341 /* Expand an Altivec constant permutation. Return true if we match
22342 an efficient implementation; false to fall back to VPERM.
22344 OP0 and OP1 are the input vectors and TARGET is the output vector.
22345 SEL specifies the constant permutation vector. */
22347 static bool
22348 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
22349 const vec_perm_indices &sel)
22351 struct altivec_perm_insn {
22352 HOST_WIDE_INT mask;
22353 enum insn_code impl;
22354 unsigned char perm[16];
22356 static const struct altivec_perm_insn patterns[] = {
22357 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
22358 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
22359 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
22360 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
22361 { OPTION_MASK_ALTIVEC,
22362 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
22363 : CODE_FOR_altivec_vmrglb_direct),
22364 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
22365 { OPTION_MASK_ALTIVEC,
22366 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
22367 : CODE_FOR_altivec_vmrglh_direct),
22368 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
22369 { OPTION_MASK_ALTIVEC,
22370 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
22371 : CODE_FOR_altivec_vmrglw_direct),
22372 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
22373 { OPTION_MASK_ALTIVEC,
22374 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
22375 : CODE_FOR_altivec_vmrghb_direct),
22376 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
22377 { OPTION_MASK_ALTIVEC,
22378 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
22379 : CODE_FOR_altivec_vmrghh_direct),
22380 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
22381 { OPTION_MASK_ALTIVEC,
22382 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
22383 : CODE_FOR_altivec_vmrghw_direct),
22384 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
22385 { OPTION_MASK_P8_VECTOR,
22386 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
22387 : CODE_FOR_p8_vmrgow_v4sf_direct),
22388 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
22389 { OPTION_MASK_P8_VECTOR,
22390 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
22391 : CODE_FOR_p8_vmrgew_v4sf_direct),
22392 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
22395 unsigned int i, j, elt, which;
22396 unsigned char perm[16];
22397 rtx x;
22398 bool one_vec;
22400 /* Unpack the constant selector. */
22401 for (i = which = 0; i < 16; ++i)
22403 elt = sel[i] & 31;
22404 which |= (elt < 16 ? 1 : 2);
22405 perm[i] = elt;
22408 /* Simplify the constant selector based on operands. */
22409 switch (which)
22411 default:
22412 gcc_unreachable ();
22414 case 3:
22415 one_vec = false;
22416 if (!rtx_equal_p (op0, op1))
22417 break;
22418 /* FALLTHRU */
22420 case 2:
22421 for (i = 0; i < 16; ++i)
22422 perm[i] &= 15;
22423 op0 = op1;
22424 one_vec = true;
22425 break;
22427 case 1:
22428 op1 = op0;
22429 one_vec = true;
22430 break;
22433 /* Look for splat patterns. */
22434 if (one_vec)
22436 elt = perm[0];
22438 for (i = 0; i < 16; ++i)
22439 if (perm[i] != elt)
22440 break;
22441 if (i == 16)
22443 if (!BYTES_BIG_ENDIAN)
22444 elt = 15 - elt;
22445 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22446 return true;
22449 if (elt % 2 == 0)
22451 for (i = 0; i < 16; i += 2)
22452 if (perm[i] != elt || perm[i + 1] != elt + 1)
22453 break;
22454 if (i == 16)
22456 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22457 x = gen_reg_rtx (V8HImode);
22458 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22459 GEN_INT (field)));
22460 emit_move_insn (target, gen_lowpart (V16QImode, x));
22461 return true;
22465 if (elt % 4 == 0)
22467 for (i = 0; i < 16; i += 4)
22468 if (perm[i] != elt
22469 || perm[i + 1] != elt + 1
22470 || perm[i + 2] != elt + 2
22471 || perm[i + 3] != elt + 3)
22472 break;
22473 if (i == 16)
22475 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22476 x = gen_reg_rtx (V4SImode);
22477 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22478 GEN_INT (field)));
22479 emit_move_insn (target, gen_lowpart (V16QImode, x));
22480 return true;
22485 /* Look for merge and pack patterns. */
22486 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22488 bool swapped;
22490 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22491 continue;
22493 elt = patterns[j].perm[0];
22494 if (perm[0] == elt)
22495 swapped = false;
22496 else if (perm[0] == elt + 16)
22497 swapped = true;
22498 else
22499 continue;
22500 for (i = 1; i < 16; ++i)
22502 elt = patterns[j].perm[i];
22503 if (swapped)
22504 elt = (elt >= 16 ? elt - 16 : elt + 16);
22505 else if (one_vec && elt >= 16)
22506 elt -= 16;
22507 if (perm[i] != elt)
22508 break;
22510 if (i == 16)
22512 enum insn_code icode = patterns[j].impl;
22513 machine_mode omode = insn_data[icode].operand[0].mode;
22514 machine_mode imode = insn_data[icode].operand[1].mode;
22516 /* For little-endian, don't use vpkuwum and vpkuhum if the
22517 underlying vector type is not V4SI and V8HI, respectively.
22518 For example, using vpkuwum with a V8HI picks up the even
22519 halfwords (BE numbering) when the even halfwords (LE
22520 numbering) are what we need. */
22521 if (!BYTES_BIG_ENDIAN
22522 && icode == CODE_FOR_altivec_vpkuwum_direct
22523 && ((REG_P (op0)
22524 && GET_MODE (op0) != V4SImode)
22525 || (SUBREG_P (op0)
22526 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22527 continue;
22528 if (!BYTES_BIG_ENDIAN
22529 && icode == CODE_FOR_altivec_vpkuhum_direct
22530 && ((REG_P (op0)
22531 && GET_MODE (op0) != V8HImode)
22532 || (SUBREG_P (op0)
22533 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22534 continue;
22536 /* For little-endian, the two input operands must be swapped
22537 (or swapped back) to ensure proper right-to-left numbering
22538 from 0 to 2N-1. */
22539 if (swapped ^ !BYTES_BIG_ENDIAN)
22540 std::swap (op0, op1);
22541 if (imode != V16QImode)
22543 op0 = gen_lowpart (imode, op0);
22544 op1 = gen_lowpart (imode, op1);
22546 if (omode == V16QImode)
22547 x = target;
22548 else
22549 x = gen_reg_rtx (omode);
22550 emit_insn (GEN_FCN (icode) (x, op0, op1));
22551 if (omode != V16QImode)
22552 emit_move_insn (target, gen_lowpart (V16QImode, x));
22553 return true;
22557 if (!BYTES_BIG_ENDIAN)
22559 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22560 return true;
22563 return false;
22566 /* Expand a VSX Permute Doubleword constant permutation.
22567 Return true if we match an efficient implementation. */
22569 static bool
22570 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22571 unsigned char perm0, unsigned char perm1)
22573 rtx x;
22575 /* If both selectors come from the same operand, fold to single op. */
22576 if ((perm0 & 2) == (perm1 & 2))
22578 if (perm0 & 2)
22579 op0 = op1;
22580 else
22581 op1 = op0;
22583 /* If both operands are equal, fold to simpler permutation. */
22584 if (rtx_equal_p (op0, op1))
22586 perm0 = perm0 & 1;
22587 perm1 = (perm1 & 1) + 2;
22589 /* If the first selector comes from the second operand, swap. */
22590 else if (perm0 & 2)
22592 if (perm1 & 2)
22593 return false;
22594 perm0 -= 2;
22595 perm1 += 2;
22596 std::swap (op0, op1);
22598 /* If the second selector does not come from the second operand, fail. */
22599 else if ((perm1 & 2) == 0)
22600 return false;
22602 /* Success! */
22603 if (target != NULL)
22605 machine_mode vmode, dmode;
22606 rtvec v;
22608 vmode = GET_MODE (target);
22609 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22610 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22611 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22612 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22613 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22614 emit_insn (gen_rtx_SET (target, x));
22616 return true;
22619 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22621 static bool
22622 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22623 rtx op1, const vec_perm_indices &sel)
22625 bool testing_p = !target;
22627 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22628 if (TARGET_ALTIVEC && testing_p)
22629 return true;
22631 /* Check for ps_merge* or xxpermdi insns. */
22632 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22634 if (testing_p)
22636 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22637 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22639 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22640 return true;
22643 if (TARGET_ALTIVEC)
22645 /* Force the target-independent code to lower to V16QImode. */
22646 if (vmode != V16QImode)
22647 return false;
22648 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22649 return true;
22652 return false;
22655 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22656 OP0 and OP1 are the input vectors and TARGET is the output vector.
22657 PERM specifies the constant permutation vector. */
22659 static void
22660 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22661 machine_mode vmode, const vec_perm_builder &perm)
22663 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22664 if (x != target)
22665 emit_move_insn (target, x);
22668 /* Expand an extract even operation. */
22670 void
22671 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22673 machine_mode vmode = GET_MODE (target);
22674 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22675 vec_perm_builder perm (nelt, nelt, 1);
22677 for (i = 0; i < nelt; i++)
22678 perm.quick_push (i * 2);
22680 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22683 /* Expand a vector interleave operation. */
22685 void
22686 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22688 machine_mode vmode = GET_MODE (target);
22689 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22690 vec_perm_builder perm (nelt, nelt, 1);
22692 high = (highp ? 0 : nelt / 2);
22693 for (i = 0; i < nelt / 2; i++)
22695 perm.quick_push (i + high);
22696 perm.quick_push (i + nelt + high);
22699 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22702 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22703 void
22704 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22706 HOST_WIDE_INT hwi_scale (scale);
22707 REAL_VALUE_TYPE r_pow;
22708 rtvec v = rtvec_alloc (2);
22709 rtx elt;
22710 rtx scale_vec = gen_reg_rtx (V2DFmode);
22711 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22712 elt = const_double_from_real_value (r_pow, DFmode);
22713 RTVEC_ELT (v, 0) = elt;
22714 RTVEC_ELT (v, 1) = elt;
22715 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22716 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22719 /* Return an RTX representing where to find the function value of a
22720 function returning MODE. */
22721 static rtx
22722 rs6000_complex_function_value (machine_mode mode)
22724 unsigned int regno;
22725 rtx r1, r2;
22726 machine_mode inner = GET_MODE_INNER (mode);
22727 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22729 if (TARGET_FLOAT128_TYPE
22730 && (mode == KCmode
22731 || (mode == TCmode && TARGET_IEEEQUAD)))
22732 regno = ALTIVEC_ARG_RETURN;
22734 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22735 regno = FP_ARG_RETURN;
22737 else
22739 regno = GP_ARG_RETURN;
22741 /* 32-bit is OK since it'll go in r3/r4. */
22742 if (TARGET_32BIT && inner_bytes >= 4)
22743 return gen_rtx_REG (mode, regno);
22746 if (inner_bytes >= 8)
22747 return gen_rtx_REG (mode, regno);
22749 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22750 const0_rtx);
22751 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22752 GEN_INT (inner_bytes));
22753 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22756 /* Return an rtx describing a return value of MODE as a PARALLEL
22757 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22758 stride REG_STRIDE. */
22760 static rtx
22761 rs6000_parallel_return (machine_mode mode,
22762 int n_elts, machine_mode elt_mode,
22763 unsigned int regno, unsigned int reg_stride)
22765 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22767 int i;
22768 for (i = 0; i < n_elts; i++)
22770 rtx r = gen_rtx_REG (elt_mode, regno);
22771 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22772 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22773 regno += reg_stride;
22776 return par;
22779 /* Target hook for TARGET_FUNCTION_VALUE.
22781 An integer value is in r3 and a floating-point value is in fp1,
22782 unless -msoft-float. */
22784 static rtx
22785 rs6000_function_value (const_tree valtype,
22786 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22787 bool outgoing ATTRIBUTE_UNUSED)
22789 machine_mode mode;
22790 unsigned int regno;
22791 machine_mode elt_mode;
22792 int n_elts;
22794 /* Special handling for structs in darwin64. */
22795 if (TARGET_MACHO
22796 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22798 CUMULATIVE_ARGS valcum;
22799 rtx valret;
22801 valcum.words = 0;
22802 valcum.fregno = FP_ARG_MIN_REG;
22803 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22804 /* Do a trial code generation as if this were going to be passed as
22805 an argument; if any part goes in memory, we return NULL. */
22806 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22807 if (valret)
22808 return valret;
22809 /* Otherwise fall through to standard ABI rules. */
22812 mode = TYPE_MODE (valtype);
22814 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22815 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22817 int first_reg, n_regs;
22819 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22821 /* _Decimal128 must use even/odd register pairs. */
22822 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22823 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22825 else
22827 first_reg = ALTIVEC_ARG_RETURN;
22828 n_regs = 1;
22831 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22834 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22835 if (TARGET_32BIT && TARGET_POWERPC64)
22836 switch (mode)
22838 default:
22839 break;
22840 case E_DImode:
22841 case E_SCmode:
22842 case E_DCmode:
22843 case E_TCmode:
22844 int count = GET_MODE_SIZE (mode) / 4;
22845 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22848 if ((INTEGRAL_TYPE_P (valtype)
22849 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22850 || POINTER_TYPE_P (valtype))
22851 mode = TARGET_32BIT ? SImode : DImode;
22853 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22854 /* _Decimal128 must use an even/odd register pair. */
22855 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22856 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22857 && !FLOAT128_VECTOR_P (mode))
22858 regno = FP_ARG_RETURN;
22859 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22860 && targetm.calls.split_complex_arg)
22861 return rs6000_complex_function_value (mode);
22862 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22863 return register is used in both cases, and we won't see V2DImode/V2DFmode
22864 for pure altivec, combine the two cases. */
22865 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode))
22866 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22867 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22868 regno = ALTIVEC_ARG_RETURN;
22869 else
22870 regno = GP_ARG_RETURN;
22872 return gen_rtx_REG (mode, regno);
22875 /* Define how to find the value returned by a library function
22876 assuming the value has mode MODE. */
22878 rs6000_libcall_value (machine_mode mode)
22880 unsigned int regno;
22882 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22883 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22884 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22886 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22887 /* _Decimal128 must use an even/odd register pair. */
22888 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22889 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22890 regno = FP_ARG_RETURN;
22891 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22892 return register is used in both cases, and we won't see V2DImode/V2DFmode
22893 for pure altivec, combine the two cases. */
22894 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22895 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22896 regno = ALTIVEC_ARG_RETURN;
22897 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22898 return rs6000_complex_function_value (mode);
22899 else
22900 regno = GP_ARG_RETURN;
22902 return gen_rtx_REG (mode, regno);
22905 /* Compute register pressure classes. We implement the target hook to avoid
22906 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22907 lead to incorrect estimates of number of available registers and therefor
22908 increased register pressure/spill. */
22909 static int
22910 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22912 int n;
22914 n = 0;
22915 pressure_classes[n++] = GENERAL_REGS;
22916 if (TARGET_VSX)
22917 pressure_classes[n++] = VSX_REGS;
22918 else
22920 if (TARGET_ALTIVEC)
22921 pressure_classes[n++] = ALTIVEC_REGS;
22922 if (TARGET_HARD_FLOAT)
22923 pressure_classes[n++] = FLOAT_REGS;
22925 pressure_classes[n++] = CR_REGS;
22926 pressure_classes[n++] = SPECIAL_REGS;
22928 return n;
22931 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22932 Frame pointer elimination is automatically handled.
22934 For the RS/6000, if frame pointer elimination is being done, we would like
22935 to convert ap into fp, not sp.
22937 We need r30 if -mminimal-toc was specified, and there are constant pool
22938 references. */
22940 static bool
22941 rs6000_can_eliminate (const int from, const int to)
22943 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22944 ? ! frame_pointer_needed
22945 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22946 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22947 || constant_pool_empty_p ()
22948 : true);
22951 /* Define the offset between two registers, FROM to be eliminated and its
22952 replacement TO, at the start of a routine. */
22953 HOST_WIDE_INT
22954 rs6000_initial_elimination_offset (int from, int to)
22956 rs6000_stack_t *info = rs6000_stack_info ();
22957 HOST_WIDE_INT offset;
22959 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22960 offset = info->push_p ? 0 : -info->total_size;
22961 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22963 offset = info->push_p ? 0 : -info->total_size;
22964 if (FRAME_GROWS_DOWNWARD)
22965 offset += info->fixed_size + info->vars_size + info->parm_size;
22967 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22968 offset = FRAME_GROWS_DOWNWARD
22969 ? info->fixed_size + info->vars_size + info->parm_size
22970 : 0;
22971 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22972 offset = info->total_size;
22973 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22974 offset = info->push_p ? info->total_size : 0;
22975 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22976 offset = 0;
22977 else
22978 gcc_unreachable ();
22980 return offset;
22983 /* Fill in sizes of registers used by unwinder. */
22985 static void
22986 rs6000_init_dwarf_reg_sizes_extra (tree address)
22988 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22990 int i;
22991 machine_mode mode = TYPE_MODE (char_type_node);
22992 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22993 rtx mem = gen_rtx_MEM (BLKmode, addr);
22994 rtx value = gen_int_mode (16, mode);
22996 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22997 The unwinder still needs to know the size of Altivec registers. */
22999 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23001 int column = DWARF_REG_TO_UNWIND_COLUMN
23002 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23003 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23005 emit_move_insn (adjust_address (mem, mode, offset), value);
23010 /* Map internal gcc register numbers to debug format register numbers.
23011 FORMAT specifies the type of debug register number to use:
23012 0 -- debug information, except for frame-related sections
23013 1 -- DWARF .debug_frame section
23014 2 -- DWARF .eh_frame section */
23016 unsigned int
23017 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
23019 /* On some platforms, we use the standard DWARF register
23020 numbering for .debug_info and .debug_frame. */
23021 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
23023 #ifdef RS6000_USE_DWARF_NUMBERING
23024 if (regno <= 31)
23025 return regno;
23026 if (FP_REGNO_P (regno))
23027 return regno - FIRST_FPR_REGNO + 32;
23028 if (ALTIVEC_REGNO_P (regno))
23029 return regno - FIRST_ALTIVEC_REGNO + 1124;
23030 if (regno == LR_REGNO)
23031 return 108;
23032 if (regno == CTR_REGNO)
23033 return 109;
23034 if (regno == CA_REGNO)
23035 return 101; /* XER */
23036 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23037 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23038 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23039 to the DWARF reg for CR. */
23040 if (format == 1 && regno == CR2_REGNO)
23041 return 64;
23042 if (CR_REGNO_P (regno))
23043 return regno - CR0_REGNO + 86;
23044 if (regno == VRSAVE_REGNO)
23045 return 356;
23046 if (regno == VSCR_REGNO)
23047 return 67;
23049 /* These do not make much sense. */
23050 if (regno == FRAME_POINTER_REGNUM)
23051 return 111;
23052 if (regno == ARG_POINTER_REGNUM)
23053 return 67;
23054 if (regno == 64)
23055 return 100;
23057 gcc_unreachable ();
23058 #endif
23061 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23062 information, and also for .eh_frame. */
23063 /* Translate the regnos to their numbers in GCC 7 (and before). */
23064 if (regno <= 31)
23065 return regno;
23066 if (FP_REGNO_P (regno))
23067 return regno - FIRST_FPR_REGNO + 32;
23068 if (ALTIVEC_REGNO_P (regno))
23069 return regno - FIRST_ALTIVEC_REGNO + 77;
23070 if (regno == LR_REGNO)
23071 return 65;
23072 if (regno == CTR_REGNO)
23073 return 66;
23074 if (regno == CA_REGNO)
23075 return 76; /* XER */
23076 if (CR_REGNO_P (regno))
23077 return regno - CR0_REGNO + 68;
23078 if (regno == VRSAVE_REGNO)
23079 return 109;
23080 if (regno == VSCR_REGNO)
23081 return 110;
23083 if (regno == FRAME_POINTER_REGNUM)
23084 return 111;
23085 if (regno == ARG_POINTER_REGNUM)
23086 return 67;
23087 if (regno == 64)
23088 return 64;
23090 gcc_unreachable ();
23093 /* target hook eh_return_filter_mode */
23094 static scalar_int_mode
23095 rs6000_eh_return_filter_mode (void)
23097 return TARGET_32BIT ? SImode : word_mode;
23100 /* Target hook for translate_mode_attribute. */
23101 static machine_mode
23102 rs6000_translate_mode_attribute (machine_mode mode)
23104 if ((FLOAT128_IEEE_P (mode)
23105 && ieee128_float_type_node == long_double_type_node)
23106 || (FLOAT128_IBM_P (mode)
23107 && ibm128_float_type_node == long_double_type_node))
23108 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
23109 return mode;
23112 /* Target hook for scalar_mode_supported_p. */
23113 static bool
23114 rs6000_scalar_mode_supported_p (scalar_mode mode)
23116 /* -m32 does not support TImode. This is the default, from
23117 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23118 same ABI as for -m32. But default_scalar_mode_supported_p allows
23119 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23120 for -mpowerpc64. */
23121 if (TARGET_32BIT && mode == TImode)
23122 return false;
23124 if (DECIMAL_FLOAT_MODE_P (mode))
23125 return default_decimal_float_supported_p ();
23126 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
23127 return true;
23128 else
23129 return default_scalar_mode_supported_p (mode);
23132 /* Target hook for vector_mode_supported_p. */
23133 static bool
23134 rs6000_vector_mode_supported_p (machine_mode mode)
23136 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
23137 128-bit, the compiler might try to widen IEEE 128-bit to IBM
23138 double-double. */
23139 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
23140 return true;
23142 else
23143 return false;
23146 /* Target hook for floatn_mode. */
23147 static opt_scalar_float_mode
23148 rs6000_floatn_mode (int n, bool extended)
23150 if (extended)
23152 switch (n)
23154 case 32:
23155 return DFmode;
23157 case 64:
23158 if (TARGET_FLOAT128_TYPE)
23159 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23160 else
23161 return opt_scalar_float_mode ();
23163 case 128:
23164 return opt_scalar_float_mode ();
23166 default:
23167 /* Those are the only valid _FloatNx types. */
23168 gcc_unreachable ();
23171 else
23173 switch (n)
23175 case 32:
23176 return SFmode;
23178 case 64:
23179 return DFmode;
23181 case 128:
23182 if (TARGET_FLOAT128_TYPE)
23183 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23184 else
23185 return opt_scalar_float_mode ();
23187 default:
23188 return opt_scalar_float_mode ();
23194 /* Target hook for c_mode_for_suffix. */
23195 static machine_mode
23196 rs6000_c_mode_for_suffix (char suffix)
23198 if (TARGET_FLOAT128_TYPE)
23200 if (suffix == 'q' || suffix == 'Q')
23201 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
23203 /* At the moment, we are not defining a suffix for IBM extended double.
23204 If/when the default for -mabi=ieeelongdouble is changed, and we want
23205 to support __ibm128 constants in legacy library code, we may need to
23206 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
23207 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
23208 __float80 constants. */
23211 return VOIDmode;
23214 /* Target hook for invalid_arg_for_unprototyped_fn. */
23215 static const char *
23216 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
23218 return (!rs6000_darwin64_abi
23219 && typelist == 0
23220 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
23221 && (funcdecl == NULL_TREE
23222 || (TREE_CODE (funcdecl) == FUNCTION_DECL
23223 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
23224 ? N_("AltiVec argument passed to unprototyped function")
23225 : NULL;
23228 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
23229 setup by using __stack_chk_fail_local hidden function instead of
23230 calling __stack_chk_fail directly. Otherwise it is better to call
23231 __stack_chk_fail directly. */
23233 static tree ATTRIBUTE_UNUSED
23234 rs6000_stack_protect_fail (void)
23236 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
23237 ? default_hidden_stack_protect_fail ()
23238 : default_external_stack_protect_fail ();
23241 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
23243 #if TARGET_ELF
23244 static unsigned HOST_WIDE_INT
23245 rs6000_asan_shadow_offset (void)
23247 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
23249 #endif
23251 /* Mask options that we want to support inside of attribute((target)) and
23252 #pragma GCC target operations. Note, we do not include things like
23253 64/32-bit, endianness, hard/soft floating point, etc. that would have
23254 different calling sequences. */
23256 struct rs6000_opt_mask {
23257 const char *name; /* option name */
23258 HOST_WIDE_INT mask; /* mask to set */
23259 bool invert; /* invert sense of mask */
23260 bool valid_target; /* option is a target option */
23263 static struct rs6000_opt_mask const rs6000_opt_masks[] =
23265 { "altivec", OPTION_MASK_ALTIVEC, false, true },
23266 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
23267 false, true },
23268 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
23269 false, true },
23270 { "cmpb", OPTION_MASK_CMPB, false, true },
23271 { "crypto", OPTION_MASK_CRYPTO, false, true },
23272 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
23273 { "dlmzb", OPTION_MASK_DLMZB, false, true },
23274 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
23275 false, true },
23276 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
23277 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
23278 { "fprnd", OPTION_MASK_FPRND, false, true },
23279 { "power10", OPTION_MASK_POWER10, false, true },
23280 { "hard-dfp", OPTION_MASK_DFP, false, true },
23281 { "htm", OPTION_MASK_HTM, false, true },
23282 { "isel", OPTION_MASK_ISEL, false, true },
23283 { "mfcrf", OPTION_MASK_MFCRF, false, true },
23284 { "mfpgpr", 0, false, true },
23285 { "mma", OPTION_MASK_MMA, false, true },
23286 { "modulo", OPTION_MASK_MODULO, false, true },
23287 { "mulhw", OPTION_MASK_MULHW, false, true },
23288 { "multiple", OPTION_MASK_MULTIPLE, false, true },
23289 { "pcrel", OPTION_MASK_PCREL, false, true },
23290 { "popcntb", OPTION_MASK_POPCNTB, false, true },
23291 { "popcntd", OPTION_MASK_POPCNTD, false, true },
23292 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
23293 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
23294 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
23295 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
23296 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
23297 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
23298 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
23299 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
23300 { "prefixed", OPTION_MASK_PREFIXED, false, true },
23301 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
23302 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
23303 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
23304 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
23305 { "string", 0, false, true },
23306 { "update", OPTION_MASK_NO_UPDATE, true , true },
23307 { "vsx", OPTION_MASK_VSX, false, true },
23308 #ifdef OPTION_MASK_64BIT
23309 #if TARGET_AIX_OS
23310 { "aix64", OPTION_MASK_64BIT, false, false },
23311 { "aix32", OPTION_MASK_64BIT, true, false },
23312 #else
23313 { "64", OPTION_MASK_64BIT, false, false },
23314 { "32", OPTION_MASK_64BIT, true, false },
23315 #endif
23316 #endif
23317 #ifdef OPTION_MASK_EABI
23318 { "eabi", OPTION_MASK_EABI, false, false },
23319 #endif
23320 #ifdef OPTION_MASK_LITTLE_ENDIAN
23321 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
23322 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
23323 #endif
23324 #ifdef OPTION_MASK_RELOCATABLE
23325 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
23326 #endif
23327 #ifdef OPTION_MASK_STRICT_ALIGN
23328 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
23329 #endif
23330 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
23331 { "string", 0, false, false },
23334 /* Builtin mask mapping for printing the flags. */
23335 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
23337 { "altivec", RS6000_BTM_ALTIVEC, false, false },
23338 { "vsx", RS6000_BTM_VSX, false, false },
23339 { "fre", RS6000_BTM_FRE, false, false },
23340 { "fres", RS6000_BTM_FRES, false, false },
23341 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
23342 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
23343 { "popcntd", RS6000_BTM_POPCNTD, false, false },
23344 { "cell", RS6000_BTM_CELL, false, false },
23345 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
23346 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
23347 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
23348 { "crypto", RS6000_BTM_CRYPTO, false, false },
23349 { "htm", RS6000_BTM_HTM, false, false },
23350 { "hard-dfp", RS6000_BTM_DFP, false, false },
23351 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
23352 { "long-double-128", RS6000_BTM_LDBL128, false, false },
23353 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
23354 { "float128", RS6000_BTM_FLOAT128, false, false },
23355 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
23356 { "mma", RS6000_BTM_MMA, false, false },
23357 { "power10", RS6000_BTM_P10, false, false },
23360 /* Option variables that we want to support inside attribute((target)) and
23361 #pragma GCC target operations. */
23363 struct rs6000_opt_var {
23364 const char *name; /* option name */
23365 size_t global_offset; /* offset of the option in global_options. */
23366 size_t target_offset; /* offset of the option in target options. */
23369 static struct rs6000_opt_var const rs6000_opt_vars[] =
23371 { "friz",
23372 offsetof (struct gcc_options, x_TARGET_FRIZ),
23373 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
23374 { "avoid-indexed-addresses",
23375 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
23376 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
23377 { "longcall",
23378 offsetof (struct gcc_options, x_rs6000_default_long_calls),
23379 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
23380 { "optimize-swaps",
23381 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
23382 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
23383 { "allow-movmisalign",
23384 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
23385 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
23386 { "sched-groups",
23387 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
23388 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
23389 { "always-hint",
23390 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
23391 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
23392 { "align-branch-targets",
23393 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
23394 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
23395 { "sched-prolog",
23396 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23397 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23398 { "sched-epilog",
23399 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
23400 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
23401 { "speculate-indirect-jumps",
23402 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
23403 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
23406 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
23407 parsing. Return true if there were no errors. */
23409 static bool
23410 rs6000_inner_target_options (tree args, bool attr_p)
23412 bool ret = true;
23414 if (args == NULL_TREE)
23417 else if (TREE_CODE (args) == STRING_CST)
23419 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23420 char *q;
23422 while ((q = strtok (p, ",")) != NULL)
23424 bool error_p = false;
23425 bool not_valid_p = false;
23426 const char *cpu_opt = NULL;
23428 p = NULL;
23429 if (strncmp (q, "cpu=", 4) == 0)
23431 int cpu_index = rs6000_cpu_name_lookup (q+4);
23432 if (cpu_index >= 0)
23433 rs6000_cpu_index = cpu_index;
23434 else
23436 error_p = true;
23437 cpu_opt = q+4;
23440 else if (strncmp (q, "tune=", 5) == 0)
23442 int tune_index = rs6000_cpu_name_lookup (q+5);
23443 if (tune_index >= 0)
23444 rs6000_tune_index = tune_index;
23445 else
23447 error_p = true;
23448 cpu_opt = q+5;
23451 else
23453 size_t i;
23454 bool invert = false;
23455 char *r = q;
23457 error_p = true;
23458 if (strncmp (r, "no-", 3) == 0)
23460 invert = true;
23461 r += 3;
23464 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23465 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23467 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23469 if (!rs6000_opt_masks[i].valid_target)
23470 not_valid_p = true;
23471 else
23473 error_p = false;
23474 rs6000_isa_flags_explicit |= mask;
23476 /* VSX needs altivec, so -mvsx automagically sets
23477 altivec and disables -mavoid-indexed-addresses. */
23478 if (!invert)
23480 if (mask == OPTION_MASK_VSX)
23482 mask |= OPTION_MASK_ALTIVEC;
23483 TARGET_AVOID_XFORM = 0;
23487 if (rs6000_opt_masks[i].invert)
23488 invert = !invert;
23490 if (invert)
23491 rs6000_isa_flags &= ~mask;
23492 else
23493 rs6000_isa_flags |= mask;
23495 break;
23498 if (error_p && !not_valid_p)
23500 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23501 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23503 size_t j = rs6000_opt_vars[i].global_offset;
23504 *((int *) ((char *)&global_options + j)) = !invert;
23505 error_p = false;
23506 not_valid_p = false;
23507 break;
23512 if (error_p)
23514 const char *eprefix, *esuffix;
23516 ret = false;
23517 if (attr_p)
23519 eprefix = "__attribute__((__target__(";
23520 esuffix = ")))";
23522 else
23524 eprefix = "#pragma GCC target ";
23525 esuffix = "";
23528 if (cpu_opt)
23529 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23530 q, esuffix);
23531 else if (not_valid_p)
23532 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23533 else
23534 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23539 else if (TREE_CODE (args) == TREE_LIST)
23543 tree value = TREE_VALUE (args);
23544 if (value)
23546 bool ret2 = rs6000_inner_target_options (value, attr_p);
23547 if (!ret2)
23548 ret = false;
23550 args = TREE_CHAIN (args);
23552 while (args != NULL_TREE);
23555 else
23557 error ("attribute %<target%> argument not a string");
23558 return false;
23561 return ret;
23564 /* Print out the target options as a list for -mdebug=target. */
23566 static void
23567 rs6000_debug_target_options (tree args, const char *prefix)
23569 if (args == NULL_TREE)
23570 fprintf (stderr, "%s<NULL>", prefix);
23572 else if (TREE_CODE (args) == STRING_CST)
23574 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23575 char *q;
23577 while ((q = strtok (p, ",")) != NULL)
23579 p = NULL;
23580 fprintf (stderr, "%s\"%s\"", prefix, q);
23581 prefix = ", ";
23585 else if (TREE_CODE (args) == TREE_LIST)
23589 tree value = TREE_VALUE (args);
23590 if (value)
23592 rs6000_debug_target_options (value, prefix);
23593 prefix = ", ";
23595 args = TREE_CHAIN (args);
23597 while (args != NULL_TREE);
23600 else
23601 gcc_unreachable ();
23603 return;
23607 /* Hook to validate attribute((target("..."))). */
23609 static bool
23610 rs6000_valid_attribute_p (tree fndecl,
23611 tree ARG_UNUSED (name),
23612 tree args,
23613 int flags)
23615 struct cl_target_option cur_target;
23616 bool ret;
23617 tree old_optimize;
23618 tree new_target, new_optimize;
23619 tree func_optimize;
23621 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23623 if (TARGET_DEBUG_TARGET)
23625 tree tname = DECL_NAME (fndecl);
23626 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23627 if (tname)
23628 fprintf (stderr, "function: %.*s\n",
23629 (int) IDENTIFIER_LENGTH (tname),
23630 IDENTIFIER_POINTER (tname));
23631 else
23632 fprintf (stderr, "function: unknown\n");
23634 fprintf (stderr, "args:");
23635 rs6000_debug_target_options (args, " ");
23636 fprintf (stderr, "\n");
23638 if (flags)
23639 fprintf (stderr, "flags: 0x%x\n", flags);
23641 fprintf (stderr, "--------------------\n");
23644 /* attribute((target("default"))) does nothing, beyond
23645 affecting multi-versioning. */
23646 if (TREE_VALUE (args)
23647 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23648 && TREE_CHAIN (args) == NULL_TREE
23649 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23650 return true;
23652 old_optimize = build_optimization_node (&global_options,
23653 &global_options_set);
23654 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23656 /* If the function changed the optimization levels as well as setting target
23657 options, start with the optimizations specified. */
23658 if (func_optimize && func_optimize != old_optimize)
23659 cl_optimization_restore (&global_options, &global_options_set,
23660 TREE_OPTIMIZATION (func_optimize));
23662 /* The target attributes may also change some optimization flags, so update
23663 the optimization options if necessary. */
23664 cl_target_option_save (&cur_target, &global_options, &global_options_set);
23665 rs6000_cpu_index = rs6000_tune_index = -1;
23666 ret = rs6000_inner_target_options (args, true);
23668 /* Set up any additional state. */
23669 if (ret)
23671 ret = rs6000_option_override_internal (false);
23672 new_target = build_target_option_node (&global_options,
23673 &global_options_set);
23675 else
23676 new_target = NULL;
23678 new_optimize = build_optimization_node (&global_options,
23679 &global_options_set);
23681 if (!new_target)
23682 ret = false;
23684 else if (fndecl)
23686 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23688 if (old_optimize != new_optimize)
23689 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23692 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
23694 if (old_optimize != new_optimize)
23695 cl_optimization_restore (&global_options, &global_options_set,
23696 TREE_OPTIMIZATION (old_optimize));
23698 return ret;
23702 /* Hook to validate the current #pragma GCC target and set the state, and
23703 update the macros based on what was changed. If ARGS is NULL, then
23704 POP_TARGET is used to reset the options. */
23706 bool
23707 rs6000_pragma_target_parse (tree args, tree pop_target)
23709 tree prev_tree = build_target_option_node (&global_options,
23710 &global_options_set);
23711 tree cur_tree;
23712 struct cl_target_option *prev_opt, *cur_opt;
23713 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23714 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23716 if (TARGET_DEBUG_TARGET)
23718 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23719 fprintf (stderr, "args:");
23720 rs6000_debug_target_options (args, " ");
23721 fprintf (stderr, "\n");
23723 if (pop_target)
23725 fprintf (stderr, "pop_target:\n");
23726 debug_tree (pop_target);
23728 else
23729 fprintf (stderr, "pop_target: <NULL>\n");
23731 fprintf (stderr, "--------------------\n");
23734 if (! args)
23736 cur_tree = ((pop_target)
23737 ? pop_target
23738 : target_option_default_node);
23739 cl_target_option_restore (&global_options, &global_options_set,
23740 TREE_TARGET_OPTION (cur_tree));
23742 else
23744 rs6000_cpu_index = rs6000_tune_index = -1;
23745 if (!rs6000_inner_target_options (args, false)
23746 || !rs6000_option_override_internal (false)
23747 || (cur_tree = build_target_option_node (&global_options,
23748 &global_options_set))
23749 == NULL_TREE)
23751 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23752 fprintf (stderr, "invalid pragma\n");
23754 return false;
23758 target_option_current_node = cur_tree;
23759 rs6000_activate_target_options (target_option_current_node);
23761 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23762 change the macros that are defined. */
23763 if (rs6000_target_modify_macros_ptr)
23765 prev_opt = TREE_TARGET_OPTION (prev_tree);
23766 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23767 prev_flags = prev_opt->x_rs6000_isa_flags;
23769 cur_opt = TREE_TARGET_OPTION (cur_tree);
23770 cur_flags = cur_opt->x_rs6000_isa_flags;
23771 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23773 diff_bumask = (prev_bumask ^ cur_bumask);
23774 diff_flags = (prev_flags ^ cur_flags);
23776 if ((diff_flags != 0) || (diff_bumask != 0))
23778 /* Delete old macros. */
23779 rs6000_target_modify_macros_ptr (false,
23780 prev_flags & diff_flags,
23781 prev_bumask & diff_bumask);
23783 /* Define new macros. */
23784 rs6000_target_modify_macros_ptr (true,
23785 cur_flags & diff_flags,
23786 cur_bumask & diff_bumask);
23790 return true;
23794 /* Remember the last target of rs6000_set_current_function. */
23795 static GTY(()) tree rs6000_previous_fndecl;
23797 /* Restore target's globals from NEW_TREE and invalidate the
23798 rs6000_previous_fndecl cache. */
23800 void
23801 rs6000_activate_target_options (tree new_tree)
23803 cl_target_option_restore (&global_options, &global_options_set,
23804 TREE_TARGET_OPTION (new_tree));
23805 if (TREE_TARGET_GLOBALS (new_tree))
23806 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23807 else if (new_tree == target_option_default_node)
23808 restore_target_globals (&default_target_globals);
23809 else
23810 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23811 rs6000_previous_fndecl = NULL_TREE;
23814 /* Establish appropriate back-end context for processing the function
23815 FNDECL. The argument might be NULL to indicate processing at top
23816 level, outside of any function scope. */
23817 static void
23818 rs6000_set_current_function (tree fndecl)
23820 if (TARGET_DEBUG_TARGET)
23822 fprintf (stderr, "\n==================== rs6000_set_current_function");
23824 if (fndecl)
23825 fprintf (stderr, ", fndecl %s (%p)",
23826 (DECL_NAME (fndecl)
23827 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23828 : "<unknown>"), (void *)fndecl);
23830 if (rs6000_previous_fndecl)
23831 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23833 fprintf (stderr, "\n");
23836 /* Only change the context if the function changes. This hook is called
23837 several times in the course of compiling a function, and we don't want to
23838 slow things down too much or call target_reinit when it isn't safe. */
23839 if (fndecl == rs6000_previous_fndecl)
23840 return;
23842 tree old_tree;
23843 if (rs6000_previous_fndecl == NULL_TREE)
23844 old_tree = target_option_current_node;
23845 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23846 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23847 else
23848 old_tree = target_option_default_node;
23850 tree new_tree;
23851 if (fndecl == NULL_TREE)
23853 if (old_tree != target_option_current_node)
23854 new_tree = target_option_current_node;
23855 else
23856 new_tree = NULL_TREE;
23858 else
23860 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23861 if (new_tree == NULL_TREE)
23862 new_tree = target_option_default_node;
23865 if (TARGET_DEBUG_TARGET)
23867 if (new_tree)
23869 fprintf (stderr, "\nnew fndecl target specific options:\n");
23870 debug_tree (new_tree);
23873 if (old_tree)
23875 fprintf (stderr, "\nold fndecl target specific options:\n");
23876 debug_tree (old_tree);
23879 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23880 fprintf (stderr, "--------------------\n");
23883 if (new_tree && old_tree != new_tree)
23884 rs6000_activate_target_options (new_tree);
23886 if (fndecl)
23887 rs6000_previous_fndecl = fndecl;
23891 /* Save the current options */
23893 static void
23894 rs6000_function_specific_save (struct cl_target_option *ptr,
23895 struct gcc_options *opts,
23896 struct gcc_options */* opts_set */)
23898 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23899 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23902 /* Restore the current options */
23904 static void
23905 rs6000_function_specific_restore (struct gcc_options *opts,
23906 struct gcc_options */* opts_set */,
23907 struct cl_target_option *ptr)
23910 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23911 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23912 (void) rs6000_option_override_internal (false);
23915 /* Print the current options */
23917 static void
23918 rs6000_function_specific_print (FILE *file, int indent,
23919 struct cl_target_option *ptr)
23921 rs6000_print_isa_options (file, indent, "Isa options set",
23922 ptr->x_rs6000_isa_flags);
23924 rs6000_print_isa_options (file, indent, "Isa options explicit",
23925 ptr->x_rs6000_isa_flags_explicit);
23928 /* Helper function to print the current isa or misc options on a line. */
23930 static void
23931 rs6000_print_options_internal (FILE *file,
23932 int indent,
23933 const char *string,
23934 HOST_WIDE_INT flags,
23935 const char *prefix,
23936 const struct rs6000_opt_mask *opts,
23937 size_t num_elements)
23939 size_t i;
23940 size_t start_column = 0;
23941 size_t cur_column;
23942 size_t max_column = 120;
23943 size_t prefix_len = strlen (prefix);
23944 size_t comma_len = 0;
23945 const char *comma = "";
23947 if (indent)
23948 start_column += fprintf (file, "%*s", indent, "");
23950 if (!flags)
23952 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23953 return;
23956 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23958 /* Print the various mask options. */
23959 cur_column = start_column;
23960 for (i = 0; i < num_elements; i++)
23962 bool invert = opts[i].invert;
23963 const char *name = opts[i].name;
23964 const char *no_str = "";
23965 HOST_WIDE_INT mask = opts[i].mask;
23966 size_t len = comma_len + prefix_len + strlen (name);
23968 if (!invert)
23970 if ((flags & mask) == 0)
23972 no_str = "no-";
23973 len += strlen ("no-");
23976 flags &= ~mask;
23979 else
23981 if ((flags & mask) != 0)
23983 no_str = "no-";
23984 len += strlen ("no-");
23987 flags |= mask;
23990 cur_column += len;
23991 if (cur_column > max_column)
23993 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23994 cur_column = start_column + len;
23995 comma = "";
23998 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23999 comma = ", ";
24000 comma_len = strlen (", ");
24003 fputs ("\n", file);
24006 /* Helper function to print the current isa options on a line. */
24008 static void
24009 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24010 HOST_WIDE_INT flags)
24012 rs6000_print_options_internal (file, indent, string, flags, "-m",
24013 &rs6000_opt_masks[0],
24014 ARRAY_SIZE (rs6000_opt_masks));
24017 static void
24018 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
24019 HOST_WIDE_INT flags)
24021 rs6000_print_options_internal (file, indent, string, flags, "",
24022 &rs6000_builtin_mask_names[0],
24023 ARRAY_SIZE (rs6000_builtin_mask_names));
24026 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24027 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24028 -mupper-regs-df, etc.).
24030 If the user used -mno-power8-vector, we need to turn off all of the implicit
24031 ISA 2.07 and 3.0 options that relate to the vector unit.
24033 If the user used -mno-power9-vector, we need to turn off all of the implicit
24034 ISA 3.0 options that relate to the vector unit.
24036 This function does not handle explicit options such as the user specifying
24037 -mdirect-move. These are handled in rs6000_option_override_internal, and
24038 the appropriate error is given if needed.
24040 We return a mask of all of the implicit options that should not be enabled
24041 by default. */
24043 static HOST_WIDE_INT
24044 rs6000_disable_incompatible_switches (void)
24046 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24047 size_t i, j;
24049 static const struct {
24050 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24051 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24052 const char *const name; /* name of the switch. */
24053 } flags[] = {
24054 { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" },
24055 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
24056 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
24057 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
24058 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
24061 for (i = 0; i < ARRAY_SIZE (flags); i++)
24063 HOST_WIDE_INT no_flag = flags[i].no_flag;
24065 if ((rs6000_isa_flags & no_flag) == 0
24066 && (rs6000_isa_flags_explicit & no_flag) != 0)
24068 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
24069 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
24070 & rs6000_isa_flags
24071 & dep_flags);
24073 if (set_flags)
24075 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
24076 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
24078 set_flags &= ~rs6000_opt_masks[j].mask;
24079 error ("%<-mno-%s%> turns off %<-m%s%>",
24080 flags[i].name,
24081 rs6000_opt_masks[j].name);
24084 gcc_assert (!set_flags);
24087 rs6000_isa_flags &= ~dep_flags;
24088 ignore_masks |= no_flag | dep_flags;
24092 return ignore_masks;
24096 /* Helper function for printing the function name when debugging. */
24098 static const char *
24099 get_decl_name (tree fn)
24101 tree name;
24103 if (!fn)
24104 return "<null>";
24106 name = DECL_NAME (fn);
24107 if (!name)
24108 return "<no-name>";
24110 return IDENTIFIER_POINTER (name);
24113 /* Return the clone id of the target we are compiling code for in a target
24114 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24115 the priority list for the target clones (ordered from lowest to
24116 highest). */
24118 static int
24119 rs6000_clone_priority (tree fndecl)
24121 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24122 HOST_WIDE_INT isa_masks;
24123 int ret = CLONE_DEFAULT;
24124 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
24125 const char *attrs_str = NULL;
24127 attrs = TREE_VALUE (TREE_VALUE (attrs));
24128 attrs_str = TREE_STRING_POINTER (attrs);
24130 /* Return priority zero for default function. Return the ISA needed for the
24131 function if it is not the default. */
24132 if (strcmp (attrs_str, "default") != 0)
24134 if (fn_opts == NULL_TREE)
24135 fn_opts = target_option_default_node;
24137 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
24138 isa_masks = rs6000_isa_flags;
24139 else
24140 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
24142 for (ret = CLONE_MAX - 1; ret != 0; ret--)
24143 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
24144 break;
24147 if (TARGET_DEBUG_TARGET)
24148 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
24149 get_decl_name (fndecl), ret);
24151 return ret;
24154 /* This compares the priority of target features in function DECL1 and DECL2.
24155 It returns positive value if DECL1 is higher priority, negative value if
24156 DECL2 is higher priority and 0 if they are the same. Note, priorities are
24157 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
24159 static int
24160 rs6000_compare_version_priority (tree decl1, tree decl2)
24162 int priority1 = rs6000_clone_priority (decl1);
24163 int priority2 = rs6000_clone_priority (decl2);
24164 int ret = priority1 - priority2;
24166 if (TARGET_DEBUG_TARGET)
24167 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
24168 get_decl_name (decl1), get_decl_name (decl2), ret);
24170 return ret;
24173 /* Make a dispatcher declaration for the multi-versioned function DECL.
24174 Calls to DECL function will be replaced with calls to the dispatcher
24175 by the front-end. Returns the decl of the dispatcher function. */
24177 static tree
24178 rs6000_get_function_versions_dispatcher (void *decl)
24180 tree fn = (tree) decl;
24181 struct cgraph_node *node = NULL;
24182 struct cgraph_node *default_node = NULL;
24183 struct cgraph_function_version_info *node_v = NULL;
24184 struct cgraph_function_version_info *first_v = NULL;
24186 tree dispatch_decl = NULL;
24188 struct cgraph_function_version_info *default_version_info = NULL;
24189 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
24191 if (TARGET_DEBUG_TARGET)
24192 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
24193 get_decl_name (fn));
24195 node = cgraph_node::get (fn);
24196 gcc_assert (node != NULL);
24198 node_v = node->function_version ();
24199 gcc_assert (node_v != NULL);
24201 if (node_v->dispatcher_resolver != NULL)
24202 return node_v->dispatcher_resolver;
24204 /* Find the default version and make it the first node. */
24205 first_v = node_v;
24206 /* Go to the beginning of the chain. */
24207 while (first_v->prev != NULL)
24208 first_v = first_v->prev;
24210 default_version_info = first_v;
24211 while (default_version_info != NULL)
24213 const tree decl2 = default_version_info->this_node->decl;
24214 if (is_function_default_version (decl2))
24215 break;
24216 default_version_info = default_version_info->next;
24219 /* If there is no default node, just return NULL. */
24220 if (default_version_info == NULL)
24221 return NULL;
24223 /* Make default info the first node. */
24224 if (first_v != default_version_info)
24226 default_version_info->prev->next = default_version_info->next;
24227 if (default_version_info->next)
24228 default_version_info->next->prev = default_version_info->prev;
24229 first_v->prev = default_version_info;
24230 default_version_info->next = first_v;
24231 default_version_info->prev = NULL;
24234 default_node = default_version_info->this_node;
24236 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
24237 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24238 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
24239 "exports hardware capability bits");
24240 #else
24242 if (targetm.has_ifunc_p ())
24244 struct cgraph_function_version_info *it_v = NULL;
24245 struct cgraph_node *dispatcher_node = NULL;
24246 struct cgraph_function_version_info *dispatcher_version_info = NULL;
24248 /* Right now, the dispatching is done via ifunc. */
24249 dispatch_decl = make_dispatcher_decl (default_node->decl);
24251 dispatcher_node = cgraph_node::get_create (dispatch_decl);
24252 gcc_assert (dispatcher_node != NULL);
24253 dispatcher_node->dispatcher_function = 1;
24254 dispatcher_version_info
24255 = dispatcher_node->insert_new_function_version ();
24256 dispatcher_version_info->next = default_version_info;
24257 dispatcher_node->definition = 1;
24259 /* Set the dispatcher for all the versions. */
24260 it_v = default_version_info;
24261 while (it_v != NULL)
24263 it_v->dispatcher_resolver = dispatch_decl;
24264 it_v = it_v->next;
24267 else
24269 error_at (DECL_SOURCE_LOCATION (default_node->decl),
24270 "multiversioning needs ifunc which is not supported "
24271 "on this target");
24273 #endif
24275 return dispatch_decl;
24278 /* Make the resolver function decl to dispatch the versions of a multi-
24279 versioned function, DEFAULT_DECL. Create an empty basic block in the
24280 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
24281 function. */
24283 static tree
24284 make_resolver_func (const tree default_decl,
24285 const tree dispatch_decl,
24286 basic_block *empty_bb)
24288 /* Make the resolver function static. The resolver function returns
24289 void *. */
24290 tree decl_name = clone_function_name (default_decl, "resolver");
24291 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
24292 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
24293 tree decl = build_fn_decl (resolver_name, type);
24294 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
24296 DECL_NAME (decl) = decl_name;
24297 TREE_USED (decl) = 1;
24298 DECL_ARTIFICIAL (decl) = 1;
24299 DECL_IGNORED_P (decl) = 0;
24300 TREE_PUBLIC (decl) = 0;
24301 DECL_UNINLINABLE (decl) = 1;
24303 /* Resolver is not external, body is generated. */
24304 DECL_EXTERNAL (decl) = 0;
24305 DECL_EXTERNAL (dispatch_decl) = 0;
24307 DECL_CONTEXT (decl) = NULL_TREE;
24308 DECL_INITIAL (decl) = make_node (BLOCK);
24309 DECL_STATIC_CONSTRUCTOR (decl) = 0;
24311 if (DECL_COMDAT_GROUP (default_decl)
24312 || TREE_PUBLIC (default_decl))
24314 /* In this case, each translation unit with a call to this
24315 versioned function will put out a resolver. Ensure it
24316 is comdat to keep just one copy. */
24317 DECL_COMDAT (decl) = 1;
24318 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
24320 else
24321 TREE_PUBLIC (dispatch_decl) = 0;
24323 /* Build result decl and add to function_decl. */
24324 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
24325 DECL_CONTEXT (t) = decl;
24326 DECL_ARTIFICIAL (t) = 1;
24327 DECL_IGNORED_P (t) = 1;
24328 DECL_RESULT (decl) = t;
24330 gimplify_function_tree (decl);
24331 push_cfun (DECL_STRUCT_FUNCTION (decl));
24332 *empty_bb = init_lowered_empty_function (decl, false,
24333 profile_count::uninitialized ());
24335 cgraph_node::add_new_function (decl, true);
24336 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
24338 pop_cfun ();
24340 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
24341 DECL_ATTRIBUTES (dispatch_decl)
24342 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
24344 cgraph_node::create_same_body_alias (dispatch_decl, decl);
24346 return decl;
24349 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
24350 return a pointer to VERSION_DECL if we are running on a machine that
24351 supports the index CLONE_ISA hardware architecture bits. This function will
24352 be called during version dispatch to decide which function version to
24353 execute. It returns the basic block at the end, to which more conditions
24354 can be added. */
24356 static basic_block
24357 add_condition_to_bb (tree function_decl, tree version_decl,
24358 int clone_isa, basic_block new_bb)
24360 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
24362 gcc_assert (new_bb != NULL);
24363 gimple_seq gseq = bb_seq (new_bb);
24366 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
24367 build_fold_addr_expr (version_decl));
24368 tree result_var = create_tmp_var (ptr_type_node);
24369 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
24370 gimple *return_stmt = gimple_build_return (result_var);
24372 if (clone_isa == CLONE_DEFAULT)
24374 gimple_seq_add_stmt (&gseq, convert_stmt);
24375 gimple_seq_add_stmt (&gseq, return_stmt);
24376 set_bb_seq (new_bb, gseq);
24377 gimple_set_bb (convert_stmt, new_bb);
24378 gimple_set_bb (return_stmt, new_bb);
24379 pop_cfun ();
24380 return new_bb;
24383 tree bool_zero = build_int_cst (bool_int_type_node, 0);
24384 tree cond_var = create_tmp_var (bool_int_type_node);
24385 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
24386 const char *arg_str = rs6000_clone_map[clone_isa].name;
24387 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
24388 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
24389 gimple_call_set_lhs (call_cond_stmt, cond_var);
24391 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
24392 gimple_set_bb (call_cond_stmt, new_bb);
24393 gimple_seq_add_stmt (&gseq, call_cond_stmt);
24395 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
24396 NULL_TREE, NULL_TREE);
24397 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
24398 gimple_set_bb (if_else_stmt, new_bb);
24399 gimple_seq_add_stmt (&gseq, if_else_stmt);
24401 gimple_seq_add_stmt (&gseq, convert_stmt);
24402 gimple_seq_add_stmt (&gseq, return_stmt);
24403 set_bb_seq (new_bb, gseq);
24405 basic_block bb1 = new_bb;
24406 edge e12 = split_block (bb1, if_else_stmt);
24407 basic_block bb2 = e12->dest;
24408 e12->flags &= ~EDGE_FALLTHRU;
24409 e12->flags |= EDGE_TRUE_VALUE;
24411 edge e23 = split_block (bb2, return_stmt);
24412 gimple_set_bb (convert_stmt, bb2);
24413 gimple_set_bb (return_stmt, bb2);
24415 basic_block bb3 = e23->dest;
24416 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
24418 remove_edge (e23);
24419 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
24421 pop_cfun ();
24422 return bb3;
24425 /* This function generates the dispatch function for multi-versioned functions.
24426 DISPATCH_DECL is the function which will contain the dispatch logic.
24427 FNDECLS are the function choices for dispatch, and is a tree chain.
24428 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
24429 code is generated. */
24431 static int
24432 dispatch_function_versions (tree dispatch_decl,
24433 void *fndecls_p,
24434 basic_block *empty_bb)
24436 int ix;
24437 tree ele;
24438 vec<tree> *fndecls;
24439 tree clones[CLONE_MAX];
24441 if (TARGET_DEBUG_TARGET)
24442 fputs ("dispatch_function_versions, top\n", stderr);
24444 gcc_assert (dispatch_decl != NULL
24445 && fndecls_p != NULL
24446 && empty_bb != NULL);
24448 /* fndecls_p is actually a vector. */
24449 fndecls = static_cast<vec<tree> *> (fndecls_p);
24451 /* At least one more version other than the default. */
24452 gcc_assert (fndecls->length () >= 2);
24454 /* The first version in the vector is the default decl. */
24455 memset ((void *) clones, '\0', sizeof (clones));
24456 clones[CLONE_DEFAULT] = (*fndecls)[0];
24458 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
24459 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
24460 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
24461 recent glibc. If we ever need to call __builtin_cpu_init, we would need
24462 to insert the code here to do the call. */
24464 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
24466 int priority = rs6000_clone_priority (ele);
24467 if (!clones[priority])
24468 clones[priority] = ele;
24471 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
24472 if (clones[ix])
24474 if (TARGET_DEBUG_TARGET)
24475 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24476 ix, get_decl_name (clones[ix]));
24478 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24479 *empty_bb);
24482 return 0;
24485 /* Generate the dispatching code body to dispatch multi-versioned function
24486 DECL. The target hook is called to process the "target" attributes and
24487 provide the code to dispatch the right function at run-time. NODE points
24488 to the dispatcher decl whose body will be created. */
24490 static tree
24491 rs6000_generate_version_dispatcher_body (void *node_p)
24493 tree resolver;
24494 basic_block empty_bb;
24495 struct cgraph_node *node = (cgraph_node *) node_p;
24496 struct cgraph_function_version_info *ninfo = node->function_version ();
24498 if (ninfo->dispatcher_resolver)
24499 return ninfo->dispatcher_resolver;
24501 /* node is going to be an alias, so remove the finalized bit. */
24502 node->definition = false;
24504 /* The first version in the chain corresponds to the default version. */
24505 ninfo->dispatcher_resolver = resolver
24506 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24508 if (TARGET_DEBUG_TARGET)
24509 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24510 get_decl_name (resolver));
24512 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24513 auto_vec<tree, 2> fn_ver_vec;
24515 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24516 vinfo;
24517 vinfo = vinfo->next)
24519 struct cgraph_node *version = vinfo->this_node;
24520 /* Check for virtual functions here again, as by this time it should
24521 have been determined if this function needs a vtable index or
24522 not. This happens for methods in derived classes that override
24523 virtual methods in base classes but are not explicitly marked as
24524 virtual. */
24525 if (DECL_VINDEX (version->decl))
24526 sorry ("Virtual function multiversioning not supported");
24528 fn_ver_vec.safe_push (version->decl);
24531 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24532 cgraph_edge::rebuild_edges ();
24533 pop_cfun ();
24534 return resolver;
24538 /* Hook to determine if one function can safely inline another. */
24540 static bool
24541 rs6000_can_inline_p (tree caller, tree callee)
24543 bool ret = false;
24544 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24545 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24547 /* If the callee has no option attributes, then it is ok to inline. */
24548 if (!callee_tree)
24549 ret = true;
24551 else
24553 HOST_WIDE_INT caller_isa;
24554 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24555 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24556 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24558 /* If the caller has option attributes, then use them.
24559 Otherwise, use the command line options. */
24560 if (caller_tree)
24561 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24562 else
24563 caller_isa = rs6000_isa_flags;
24565 /* The callee's options must be a subset of the caller's options, i.e.
24566 a vsx function may inline an altivec function, but a no-vsx function
24567 must not inline a vsx function. However, for those options that the
24568 callee has explicitly enabled or disabled, then we must enforce that
24569 the callee's and caller's options match exactly; see PR70010. */
24570 if (((caller_isa & callee_isa) == callee_isa)
24571 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24572 ret = true;
24575 if (TARGET_DEBUG_TARGET)
24576 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24577 get_decl_name (caller), get_decl_name (callee),
24578 (ret ? "can" : "cannot"));
24580 return ret;
24583 /* Allocate a stack temp and fixup the address so it meets the particular
24584 memory requirements (either offetable or REG+REG addressing). */
24587 rs6000_allocate_stack_temp (machine_mode mode,
24588 bool offsettable_p,
24589 bool reg_reg_p)
24591 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24592 rtx addr = XEXP (stack, 0);
24593 int strict_p = reload_completed;
24595 if (!legitimate_indirect_address_p (addr, strict_p))
24597 if (offsettable_p
24598 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24599 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24601 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24602 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24605 return stack;
24608 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24609 convert to such a form to deal with memory reference instructions
24610 like STFIWX and LDBRX that only take reg+reg addressing. */
24613 rs6000_force_indexed_or_indirect_mem (rtx x)
24615 machine_mode mode = GET_MODE (x);
24617 gcc_assert (MEM_P (x));
24618 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24620 rtx addr = XEXP (x, 0);
24621 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24623 rtx reg = XEXP (addr, 0);
24624 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24625 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24626 gcc_assert (REG_P (reg));
24627 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24628 addr = reg;
24630 else if (GET_CODE (addr) == PRE_MODIFY)
24632 rtx reg = XEXP (addr, 0);
24633 rtx expr = XEXP (addr, 1);
24634 gcc_assert (REG_P (reg));
24635 gcc_assert (GET_CODE (expr) == PLUS);
24636 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24637 addr = reg;
24640 if (GET_CODE (addr) == PLUS)
24642 rtx op0 = XEXP (addr, 0);
24643 rtx op1 = XEXP (addr, 1);
24644 op0 = force_reg (Pmode, op0);
24645 op1 = force_reg (Pmode, op1);
24646 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24648 else
24649 x = replace_equiv_address (x, force_reg (Pmode, addr));
24652 return x;
24655 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24657 On the RS/6000, all integer constants are acceptable, most won't be valid
24658 for particular insns, though. Only easy FP constants are acceptable. */
24660 static bool
24661 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24663 if (TARGET_ELF && tls_referenced_p (x))
24664 return false;
24666 if (CONST_DOUBLE_P (x))
24667 return easy_fp_constant (x, mode);
24669 if (GET_CODE (x) == CONST_VECTOR)
24670 return easy_vector_constant (x, mode);
24672 return true;
24676 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24678 static bool
24679 chain_already_loaded (rtx_insn *last)
24681 for (; last != NULL; last = PREV_INSN (last))
24683 if (NONJUMP_INSN_P (last))
24685 rtx patt = PATTERN (last);
24687 if (GET_CODE (patt) == SET)
24689 rtx lhs = XEXP (patt, 0);
24691 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24692 return true;
24696 return false;
24699 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24701 void
24702 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24704 rtx func = func_desc;
24705 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24706 rtx toc_load = NULL_RTX;
24707 rtx toc_restore = NULL_RTX;
24708 rtx func_addr;
24709 rtx abi_reg = NULL_RTX;
24710 rtx call[5];
24711 int n_call;
24712 rtx insn;
24713 bool is_pltseq_longcall;
24715 if (global_tlsarg)
24716 tlsarg = global_tlsarg;
24718 /* Handle longcall attributes. */
24719 is_pltseq_longcall = false;
24720 if ((INTVAL (cookie) & CALL_LONG) != 0
24721 && GET_CODE (func_desc) == SYMBOL_REF)
24723 func = rs6000_longcall_ref (func_desc, tlsarg);
24724 if (TARGET_PLTSEQ)
24725 is_pltseq_longcall = true;
24728 /* Handle indirect calls. */
24729 if (!SYMBOL_REF_P (func)
24730 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24732 if (!rs6000_pcrel_p ())
24734 /* Save the TOC into its reserved slot before the call,
24735 and prepare to restore it after the call. */
24736 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24737 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24738 gen_rtvec (1, stack_toc_offset),
24739 UNSPEC_TOCSLOT);
24740 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24742 /* Can we optimize saving the TOC in the prologue or
24743 do we need to do it at every call? */
24744 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24745 cfun->machine->save_toc_in_prologue = true;
24746 else
24748 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24749 rtx stack_toc_mem = gen_frame_mem (Pmode,
24750 gen_rtx_PLUS (Pmode, stack_ptr,
24751 stack_toc_offset));
24752 MEM_VOLATILE_P (stack_toc_mem) = 1;
24753 if (is_pltseq_longcall)
24755 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24756 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24757 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24759 else
24760 emit_move_insn (stack_toc_mem, toc_reg);
24764 if (DEFAULT_ABI == ABI_ELFv2)
24766 /* A function pointer in the ELFv2 ABI is just a plain address, but
24767 the ABI requires it to be loaded into r12 before the call. */
24768 func_addr = gen_rtx_REG (Pmode, 12);
24769 emit_move_insn (func_addr, func);
24770 abi_reg = func_addr;
24771 /* Indirect calls via CTR are strongly preferred over indirect
24772 calls via LR, so move the address there. Needed to mark
24773 this insn for linker plt sequence editing too. */
24774 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24775 if (is_pltseq_longcall)
24777 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24778 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24779 emit_insn (gen_rtx_SET (func_addr, mark_func));
24780 v = gen_rtvec (2, func_addr, func_desc);
24781 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24783 else
24784 emit_move_insn (func_addr, abi_reg);
24786 else
24788 /* A function pointer under AIX is a pointer to a data area whose
24789 first word contains the actual address of the function, whose
24790 second word contains a pointer to its TOC, and whose third word
24791 contains a value to place in the static chain register (r11).
24792 Note that if we load the static chain, our "trampoline" need
24793 not have any executable code. */
24795 /* Load up address of the actual function. */
24796 func = force_reg (Pmode, func);
24797 func_addr = gen_reg_rtx (Pmode);
24798 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24800 /* Indirect calls via CTR are strongly preferred over indirect
24801 calls via LR, so move the address there. */
24802 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24803 emit_move_insn (ctr_reg, func_addr);
24804 func_addr = ctr_reg;
24806 /* Prepare to load the TOC of the called function. Note that the
24807 TOC load must happen immediately before the actual call so
24808 that unwinding the TOC registers works correctly. See the
24809 comment in frob_update_context. */
24810 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24811 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24812 gen_rtx_PLUS (Pmode, func,
24813 func_toc_offset));
24814 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24816 /* If we have a static chain, load it up. But, if the call was
24817 originally direct, the 3rd word has not been written since no
24818 trampoline has been built, so we ought not to load it, lest we
24819 override a static chain value. */
24820 if (!(GET_CODE (func_desc) == SYMBOL_REF
24821 && SYMBOL_REF_FUNCTION_P (func_desc))
24822 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24823 && !chain_already_loaded (get_current_sequence ()->next->last))
24825 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24826 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24827 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24828 gen_rtx_PLUS (Pmode, func,
24829 func_sc_offset));
24830 emit_move_insn (sc_reg, func_sc_mem);
24831 abi_reg = sc_reg;
24835 else
24837 /* No TOC register needed for calls from PC-relative callers. */
24838 if (!rs6000_pcrel_p ())
24839 /* Direct calls use the TOC: for local calls, the callee will
24840 assume the TOC register is set; for non-local calls, the
24841 PLT stub needs the TOC register. */
24842 abi_reg = toc_reg;
24843 func_addr = func;
24846 /* Create the call. */
24847 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24848 if (value != NULL_RTX)
24849 call[0] = gen_rtx_SET (value, call[0]);
24850 call[1] = gen_rtx_USE (VOIDmode, cookie);
24851 n_call = 2;
24853 if (toc_load)
24854 call[n_call++] = toc_load;
24855 if (toc_restore)
24856 call[n_call++] = toc_restore;
24858 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24860 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24861 insn = emit_call_insn (insn);
24863 /* Mention all registers defined by the ABI to hold information
24864 as uses in CALL_INSN_FUNCTION_USAGE. */
24865 if (abi_reg)
24866 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24869 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24871 void
24872 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24874 rtx call[2];
24875 rtx insn;
24876 rtx r12 = NULL_RTX;
24877 rtx func_addr = func_desc;
24879 gcc_assert (INTVAL (cookie) == 0);
24881 if (global_tlsarg)
24882 tlsarg = global_tlsarg;
24884 /* For ELFv2, r12 and CTR need to hold the function address
24885 for an indirect call. */
24886 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
24888 r12 = gen_rtx_REG (Pmode, 12);
24889 emit_move_insn (r12, func_desc);
24890 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24891 emit_move_insn (func_addr, r12);
24894 /* Create the call. */
24895 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24896 if (value != NULL_RTX)
24897 call[0] = gen_rtx_SET (value, call[0]);
24899 call[1] = simple_return_rtx;
24901 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24902 insn = emit_call_insn (insn);
24904 /* Note use of the TOC register. */
24905 if (!rs6000_pcrel_p ())
24906 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24907 gen_rtx_REG (Pmode, TOC_REGNUM));
24909 /* Note use of r12. */
24910 if (r12)
24911 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
24914 /* Expand code to perform a call under the SYSV4 ABI. */
24916 void
24917 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24919 rtx func = func_desc;
24920 rtx func_addr;
24921 rtx call[4];
24922 rtx insn;
24923 rtx abi_reg = NULL_RTX;
24924 int n;
24926 if (global_tlsarg)
24927 tlsarg = global_tlsarg;
24929 /* Handle longcall attributes. */
24930 if ((INTVAL (cookie) & CALL_LONG) != 0
24931 && GET_CODE (func_desc) == SYMBOL_REF)
24933 func = rs6000_longcall_ref (func_desc, tlsarg);
24934 /* If the longcall was implemented as an inline PLT call using
24935 PLT unspecs then func will be REG:r11. If not, func will be
24936 a pseudo reg. The inline PLT call sequence supports lazy
24937 linking (and longcalls to functions in dlopen'd libraries).
24938 The other style of longcalls don't. The lazy linking entry
24939 to the dynamic symbol resolver requires r11 be the function
24940 address (as it is for linker generated PLT stubs). Ensure
24941 r11 stays valid to the bctrl by marking r11 used by the call. */
24942 if (TARGET_PLTSEQ)
24943 abi_reg = func;
24946 /* Handle indirect calls. */
24947 if (GET_CODE (func) != SYMBOL_REF)
24949 func = force_reg (Pmode, func);
24951 /* Indirect calls via CTR are strongly preferred over indirect
24952 calls via LR, so move the address there. That can't be left
24953 to reload because we want to mark every instruction in an
24954 inline PLT call sequence with a reloc, enabling the linker to
24955 edit the sequence back to a direct call when that makes sense. */
24956 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24957 if (abi_reg)
24959 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24960 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24961 emit_insn (gen_rtx_SET (func_addr, mark_func));
24962 v = gen_rtvec (2, func_addr, func_desc);
24963 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24965 else
24966 emit_move_insn (func_addr, func);
24968 else
24969 func_addr = func;
24971 /* Create the call. */
24972 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24973 if (value != NULL_RTX)
24974 call[0] = gen_rtx_SET (value, call[0]);
24976 call[1] = gen_rtx_USE (VOIDmode, cookie);
24977 n = 2;
24978 if (TARGET_SECURE_PLT
24979 && flag_pic
24980 && GET_CODE (func_addr) == SYMBOL_REF
24981 && !SYMBOL_REF_LOCAL_P (func_addr))
24982 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24984 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24986 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24987 insn = emit_call_insn (insn);
24988 if (abi_reg)
24989 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24992 /* Expand code to perform a sibling call under the SysV4 ABI. */
24994 void
24995 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24997 rtx func = func_desc;
24998 rtx func_addr;
24999 rtx call[3];
25000 rtx insn;
25001 rtx abi_reg = NULL_RTX;
25003 if (global_tlsarg)
25004 tlsarg = global_tlsarg;
25006 /* Handle longcall attributes. */
25007 if ((INTVAL (cookie) & CALL_LONG) != 0
25008 && GET_CODE (func_desc) == SYMBOL_REF)
25010 func = rs6000_longcall_ref (func_desc, tlsarg);
25011 /* If the longcall was implemented as an inline PLT call using
25012 PLT unspecs then func will be REG:r11. If not, func will be
25013 a pseudo reg. The inline PLT call sequence supports lazy
25014 linking (and longcalls to functions in dlopen'd libraries).
25015 The other style of longcalls don't. The lazy linking entry
25016 to the dynamic symbol resolver requires r11 be the function
25017 address (as it is for linker generated PLT stubs). Ensure
25018 r11 stays valid to the bctr by marking r11 used by the call. */
25019 if (TARGET_PLTSEQ)
25020 abi_reg = func;
25023 /* Handle indirect calls. */
25024 if (GET_CODE (func) != SYMBOL_REF)
25026 func = force_reg (Pmode, func);
25028 /* Indirect sibcalls must go via CTR. That can't be left to
25029 reload because we want to mark every instruction in an inline
25030 PLT call sequence with a reloc, enabling the linker to edit
25031 the sequence back to a direct call when that makes sense. */
25032 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25033 if (abi_reg)
25035 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25036 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25037 emit_insn (gen_rtx_SET (func_addr, mark_func));
25038 v = gen_rtvec (2, func_addr, func_desc);
25039 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25041 else
25042 emit_move_insn (func_addr, func);
25044 else
25045 func_addr = func;
25047 /* Create the call. */
25048 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25049 if (value != NULL_RTX)
25050 call[0] = gen_rtx_SET (value, call[0]);
25052 call[1] = gen_rtx_USE (VOIDmode, cookie);
25053 call[2] = simple_return_rtx;
25055 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25056 insn = emit_call_insn (insn);
25057 if (abi_reg)
25058 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25061 #if TARGET_MACHO
25063 /* Expand code to perform a call under the Darwin ABI.
25064 Modulo handling of mlongcall, this is much the same as sysv.
25065 if/when the longcall optimisation is removed, we could drop this
25066 code and use the sysv case (taking care to avoid the tls stuff).
25068 We can use this for sibcalls too, if needed. */
25070 void
25071 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
25072 rtx cookie, bool sibcall)
25074 rtx func = func_desc;
25075 rtx func_addr;
25076 rtx call[3];
25077 rtx insn;
25078 int cookie_val = INTVAL (cookie);
25079 bool make_island = false;
25081 /* Handle longcall attributes, there are two cases for Darwin:
25082 1) Newer linkers are capable of synthesising any branch islands needed.
25083 2) We need a helper branch island synthesised by the compiler.
25084 The second case has mostly been retired and we don't use it for m64.
25085 In fact, it's is an optimisation, we could just indirect as sysv does..
25086 ... however, backwards compatibility for now.
25087 If we're going to use this, then we need to keep the CALL_LONG bit set,
25088 so that we can pick up the special insn form later. */
25089 if ((cookie_val & CALL_LONG) != 0
25090 && GET_CODE (func_desc) == SYMBOL_REF)
25092 /* FIXME: the longcall opt should not hang off this flag, it is most
25093 likely incorrect for kernel-mode code-generation. */
25094 if (darwin_symbol_stubs && TARGET_32BIT)
25095 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
25096 else
25098 /* The linker is capable of doing this, but the user explicitly
25099 asked for -mlongcall, so we'll do the 'normal' version. */
25100 func = rs6000_longcall_ref (func_desc, NULL_RTX);
25101 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
25105 /* Handle indirect calls. */
25106 if (GET_CODE (func) != SYMBOL_REF)
25108 func = force_reg (Pmode, func);
25110 /* Indirect calls via CTR are strongly preferred over indirect
25111 calls via LR, and are required for indirect sibcalls, so move
25112 the address there. */
25113 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25114 emit_move_insn (func_addr, func);
25116 else
25117 func_addr = func;
25119 /* Create the call. */
25120 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25121 if (value != NULL_RTX)
25122 call[0] = gen_rtx_SET (value, call[0]);
25124 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
25126 if (sibcall)
25127 call[2] = simple_return_rtx;
25128 else
25129 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25131 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
25132 insn = emit_call_insn (insn);
25133 /* Now we have the debug info in the insn, we can set up the branch island
25134 if we're using one. */
25135 if (make_island)
25137 tree funname = get_identifier (XSTR (func_desc, 0));
25139 if (no_previous_def (funname))
25141 rtx label_rtx = gen_label_rtx ();
25142 char *label_buf, temp_buf[256];
25143 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
25144 CODE_LABEL_NUMBER (label_rtx));
25145 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
25146 tree labelname = get_identifier (label_buf);
25147 add_compiler_branch_island (labelname, funname,
25148 insn_line ((const rtx_insn*)insn));
25152 #endif
25154 void
25155 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25156 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25158 #if TARGET_MACHO
25159 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
25160 #else
25161 gcc_unreachable();
25162 #endif
25166 void
25167 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
25168 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
25170 #if TARGET_MACHO
25171 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
25172 #else
25173 gcc_unreachable();
25174 #endif
25177 /* Return whether we should generate PC-relative code for FNDECL. */
25178 bool
25179 rs6000_fndecl_pcrel_p (const_tree fndecl)
25181 if (DEFAULT_ABI != ABI_ELFv2)
25182 return false;
25184 struct cl_target_option *opts = target_opts_for_fn (fndecl);
25186 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25187 && TARGET_CMODEL == CMODEL_MEDIUM);
25190 /* Return whether we should generate PC-relative code for *FN. */
25191 bool
25192 rs6000_function_pcrel_p (struct function *fn)
25194 if (DEFAULT_ABI != ABI_ELFv2)
25195 return false;
25197 /* Optimize usual case. */
25198 if (fn == cfun)
25199 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25200 && TARGET_CMODEL == CMODEL_MEDIUM);
25202 return rs6000_fndecl_pcrel_p (fn->decl);
25205 /* Return whether we should generate PC-relative code for the current
25206 function. */
25207 bool
25208 rs6000_pcrel_p ()
25210 return (DEFAULT_ABI == ABI_ELFv2
25211 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
25212 && TARGET_CMODEL == CMODEL_MEDIUM);
25216 /* Given an address (ADDR), a mode (MODE), and what the format of the
25217 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
25218 for the address. */
25220 enum insn_form
25221 address_to_insn_form (rtx addr,
25222 machine_mode mode,
25223 enum non_prefixed_form non_prefixed_format)
25225 /* Single register is easy. */
25226 if (REG_P (addr) || SUBREG_P (addr))
25227 return INSN_FORM_BASE_REG;
25229 /* If the non prefixed instruction format doesn't support offset addressing,
25230 make sure only indexed addressing is allowed.
25232 We special case SDmode so that the register allocator does not try to move
25233 SDmode through GPR registers, but instead uses the 32-bit integer load and
25234 store instructions for the floating point registers. */
25235 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
25237 if (GET_CODE (addr) != PLUS)
25238 return INSN_FORM_BAD;
25240 rtx op0 = XEXP (addr, 0);
25241 rtx op1 = XEXP (addr, 1);
25242 if (!REG_P (op0) && !SUBREG_P (op0))
25243 return INSN_FORM_BAD;
25245 if (!REG_P (op1) && !SUBREG_P (op1))
25246 return INSN_FORM_BAD;
25248 return INSN_FORM_X;
25251 /* Deal with update forms. */
25252 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
25253 return INSN_FORM_UPDATE;
25255 /* Handle PC-relative symbols and labels. Check for both local and
25256 external symbols. Assume labels are always local. TLS symbols
25257 are not PC-relative for rs6000. */
25258 if (TARGET_PCREL)
25260 if (LABEL_REF_P (addr))
25261 return INSN_FORM_PCREL_LOCAL;
25263 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
25265 if (!SYMBOL_REF_LOCAL_P (addr))
25266 return INSN_FORM_PCREL_EXTERNAL;
25267 else
25268 return INSN_FORM_PCREL_LOCAL;
25272 if (GET_CODE (addr) == CONST)
25273 addr = XEXP (addr, 0);
25275 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
25276 if (GET_CODE (addr) == LO_SUM)
25277 return INSN_FORM_LO_SUM;
25279 /* Everything below must be an offset address of some form. */
25280 if (GET_CODE (addr) != PLUS)
25281 return INSN_FORM_BAD;
25283 rtx op0 = XEXP (addr, 0);
25284 rtx op1 = XEXP (addr, 1);
25286 /* Check for indexed addresses. */
25287 if (REG_P (op1) || SUBREG_P (op1))
25289 if (REG_P (op0) || SUBREG_P (op0))
25290 return INSN_FORM_X;
25292 return INSN_FORM_BAD;
25295 if (!CONST_INT_P (op1))
25296 return INSN_FORM_BAD;
25298 HOST_WIDE_INT offset = INTVAL (op1);
25299 if (!SIGNED_INTEGER_34BIT_P (offset))
25300 return INSN_FORM_BAD;
25302 /* Check for local and external PC-relative addresses. Labels are always
25303 local. TLS symbols are not PC-relative for rs6000. */
25304 if (TARGET_PCREL)
25306 if (LABEL_REF_P (op0))
25307 return INSN_FORM_PCREL_LOCAL;
25309 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
25311 if (!SYMBOL_REF_LOCAL_P (op0))
25312 return INSN_FORM_PCREL_EXTERNAL;
25313 else
25314 return INSN_FORM_PCREL_LOCAL;
25318 /* If it isn't PC-relative, the address must use a base register. */
25319 if (!REG_P (op0) && !SUBREG_P (op0))
25320 return INSN_FORM_BAD;
25322 /* Large offsets must be prefixed. */
25323 if (!SIGNED_INTEGER_16BIT_P (offset))
25325 if (TARGET_PREFIXED)
25326 return INSN_FORM_PREFIXED_NUMERIC;
25328 return INSN_FORM_BAD;
25331 /* We have a 16-bit offset, see what default instruction format to use. */
25332 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
25334 unsigned size = GET_MODE_SIZE (mode);
25336 /* On 64-bit systems, assume 64-bit integers need to use DS form
25337 addresses (for LD/STD). VSX vectors need to use DQ form addresses
25338 (for LXV and STXV). TImode is problematical in that its normal usage
25339 is expected to be GPRs where it wants a DS instruction format, but if
25340 it goes into the vector registers, it wants a DQ instruction
25341 format. */
25342 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
25343 non_prefixed_format = NON_PREFIXED_DS;
25345 else if (TARGET_VSX && size >= 16
25346 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
25347 non_prefixed_format = NON_PREFIXED_DQ;
25349 else
25350 non_prefixed_format = NON_PREFIXED_D;
25353 /* Classify the D/DS/DQ-form addresses. */
25354 switch (non_prefixed_format)
25356 /* Instruction format D, all 16 bits are valid. */
25357 case NON_PREFIXED_D:
25358 return INSN_FORM_D;
25360 /* Instruction format DS, bottom 2 bits must be 0. */
25361 case NON_PREFIXED_DS:
25362 if ((offset & 3) == 0)
25363 return INSN_FORM_DS;
25365 else if (TARGET_PREFIXED)
25366 return INSN_FORM_PREFIXED_NUMERIC;
25368 else
25369 return INSN_FORM_BAD;
25371 /* Instruction format DQ, bottom 4 bits must be 0. */
25372 case NON_PREFIXED_DQ:
25373 if ((offset & 15) == 0)
25374 return INSN_FORM_DQ;
25376 else if (TARGET_PREFIXED)
25377 return INSN_FORM_PREFIXED_NUMERIC;
25379 else
25380 return INSN_FORM_BAD;
25382 default:
25383 break;
25386 return INSN_FORM_BAD;
25389 /* Helper function to see if we're potentially looking at lfs/stfs.
25390 - PARALLEL containing a SET and a CLOBBER
25391 - stfs:
25392 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
25393 - CLOBBER is a V4SF
25394 - lfs:
25395 - SET is from UNSPEC_SF_FROM_SI to REG:SF
25396 - CLOBBER is a DI
25399 static bool
25400 is_lfs_stfs_insn (rtx_insn *insn)
25402 rtx pattern = PATTERN (insn);
25403 if (GET_CODE (pattern) != PARALLEL)
25404 return false;
25406 /* This should be a parallel with exactly one set and one clobber. */
25407 if (XVECLEN (pattern, 0) != 2)
25408 return false;
25410 rtx set = XVECEXP (pattern, 0, 0);
25411 if (GET_CODE (set) != SET)
25412 return false;
25414 rtx clobber = XVECEXP (pattern, 0, 1);
25415 if (GET_CODE (clobber) != CLOBBER)
25416 return false;
25418 /* All we care is that the destination of the SET is a mem:SI,
25419 the source should be an UNSPEC_SI_FROM_SF, and the clobber
25420 should be a scratch:V4SF. */
25422 rtx dest = SET_DEST (set);
25423 rtx src = SET_SRC (set);
25424 rtx scratch = SET_DEST (clobber);
25426 if (GET_CODE (src) != UNSPEC)
25427 return false;
25429 /* stfs case. */
25430 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
25431 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
25432 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
25433 return true;
25435 /* lfs case. */
25436 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
25437 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
25438 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
25439 return true;
25441 return false;
25444 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
25445 instruction format (D/DS/DQ) used for offset memory. */
25447 static enum non_prefixed_form
25448 reg_to_non_prefixed (rtx reg, machine_mode mode)
25450 /* If it isn't a register, use the defaults. */
25451 if (!REG_P (reg) && !SUBREG_P (reg))
25452 return NON_PREFIXED_DEFAULT;
25454 unsigned int r = reg_or_subregno (reg);
25456 /* If we have a pseudo, use the default instruction format. */
25457 if (!HARD_REGISTER_NUM_P (r))
25458 return NON_PREFIXED_DEFAULT;
25460 unsigned size = GET_MODE_SIZE (mode);
25462 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
25463 128-bit floating point, and 128-bit integers. Before power9, only indexed
25464 addressing was available for vectors. */
25465 if (FP_REGNO_P (r))
25467 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25468 return NON_PREFIXED_D;
25470 else if (size < 8)
25471 return NON_PREFIXED_X;
25473 else if (TARGET_VSX && size >= 16
25474 && (VECTOR_MODE_P (mode)
25475 || VECTOR_ALIGNMENT_P (mode)
25476 || mode == TImode || mode == CTImode))
25477 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
25479 else
25480 return NON_PREFIXED_DEFAULT;
25483 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
25484 128-bit floating point, and 128-bit integers. Before power9, only indexed
25485 addressing was available. */
25486 else if (ALTIVEC_REGNO_P (r))
25488 if (!TARGET_P9_VECTOR)
25489 return NON_PREFIXED_X;
25491 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
25492 return NON_PREFIXED_DS;
25494 else if (size < 8)
25495 return NON_PREFIXED_X;
25497 else if (TARGET_VSX && size >= 16
25498 && (VECTOR_MODE_P (mode)
25499 || VECTOR_ALIGNMENT_P (mode)
25500 || mode == TImode || mode == CTImode))
25501 return NON_PREFIXED_DQ;
25503 else
25504 return NON_PREFIXED_DEFAULT;
25507 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
25508 otherwise. Assume that any other register, such as LR, CRs, etc. will go
25509 through the GPR registers for memory operations. */
25510 else if (TARGET_POWERPC64 && size >= 8)
25511 return NON_PREFIXED_DS;
25513 return NON_PREFIXED_D;
25517 /* Whether a load instruction is a prefixed instruction. This is called from
25518 the prefixed attribute processing. */
25520 bool
25521 prefixed_load_p (rtx_insn *insn)
25523 /* Validate the insn to make sure it is a normal load insn. */
25524 extract_insn_cached (insn);
25525 if (recog_data.n_operands < 2)
25526 return false;
25528 rtx reg = recog_data.operand[0];
25529 rtx mem = recog_data.operand[1];
25531 if (!REG_P (reg) && !SUBREG_P (reg))
25532 return false;
25534 if (!MEM_P (mem))
25535 return false;
25537 /* Prefixed load instructions do not support update or indexed forms. */
25538 if (get_attr_indexed (insn) == INDEXED_YES
25539 || get_attr_update (insn) == UPDATE_YES)
25540 return false;
25542 /* LWA uses the DS format instead of the D format that LWZ uses. */
25543 enum non_prefixed_form non_prefixed;
25544 machine_mode reg_mode = GET_MODE (reg);
25545 machine_mode mem_mode = GET_MODE (mem);
25547 if (mem_mode == SImode && reg_mode == DImode
25548 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
25549 non_prefixed = NON_PREFIXED_DS;
25551 else
25552 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25554 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25555 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
25556 else
25557 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25560 /* Whether a store instruction is a prefixed instruction. This is called from
25561 the prefixed attribute processing. */
25563 bool
25564 prefixed_store_p (rtx_insn *insn)
25566 /* Validate the insn to make sure it is a normal store insn. */
25567 extract_insn_cached (insn);
25568 if (recog_data.n_operands < 2)
25569 return false;
25571 rtx mem = recog_data.operand[0];
25572 rtx reg = recog_data.operand[1];
25574 if (!REG_P (reg) && !SUBREG_P (reg))
25575 return false;
25577 if (!MEM_P (mem))
25578 return false;
25580 /* Prefixed store instructions do not support update or indexed forms. */
25581 if (get_attr_indexed (insn) == INDEXED_YES
25582 || get_attr_update (insn) == UPDATE_YES)
25583 return false;
25585 machine_mode mem_mode = GET_MODE (mem);
25586 rtx addr = XEXP (mem, 0);
25587 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25589 /* Need to make sure we aren't looking at a stfs which doesn't look
25590 like the other things reg_to_non_prefixed/address_is_prefixed
25591 looks for. */
25592 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
25593 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
25594 else
25595 return address_is_prefixed (addr, mem_mode, non_prefixed);
25598 /* Whether a load immediate or add instruction is a prefixed instruction. This
25599 is called from the prefixed attribute processing. */
25601 bool
25602 prefixed_paddi_p (rtx_insn *insn)
25604 rtx set = single_set (insn);
25605 if (!set)
25606 return false;
25608 rtx dest = SET_DEST (set);
25609 rtx src = SET_SRC (set);
25611 if (!REG_P (dest) && !SUBREG_P (dest))
25612 return false;
25614 /* Is this a load immediate that can't be done with a simple ADDI or
25615 ADDIS? */
25616 if (CONST_INT_P (src))
25617 return (satisfies_constraint_eI (src)
25618 && !satisfies_constraint_I (src)
25619 && !satisfies_constraint_L (src));
25621 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25622 ADDIS? */
25623 if (GET_CODE (src) == PLUS)
25625 rtx op1 = XEXP (src, 1);
25627 return (CONST_INT_P (op1)
25628 && satisfies_constraint_eI (op1)
25629 && !satisfies_constraint_I (op1)
25630 && !satisfies_constraint_L (op1));
25633 /* If not, is it a load of a PC-relative address? */
25634 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25635 return false;
25637 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25638 return false;
25640 enum insn_form iform = address_to_insn_form (src, Pmode,
25641 NON_PREFIXED_DEFAULT);
25643 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25646 /* Whether the next instruction needs a 'p' prefix issued before the
25647 instruction is printed out. */
25648 static bool next_insn_prefixed_p;
25650 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25651 outputting the assembler code. On the PowerPC, we remember if the current
25652 insn is a prefixed insn where we need to emit a 'p' before the insn.
25654 In addition, if the insn is part of a PC-relative reference to an external
25655 label optimization, this is recorded also. */
25656 void
25657 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25659 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25660 return;
25663 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25664 We use it to emit a 'p' for prefixed insns that is set in
25665 FINAL_PRESCAN_INSN. */
25666 void
25667 rs6000_asm_output_opcode (FILE *stream)
25669 if (next_insn_prefixed_p)
25670 fprintf (stream, "p");
25672 return;
25675 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25676 should be adjusted to reflect any required changes. This macro is used when
25677 there is some systematic length adjustment required that would be difficult
25678 to express in the length attribute.
25680 In the PowerPC, we use this to adjust the length of an instruction if one or
25681 more prefixed instructions are generated, using the attribute
25682 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25683 hardware requires that a prefied instruciton does not cross a 64-byte
25684 boundary. This means the compiler has to assume the length of the first
25685 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25686 already set for the non-prefixed instruction, we just need to udpate for the
25687 difference. */
25690 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25692 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
25694 rtx pattern = PATTERN (insn);
25695 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25696 && get_attr_prefixed (insn) == PREFIXED_YES)
25698 int num_prefixed = get_attr_max_prefixed_insns (insn);
25699 length += 4 * (num_prefixed + 1);
25703 return length;
25707 #ifdef HAVE_GAS_HIDDEN
25708 # define USE_HIDDEN_LINKONCE 1
25709 #else
25710 # define USE_HIDDEN_LINKONCE 0
25711 #endif
25713 /* Fills in the label name that should be used for a 476 link stack thunk. */
25715 void
25716 get_ppc476_thunk_name (char name[32])
25718 gcc_assert (TARGET_LINK_STACK);
25720 if (USE_HIDDEN_LINKONCE)
25721 sprintf (name, "__ppc476.get_thunk");
25722 else
25723 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25726 /* This function emits the simple thunk routine that is used to preserve
25727 the link stack on the 476 cpu. */
25729 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25730 static void
25731 rs6000_code_end (void)
25733 char name[32];
25734 tree decl;
25736 if (!TARGET_LINK_STACK)
25737 return;
25739 get_ppc476_thunk_name (name);
25741 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25742 build_function_type_list (void_type_node, NULL_TREE));
25743 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25744 NULL_TREE, void_type_node);
25745 TREE_PUBLIC (decl) = 1;
25746 TREE_STATIC (decl) = 1;
25748 #if RS6000_WEAK
25749 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25751 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25752 targetm.asm_out.unique_section (decl, 0);
25753 switch_to_section (get_named_section (decl, NULL, 0));
25754 DECL_WEAK (decl) = 1;
25755 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25756 targetm.asm_out.globalize_label (asm_out_file, name);
25757 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25758 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25760 else
25761 #endif
25763 switch_to_section (text_section);
25764 ASM_OUTPUT_LABEL (asm_out_file, name);
25767 DECL_INITIAL (decl) = make_node (BLOCK);
25768 current_function_decl = decl;
25769 allocate_struct_function (decl, false);
25770 init_function_start (decl);
25771 first_function_block_is_cold = false;
25772 /* Make sure unwind info is emitted for the thunk if needed. */
25773 final_start_function (emit_barrier (), asm_out_file, 1);
25775 fputs ("\tblr\n", asm_out_file);
25777 final_end_function ();
25778 init_insn_lengths ();
25779 free_after_compilation (cfun);
25780 set_cfun (NULL);
25781 current_function_decl = NULL;
25784 /* Add r30 to hard reg set if the prologue sets it up and it is not
25785 pic_offset_table_rtx. */
25787 static void
25788 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25790 if (!TARGET_SINGLE_PIC_BASE
25791 && TARGET_TOC
25792 && TARGET_MINIMAL_TOC
25793 && !constant_pool_empty_p ())
25794 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25795 if (cfun->machine->split_stack_argp_used)
25796 add_to_hard_reg_set (&set->set, Pmode, 12);
25798 /* Make sure the hard reg set doesn't include r2, which was possibly added
25799 via PIC_OFFSET_TABLE_REGNUM. */
25800 if (TARGET_TOC)
25801 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25805 /* Helper function for rs6000_split_logical to emit a logical instruction after
25806 spliting the operation to single GPR registers.
25808 DEST is the destination register.
25809 OP1 and OP2 are the input source registers.
25810 CODE is the base operation (AND, IOR, XOR, NOT).
25811 MODE is the machine mode.
25812 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25813 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25814 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25816 static void
25817 rs6000_split_logical_inner (rtx dest,
25818 rtx op1,
25819 rtx op2,
25820 enum rtx_code code,
25821 machine_mode mode,
25822 bool complement_final_p,
25823 bool complement_op1_p,
25824 bool complement_op2_p)
25826 rtx bool_rtx;
25828 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25829 if (op2 && CONST_INT_P (op2)
25830 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25831 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25833 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25834 HOST_WIDE_INT value = INTVAL (op2) & mask;
25836 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25837 if (code == AND)
25839 if (value == 0)
25841 emit_insn (gen_rtx_SET (dest, const0_rtx));
25842 return;
25845 else if (value == mask)
25847 if (!rtx_equal_p (dest, op1))
25848 emit_insn (gen_rtx_SET (dest, op1));
25849 return;
25853 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25854 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25855 else if (code == IOR || code == XOR)
25857 if (value == 0)
25859 if (!rtx_equal_p (dest, op1))
25860 emit_insn (gen_rtx_SET (dest, op1));
25861 return;
25866 if (code == AND && mode == SImode
25867 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25869 emit_insn (gen_andsi3 (dest, op1, op2));
25870 return;
25873 if (complement_op1_p)
25874 op1 = gen_rtx_NOT (mode, op1);
25876 if (complement_op2_p)
25877 op2 = gen_rtx_NOT (mode, op2);
25879 /* For canonical RTL, if only one arm is inverted it is the first. */
25880 if (!complement_op1_p && complement_op2_p)
25881 std::swap (op1, op2);
25883 bool_rtx = ((code == NOT)
25884 ? gen_rtx_NOT (mode, op1)
25885 : gen_rtx_fmt_ee (code, mode, op1, op2));
25887 if (complement_final_p)
25888 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25890 emit_insn (gen_rtx_SET (dest, bool_rtx));
25893 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25894 operations are split immediately during RTL generation to allow for more
25895 optimizations of the AND/IOR/XOR.
25897 OPERANDS is an array containing the destination and two input operands.
25898 CODE is the base operation (AND, IOR, XOR, NOT).
25899 MODE is the machine mode.
25900 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25901 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25902 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25903 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25904 formation of the AND instructions. */
25906 static void
25907 rs6000_split_logical_di (rtx operands[3],
25908 enum rtx_code code,
25909 bool complement_final_p,
25910 bool complement_op1_p,
25911 bool complement_op2_p)
25913 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25914 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25915 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25916 enum hi_lo { hi = 0, lo = 1 };
25917 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25918 size_t i;
25920 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25921 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25922 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25923 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25925 if (code == NOT)
25926 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25927 else
25929 if (!CONST_INT_P (operands[2]))
25931 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25932 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25934 else
25936 HOST_WIDE_INT value = INTVAL (operands[2]);
25937 HOST_WIDE_INT value_hi_lo[2];
25939 gcc_assert (!complement_final_p);
25940 gcc_assert (!complement_op1_p);
25941 gcc_assert (!complement_op2_p);
25943 value_hi_lo[hi] = value >> 32;
25944 value_hi_lo[lo] = value & lower_32bits;
25946 for (i = 0; i < 2; i++)
25948 HOST_WIDE_INT sub_value = value_hi_lo[i];
25950 if (sub_value & sign_bit)
25951 sub_value |= upper_32bits;
25953 op2_hi_lo[i] = GEN_INT (sub_value);
25955 /* If this is an AND instruction, check to see if we need to load
25956 the value in a register. */
25957 if (code == AND && sub_value != -1 && sub_value != 0
25958 && !and_operand (op2_hi_lo[i], SImode))
25959 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25964 for (i = 0; i < 2; i++)
25966 /* Split large IOR/XOR operations. */
25967 if ((code == IOR || code == XOR)
25968 && CONST_INT_P (op2_hi_lo[i])
25969 && !complement_final_p
25970 && !complement_op1_p
25971 && !complement_op2_p
25972 && !logical_const_operand (op2_hi_lo[i], SImode))
25974 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25975 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25976 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25977 rtx tmp = gen_reg_rtx (SImode);
25979 /* Make sure the constant is sign extended. */
25980 if ((hi_16bits & sign_bit) != 0)
25981 hi_16bits |= upper_32bits;
25983 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25984 code, SImode, false, false, false);
25986 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25987 code, SImode, false, false, false);
25989 else
25990 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25991 code, SImode, complement_final_p,
25992 complement_op1_p, complement_op2_p);
25995 return;
25998 /* Split the insns that make up boolean operations operating on multiple GPR
25999 registers. The boolean MD patterns ensure that the inputs either are
26000 exactly the same as the output registers, or there is no overlap.
26002 OPERANDS is an array containing the destination and two input operands.
26003 CODE is the base operation (AND, IOR, XOR, NOT).
26004 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26005 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26006 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26008 void
26009 rs6000_split_logical (rtx operands[3],
26010 enum rtx_code code,
26011 bool complement_final_p,
26012 bool complement_op1_p,
26013 bool complement_op2_p)
26015 machine_mode mode = GET_MODE (operands[0]);
26016 machine_mode sub_mode;
26017 rtx op0, op1, op2;
26018 int sub_size, regno0, regno1, nregs, i;
26020 /* If this is DImode, use the specialized version that can run before
26021 register allocation. */
26022 if (mode == DImode && !TARGET_POWERPC64)
26024 rs6000_split_logical_di (operands, code, complement_final_p,
26025 complement_op1_p, complement_op2_p);
26026 return;
26029 op0 = operands[0];
26030 op1 = operands[1];
26031 op2 = (code == NOT) ? NULL_RTX : operands[2];
26032 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
26033 sub_size = GET_MODE_SIZE (sub_mode);
26034 regno0 = REGNO (op0);
26035 regno1 = REGNO (op1);
26037 gcc_assert (reload_completed);
26038 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26039 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
26041 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
26042 gcc_assert (nregs > 1);
26044 if (op2 && REG_P (op2))
26045 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
26047 for (i = 0; i < nregs; i++)
26049 int offset = i * sub_size;
26050 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
26051 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
26052 rtx sub_op2 = ((code == NOT)
26053 ? NULL_RTX
26054 : simplify_subreg (sub_mode, op2, mode, offset));
26056 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
26057 complement_final_p, complement_op1_p,
26058 complement_op2_p);
26061 return;
26065 /* Return true if the peephole2 can combine a load involving a combination of
26066 an addis instruction and a load with an offset that can be fused together on
26067 a power8. */
26069 bool
26070 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
26071 rtx addis_value, /* addis value. */
26072 rtx target, /* target register that is loaded. */
26073 rtx mem) /* bottom part of the memory addr. */
26075 rtx addr;
26076 rtx base_reg;
26078 /* Validate arguments. */
26079 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
26080 return false;
26082 if (!base_reg_operand (target, GET_MODE (target)))
26083 return false;
26085 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
26086 return false;
26088 /* Allow sign/zero extension. */
26089 if (GET_CODE (mem) == ZERO_EXTEND
26090 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
26091 mem = XEXP (mem, 0);
26093 if (!MEM_P (mem))
26094 return false;
26096 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
26097 return false;
26099 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
26100 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
26101 return false;
26103 /* Validate that the register used to load the high value is either the
26104 register being loaded, or we can safely replace its use.
26106 This function is only called from the peephole2 pass and we assume that
26107 there are 2 instructions in the peephole (addis and load), so we want to
26108 check if the target register was not used in the memory address and the
26109 register to hold the addis result is dead after the peephole. */
26110 if (REGNO (addis_reg) != REGNO (target))
26112 if (reg_mentioned_p (target, mem))
26113 return false;
26115 if (!peep2_reg_dead_p (2, addis_reg))
26116 return false;
26118 /* If the target register being loaded is the stack pointer, we must
26119 avoid loading any other value into it, even temporarily. */
26120 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
26121 return false;
26124 base_reg = XEXP (addr, 0);
26125 return REGNO (addis_reg) == REGNO (base_reg);
26128 /* During the peephole2 pass, adjust and expand the insns for a load fusion
26129 sequence. We adjust the addis register to use the target register. If the
26130 load sign extends, we adjust the code to do the zero extending load, and an
26131 explicit sign extension later since the fusion only covers zero extending
26132 loads.
26134 The operands are:
26135 operands[0] register set with addis (to be replaced with target)
26136 operands[1] value set via addis
26137 operands[2] target register being loaded
26138 operands[3] D-form memory reference using operands[0]. */
26140 void
26141 expand_fusion_gpr_load (rtx *operands)
26143 rtx addis_value = operands[1];
26144 rtx target = operands[2];
26145 rtx orig_mem = operands[3];
26146 rtx new_addr, new_mem, orig_addr, offset;
26147 enum rtx_code plus_or_lo_sum;
26148 machine_mode target_mode = GET_MODE (target);
26149 machine_mode extend_mode = target_mode;
26150 machine_mode ptr_mode = Pmode;
26151 enum rtx_code extend = UNKNOWN;
26153 if (GET_CODE (orig_mem) == ZERO_EXTEND
26154 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
26156 extend = GET_CODE (orig_mem);
26157 orig_mem = XEXP (orig_mem, 0);
26158 target_mode = GET_MODE (orig_mem);
26161 gcc_assert (MEM_P (orig_mem));
26163 orig_addr = XEXP (orig_mem, 0);
26164 plus_or_lo_sum = GET_CODE (orig_addr);
26165 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
26167 offset = XEXP (orig_addr, 1);
26168 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
26169 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
26171 if (extend != UNKNOWN)
26172 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
26174 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
26175 UNSPEC_FUSION_GPR);
26176 emit_insn (gen_rtx_SET (target, new_mem));
26178 if (extend == SIGN_EXTEND)
26180 int sub_off = ((BYTES_BIG_ENDIAN)
26181 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
26182 : 0);
26183 rtx sign_reg
26184 = simplify_subreg (target_mode, target, extend_mode, sub_off);
26186 emit_insn (gen_rtx_SET (target,
26187 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
26190 return;
26193 /* Emit the addis instruction that will be part of a fused instruction
26194 sequence. */
26196 void
26197 emit_fusion_addis (rtx target, rtx addis_value)
26199 rtx fuse_ops[10];
26200 const char *addis_str = NULL;
26202 /* Emit the addis instruction. */
26203 fuse_ops[0] = target;
26204 if (satisfies_constraint_L (addis_value))
26206 fuse_ops[1] = addis_value;
26207 addis_str = "lis %0,%v1";
26210 else if (GET_CODE (addis_value) == PLUS)
26212 rtx op0 = XEXP (addis_value, 0);
26213 rtx op1 = XEXP (addis_value, 1);
26215 if (REG_P (op0) && CONST_INT_P (op1)
26216 && satisfies_constraint_L (op1))
26218 fuse_ops[1] = op0;
26219 fuse_ops[2] = op1;
26220 addis_str = "addis %0,%1,%v2";
26224 else if (GET_CODE (addis_value) == HIGH)
26226 rtx value = XEXP (addis_value, 0);
26227 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
26229 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
26230 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
26231 if (TARGET_ELF)
26232 addis_str = "addis %0,%2,%1@toc@ha";
26234 else if (TARGET_XCOFF)
26235 addis_str = "addis %0,%1@u(%2)";
26237 else
26238 gcc_unreachable ();
26241 else if (GET_CODE (value) == PLUS)
26243 rtx op0 = XEXP (value, 0);
26244 rtx op1 = XEXP (value, 1);
26246 if (GET_CODE (op0) == UNSPEC
26247 && XINT (op0, 1) == UNSPEC_TOCREL
26248 && CONST_INT_P (op1))
26250 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
26251 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
26252 fuse_ops[3] = op1;
26253 if (TARGET_ELF)
26254 addis_str = "addis %0,%2,%1+%3@toc@ha";
26256 else if (TARGET_XCOFF)
26257 addis_str = "addis %0,%1+%3@u(%2)";
26259 else
26260 gcc_unreachable ();
26264 else if (satisfies_constraint_L (value))
26266 fuse_ops[1] = value;
26267 addis_str = "lis %0,%v1";
26270 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
26272 fuse_ops[1] = value;
26273 addis_str = "lis %0,%1@ha";
26277 if (!addis_str)
26278 fatal_insn ("Could not generate addis value for fusion", addis_value);
26280 output_asm_insn (addis_str, fuse_ops);
26283 /* Emit a D-form load or store instruction that is the second instruction
26284 of a fusion sequence. */
26286 static void
26287 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
26289 rtx fuse_ops[10];
26290 char insn_template[80];
26292 fuse_ops[0] = load_reg;
26293 fuse_ops[1] = addis_reg;
26295 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
26297 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
26298 fuse_ops[2] = offset;
26299 output_asm_insn (insn_template, fuse_ops);
26302 else if (GET_CODE (offset) == UNSPEC
26303 && XINT (offset, 1) == UNSPEC_TOCREL)
26305 if (TARGET_ELF)
26306 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
26308 else if (TARGET_XCOFF)
26309 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26311 else
26312 gcc_unreachable ();
26314 fuse_ops[2] = XVECEXP (offset, 0, 0);
26315 output_asm_insn (insn_template, fuse_ops);
26318 else if (GET_CODE (offset) == PLUS
26319 && GET_CODE (XEXP (offset, 0)) == UNSPEC
26320 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
26321 && CONST_INT_P (XEXP (offset, 1)))
26323 rtx tocrel_unspec = XEXP (offset, 0);
26324 if (TARGET_ELF)
26325 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
26327 else if (TARGET_XCOFF)
26328 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
26330 else
26331 gcc_unreachable ();
26333 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
26334 fuse_ops[3] = XEXP (offset, 1);
26335 output_asm_insn (insn_template, fuse_ops);
26338 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
26340 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
26342 fuse_ops[2] = offset;
26343 output_asm_insn (insn_template, fuse_ops);
26346 else
26347 fatal_insn ("Unable to generate load/store offset for fusion", offset);
26349 return;
26352 /* Given an address, convert it into the addis and load offset parts. Addresses
26353 created during the peephole2 process look like:
26354 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
26355 (unspec [(...)] UNSPEC_TOCREL)) */
26357 static void
26358 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
26360 rtx hi, lo;
26362 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
26364 hi = XEXP (addr, 0);
26365 lo = XEXP (addr, 1);
26367 else
26368 gcc_unreachable ();
26370 *p_hi = hi;
26371 *p_lo = lo;
26374 /* Return a string to fuse an addis instruction with a gpr load to the same
26375 register that we loaded up the addis instruction. The address that is used
26376 is the logical address that was formed during peephole2:
26377 (lo_sum (high) (low-part))
26379 The code is complicated, so we call output_asm_insn directly, and just
26380 return "". */
26382 const char *
26383 emit_fusion_gpr_load (rtx target, rtx mem)
26385 rtx addis_value;
26386 rtx addr;
26387 rtx load_offset;
26388 const char *load_str = NULL;
26389 machine_mode mode;
26391 if (GET_CODE (mem) == ZERO_EXTEND)
26392 mem = XEXP (mem, 0);
26394 gcc_assert (REG_P (target) && MEM_P (mem));
26396 addr = XEXP (mem, 0);
26397 fusion_split_address (addr, &addis_value, &load_offset);
26399 /* Now emit the load instruction to the same register. */
26400 mode = GET_MODE (mem);
26401 switch (mode)
26403 case E_QImode:
26404 load_str = "lbz";
26405 break;
26407 case E_HImode:
26408 load_str = "lhz";
26409 break;
26411 case E_SImode:
26412 case E_SFmode:
26413 load_str = "lwz";
26414 break;
26416 case E_DImode:
26417 case E_DFmode:
26418 gcc_assert (TARGET_POWERPC64);
26419 load_str = "ld";
26420 break;
26422 default:
26423 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
26426 /* Emit the addis instruction. */
26427 emit_fusion_addis (target, addis_value);
26429 /* Emit the D-form load instruction. */
26430 emit_fusion_load (target, target, load_offset, load_str);
26432 return "";
26436 #ifdef RS6000_GLIBC_ATOMIC_FENV
26437 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
26438 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
26439 #endif
26441 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
26443 static void
26444 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
26446 if (!TARGET_HARD_FLOAT)
26448 #ifdef RS6000_GLIBC_ATOMIC_FENV
26449 if (atomic_hold_decl == NULL_TREE)
26451 atomic_hold_decl
26452 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26453 get_identifier ("__atomic_feholdexcept"),
26454 build_function_type_list (void_type_node,
26455 double_ptr_type_node,
26456 NULL_TREE));
26457 TREE_PUBLIC (atomic_hold_decl) = 1;
26458 DECL_EXTERNAL (atomic_hold_decl) = 1;
26461 if (atomic_clear_decl == NULL_TREE)
26463 atomic_clear_decl
26464 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26465 get_identifier ("__atomic_feclearexcept"),
26466 build_function_type_list (void_type_node,
26467 NULL_TREE));
26468 TREE_PUBLIC (atomic_clear_decl) = 1;
26469 DECL_EXTERNAL (atomic_clear_decl) = 1;
26472 tree const_double = build_qualified_type (double_type_node,
26473 TYPE_QUAL_CONST);
26474 tree const_double_ptr = build_pointer_type (const_double);
26475 if (atomic_update_decl == NULL_TREE)
26477 atomic_update_decl
26478 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
26479 get_identifier ("__atomic_feupdateenv"),
26480 build_function_type_list (void_type_node,
26481 const_double_ptr,
26482 NULL_TREE));
26483 TREE_PUBLIC (atomic_update_decl) = 1;
26484 DECL_EXTERNAL (atomic_update_decl) = 1;
26487 tree fenv_var = create_tmp_var_raw (double_type_node);
26488 TREE_ADDRESSABLE (fenv_var) = 1;
26489 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
26490 build4 (TARGET_EXPR, double_type_node, fenv_var,
26491 void_node, NULL_TREE, NULL_TREE));
26493 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
26494 *clear = build_call_expr (atomic_clear_decl, 0);
26495 *update = build_call_expr (atomic_update_decl, 1,
26496 fold_convert (const_double_ptr, fenv_addr));
26497 #endif
26498 return;
26501 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
26502 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
26503 tree call_mffs = build_call_expr (mffs, 0);
26505 /* Generates the equivalent of feholdexcept (&fenv_var)
26507 *fenv_var = __builtin_mffs ();
26508 double fenv_hold;
26509 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
26510 __builtin_mtfsf (0xff, fenv_hold); */
26512 /* Mask to clear everything except for the rounding modes and non-IEEE
26513 arithmetic flag. */
26514 const unsigned HOST_WIDE_INT hold_exception_mask
26515 = HOST_WIDE_INT_C (0xffffffff00000007);
26517 tree fenv_var = create_tmp_var_raw (double_type_node);
26519 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
26520 NULL_TREE, NULL_TREE);
26522 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
26523 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26524 build_int_cst (uint64_type_node,
26525 hold_exception_mask));
26527 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26528 fenv_llu_and);
26530 tree hold_mtfsf = build_call_expr (mtfsf, 2,
26531 build_int_cst (unsigned_type_node, 0xff),
26532 fenv_hold_mtfsf);
26534 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
26536 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
26538 double fenv_clear = __builtin_mffs ();
26539 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
26540 __builtin_mtfsf (0xff, fenv_clear); */
26542 /* Mask to clear everything except for the rounding modes and non-IEEE
26543 arithmetic flag. */
26544 const unsigned HOST_WIDE_INT clear_exception_mask
26545 = HOST_WIDE_INT_C (0xffffffff00000000);
26547 tree fenv_clear = create_tmp_var_raw (double_type_node);
26549 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
26550 call_mffs, NULL_TREE, NULL_TREE);
26552 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
26553 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
26554 fenv_clean_llu,
26555 build_int_cst (uint64_type_node,
26556 clear_exception_mask));
26558 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26559 fenv_clear_llu_and);
26561 tree clear_mtfsf = build_call_expr (mtfsf, 2,
26562 build_int_cst (unsigned_type_node, 0xff),
26563 fenv_clear_mtfsf);
26565 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
26567 /* Generates the equivalent of feupdateenv (&fenv_var)
26569 double old_fenv = __builtin_mffs ();
26570 double fenv_update;
26571 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
26572 (*(uint64_t*)fenv_var 0x1ff80fff);
26573 __builtin_mtfsf (0xff, fenv_update); */
26575 const unsigned HOST_WIDE_INT update_exception_mask
26576 = HOST_WIDE_INT_C (0xffffffff1fffff00);
26577 const unsigned HOST_WIDE_INT new_exception_mask
26578 = HOST_WIDE_INT_C (0x1ff80fff);
26580 tree old_fenv = create_tmp_var_raw (double_type_node);
26581 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
26582 call_mffs, NULL_TREE, NULL_TREE);
26584 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
26585 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26586 build_int_cst (uint64_type_node,
26587 update_exception_mask));
26589 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26590 build_int_cst (uint64_type_node,
26591 new_exception_mask));
26593 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26594 old_llu_and, new_llu_and);
26596 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26597 new_llu_mask);
26599 tree update_mtfsf = build_call_expr (mtfsf, 2,
26600 build_int_cst (unsigned_type_node, 0xff),
26601 fenv_update_mtfsf);
26603 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26606 void
26607 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26609 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26611 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26612 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26614 /* The destination of the vmrgew instruction layout is:
26615 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26616 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26617 vmrgew instruction will be correct. */
26618 if (BYTES_BIG_ENDIAN)
26620 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26621 GEN_INT (0)));
26622 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26623 GEN_INT (3)));
26625 else
26627 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26628 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26631 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26632 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26634 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26635 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26637 if (BYTES_BIG_ENDIAN)
26638 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26639 else
26640 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26643 void
26644 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26646 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26648 rtx_tmp0 = gen_reg_rtx (V2DImode);
26649 rtx_tmp1 = gen_reg_rtx (V2DImode);
26651 /* The destination of the vmrgew instruction layout is:
26652 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26653 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26654 vmrgew instruction will be correct. */
26655 if (BYTES_BIG_ENDIAN)
26657 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26658 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26660 else
26662 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26663 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26666 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26667 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26669 if (signed_convert)
26671 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26672 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26674 else
26676 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26677 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26680 if (BYTES_BIG_ENDIAN)
26681 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26682 else
26683 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26686 void
26687 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26688 rtx src2)
26690 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26692 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26693 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26695 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26696 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26698 rtx_tmp2 = gen_reg_rtx (V4SImode);
26699 rtx_tmp3 = gen_reg_rtx (V4SImode);
26701 if (signed_convert)
26703 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26704 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26706 else
26708 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26709 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26712 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26715 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26717 static bool
26718 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26719 optimization_type opt_type)
26721 switch (op)
26723 case rsqrt_optab:
26724 return (opt_type == OPTIMIZE_FOR_SPEED
26725 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26727 default:
26728 return true;
26732 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26734 static HOST_WIDE_INT
26735 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26737 if (TREE_CODE (exp) == STRING_CST
26738 && (STRICT_ALIGNMENT || !optimize_size))
26739 return MAX (align, BITS_PER_WORD);
26740 return align;
26743 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26745 static HOST_WIDE_INT
26746 rs6000_starting_frame_offset (void)
26748 if (FRAME_GROWS_DOWNWARD)
26749 return 0;
26750 return RS6000_STARTING_FRAME_OFFSET;
26754 /* Create an alias for a mangled name where we have changed the mangling (in
26755 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26756 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26758 #if TARGET_ELF && RS6000_WEAK
26759 static void
26760 rs6000_globalize_decl_name (FILE * stream, tree decl)
26762 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26764 targetm.asm_out.globalize_label (stream, name);
26766 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26768 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26769 const char *old_name;
26771 ieee128_mangling_gcc_8_1 = true;
26772 lang_hooks.set_decl_assembler_name (decl);
26773 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26774 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26775 ieee128_mangling_gcc_8_1 = false;
26777 if (strcmp (name, old_name) != 0)
26779 fprintf (stream, "\t.weak %s\n", old_name);
26780 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26784 #endif
26787 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26788 function names from <foo>l to <foo>f128 if the default long double type is
26789 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26790 include file switches the names on systems that support long double as IEEE
26791 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26792 In the future, glibc will export names like __ieee128_sinf128 and we can
26793 switch to using those instead of using sinf128, which pollutes the user's
26794 namespace.
26796 This will switch the names for Fortran math functions as well (which doesn't
26797 use math.h). However, Fortran needs other changes to the compiler and
26798 library before you can switch the real*16 type at compile time.
26800 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26801 only do this if the default is that long double is IBM extended double, and
26802 the user asked for IEEE 128-bit. */
26804 static tree
26805 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26807 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26808 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26810 size_t len = IDENTIFIER_LENGTH (id);
26811 const char *name = IDENTIFIER_POINTER (id);
26813 if (name[len - 1] == 'l')
26815 bool uses_ieee128_p = false;
26816 tree type = TREE_TYPE (decl);
26817 machine_mode ret_mode = TYPE_MODE (type);
26819 /* See if the function returns a IEEE 128-bit floating point type or
26820 complex type. */
26821 if (ret_mode == TFmode || ret_mode == TCmode)
26822 uses_ieee128_p = true;
26823 else
26825 function_args_iterator args_iter;
26826 tree arg;
26828 /* See if the function passes a IEEE 128-bit floating point type
26829 or complex type. */
26830 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26832 machine_mode arg_mode = TYPE_MODE (arg);
26833 if (arg_mode == TFmode || arg_mode == TCmode)
26835 uses_ieee128_p = true;
26836 break;
26841 /* If we passed or returned an IEEE 128-bit floating point type,
26842 change the name. */
26843 if (uses_ieee128_p)
26845 char *name2 = (char *) alloca (len + 4);
26846 memcpy (name2, name, len - 1);
26847 strcpy (name2 + len - 1, "f128");
26848 id = get_identifier (name2);
26853 return id;
26856 /* Predict whether the given loop in gimple will be transformed in the RTL
26857 doloop_optimize pass. */
26859 static bool
26860 rs6000_predict_doloop_p (struct loop *loop)
26862 gcc_assert (loop);
26864 /* On rs6000, targetm.can_use_doloop_p is actually
26865 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26866 if (loop->inner != NULL)
26868 if (dump_file && (dump_flags & TDF_DETAILS))
26869 fprintf (dump_file, "Predict doloop failure due to"
26870 " loop nesting.\n");
26871 return false;
26874 return true;
26877 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
26879 static bool
26880 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
26882 gcc_assert (MEM_P (mem));
26884 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
26885 type addresses, so don't allow MEMs with those address types to be
26886 substituted as an equivalent expression. See PR93974 for details. */
26887 if (GET_CODE (XEXP (mem, 0)) == AND)
26888 return true;
26890 return false;
26893 /* Implement TARGET_INVALID_CONVERSION. */
26895 static const char *
26896 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
26898 /* Make sure we're working with the canonical types. */
26899 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
26900 fromtype = TYPE_CANONICAL (fromtype);
26901 if (TYPE_CANONICAL (totype) != NULL_TREE)
26902 totype = TYPE_CANONICAL (totype);
26904 machine_mode frommode = TYPE_MODE (fromtype);
26905 machine_mode tomode = TYPE_MODE (totype);
26907 if (frommode != tomode)
26909 /* Do not allow conversions to/from PXImode and POImode types. */
26910 if (frommode == PXImode)
26911 return N_("invalid conversion from type %<__vector_quad%>");
26912 if (tomode == PXImode)
26913 return N_("invalid conversion to type %<__vector_quad%>");
26914 if (frommode == POImode)
26915 return N_("invalid conversion from type %<__vector_pair%>");
26916 if (tomode == POImode)
26917 return N_("invalid conversion to type %<__vector_pair%>");
26919 else if (POINTER_TYPE_P (fromtype) && POINTER_TYPE_P (totype))
26921 /* We really care about the modes of the base types. */
26922 frommode = TYPE_MODE (TREE_TYPE (fromtype));
26923 tomode = TYPE_MODE (TREE_TYPE (totype));
26925 /* Do not allow conversions to/from PXImode and POImode pointer
26926 types, except to/from void pointers. */
26927 if (frommode != tomode
26928 && frommode != VOIDmode
26929 && tomode != VOIDmode)
26931 if (frommode == PXImode)
26932 return N_("invalid conversion from type %<* __vector_quad%>");
26933 if (tomode == PXImode)
26934 return N_("invalid conversion to type %<* __vector_quad%>");
26935 if (frommode == POImode)
26936 return N_("invalid conversion from type %<* __vector_pair%>");
26937 if (tomode == POImode)
26938 return N_("invalid conversion to type %<* __vector_pair%>");
26942 /* Conversion allowed. */
26943 return NULL;
26946 long long
26947 rs6000_const_f32_to_i32 (rtx operand)
26949 long long value;
26950 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
26952 gcc_assert (GET_MODE (operand) == SFmode);
26953 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
26954 return value;
26957 void
26958 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
26960 printf("rs6000_emit_xxspltidp_v2df called %ld\n", value);
26961 printf("rs6000_emit_xxspltidp_v2df called 0x%lx\n", value);
26962 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
26963 inform (input_location,
26964 "the result for the xxspltidp instruction is undefined for subnormal input values.\n");
26965 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
26968 struct gcc_target targetm = TARGET_INITIALIZER;
26970 #include "gt-rs6000.h"