Use the SIGNED_16BIT_OFFSET_EXTRA_P macro for 16-bit signed tests.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
bloba4a38822b43ec2d7cb0037499f3d000f53e41ae1
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "params.h"
66 #include "tm-constrs.h"
67 #include "tree-vectorizer.h"
68 #include "target-globals.h"
69 #include "builtins.h"
70 #include "tree-vector-builder.h"
71 #include "context.h"
72 #include "tree-pass.h"
73 #include "except.h"
74 #if TARGET_XCOFF
75 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
76 #endif
77 #include "case-cfn-macros.h"
78 #include "ppc-auxv.h"
79 #include "tree-ssa-propagate.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "rs6000-internal.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load;
104 /* Set to nonzero once AIX common-mode calls have been defined. */
105 static GTY(()) int common_mode_defined;
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
166 static int dbg_cost_ctrl;
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
187 extern GTY(()) section *toc_section;
188 section *toc_section = 0;
190 /* Describe the vector unit used for modes. */
191 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
192 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
194 /* Register classes for various constraints that are based on the target
195 switches. */
196 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
198 /* Describe the alignment of a vector. */
199 int rs6000_vector_align[NUM_MACHINE_MODES];
201 /* Map selected modes to types for builtins. */
202 GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
204 /* What modes to automatically generate reciprocal divide estimate (fre) and
205 reciprocal sqrt (frsqrte) for. */
206 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
208 /* Masks to determine which reciprocal esitmate instructions to generate
209 automatically. */
210 enum rs6000_recip_mask {
211 RECIP_SF_DIV = 0x001, /* Use divide estimate */
212 RECIP_DF_DIV = 0x002,
213 RECIP_V4SF_DIV = 0x004,
214 RECIP_V2DF_DIV = 0x008,
216 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
217 RECIP_DF_RSQRT = 0x020,
218 RECIP_V4SF_RSQRT = 0x040,
219 RECIP_V2DF_RSQRT = 0x080,
221 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_NONE = 0,
223 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
224 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
225 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
227 RECIP_HIGH_PRECISION = RECIP_ALL,
229 /* On low precision machines like the power5, don't enable double precision
230 reciprocal square root estimate, since it isn't accurate enough. */
231 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
234 /* -mrecip options. */
235 static struct
237 const char *string; /* option name */
238 unsigned int mask; /* mask bits to set */
239 } recip_options[] = {
240 { "all", RECIP_ALL },
241 { "none", RECIP_NONE },
242 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
243 | RECIP_V2DF_DIV) },
244 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
245 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
246 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
247 | RECIP_V2DF_RSQRT) },
248 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
249 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
252 /* On PowerPC, we have a limited number of target clones that we care about
253 which means we can use an array to hold the options, rather than having more
254 elaborate data structures to identify each possible variation. Order the
255 clones from the default to the highest ISA. */
256 enum {
257 CLONE_DEFAULT = 0, /* default clone. */
258 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
259 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
260 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
261 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
262 CLONE_MAX
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
280 /* Newer LIBCs explicitly export this symbol to declare that they provide
281 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
282 reference to this symbol whenever we expand a CPU builtin, so that
283 we never link against an old LIBC. */
284 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286 /* True if we have expanded a CPU builtin. */
287 bool cpu_builtin_p = false;
289 /* Pointer to function (in rs6000-c.c) that can define or undefine target
290 macros that have changed. Languages that don't support the preprocessor
291 don't link in rs6000-c.c, so we can't call it directly. */
292 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294 /* Simplfy register classes into simpler classifications. We assume
295 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
296 check for standard register classes (gpr/floating/altivec/vsx) and
297 floating/vector classes (float/altivec/vsx). */
299 enum rs6000_reg_type {
300 NO_REG_TYPE,
301 PSEUDO_REG_TYPE,
302 GPR_REG_TYPE,
303 VSX_REG_TYPE,
304 ALTIVEC_REG_TYPE,
305 FPR_REG_TYPE,
306 SPR_REG_TYPE,
307 CR_REG_TYPE
310 /* Map register class to register type. */
311 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313 /* First/last register type for the 'normal' register types (i.e. general
314 purpose, floating point, altivec, and VSX registers). */
315 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
320 /* Register classes we care about in secondary reload or go if legitimate
321 address. We only need to worry about GPR, FPR, and Altivec registers here,
322 along an ANY field that is the OR of the 3 register classes. */
324 enum rs6000_reload_reg_type {
325 RELOAD_REG_GPR, /* General purpose registers. */
326 RELOAD_REG_FPR, /* Traditional floating point regs. */
327 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
328 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
329 N_RELOAD_REG
332 /* For setting up register classes, loop through the 3 register classes mapping
333 into real registers, and skip the ANY class, which is just an OR of the
334 bits. */
335 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
336 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338 /* Map reload register type to a register in the register class. */
339 struct reload_reg_map_type {
340 const char *name; /* Register class name. */
341 int reg; /* Register in the register class. */
344 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
345 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
346 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
347 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
348 { "Any", -1 }, /* RELOAD_REG_ANY. */
351 /* Mask bits for each register class, indexed per mode. Historically the
352 compiler has been more restrictive which types can do PRE_MODIFY instead of
353 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
354 typedef unsigned char addr_mask_type;
356 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
357 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
358 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
359 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
360 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
361 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
362 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
363 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365 /* Register type masks based on the type, of valid addressing modes. */
366 struct rs6000_reg_addr {
367 enum insn_code reload_load; /* INSN to reload for loading. */
368 enum insn_code reload_store; /* INSN to reload for storing. */
369 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
370 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
371 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
372 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
373 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
376 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
379 static inline bool
380 mode_supports_pre_incdec_p (machine_mode mode)
382 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
383 != 0);
386 /* Helper function to say whether a mode supports PRE_MODIFY. */
387 static inline bool
388 mode_supports_pre_modify_p (machine_mode mode)
390 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
391 != 0);
394 /* Return true if we have D-form addressing in altivec registers. */
395 static inline bool
396 mode_supports_vmx_dform (machine_mode mode)
398 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
401 /* Return true if we have D-form addressing in VSX registers. This addressing
402 is more limited than normal d-form addressing in that the offset must be
403 aligned on a 16-byte boundary. */
404 static inline bool
405 mode_supports_dq_form (machine_mode mode)
407 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
408 != 0);
411 /* Given that there exists at least one variable that is set (produced)
412 by OUT_INSN and read (consumed) by IN_INSN, return true iff
413 IN_INSN represents one or more memory store operations and none of
414 the variables set by OUT_INSN is used by IN_INSN as the address of a
415 store operation. If either IN_INSN or OUT_INSN does not represent
416 a "single" RTL SET expression (as loosely defined by the
417 implementation of the single_set function) or a PARALLEL with only
418 SETs, CLOBBERs, and USEs inside, this function returns false.
420 This rs6000-specific version of store_data_bypass_p checks for
421 certain conditions that result in assertion failures (and internal
422 compiler errors) in the generic store_data_bypass_p function and
423 returns false rather than calling store_data_bypass_p if one of the
424 problematic conditions is detected. */
427 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 rtx out_set, in_set;
430 rtx out_pat, in_pat;
431 rtx out_exp, in_exp;
432 int i, j;
434 in_set = single_set (in_insn);
435 if (in_set)
437 if (MEM_P (SET_DEST (in_set)))
439 out_set = single_set (out_insn);
440 if (!out_set)
442 out_pat = PATTERN (out_insn);
443 if (GET_CODE (out_pat) == PARALLEL)
445 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 out_exp = XVECEXP (out_pat, 0, i);
448 if ((GET_CODE (out_exp) == CLOBBER)
449 || (GET_CODE (out_exp) == USE))
450 continue;
451 else if (GET_CODE (out_exp) != SET)
452 return false;
458 else
460 in_pat = PATTERN (in_insn);
461 if (GET_CODE (in_pat) != PARALLEL)
462 return false;
464 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 in_exp = XVECEXP (in_pat, 0, i);
467 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
468 continue;
469 else if (GET_CODE (in_exp) != SET)
470 return false;
472 if (MEM_P (SET_DEST (in_exp)))
474 out_set = single_set (out_insn);
475 if (!out_set)
477 out_pat = PATTERN (out_insn);
478 if (GET_CODE (out_pat) != PARALLEL)
479 return false;
480 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 out_exp = XVECEXP (out_pat, 0, j);
483 if ((GET_CODE (out_exp) == CLOBBER)
484 || (GET_CODE (out_exp) == USE))
485 continue;
486 else if (GET_CODE (out_exp) != SET)
487 return false;
493 return store_data_bypass_p (out_insn, in_insn);
497 /* Processor costs (relative to an add) */
499 const struct processor_costs *rs6000_cost;
501 /* Instruction size costs on 32bit processors. */
502 static const
503 struct processor_costs size32_cost = {
504 COSTS_N_INSNS (1), /* mulsi */
505 COSTS_N_INSNS (1), /* mulsi_const */
506 COSTS_N_INSNS (1), /* mulsi_const9 */
507 COSTS_N_INSNS (1), /* muldi */
508 COSTS_N_INSNS (1), /* divsi */
509 COSTS_N_INSNS (1), /* divdi */
510 COSTS_N_INSNS (1), /* fp */
511 COSTS_N_INSNS (1), /* dmul */
512 COSTS_N_INSNS (1), /* sdiv */
513 COSTS_N_INSNS (1), /* ddiv */
514 32, /* cache line size */
515 0, /* l1 cache */
516 0, /* l2 cache */
517 0, /* streams */
518 0, /* SF->DF convert */
521 /* Instruction size costs on 64bit processors. */
522 static const
523 struct processor_costs size64_cost = {
524 COSTS_N_INSNS (1), /* mulsi */
525 COSTS_N_INSNS (1), /* mulsi_const */
526 COSTS_N_INSNS (1), /* mulsi_const9 */
527 COSTS_N_INSNS (1), /* muldi */
528 COSTS_N_INSNS (1), /* divsi */
529 COSTS_N_INSNS (1), /* divdi */
530 COSTS_N_INSNS (1), /* fp */
531 COSTS_N_INSNS (1), /* dmul */
532 COSTS_N_INSNS (1), /* sdiv */
533 COSTS_N_INSNS (1), /* ddiv */
534 128, /* cache line size */
535 0, /* l1 cache */
536 0, /* l2 cache */
537 0, /* streams */
538 0, /* SF->DF convert */
541 /* Instruction costs on RS64A processors. */
542 static const
543 struct processor_costs rs64a_cost = {
544 COSTS_N_INSNS (20), /* mulsi */
545 COSTS_N_INSNS (12), /* mulsi_const */
546 COSTS_N_INSNS (8), /* mulsi_const9 */
547 COSTS_N_INSNS (34), /* muldi */
548 COSTS_N_INSNS (65), /* divsi */
549 COSTS_N_INSNS (67), /* divdi */
550 COSTS_N_INSNS (4), /* fp */
551 COSTS_N_INSNS (4), /* dmul */
552 COSTS_N_INSNS (31), /* sdiv */
553 COSTS_N_INSNS (31), /* ddiv */
554 128, /* cache line size */
555 128, /* l1 cache */
556 2048, /* l2 cache */
557 1, /* streams */
558 0, /* SF->DF convert */
561 /* Instruction costs on MPCCORE processors. */
562 static const
563 struct processor_costs mpccore_cost = {
564 COSTS_N_INSNS (2), /* mulsi */
565 COSTS_N_INSNS (2), /* mulsi_const */
566 COSTS_N_INSNS (2), /* mulsi_const9 */
567 COSTS_N_INSNS (2), /* muldi */
568 COSTS_N_INSNS (6), /* divsi */
569 COSTS_N_INSNS (6), /* divdi */
570 COSTS_N_INSNS (4), /* fp */
571 COSTS_N_INSNS (5), /* dmul */
572 COSTS_N_INSNS (10), /* sdiv */
573 COSTS_N_INSNS (17), /* ddiv */
574 32, /* cache line size */
575 4, /* l1 cache */
576 16, /* l2 cache */
577 1, /* streams */
578 0, /* SF->DF convert */
581 /* Instruction costs on PPC403 processors. */
582 static const
583 struct processor_costs ppc403_cost = {
584 COSTS_N_INSNS (4), /* mulsi */
585 COSTS_N_INSNS (4), /* mulsi_const */
586 COSTS_N_INSNS (4), /* mulsi_const9 */
587 COSTS_N_INSNS (4), /* muldi */
588 COSTS_N_INSNS (33), /* divsi */
589 COSTS_N_INSNS (33), /* divdi */
590 COSTS_N_INSNS (11), /* fp */
591 COSTS_N_INSNS (11), /* dmul */
592 COSTS_N_INSNS (11), /* sdiv */
593 COSTS_N_INSNS (11), /* ddiv */
594 32, /* cache line size */
595 4, /* l1 cache */
596 16, /* l2 cache */
597 1, /* streams */
598 0, /* SF->DF convert */
601 /* Instruction costs on PPC405 processors. */
602 static const
603 struct processor_costs ppc405_cost = {
604 COSTS_N_INSNS (5), /* mulsi */
605 COSTS_N_INSNS (4), /* mulsi_const */
606 COSTS_N_INSNS (3), /* mulsi_const9 */
607 COSTS_N_INSNS (5), /* muldi */
608 COSTS_N_INSNS (35), /* divsi */
609 COSTS_N_INSNS (35), /* divdi */
610 COSTS_N_INSNS (11), /* fp */
611 COSTS_N_INSNS (11), /* dmul */
612 COSTS_N_INSNS (11), /* sdiv */
613 COSTS_N_INSNS (11), /* ddiv */
614 32, /* cache line size */
615 16, /* l1 cache */
616 128, /* l2 cache */
617 1, /* streams */
618 0, /* SF->DF convert */
621 /* Instruction costs on PPC440 processors. */
622 static const
623 struct processor_costs ppc440_cost = {
624 COSTS_N_INSNS (3), /* mulsi */
625 COSTS_N_INSNS (2), /* mulsi_const */
626 COSTS_N_INSNS (2), /* mulsi_const9 */
627 COSTS_N_INSNS (3), /* muldi */
628 COSTS_N_INSNS (34), /* divsi */
629 COSTS_N_INSNS (34), /* divdi */
630 COSTS_N_INSNS (5), /* fp */
631 COSTS_N_INSNS (5), /* dmul */
632 COSTS_N_INSNS (19), /* sdiv */
633 COSTS_N_INSNS (33), /* ddiv */
634 32, /* cache line size */
635 32, /* l1 cache */
636 256, /* l2 cache */
637 1, /* streams */
638 0, /* SF->DF convert */
641 /* Instruction costs on PPC476 processors. */
642 static const
643 struct processor_costs ppc476_cost = {
644 COSTS_N_INSNS (4), /* mulsi */
645 COSTS_N_INSNS (4), /* mulsi_const */
646 COSTS_N_INSNS (4), /* mulsi_const9 */
647 COSTS_N_INSNS (4), /* muldi */
648 COSTS_N_INSNS (11), /* divsi */
649 COSTS_N_INSNS (11), /* divdi */
650 COSTS_N_INSNS (6), /* fp */
651 COSTS_N_INSNS (6), /* dmul */
652 COSTS_N_INSNS (19), /* sdiv */
653 COSTS_N_INSNS (33), /* ddiv */
654 32, /* l1 cache line size */
655 32, /* l1 cache */
656 512, /* l2 cache */
657 1, /* streams */
658 0, /* SF->DF convert */
661 /* Instruction costs on PPC601 processors. */
662 static const
663 struct processor_costs ppc601_cost = {
664 COSTS_N_INSNS (5), /* mulsi */
665 COSTS_N_INSNS (5), /* mulsi_const */
666 COSTS_N_INSNS (5), /* mulsi_const9 */
667 COSTS_N_INSNS (5), /* muldi */
668 COSTS_N_INSNS (36), /* divsi */
669 COSTS_N_INSNS (36), /* divdi */
670 COSTS_N_INSNS (4), /* fp */
671 COSTS_N_INSNS (5), /* dmul */
672 COSTS_N_INSNS (17), /* sdiv */
673 COSTS_N_INSNS (31), /* ddiv */
674 32, /* cache line size */
675 32, /* l1 cache */
676 256, /* l2 cache */
677 1, /* streams */
678 0, /* SF->DF convert */
681 /* Instruction costs on PPC603 processors. */
682 static const
683 struct processor_costs ppc603_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (3), /* mulsi_const */
686 COSTS_N_INSNS (2), /* mulsi_const9 */
687 COSTS_N_INSNS (5), /* muldi */
688 COSTS_N_INSNS (37), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (4), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (33), /* ddiv */
694 32, /* cache line size */
695 8, /* l1 cache */
696 64, /* l2 cache */
697 1, /* streams */
698 0, /* SF->DF convert */
701 /* Instruction costs on PPC604 processors. */
702 static const
703 struct processor_costs ppc604_cost = {
704 COSTS_N_INSNS (4), /* mulsi */
705 COSTS_N_INSNS (4), /* mulsi_const */
706 COSTS_N_INSNS (4), /* mulsi_const9 */
707 COSTS_N_INSNS (4), /* muldi */
708 COSTS_N_INSNS (20), /* divsi */
709 COSTS_N_INSNS (20), /* divdi */
710 COSTS_N_INSNS (3), /* fp */
711 COSTS_N_INSNS (3), /* dmul */
712 COSTS_N_INSNS (18), /* sdiv */
713 COSTS_N_INSNS (32), /* ddiv */
714 32, /* cache line size */
715 16, /* l1 cache */
716 512, /* l2 cache */
717 1, /* streams */
718 0, /* SF->DF convert */
721 /* Instruction costs on PPC604e processors. */
722 static const
723 struct processor_costs ppc604e_cost = {
724 COSTS_N_INSNS (2), /* mulsi */
725 COSTS_N_INSNS (2), /* mulsi_const */
726 COSTS_N_INSNS (2), /* mulsi_const9 */
727 COSTS_N_INSNS (2), /* muldi */
728 COSTS_N_INSNS (20), /* divsi */
729 COSTS_N_INSNS (20), /* divdi */
730 COSTS_N_INSNS (3), /* fp */
731 COSTS_N_INSNS (3), /* dmul */
732 COSTS_N_INSNS (18), /* sdiv */
733 COSTS_N_INSNS (32), /* ddiv */
734 32, /* cache line size */
735 32, /* l1 cache */
736 1024, /* l2 cache */
737 1, /* streams */
738 0, /* SF->DF convert */
741 /* Instruction costs on PPC620 processors. */
742 static const
743 struct processor_costs ppc620_cost = {
744 COSTS_N_INSNS (5), /* mulsi */
745 COSTS_N_INSNS (4), /* mulsi_const */
746 COSTS_N_INSNS (3), /* mulsi_const9 */
747 COSTS_N_INSNS (7), /* muldi */
748 COSTS_N_INSNS (21), /* divsi */
749 COSTS_N_INSNS (37), /* divdi */
750 COSTS_N_INSNS (3), /* fp */
751 COSTS_N_INSNS (3), /* dmul */
752 COSTS_N_INSNS (18), /* sdiv */
753 COSTS_N_INSNS (32), /* ddiv */
754 128, /* cache line size */
755 32, /* l1 cache */
756 1024, /* l2 cache */
757 1, /* streams */
758 0, /* SF->DF convert */
761 /* Instruction costs on PPC630 processors. */
762 static const
763 struct processor_costs ppc630_cost = {
764 COSTS_N_INSNS (5), /* mulsi */
765 COSTS_N_INSNS (4), /* mulsi_const */
766 COSTS_N_INSNS (3), /* mulsi_const9 */
767 COSTS_N_INSNS (7), /* muldi */
768 COSTS_N_INSNS (21), /* divsi */
769 COSTS_N_INSNS (37), /* divdi */
770 COSTS_N_INSNS (3), /* fp */
771 COSTS_N_INSNS (3), /* dmul */
772 COSTS_N_INSNS (17), /* sdiv */
773 COSTS_N_INSNS (21), /* ddiv */
774 128, /* cache line size */
775 64, /* l1 cache */
776 1024, /* l2 cache */
777 1, /* streams */
778 0, /* SF->DF convert */
781 /* Instruction costs on Cell processor. */
782 /* COSTS_N_INSNS (1) ~ one add. */
783 static const
784 struct processor_costs ppccell_cost = {
785 COSTS_N_INSNS (9/2)+2, /* mulsi */
786 COSTS_N_INSNS (6/2), /* mulsi_const */
787 COSTS_N_INSNS (6/2), /* mulsi_const9 */
788 COSTS_N_INSNS (15/2)+2, /* muldi */
789 COSTS_N_INSNS (38/2), /* divsi */
790 COSTS_N_INSNS (70/2), /* divdi */
791 COSTS_N_INSNS (10/2), /* fp */
792 COSTS_N_INSNS (10/2), /* dmul */
793 COSTS_N_INSNS (74/2), /* sdiv */
794 COSTS_N_INSNS (74/2), /* ddiv */
795 128, /* cache line size */
796 32, /* l1 cache */
797 512, /* l2 cache */
798 6, /* streams */
799 0, /* SF->DF convert */
802 /* Instruction costs on PPC750 and PPC7400 processors. */
803 static const
804 struct processor_costs ppc750_cost = {
805 COSTS_N_INSNS (5), /* mulsi */
806 COSTS_N_INSNS (3), /* mulsi_const */
807 COSTS_N_INSNS (2), /* mulsi_const9 */
808 COSTS_N_INSNS (5), /* muldi */
809 COSTS_N_INSNS (17), /* divsi */
810 COSTS_N_INSNS (17), /* divdi */
811 COSTS_N_INSNS (3), /* fp */
812 COSTS_N_INSNS (3), /* dmul */
813 COSTS_N_INSNS (17), /* sdiv */
814 COSTS_N_INSNS (31), /* ddiv */
815 32, /* cache line size */
816 32, /* l1 cache */
817 512, /* l2 cache */
818 1, /* streams */
819 0, /* SF->DF convert */
822 /* Instruction costs on PPC7450 processors. */
823 static const
824 struct processor_costs ppc7450_cost = {
825 COSTS_N_INSNS (4), /* mulsi */
826 COSTS_N_INSNS (3), /* mulsi_const */
827 COSTS_N_INSNS (3), /* mulsi_const9 */
828 COSTS_N_INSNS (4), /* muldi */
829 COSTS_N_INSNS (23), /* divsi */
830 COSTS_N_INSNS (23), /* divdi */
831 COSTS_N_INSNS (5), /* fp */
832 COSTS_N_INSNS (5), /* dmul */
833 COSTS_N_INSNS (21), /* sdiv */
834 COSTS_N_INSNS (35), /* ddiv */
835 32, /* cache line size */
836 32, /* l1 cache */
837 1024, /* l2 cache */
838 1, /* streams */
839 0, /* SF->DF convert */
842 /* Instruction costs on PPC8540 processors. */
843 static const
844 struct processor_costs ppc8540_cost = {
845 COSTS_N_INSNS (4), /* mulsi */
846 COSTS_N_INSNS (4), /* mulsi_const */
847 COSTS_N_INSNS (4), /* mulsi_const9 */
848 COSTS_N_INSNS (4), /* muldi */
849 COSTS_N_INSNS (19), /* divsi */
850 COSTS_N_INSNS (19), /* divdi */
851 COSTS_N_INSNS (4), /* fp */
852 COSTS_N_INSNS (4), /* dmul */
853 COSTS_N_INSNS (29), /* sdiv */
854 COSTS_N_INSNS (29), /* ddiv */
855 32, /* cache line size */
856 32, /* l1 cache */
857 256, /* l2 cache */
858 1, /* prefetch streams /*/
859 0, /* SF->DF convert */
862 /* Instruction costs on E300C2 and E300C3 cores. */
863 static const
864 struct processor_costs ppce300c2c3_cost = {
865 COSTS_N_INSNS (4), /* mulsi */
866 COSTS_N_INSNS (4), /* mulsi_const */
867 COSTS_N_INSNS (4), /* mulsi_const9 */
868 COSTS_N_INSNS (4), /* muldi */
869 COSTS_N_INSNS (19), /* divsi */
870 COSTS_N_INSNS (19), /* divdi */
871 COSTS_N_INSNS (3), /* fp */
872 COSTS_N_INSNS (4), /* dmul */
873 COSTS_N_INSNS (18), /* sdiv */
874 COSTS_N_INSNS (33), /* ddiv */
876 16, /* l1 cache */
877 16, /* l2 cache */
878 1, /* prefetch streams /*/
879 0, /* SF->DF convert */
882 /* Instruction costs on PPCE500MC processors. */
883 static const
884 struct processor_costs ppce500mc_cost = {
885 COSTS_N_INSNS (4), /* mulsi */
886 COSTS_N_INSNS (4), /* mulsi_const */
887 COSTS_N_INSNS (4), /* mulsi_const9 */
888 COSTS_N_INSNS (4), /* muldi */
889 COSTS_N_INSNS (14), /* divsi */
890 COSTS_N_INSNS (14), /* divdi */
891 COSTS_N_INSNS (8), /* fp */
892 COSTS_N_INSNS (10), /* dmul */
893 COSTS_N_INSNS (36), /* sdiv */
894 COSTS_N_INSNS (66), /* ddiv */
895 64, /* cache line size */
896 32, /* l1 cache */
897 128, /* l2 cache */
898 1, /* prefetch streams /*/
899 0, /* SF->DF convert */
902 /* Instruction costs on PPCE500MC64 processors. */
903 static const
904 struct processor_costs ppce500mc64_cost = {
905 COSTS_N_INSNS (4), /* mulsi */
906 COSTS_N_INSNS (4), /* mulsi_const */
907 COSTS_N_INSNS (4), /* mulsi_const9 */
908 COSTS_N_INSNS (4), /* muldi */
909 COSTS_N_INSNS (14), /* divsi */
910 COSTS_N_INSNS (14), /* divdi */
911 COSTS_N_INSNS (4), /* fp */
912 COSTS_N_INSNS (10), /* dmul */
913 COSTS_N_INSNS (36), /* sdiv */
914 COSTS_N_INSNS (66), /* ddiv */
915 64, /* cache line size */
916 32, /* l1 cache */
917 128, /* l2 cache */
918 1, /* prefetch streams /*/
919 0, /* SF->DF convert */
922 /* Instruction costs on PPCE5500 processors. */
923 static const
924 struct processor_costs ppce5500_cost = {
925 COSTS_N_INSNS (5), /* mulsi */
926 COSTS_N_INSNS (5), /* mulsi_const */
927 COSTS_N_INSNS (4), /* mulsi_const9 */
928 COSTS_N_INSNS (5), /* muldi */
929 COSTS_N_INSNS (14), /* divsi */
930 COSTS_N_INSNS (14), /* divdi */
931 COSTS_N_INSNS (7), /* fp */
932 COSTS_N_INSNS (10), /* dmul */
933 COSTS_N_INSNS (36), /* sdiv */
934 COSTS_N_INSNS (66), /* ddiv */
935 64, /* cache line size */
936 32, /* l1 cache */
937 128, /* l2 cache */
938 1, /* prefetch streams /*/
939 0, /* SF->DF convert */
942 /* Instruction costs on PPCE6500 processors. */
943 static const
944 struct processor_costs ppce6500_cost = {
945 COSTS_N_INSNS (5), /* mulsi */
946 COSTS_N_INSNS (5), /* mulsi_const */
947 COSTS_N_INSNS (4), /* mulsi_const9 */
948 COSTS_N_INSNS (5), /* muldi */
949 COSTS_N_INSNS (14), /* divsi */
950 COSTS_N_INSNS (14), /* divdi */
951 COSTS_N_INSNS (7), /* fp */
952 COSTS_N_INSNS (10), /* dmul */
953 COSTS_N_INSNS (36), /* sdiv */
954 COSTS_N_INSNS (66), /* ddiv */
955 64, /* cache line size */
956 32, /* l1 cache */
957 128, /* l2 cache */
958 1, /* prefetch streams /*/
959 0, /* SF->DF convert */
962 /* Instruction costs on AppliedMicro Titan processors. */
963 static const
964 struct processor_costs titan_cost = {
965 COSTS_N_INSNS (5), /* mulsi */
966 COSTS_N_INSNS (5), /* mulsi_const */
967 COSTS_N_INSNS (5), /* mulsi_const9 */
968 COSTS_N_INSNS (5), /* muldi */
969 COSTS_N_INSNS (18), /* divsi */
970 COSTS_N_INSNS (18), /* divdi */
971 COSTS_N_INSNS (10), /* fp */
972 COSTS_N_INSNS (10), /* dmul */
973 COSTS_N_INSNS (46), /* sdiv */
974 COSTS_N_INSNS (72), /* ddiv */
975 32, /* cache line size */
976 32, /* l1 cache */
977 512, /* l2 cache */
978 1, /* prefetch streams /*/
979 0, /* SF->DF convert */
982 /* Instruction costs on POWER4 and POWER5 processors. */
983 static const
984 struct processor_costs power4_cost = {
985 COSTS_N_INSNS (3), /* mulsi */
986 COSTS_N_INSNS (2), /* mulsi_const */
987 COSTS_N_INSNS (2), /* mulsi_const9 */
988 COSTS_N_INSNS (4), /* muldi */
989 COSTS_N_INSNS (18), /* divsi */
990 COSTS_N_INSNS (34), /* divdi */
991 COSTS_N_INSNS (3), /* fp */
992 COSTS_N_INSNS (3), /* dmul */
993 COSTS_N_INSNS (17), /* sdiv */
994 COSTS_N_INSNS (17), /* ddiv */
995 128, /* cache line size */
996 32, /* l1 cache */
997 1024, /* l2 cache */
998 8, /* prefetch streams /*/
999 0, /* SF->DF convert */
1002 /* Instruction costs on POWER6 processors. */
1003 static const
1004 struct processor_costs power6_cost = {
1005 COSTS_N_INSNS (8), /* mulsi */
1006 COSTS_N_INSNS (8), /* mulsi_const */
1007 COSTS_N_INSNS (8), /* mulsi_const9 */
1008 COSTS_N_INSNS (8), /* muldi */
1009 COSTS_N_INSNS (22), /* divsi */
1010 COSTS_N_INSNS (28), /* divdi */
1011 COSTS_N_INSNS (3), /* fp */
1012 COSTS_N_INSNS (3), /* dmul */
1013 COSTS_N_INSNS (13), /* sdiv */
1014 COSTS_N_INSNS (16), /* ddiv */
1015 128, /* cache line size */
1016 64, /* l1 cache */
1017 2048, /* l2 cache */
1018 16, /* prefetch streams */
1019 0, /* SF->DF convert */
1022 /* Instruction costs on POWER7 processors. */
1023 static const
1024 struct processor_costs power7_cost = {
1025 COSTS_N_INSNS (2), /* mulsi */
1026 COSTS_N_INSNS (2), /* mulsi_const */
1027 COSTS_N_INSNS (2), /* mulsi_const9 */
1028 COSTS_N_INSNS (2), /* muldi */
1029 COSTS_N_INSNS (18), /* divsi */
1030 COSTS_N_INSNS (34), /* divdi */
1031 COSTS_N_INSNS (3), /* fp */
1032 COSTS_N_INSNS (3), /* dmul */
1033 COSTS_N_INSNS (13), /* sdiv */
1034 COSTS_N_INSNS (16), /* ddiv */
1035 128, /* cache line size */
1036 32, /* l1 cache */
1037 256, /* l2 cache */
1038 12, /* prefetch streams */
1039 COSTS_N_INSNS (3), /* SF->DF convert */
1042 /* Instruction costs on POWER8 processors. */
1043 static const
1044 struct processor_costs power8_cost = {
1045 COSTS_N_INSNS (3), /* mulsi */
1046 COSTS_N_INSNS (3), /* mulsi_const */
1047 COSTS_N_INSNS (3), /* mulsi_const9 */
1048 COSTS_N_INSNS (3), /* muldi */
1049 COSTS_N_INSNS (19), /* divsi */
1050 COSTS_N_INSNS (35), /* divdi */
1051 COSTS_N_INSNS (3), /* fp */
1052 COSTS_N_INSNS (3), /* dmul */
1053 COSTS_N_INSNS (14), /* sdiv */
1054 COSTS_N_INSNS (17), /* ddiv */
1055 128, /* cache line size */
1056 32, /* l1 cache */
1057 256, /* l2 cache */
1058 12, /* prefetch streams */
1059 COSTS_N_INSNS (3), /* SF->DF convert */
1062 /* Instruction costs on POWER9 processors. */
1063 static const
1064 struct processor_costs power9_cost = {
1065 COSTS_N_INSNS (3), /* mulsi */
1066 COSTS_N_INSNS (3), /* mulsi_const */
1067 COSTS_N_INSNS (3), /* mulsi_const9 */
1068 COSTS_N_INSNS (3), /* muldi */
1069 COSTS_N_INSNS (8), /* divsi */
1070 COSTS_N_INSNS (12), /* divdi */
1071 COSTS_N_INSNS (3), /* fp */
1072 COSTS_N_INSNS (3), /* dmul */
1073 COSTS_N_INSNS (13), /* sdiv */
1074 COSTS_N_INSNS (18), /* ddiv */
1075 128, /* cache line size */
1076 32, /* l1 cache */
1077 512, /* l2 cache */
1078 8, /* prefetch streams */
1079 COSTS_N_INSNS (3), /* SF->DF convert */
1082 /* Instruction costs on POWER A2 processors. */
1083 static const
1084 struct processor_costs ppca2_cost = {
1085 COSTS_N_INSNS (16), /* mulsi */
1086 COSTS_N_INSNS (16), /* mulsi_const */
1087 COSTS_N_INSNS (16), /* mulsi_const9 */
1088 COSTS_N_INSNS (16), /* muldi */
1089 COSTS_N_INSNS (22), /* divsi */
1090 COSTS_N_INSNS (28), /* divdi */
1091 COSTS_N_INSNS (3), /* fp */
1092 COSTS_N_INSNS (3), /* dmul */
1093 COSTS_N_INSNS (59), /* sdiv */
1094 COSTS_N_INSNS (72), /* ddiv */
1096 16, /* l1 cache */
1097 2048, /* l2 cache */
1098 16, /* prefetch streams */
1099 0, /* SF->DF convert */
1102 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1103 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1106 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1107 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1108 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1111 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1112 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1113 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1114 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1115 bool);
1116 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1117 unsigned int);
1118 static bool is_microcoded_insn (rtx_insn *);
1119 static bool is_nonpipeline_insn (rtx_insn *);
1120 static bool is_cracked_insn (rtx_insn *);
1121 static bool is_load_insn (rtx, rtx *);
1122 static bool is_store_insn (rtx, rtx *);
1123 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1124 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1125 static bool insn_must_be_first_in_group (rtx_insn *);
1126 static bool insn_must_be_last_in_group (rtx_insn *);
1127 int easy_vector_constant (rtx, machine_mode);
1128 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1129 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1130 #if TARGET_MACHO
1131 static tree get_prev_label (tree);
1132 #endif
1133 static bool rs6000_mode_dependent_address (const_rtx);
1134 static bool rs6000_debug_mode_dependent_address (const_rtx);
1135 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1136 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1137 machine_mode, rtx);
1138 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1139 machine_mode,
1140 rtx);
1141 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1142 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1143 enum reg_class);
1144 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1145 reg_class_t,
1146 reg_class_t);
1147 static bool rs6000_debug_can_change_mode_class (machine_mode,
1148 machine_mode,
1149 reg_class_t);
1151 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1152 = rs6000_mode_dependent_address;
1154 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1155 machine_mode, rtx)
1156 = rs6000_secondary_reload_class;
1158 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1159 = rs6000_preferred_reload_class;
1161 const int INSN_NOT_AVAILABLE = -1;
1163 static void rs6000_print_isa_options (FILE *, int, const char *,
1164 HOST_WIDE_INT);
1165 static void rs6000_print_builtin_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1167 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1170 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1171 enum rs6000_reg_type,
1172 machine_mode,
1173 secondary_reload_info *,
1174 bool);
1175 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1177 /* Hash table stuff for keeping track of TOC entries. */
1179 struct GTY((for_user)) toc_hash_struct
1181 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1182 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1183 rtx key;
1184 machine_mode key_mode;
1185 int labelno;
1188 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1190 static hashval_t hash (toc_hash_struct *);
1191 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1198 /* Default register names. */
1199 char rs6000_reg_names[][8] =
1201 /* GPRs */
1202 "0", "1", "2", "3", "4", "5", "6", "7",
1203 "8", "9", "10", "11", "12", "13", "14", "15",
1204 "16", "17", "18", "19", "20", "21", "22", "23",
1205 "24", "25", "26", "27", "28", "29", "30", "31",
1206 /* FPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* VRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* lr ctr ca ap */
1217 "lr", "ctr", "ca", "ap",
1218 /* cr0..cr7 */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 /* vrsave vscr sfp */
1221 "vrsave", "vscr", "sfp",
1224 #ifdef TARGET_REGNAMES
1225 static const char alt_reg_names[][8] =
1227 /* GPRs */
1228 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1229 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1230 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1231 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1232 /* FPRs */
1233 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1234 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1235 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1236 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1237 /* VRs */
1238 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1239 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1240 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1241 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1242 /* lr ctr ca ap */
1243 "lr", "ctr", "ca", "ap",
1244 /* cr0..cr7 */
1245 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1246 /* vrsave vscr sfp */
1247 "vrsave", "vscr", "sfp",
1249 #endif
1251 /* Table of valid machine attributes. */
1253 static const struct attribute_spec rs6000_attribute_table[] =
1255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1256 affects_type_identity, handler, exclude } */
1257 { "altivec", 1, 1, false, true, false, false,
1258 rs6000_handle_altivec_attribute, NULL },
1259 { "longcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute, NULL },
1261 { "shortcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "ms_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute, NULL },
1265 { "gcc_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1268 SUBTARGET_ATTRIBUTE_TABLE,
1269 #endif
1270 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 #ifndef TARGET_PROFILE_KERNEL
1274 #define TARGET_PROFILE_KERNEL 0
1275 #endif
1277 /* Initialize the GCC target structure. */
1278 #undef TARGET_ATTRIBUTE_TABLE
1279 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1280 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1281 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1282 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1283 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1285 #undef TARGET_ASM_ALIGNED_DI_OP
1286 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1288 /* Default unaligned ops are only provided for ELF. Find the ops needed
1289 for non-ELF systems. */
1290 #ifndef OBJECT_FORMAT_ELF
1291 #if TARGET_XCOFF
1292 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1293 64-bit targets. */
1294 #undef TARGET_ASM_UNALIGNED_HI_OP
1295 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1296 #undef TARGET_ASM_UNALIGNED_SI_OP
1297 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1298 #undef TARGET_ASM_UNALIGNED_DI_OP
1299 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1300 #else
1301 /* For Darwin. */
1302 #undef TARGET_ASM_UNALIGNED_HI_OP
1303 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1304 #undef TARGET_ASM_UNALIGNED_SI_OP
1305 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1306 #undef TARGET_ASM_UNALIGNED_DI_OP
1307 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1308 #undef TARGET_ASM_ALIGNED_DI_OP
1309 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1310 #endif
1311 #endif
1313 /* This hook deals with fixups for relocatable code and DI-mode objects
1314 in 64-bit code. */
1315 #undef TARGET_ASM_INTEGER
1316 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1318 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1319 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1320 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1321 #endif
1323 #undef TARGET_SET_UP_BY_PROLOGUE
1324 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1326 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1327 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1328 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1329 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1330 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1336 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1339 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1340 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1342 #undef TARGET_INTERNAL_ARG_POINTER
1343 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1345 #undef TARGET_HAVE_TLS
1346 #define TARGET_HAVE_TLS HAVE_AS_TLS
1348 #undef TARGET_CANNOT_FORCE_CONST_MEM
1349 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1351 #undef TARGET_DELEGITIMIZE_ADDRESS
1352 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1354 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1355 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1357 #undef TARGET_LEGITIMATE_COMBINED_INSN
1358 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1360 #undef TARGET_ASM_FUNCTION_PROLOGUE
1361 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1362 #undef TARGET_ASM_FUNCTION_EPILOGUE
1363 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1368 #undef TARGET_LEGITIMIZE_ADDRESS
1369 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1371 #undef TARGET_SCHED_VARIABLE_ISSUE
1372 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1374 #undef TARGET_SCHED_ISSUE_RATE
1375 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1376 #undef TARGET_SCHED_ADJUST_COST
1377 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1378 #undef TARGET_SCHED_ADJUST_PRIORITY
1379 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1380 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1381 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1382 #undef TARGET_SCHED_INIT
1383 #define TARGET_SCHED_INIT rs6000_sched_init
1384 #undef TARGET_SCHED_FINISH
1385 #define TARGET_SCHED_FINISH rs6000_sched_finish
1386 #undef TARGET_SCHED_REORDER
1387 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1388 #undef TARGET_SCHED_REORDER2
1389 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1394 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1395 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1397 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1398 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1399 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1400 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1401 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1402 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1403 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1404 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1406 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1407 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1409 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1410 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1411 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1412 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1413 rs6000_builtin_support_vector_misalignment
1414 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1415 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1418 rs6000_builtin_vectorization_cost
1419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1421 rs6000_preferred_simd_mode
1422 #undef TARGET_VECTORIZE_INIT_COST
1423 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1424 #undef TARGET_VECTORIZE_ADD_STMT_COST
1425 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1426 #undef TARGET_VECTORIZE_FINISH_COST
1427 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1428 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1429 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1431 #undef TARGET_INIT_BUILTINS
1432 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1433 #undef TARGET_BUILTIN_DECL
1434 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1436 #undef TARGET_FOLD_BUILTIN
1437 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1438 #undef TARGET_GIMPLE_FOLD_BUILTIN
1439 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1441 #undef TARGET_EXPAND_BUILTIN
1442 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1444 #undef TARGET_MANGLE_TYPE
1445 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1447 #undef TARGET_INIT_LIBFUNCS
1448 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1450 #if TARGET_MACHO
1451 #undef TARGET_BINDS_LOCAL_P
1452 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1453 #endif
1455 #undef TARGET_MS_BITFIELD_LAYOUT_P
1456 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1458 #undef TARGET_ASM_OUTPUT_MI_THUNK
1459 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1461 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1462 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1464 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1465 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1467 #undef TARGET_REGISTER_MOVE_COST
1468 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1469 #undef TARGET_MEMORY_MOVE_COST
1470 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1471 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1472 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1473 rs6000_ira_change_pseudo_allocno_class
1474 #undef TARGET_CANNOT_COPY_INSN_P
1475 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1476 #undef TARGET_RTX_COSTS
1477 #define TARGET_RTX_COSTS rs6000_rtx_costs
1478 #undef TARGET_ADDRESS_COST
1479 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1480 #undef TARGET_INSN_COST
1481 #define TARGET_INSN_COST rs6000_insn_cost
1483 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1484 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1486 #undef TARGET_PROMOTE_FUNCTION_MODE
1487 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1489 #undef TARGET_RETURN_IN_MEMORY
1490 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1492 #undef TARGET_RETURN_IN_MSB
1493 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1495 #undef TARGET_SETUP_INCOMING_VARARGS
1496 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1498 /* Always strict argument naming on rs6000. */
1499 #undef TARGET_STRICT_ARGUMENT_NAMING
1500 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1501 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1502 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1503 #undef TARGET_SPLIT_COMPLEX_ARG
1504 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1505 #undef TARGET_MUST_PASS_IN_STACK
1506 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1507 #undef TARGET_PASS_BY_REFERENCE
1508 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1509 #undef TARGET_ARG_PARTIAL_BYTES
1510 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1511 #undef TARGET_FUNCTION_ARG_ADVANCE
1512 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1513 #undef TARGET_FUNCTION_ARG
1514 #define TARGET_FUNCTION_ARG rs6000_function_arg
1515 #undef TARGET_FUNCTION_ARG_PADDING
1516 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1517 #undef TARGET_FUNCTION_ARG_BOUNDARY
1518 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1520 #undef TARGET_BUILD_BUILTIN_VA_LIST
1521 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1523 #undef TARGET_EXPAND_BUILTIN_VA_START
1524 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1526 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1527 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1529 #undef TARGET_EH_RETURN_FILTER_MODE
1530 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1532 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1533 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1535 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1536 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1538 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1539 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1541 #undef TARGET_FLOATN_MODE
1542 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1544 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1545 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1547 #undef TARGET_MD_ASM_ADJUST
1548 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1550 #undef TARGET_OPTION_OVERRIDE
1551 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1553 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1554 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1555 rs6000_builtin_vectorized_function
1557 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1558 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1559 rs6000_builtin_md_vectorized_function
1561 #undef TARGET_STACK_PROTECT_GUARD
1562 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1564 #if !TARGET_MACHO
1565 #undef TARGET_STACK_PROTECT_FAIL
1566 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1567 #endif
1569 #ifdef HAVE_AS_TLS
1570 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1571 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1572 #endif
1574 /* Use a 32-bit anchor range. This leads to sequences like:
1576 addis tmp,anchor,high
1577 add dest,tmp,low
1579 where tmp itself acts as an anchor, and can be shared between
1580 accesses to the same 64k page. */
1581 #undef TARGET_MIN_ANCHOR_OFFSET
1582 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1583 #undef TARGET_MAX_ANCHOR_OFFSET
1584 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1585 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1586 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1587 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1588 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1590 #undef TARGET_BUILTIN_RECIPROCAL
1591 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1593 #undef TARGET_SECONDARY_RELOAD
1594 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1595 #undef TARGET_SECONDARY_MEMORY_NEEDED
1596 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1597 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1598 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1600 #undef TARGET_LEGITIMATE_ADDRESS_P
1601 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1603 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1604 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1606 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1607 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1609 #undef TARGET_CAN_ELIMINATE
1610 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1612 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1613 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1615 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1616 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1618 #undef TARGET_TRAMPOLINE_INIT
1619 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1621 #undef TARGET_FUNCTION_VALUE
1622 #define TARGET_FUNCTION_VALUE rs6000_function_value
1624 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1625 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1627 #undef TARGET_OPTION_SAVE
1628 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1630 #undef TARGET_OPTION_RESTORE
1631 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1633 #undef TARGET_OPTION_PRINT
1634 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1636 #undef TARGET_CAN_INLINE_P
1637 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1639 #undef TARGET_SET_CURRENT_FUNCTION
1640 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1642 #undef TARGET_LEGITIMATE_CONSTANT_P
1643 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1645 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1646 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1648 #undef TARGET_CAN_USE_DOLOOP_P
1649 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1651 #undef TARGET_PREDICT_DOLOOP_P
1652 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1654 #undef TARGET_HAVE_COUNT_REG_DECR_P
1655 #define TARGET_HAVE_COUNT_REG_DECR_P true
1657 /* 1000000000 is infinite cost in IVOPTs. */
1658 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1659 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1661 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1662 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1664 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1665 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1667 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1668 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1669 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1670 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1671 #undef TARGET_UNWIND_WORD_MODE
1672 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1674 #undef TARGET_OFFLOAD_OPTIONS
1675 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1677 #undef TARGET_C_MODE_FOR_SUFFIX
1678 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1680 #undef TARGET_INVALID_BINARY_OP
1681 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1683 #undef TARGET_OPTAB_SUPPORTED_P
1684 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1686 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1687 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1689 #undef TARGET_COMPARE_VERSION_PRIORITY
1690 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1692 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1693 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1694 rs6000_generate_version_dispatcher_body
1696 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1697 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1698 rs6000_get_function_versions_dispatcher
1700 #undef TARGET_OPTION_FUNCTION_VERSIONS
1701 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1703 #undef TARGET_HARD_REGNO_NREGS
1704 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1705 #undef TARGET_HARD_REGNO_MODE_OK
1706 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1708 #undef TARGET_MODES_TIEABLE_P
1709 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1711 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1712 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1713 rs6000_hard_regno_call_part_clobbered
1715 #undef TARGET_SLOW_UNALIGNED_ACCESS
1716 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1718 #undef TARGET_CAN_CHANGE_MODE_CLASS
1719 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1721 #undef TARGET_CONSTANT_ALIGNMENT
1722 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1724 #undef TARGET_STARTING_FRAME_OFFSET
1725 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1727 #if TARGET_ELF && RS6000_WEAK
1728 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1729 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1730 #endif
1732 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1733 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1735 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1736 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1739 /* Processor table. */
1740 struct rs6000_ptt
1742 const char *const name; /* Canonical processor name. */
1743 const enum processor_type processor; /* Processor type enum value. */
1744 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1747 static struct rs6000_ptt const processor_target_table[] =
1749 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1750 #include "rs6000-cpus.def"
1751 #undef RS6000_CPU
1754 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1755 name is invalid. */
1757 static int
1758 rs6000_cpu_name_lookup (const char *name)
1760 size_t i;
1762 if (name != NULL)
1764 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1765 if (! strcmp (name, processor_target_table[i].name))
1766 return (int)i;
1769 return -1;
1773 /* Return number of consecutive hard regs needed starting at reg REGNO
1774 to hold something of mode MODE.
1775 This is ordinarily the length in words of a value of mode MODE
1776 but can be less for certain modes in special long registers.
1778 POWER and PowerPC GPRs hold 32 bits worth;
1779 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1781 static int
1782 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1784 unsigned HOST_WIDE_INT reg_size;
1786 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1787 128-bit floating point that can go in vector registers, which has VSX
1788 memory addressing. */
1789 if (FP_REGNO_P (regno))
1790 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1791 ? UNITS_PER_VSX_WORD
1792 : UNITS_PER_FP_WORD);
1794 else if (ALTIVEC_REGNO_P (regno))
1795 reg_size = UNITS_PER_ALTIVEC_WORD;
1797 else
1798 reg_size = UNITS_PER_WORD;
1800 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1803 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1804 MODE. */
1805 static int
1806 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1808 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1810 if (COMPLEX_MODE_P (mode))
1811 mode = GET_MODE_INNER (mode);
1813 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1814 register combinations, and use PTImode where we need to deal with quad
1815 word memory operations. Don't allow quad words in the argument or frame
1816 pointer registers, just registers 0..31. */
1817 if (mode == PTImode)
1818 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1819 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1820 && ((regno & 1) == 0));
1822 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1823 implementations. Don't allow an item to be split between a FP register
1824 and an Altivec register. Allow TImode in all VSX registers if the user
1825 asked for it. */
1826 if (TARGET_VSX && VSX_REGNO_P (regno)
1827 && (VECTOR_MEM_VSX_P (mode)
1828 || FLOAT128_VECTOR_P (mode)
1829 || reg_addr[mode].scalar_in_vmx_p
1830 || mode == TImode
1831 || (TARGET_VADDUQM && mode == V1TImode)))
1833 if (FP_REGNO_P (regno))
1834 return FP_REGNO_P (last_regno);
1836 if (ALTIVEC_REGNO_P (regno))
1838 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1839 return 0;
1841 return ALTIVEC_REGNO_P (last_regno);
1845 /* The GPRs can hold any mode, but values bigger than one register
1846 cannot go past R31. */
1847 if (INT_REGNO_P (regno))
1848 return INT_REGNO_P (last_regno);
1850 /* The float registers (except for VSX vector modes) can only hold floating
1851 modes and DImode. */
1852 if (FP_REGNO_P (regno))
1854 if (FLOAT128_VECTOR_P (mode))
1855 return false;
1857 if (SCALAR_FLOAT_MODE_P (mode)
1858 && (mode != TDmode || (regno % 2) == 0)
1859 && FP_REGNO_P (last_regno))
1860 return 1;
1862 if (GET_MODE_CLASS (mode) == MODE_INT)
1864 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1865 return 1;
1867 if (TARGET_P8_VECTOR && (mode == SImode))
1868 return 1;
1870 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1871 return 1;
1874 return 0;
1877 /* The CR register can only hold CC modes. */
1878 if (CR_REGNO_P (regno))
1879 return GET_MODE_CLASS (mode) == MODE_CC;
1881 if (CA_REGNO_P (regno))
1882 return mode == Pmode || mode == SImode;
1884 /* AltiVec only in AldyVec registers. */
1885 if (ALTIVEC_REGNO_P (regno))
1886 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1887 || mode == V1TImode);
1889 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1890 and it must be able to fit within the register set. */
1892 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1895 /* Implement TARGET_HARD_REGNO_NREGS. */
1897 static unsigned int
1898 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1900 return rs6000_hard_regno_nregs[mode][regno];
1903 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1905 static bool
1906 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1908 return rs6000_hard_regno_mode_ok_p[mode][regno];
1911 /* Implement TARGET_MODES_TIEABLE_P.
1913 PTImode cannot tie with other modes because PTImode is restricted to even
1914 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1915 57744).
1917 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1918 128-bit floating point on VSX systems ties with other vectors. */
1920 static bool
1921 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1923 if (mode1 == PTImode)
1924 return mode2 == PTImode;
1925 if (mode2 == PTImode)
1926 return false;
1928 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1929 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1930 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1931 return false;
1933 if (SCALAR_FLOAT_MODE_P (mode1))
1934 return SCALAR_FLOAT_MODE_P (mode2);
1935 if (SCALAR_FLOAT_MODE_P (mode2))
1936 return false;
1938 if (GET_MODE_CLASS (mode1) == MODE_CC)
1939 return GET_MODE_CLASS (mode2) == MODE_CC;
1940 if (GET_MODE_CLASS (mode2) == MODE_CC)
1941 return false;
1943 return true;
1946 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1948 static bool
1949 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1950 machine_mode mode)
1952 if (TARGET_32BIT
1953 && TARGET_POWERPC64
1954 && GET_MODE_SIZE (mode) > 4
1955 && INT_REGNO_P (regno))
1956 return true;
1958 if (TARGET_VSX
1959 && FP_REGNO_P (regno)
1960 && GET_MODE_SIZE (mode) > 8
1961 && !FLOAT128_2REG_P (mode))
1962 return true;
1964 return false;
1967 /* Print interesting facts about registers. */
1968 static void
1969 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1971 int r, m;
1973 for (r = first_regno; r <= last_regno; ++r)
1975 const char *comma = "";
1976 int len;
1978 if (first_regno == last_regno)
1979 fprintf (stderr, "%s:\t", reg_name);
1980 else
1981 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1983 len = 8;
1984 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1985 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1987 if (len > 70)
1989 fprintf (stderr, ",\n\t");
1990 len = 8;
1991 comma = "";
1994 if (rs6000_hard_regno_nregs[m][r] > 1)
1995 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1996 rs6000_hard_regno_nregs[m][r]);
1997 else
1998 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2000 comma = ", ";
2003 if (call_used_or_fixed_reg_p (r))
2005 if (len > 70)
2007 fprintf (stderr, ",\n\t");
2008 len = 8;
2009 comma = "";
2012 len += fprintf (stderr, "%s%s", comma, "call-used");
2013 comma = ", ";
2016 if (fixed_regs[r])
2018 if (len > 70)
2020 fprintf (stderr, ",\n\t");
2021 len = 8;
2022 comma = "";
2025 len += fprintf (stderr, "%s%s", comma, "fixed");
2026 comma = ", ";
2029 if (len > 70)
2031 fprintf (stderr, ",\n\t");
2032 comma = "";
2035 len += fprintf (stderr, "%sreg-class = %s", comma,
2036 reg_class_names[(int)rs6000_regno_regclass[r]]);
2037 comma = ", ";
2039 if (len > 70)
2041 fprintf (stderr, ",\n\t");
2042 comma = "";
2045 fprintf (stderr, "%sregno = %d\n", comma, r);
2049 static const char *
2050 rs6000_debug_vector_unit (enum rs6000_vector v)
2052 const char *ret;
2054 switch (v)
2056 case VECTOR_NONE: ret = "none"; break;
2057 case VECTOR_ALTIVEC: ret = "altivec"; break;
2058 case VECTOR_VSX: ret = "vsx"; break;
2059 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2060 default: ret = "unknown"; break;
2063 return ret;
2066 /* Inner function printing just the address mask for a particular reload
2067 register class. */
2068 DEBUG_FUNCTION char *
2069 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2071 static char ret[8];
2072 char *p = ret;
2074 if ((mask & RELOAD_REG_VALID) != 0)
2075 *p++ = 'v';
2076 else if (keep_spaces)
2077 *p++ = ' ';
2079 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2080 *p++ = 'm';
2081 else if (keep_spaces)
2082 *p++ = ' ';
2084 if ((mask & RELOAD_REG_INDEXED) != 0)
2085 *p++ = 'i';
2086 else if (keep_spaces)
2087 *p++ = ' ';
2089 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2090 *p++ = 'O';
2091 else if ((mask & RELOAD_REG_OFFSET) != 0)
2092 *p++ = 'o';
2093 else if (keep_spaces)
2094 *p++ = ' ';
2096 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2097 *p++ = '+';
2098 else if (keep_spaces)
2099 *p++ = ' ';
2101 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2102 *p++ = '+';
2103 else if (keep_spaces)
2104 *p++ = ' ';
2106 if ((mask & RELOAD_REG_AND_M16) != 0)
2107 *p++ = '&';
2108 else if (keep_spaces)
2109 *p++ = ' ';
2111 *p = '\0';
2113 return ret;
2116 /* Print the address masks in a human readble fashion. */
2117 DEBUG_FUNCTION void
2118 rs6000_debug_print_mode (ssize_t m)
2120 ssize_t rc;
2121 int spaces = 0;
2123 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2124 for (rc = 0; rc < N_RELOAD_REG; rc++)
2125 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2126 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2128 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2129 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2131 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2132 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2133 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2134 spaces = 0;
2136 else
2137 spaces += sizeof (" Reload=sl") - 1;
2139 if (reg_addr[m].scalar_in_vmx_p)
2141 fprintf (stderr, "%*s Upper=y", spaces, "");
2142 spaces = 0;
2144 else
2145 spaces += sizeof (" Upper=y") - 1;
2147 if (rs6000_vector_unit[m] != VECTOR_NONE
2148 || rs6000_vector_mem[m] != VECTOR_NONE)
2150 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2151 spaces, "",
2152 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2153 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2156 fputs ("\n", stderr);
2159 #define DEBUG_FMT_ID "%-32s= "
2160 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2161 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2162 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2164 /* Print various interesting information with -mdebug=reg. */
2165 static void
2166 rs6000_debug_reg_global (void)
2168 static const char *const tf[2] = { "false", "true" };
2169 const char *nl = (const char *)0;
2170 int m;
2171 size_t m1, m2, v;
2172 char costly_num[20];
2173 char nop_num[20];
2174 char flags_buffer[40];
2175 const char *costly_str;
2176 const char *nop_str;
2177 const char *trace_str;
2178 const char *abi_str;
2179 const char *cmodel_str;
2180 struct cl_target_option cl_opts;
2182 /* Modes we want tieable information on. */
2183 static const machine_mode print_tieable_modes[] = {
2184 QImode,
2185 HImode,
2186 SImode,
2187 DImode,
2188 TImode,
2189 PTImode,
2190 SFmode,
2191 DFmode,
2192 TFmode,
2193 IFmode,
2194 KFmode,
2195 SDmode,
2196 DDmode,
2197 TDmode,
2198 V16QImode,
2199 V8HImode,
2200 V4SImode,
2201 V2DImode,
2202 V1TImode,
2203 V32QImode,
2204 V16HImode,
2205 V8SImode,
2206 V4DImode,
2207 V2TImode,
2208 V4SFmode,
2209 V2DFmode,
2210 V8SFmode,
2211 V4DFmode,
2212 CCmode,
2213 CCUNSmode,
2214 CCEQmode,
2217 /* Virtual regs we are interested in. */
2218 const static struct {
2219 int regno; /* register number. */
2220 const char *name; /* register name. */
2221 } virtual_regs[] = {
2222 { STACK_POINTER_REGNUM, "stack pointer:" },
2223 { TOC_REGNUM, "toc: " },
2224 { STATIC_CHAIN_REGNUM, "static chain: " },
2225 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2226 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2227 { ARG_POINTER_REGNUM, "arg pointer: " },
2228 { FRAME_POINTER_REGNUM, "frame pointer:" },
2229 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2230 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2231 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2232 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2233 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2234 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2235 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2236 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2237 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2240 fputs ("\nHard register information:\n", stderr);
2241 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2242 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2243 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2244 LAST_ALTIVEC_REGNO,
2245 "vs");
2246 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2247 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2248 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2249 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2250 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2251 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2253 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2254 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2255 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2257 fprintf (stderr,
2258 "\n"
2259 "d reg_class = %s\n"
2260 "f reg_class = %s\n"
2261 "v reg_class = %s\n"
2262 "wa reg_class = %s\n"
2263 "we reg_class = %s\n"
2264 "wr reg_class = %s\n"
2265 "wx reg_class = %s\n"
2266 "wA reg_class = %s\n"
2267 "\n",
2268 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2269 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2270 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2277 nl = "\n";
2278 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2279 rs6000_debug_print_mode (m);
2281 fputs ("\n", stderr);
2283 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2285 machine_mode mode1 = print_tieable_modes[m1];
2286 bool first_time = true;
2288 nl = (const char *)0;
2289 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2291 machine_mode mode2 = print_tieable_modes[m2];
2292 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2294 if (first_time)
2296 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2297 nl = "\n";
2298 first_time = false;
2301 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2305 if (!first_time)
2306 fputs ("\n", stderr);
2309 if (nl)
2310 fputs (nl, stderr);
2312 if (rs6000_recip_control)
2314 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2316 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2317 if (rs6000_recip_bits[m])
2319 fprintf (stderr,
2320 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2321 GET_MODE_NAME (m),
2322 (RS6000_RECIP_AUTO_RE_P (m)
2323 ? "auto"
2324 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2325 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2326 ? "auto"
2327 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2330 fputs ("\n", stderr);
2333 if (rs6000_cpu_index >= 0)
2335 const char *name = processor_target_table[rs6000_cpu_index].name;
2336 HOST_WIDE_INT flags
2337 = processor_target_table[rs6000_cpu_index].target_enable;
2339 sprintf (flags_buffer, "-mcpu=%s flags", name);
2340 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2342 else
2343 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2345 if (rs6000_tune_index >= 0)
2347 const char *name = processor_target_table[rs6000_tune_index].name;
2348 HOST_WIDE_INT flags
2349 = processor_target_table[rs6000_tune_index].target_enable;
2351 sprintf (flags_buffer, "-mtune=%s flags", name);
2352 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2354 else
2355 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2357 cl_target_option_save (&cl_opts, &global_options);
2358 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2359 rs6000_isa_flags);
2361 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2362 rs6000_isa_flags_explicit);
2364 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2365 rs6000_builtin_mask);
2367 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2369 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2370 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2372 switch (rs6000_sched_costly_dep)
2374 case max_dep_latency:
2375 costly_str = "max_dep_latency";
2376 break;
2378 case no_dep_costly:
2379 costly_str = "no_dep_costly";
2380 break;
2382 case all_deps_costly:
2383 costly_str = "all_deps_costly";
2384 break;
2386 case true_store_to_load_dep_costly:
2387 costly_str = "true_store_to_load_dep_costly";
2388 break;
2390 case store_to_load_dep_costly:
2391 costly_str = "store_to_load_dep_costly";
2392 break;
2394 default:
2395 costly_str = costly_num;
2396 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2397 break;
2400 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2402 switch (rs6000_sched_insert_nops)
2404 case sched_finish_regroup_exact:
2405 nop_str = "sched_finish_regroup_exact";
2406 break;
2408 case sched_finish_pad_groups:
2409 nop_str = "sched_finish_pad_groups";
2410 break;
2412 case sched_finish_none:
2413 nop_str = "sched_finish_none";
2414 break;
2416 default:
2417 nop_str = nop_num;
2418 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2419 break;
2422 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2424 switch (rs6000_sdata)
2426 default:
2427 case SDATA_NONE:
2428 break;
2430 case SDATA_DATA:
2431 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2432 break;
2434 case SDATA_SYSV:
2435 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2436 break;
2438 case SDATA_EABI:
2439 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2440 break;
2444 switch (rs6000_traceback)
2446 case traceback_default: trace_str = "default"; break;
2447 case traceback_none: trace_str = "none"; break;
2448 case traceback_part: trace_str = "part"; break;
2449 case traceback_full: trace_str = "full"; break;
2450 default: trace_str = "unknown"; break;
2453 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2455 switch (rs6000_current_cmodel)
2457 case CMODEL_SMALL: cmodel_str = "small"; break;
2458 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2459 case CMODEL_LARGE: cmodel_str = "large"; break;
2460 default: cmodel_str = "unknown"; break;
2463 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2465 switch (rs6000_current_abi)
2467 case ABI_NONE: abi_str = "none"; break;
2468 case ABI_AIX: abi_str = "aix"; break;
2469 case ABI_ELFv2: abi_str = "ELFv2"; break;
2470 case ABI_V4: abi_str = "V4"; break;
2471 case ABI_DARWIN: abi_str = "darwin"; break;
2472 default: abi_str = "unknown"; break;
2475 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2477 if (rs6000_altivec_abi)
2478 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2480 if (rs6000_darwin64_abi)
2481 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2483 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2484 (TARGET_SOFT_FLOAT ? "true" : "false"));
2486 if (TARGET_LINK_STACK)
2487 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2489 if (TARGET_P8_FUSION)
2491 char options[80];
2493 strcpy (options, "power8");
2494 if (TARGET_P8_FUSION_SIGN)
2495 strcat (options, ", sign");
2497 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2500 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2501 TARGET_SECURE_PLT ? "secure" : "bss");
2502 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2503 aix_struct_return ? "aix" : "sysv");
2504 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2505 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2506 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2507 tf[!!rs6000_align_branch_targets]);
2508 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2509 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2510 rs6000_long_double_type_size);
2511 if (rs6000_long_double_type_size > 64)
2513 fprintf (stderr, DEBUG_FMT_S, "long double type",
2514 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2515 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2516 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2518 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2519 (int)rs6000_sched_restricted_insns_priority);
2520 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2521 (int)END_BUILTINS);
2522 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2523 (int)RS6000_BUILTIN_COUNT);
2525 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2526 (int)TARGET_FLOAT128_ENABLE_TYPE);
2528 if (TARGET_VSX)
2529 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2530 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2532 if (TARGET_DIRECT_MOVE_128)
2533 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2534 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2538 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2539 legitimate address support to figure out the appropriate addressing to
2540 use. */
2542 static void
2543 rs6000_setup_reg_addr_masks (void)
2545 ssize_t rc, reg, m, nregs;
2546 addr_mask_type any_addr_mask, addr_mask;
2548 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2550 machine_mode m2 = (machine_mode) m;
2551 bool complex_p = false;
2552 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2553 size_t msize;
2555 if (COMPLEX_MODE_P (m2))
2557 complex_p = true;
2558 m2 = GET_MODE_INNER (m2);
2561 msize = GET_MODE_SIZE (m2);
2563 /* SDmode is special in that we want to access it only via REG+REG
2564 addressing on power7 and above, since we want to use the LFIWZX and
2565 STFIWZX instructions to load it. */
2566 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2568 any_addr_mask = 0;
2569 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2571 addr_mask = 0;
2572 reg = reload_reg_map[rc].reg;
2574 /* Can mode values go in the GPR/FPR/Altivec registers? */
2575 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2577 bool small_int_vsx_p = (small_int_p
2578 && (rc == RELOAD_REG_FPR
2579 || rc == RELOAD_REG_VMX));
2581 nregs = rs6000_hard_regno_nregs[m][reg];
2582 addr_mask |= RELOAD_REG_VALID;
2584 /* Indicate if the mode takes more than 1 physical register. If
2585 it takes a single register, indicate it can do REG+REG
2586 addressing. Small integers in VSX registers can only do
2587 REG+REG addressing. */
2588 if (small_int_vsx_p)
2589 addr_mask |= RELOAD_REG_INDEXED;
2590 else if (nregs > 1 || m == BLKmode || complex_p)
2591 addr_mask |= RELOAD_REG_MULTIPLE;
2592 else
2593 addr_mask |= RELOAD_REG_INDEXED;
2595 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2596 addressing. If we allow scalars into Altivec registers,
2597 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2599 For VSX systems, we don't allow update addressing for
2600 DFmode/SFmode if those registers can go in both the
2601 traditional floating point registers and Altivec registers.
2602 The load/store instructions for the Altivec registers do not
2603 have update forms. If we allowed update addressing, it seems
2604 to break IV-OPT code using floating point if the index type is
2605 int instead of long (PR target/81550 and target/84042). */
2607 if (TARGET_UPDATE
2608 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2609 && msize <= 8
2610 && !VECTOR_MODE_P (m2)
2611 && !FLOAT128_VECTOR_P (m2)
2612 && !complex_p
2613 && (m != E_DFmode || !TARGET_VSX)
2614 && (m != E_SFmode || !TARGET_P8_VECTOR)
2615 && !small_int_vsx_p)
2617 addr_mask |= RELOAD_REG_PRE_INCDEC;
2619 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2620 we don't allow PRE_MODIFY for some multi-register
2621 operations. */
2622 switch (m)
2624 default:
2625 addr_mask |= RELOAD_REG_PRE_MODIFY;
2626 break;
2628 case E_DImode:
2629 if (TARGET_POWERPC64)
2630 addr_mask |= RELOAD_REG_PRE_MODIFY;
2631 break;
2633 case E_DFmode:
2634 case E_DDmode:
2635 if (TARGET_HARD_FLOAT)
2636 addr_mask |= RELOAD_REG_PRE_MODIFY;
2637 break;
2642 /* GPR and FPR registers can do REG+OFFSET addressing, except
2643 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2644 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2645 if ((addr_mask != 0) && !indexed_only_p
2646 && msize <= 8
2647 && (rc == RELOAD_REG_GPR
2648 || ((msize == 8 || m2 == SFmode)
2649 && (rc == RELOAD_REG_FPR
2650 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2651 addr_mask |= RELOAD_REG_OFFSET;
2653 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2654 instructions are enabled. The offset for 128-bit VSX registers is
2655 only 12-bits. While GPRs can handle the full offset range, VSX
2656 registers can only handle the restricted range. */
2657 else if ((addr_mask != 0) && !indexed_only_p
2658 && msize == 16 && TARGET_P9_VECTOR
2659 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2660 || (m2 == TImode && TARGET_VSX)))
2662 addr_mask |= RELOAD_REG_OFFSET;
2663 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2664 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2667 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2668 addressing on 128-bit types. */
2669 if (rc == RELOAD_REG_VMX && msize == 16
2670 && (addr_mask & RELOAD_REG_VALID) != 0)
2671 addr_mask |= RELOAD_REG_AND_M16;
2673 reg_addr[m].addr_mask[rc] = addr_mask;
2674 any_addr_mask |= addr_mask;
2677 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2682 /* Initialize the various global tables that are based on register size. */
2683 static void
2684 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2686 ssize_t r, m, c;
2687 int align64;
2688 int align32;
2690 /* Precalculate REGNO_REG_CLASS. */
2691 rs6000_regno_regclass[0] = GENERAL_REGS;
2692 for (r = 1; r < 32; ++r)
2693 rs6000_regno_regclass[r] = BASE_REGS;
2695 for (r = 32; r < 64; ++r)
2696 rs6000_regno_regclass[r] = FLOAT_REGS;
2698 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2699 rs6000_regno_regclass[r] = NO_REGS;
2701 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2702 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2704 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2705 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2706 rs6000_regno_regclass[r] = CR_REGS;
2708 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2709 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2710 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2711 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2712 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2713 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2714 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2716 /* Precalculate register class to simpler reload register class. We don't
2717 need all of the register classes that are combinations of different
2718 classes, just the simple ones that have constraint letters. */
2719 for (c = 0; c < N_REG_CLASSES; c++)
2720 reg_class_to_reg_type[c] = NO_REG_TYPE;
2722 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2723 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2724 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2725 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2726 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2728 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2733 if (TARGET_VSX)
2735 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2736 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2738 else
2740 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2741 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2744 /* Precalculate the valid memory formats as well as the vector information,
2745 this must be set up before the rs6000_hard_regno_nregs_internal calls
2746 below. */
2747 gcc_assert ((int)VECTOR_NONE == 0);
2748 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2749 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2751 gcc_assert ((int)CODE_FOR_nothing == 0);
2752 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2754 gcc_assert ((int)NO_REGS == 0);
2755 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2757 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2758 believes it can use native alignment or still uses 128-bit alignment. */
2759 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2761 align64 = 64;
2762 align32 = 32;
2764 else
2766 align64 = 128;
2767 align32 = 128;
2770 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2771 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2772 if (TARGET_FLOAT128_TYPE)
2774 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2775 rs6000_vector_align[KFmode] = 128;
2777 if (FLOAT128_IEEE_P (TFmode))
2779 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2780 rs6000_vector_align[TFmode] = 128;
2784 /* V2DF mode, VSX only. */
2785 if (TARGET_VSX)
2787 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2788 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2789 rs6000_vector_align[V2DFmode] = align64;
2792 /* V4SF mode, either VSX or Altivec. */
2793 if (TARGET_VSX)
2795 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2796 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2797 rs6000_vector_align[V4SFmode] = align32;
2799 else if (TARGET_ALTIVEC)
2801 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2802 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2803 rs6000_vector_align[V4SFmode] = align32;
2806 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2807 and stores. */
2808 if (TARGET_ALTIVEC)
2810 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2811 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2812 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2813 rs6000_vector_align[V4SImode] = align32;
2814 rs6000_vector_align[V8HImode] = align32;
2815 rs6000_vector_align[V16QImode] = align32;
2817 if (TARGET_VSX)
2819 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2820 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2821 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2823 else
2825 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2826 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2827 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2831 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2832 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2833 if (TARGET_VSX)
2835 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2836 rs6000_vector_unit[V2DImode]
2837 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2838 rs6000_vector_align[V2DImode] = align64;
2840 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2841 rs6000_vector_unit[V1TImode]
2842 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2843 rs6000_vector_align[V1TImode] = 128;
2846 /* DFmode, see if we want to use the VSX unit. Memory is handled
2847 differently, so don't set rs6000_vector_mem. */
2848 if (TARGET_VSX)
2850 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2851 rs6000_vector_align[DFmode] = 64;
2854 /* SFmode, see if we want to use the VSX unit. */
2855 if (TARGET_P8_VECTOR)
2857 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2858 rs6000_vector_align[SFmode] = 32;
2861 /* Allow TImode in VSX register and set the VSX memory macros. */
2862 if (TARGET_VSX)
2864 rs6000_vector_mem[TImode] = VECTOR_VSX;
2865 rs6000_vector_align[TImode] = align64;
2868 /* Register class constraints for the constraints that depend on compile
2869 switches. When the VSX code was added, different constraints were added
2870 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2871 of the VSX registers are used. The register classes for scalar floating
2872 point types is set, based on whether we allow that type into the upper
2873 (Altivec) registers. GCC has register classes to target the Altivec
2874 registers for load/store operations, to select using a VSX memory
2875 operation instead of the traditional floating point operation. The
2876 constraints are:
2878 d - Register class to use with traditional DFmode instructions.
2879 f - Register class to use with traditional SFmode instructions.
2880 v - Altivec register.
2881 wa - Any VSX register.
2882 wc - Reserved to represent individual CR bits (used in LLVM).
2883 wn - always NO_REGS.
2884 wr - GPR if 64-bit mode is permitted.
2885 wx - Float register if we can do 32-bit int stores. */
2887 if (TARGET_HARD_FLOAT)
2889 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2890 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2893 if (TARGET_VSX)
2894 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2896 /* Add conditional constraints based on various options, to allow us to
2897 collapse multiple insn patterns. */
2898 if (TARGET_ALTIVEC)
2899 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2901 if (TARGET_POWERPC64)
2903 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2904 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2907 if (TARGET_STFIWX)
2908 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2910 /* Support for new direct moves (ISA 3.0 + 64bit). */
2911 if (TARGET_DIRECT_MOVE_128)
2912 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2914 /* Set up the reload helper and direct move functions. */
2915 if (TARGET_VSX || TARGET_ALTIVEC)
2917 if (TARGET_64BIT)
2919 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2920 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2921 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2922 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2923 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2924 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2925 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2926 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2927 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2928 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2929 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2930 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2931 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2932 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2933 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2934 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2935 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2936 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2937 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2938 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2940 if (FLOAT128_VECTOR_P (KFmode))
2942 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2943 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2946 if (FLOAT128_VECTOR_P (TFmode))
2948 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2949 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2952 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2953 available. */
2954 if (TARGET_NO_SDMODE_STACK)
2956 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2957 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2960 if (TARGET_VSX)
2962 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2963 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2966 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2968 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2969 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2970 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2971 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2972 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2973 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2974 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2975 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2976 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2978 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2979 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2980 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2981 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2982 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2983 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2984 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2985 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2986 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2988 if (FLOAT128_VECTOR_P (KFmode))
2990 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2991 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2994 if (FLOAT128_VECTOR_P (TFmode))
2996 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
2997 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3001 else
3003 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3004 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3005 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3006 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3007 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3008 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3009 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3010 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3011 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3012 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3013 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3014 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3015 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3016 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3017 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3018 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3019 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3020 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3021 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3022 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3024 if (FLOAT128_VECTOR_P (KFmode))
3026 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3027 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3030 if (FLOAT128_IEEE_P (TFmode))
3032 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3033 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3036 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3037 available. */
3038 if (TARGET_NO_SDMODE_STACK)
3040 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3041 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3044 if (TARGET_VSX)
3046 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3047 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3050 if (TARGET_DIRECT_MOVE)
3052 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3053 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3054 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3058 reg_addr[DFmode].scalar_in_vmx_p = true;
3059 reg_addr[DImode].scalar_in_vmx_p = true;
3061 if (TARGET_P8_VECTOR)
3063 reg_addr[SFmode].scalar_in_vmx_p = true;
3064 reg_addr[SImode].scalar_in_vmx_p = true;
3066 if (TARGET_P9_VECTOR)
3068 reg_addr[HImode].scalar_in_vmx_p = true;
3069 reg_addr[QImode].scalar_in_vmx_p = true;
3074 /* Precalculate HARD_REGNO_NREGS. */
3075 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3076 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3077 rs6000_hard_regno_nregs[m][r]
3078 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3080 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3081 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3082 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3083 rs6000_hard_regno_mode_ok_p[m][r]
3084 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3086 /* Precalculate CLASS_MAX_NREGS sizes. */
3087 for (c = 0; c < LIM_REG_CLASSES; ++c)
3089 int reg_size;
3091 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3092 reg_size = UNITS_PER_VSX_WORD;
3094 else if (c == ALTIVEC_REGS)
3095 reg_size = UNITS_PER_ALTIVEC_WORD;
3097 else if (c == FLOAT_REGS)
3098 reg_size = UNITS_PER_FP_WORD;
3100 else
3101 reg_size = UNITS_PER_WORD;
3103 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3105 machine_mode m2 = (machine_mode)m;
3106 int reg_size2 = reg_size;
3108 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3109 in VSX. */
3110 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3111 reg_size2 = UNITS_PER_FP_WORD;
3113 rs6000_class_max_nregs[m][c]
3114 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3118 /* Calculate which modes to automatically generate code to use a the
3119 reciprocal divide and square root instructions. In the future, possibly
3120 automatically generate the instructions even if the user did not specify
3121 -mrecip. The older machines double precision reciprocal sqrt estimate is
3122 not accurate enough. */
3123 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3124 if (TARGET_FRES)
3125 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3126 if (TARGET_FRE)
3127 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3128 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3129 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3130 if (VECTOR_UNIT_VSX_P (V2DFmode))
3131 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3133 if (TARGET_FRSQRTES)
3134 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3135 if (TARGET_FRSQRTE)
3136 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3137 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3138 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3139 if (VECTOR_UNIT_VSX_P (V2DFmode))
3140 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3142 if (rs6000_recip_control)
3144 if (!flag_finite_math_only)
3145 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3146 "-ffast-math");
3147 if (flag_trapping_math)
3148 warning (0, "%qs requires %qs or %qs", "-mrecip",
3149 "-fno-trapping-math", "-ffast-math");
3150 if (!flag_reciprocal_math)
3151 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3152 "-ffast-math");
3153 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3155 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3156 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3157 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3159 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3160 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3161 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3163 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3164 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3165 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3167 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3168 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3169 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3171 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3172 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3173 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3175 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3176 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3177 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3179 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3180 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3181 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3183 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3184 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3185 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3189 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3190 legitimate address support to figure out the appropriate addressing to
3191 use. */
3192 rs6000_setup_reg_addr_masks ();
3194 if (global_init_p || TARGET_DEBUG_TARGET)
3196 if (TARGET_DEBUG_REG)
3197 rs6000_debug_reg_global ();
3199 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3200 fprintf (stderr,
3201 "SImode variable mult cost = %d\n"
3202 "SImode constant mult cost = %d\n"
3203 "SImode short constant mult cost = %d\n"
3204 "DImode multipliciation cost = %d\n"
3205 "SImode division cost = %d\n"
3206 "DImode division cost = %d\n"
3207 "Simple fp operation cost = %d\n"
3208 "DFmode multiplication cost = %d\n"
3209 "SFmode division cost = %d\n"
3210 "DFmode division cost = %d\n"
3211 "cache line size = %d\n"
3212 "l1 cache size = %d\n"
3213 "l2 cache size = %d\n"
3214 "simultaneous prefetches = %d\n"
3215 "\n",
3216 rs6000_cost->mulsi,
3217 rs6000_cost->mulsi_const,
3218 rs6000_cost->mulsi_const9,
3219 rs6000_cost->muldi,
3220 rs6000_cost->divsi,
3221 rs6000_cost->divdi,
3222 rs6000_cost->fp,
3223 rs6000_cost->dmul,
3224 rs6000_cost->sdiv,
3225 rs6000_cost->ddiv,
3226 rs6000_cost->cache_line_size,
3227 rs6000_cost->l1_cache_size,
3228 rs6000_cost->l2_cache_size,
3229 rs6000_cost->simultaneous_prefetches);
3233 #if TARGET_MACHO
3234 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3236 static void
3237 darwin_rs6000_override_options (void)
3239 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3240 off. */
3241 rs6000_altivec_abi = 1;
3242 TARGET_ALTIVEC_VRSAVE = 1;
3243 rs6000_current_abi = ABI_DARWIN;
3245 if (DEFAULT_ABI == ABI_DARWIN
3246 && TARGET_64BIT)
3247 darwin_one_byte_bool = 1;
3249 if (TARGET_64BIT && ! TARGET_POWERPC64)
3251 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3252 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3255 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3256 optimisation, and will not work with the most generic case (where the
3257 symbol is undefined external, but there is no symbl stub). */
3258 if (TARGET_64BIT)
3259 rs6000_default_long_calls = 0;
3261 /* ld_classic is (so far) still used for kernel (static) code, and supports
3262 the JBSR longcall / branch islands. */
3263 if (flag_mkernel)
3265 rs6000_default_long_calls = 1;
3267 /* Allow a kext author to do -mkernel -mhard-float. */
3268 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3269 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3272 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3273 Altivec. */
3274 if (!flag_mkernel && !flag_apple_kext
3275 && TARGET_64BIT
3276 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3277 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3279 /* Unless the user (not the configurer) has explicitly overridden
3280 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3281 G4 unless targeting the kernel. */
3282 if (!flag_mkernel
3283 && !flag_apple_kext
3284 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3285 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3286 && ! global_options_set.x_rs6000_cpu_index)
3288 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3291 #endif
3293 /* If not otherwise specified by a target, make 'long double' equivalent to
3294 'double'. */
3296 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3297 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3298 #endif
3300 /* Return the builtin mask of the various options used that could affect which
3301 builtins were used. In the past we used target_flags, but we've run out of
3302 bits, and some options are no longer in target_flags. */
3304 HOST_WIDE_INT
3305 rs6000_builtin_mask_calculate (void)
3307 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3308 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3309 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3310 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3311 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3312 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3313 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3314 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3315 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3316 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3317 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3318 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3319 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3320 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3321 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3322 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3323 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3324 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3325 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3326 | ((TARGET_LONG_DOUBLE_128
3327 && TARGET_HARD_FLOAT
3328 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3329 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3330 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3333 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3334 to clobber the XER[CA] bit because clobbering that bit without telling
3335 the compiler worked just fine with versions of GCC before GCC 5, and
3336 breaking a lot of older code in ways that are hard to track down is
3337 not such a great idea. */
3339 static rtx_insn *
3340 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3341 vec<const char *> &/*constraints*/,
3342 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3344 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3345 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3346 return NULL;
3349 /* Override command line options.
3351 Combine build-specific configuration information with options
3352 specified on the command line to set various state variables which
3353 influence code generation, optimization, and expansion of built-in
3354 functions. Assure that command-line configuration preferences are
3355 compatible with each other and with the build configuration; issue
3356 warnings while adjusting configuration or error messages while
3357 rejecting configuration.
3359 Upon entry to this function:
3361 This function is called once at the beginning of
3362 compilation, and then again at the start and end of compiling
3363 each section of code that has a different configuration, as
3364 indicated, for example, by adding the
3366 __attribute__((__target__("cpu=power9")))
3368 qualifier to a function definition or, for example, by bracketing
3369 code between
3371 #pragma GCC target("altivec")
3375 #pragma GCC reset_options
3377 directives. Parameter global_init_p is true for the initial
3378 invocation, which initializes global variables, and false for all
3379 subsequent invocations.
3382 Various global state information is assumed to be valid. This
3383 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3384 default CPU specified at build configure time, TARGET_DEFAULT,
3385 representing the default set of option flags for the default
3386 target, and global_options_set.x_rs6000_isa_flags, representing
3387 which options were requested on the command line.
3389 Upon return from this function:
3391 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3392 was set by name on the command line. Additionally, if certain
3393 attributes are automatically enabled or disabled by this function
3394 in order to assure compatibility between options and
3395 configuration, the flags associated with those attributes are
3396 also set. By setting these "explicit bits", we avoid the risk
3397 that other code might accidentally overwrite these particular
3398 attributes with "default values".
3400 The various bits of rs6000_isa_flags are set to indicate the
3401 target options that have been selected for the most current
3402 compilation efforts. This has the effect of also turning on the
3403 associated TARGET_XXX values since these are macros which are
3404 generally defined to test the corresponding bit of the
3405 rs6000_isa_flags variable.
3407 The variable rs6000_builtin_mask is set to represent the target
3408 options for the most current compilation efforts, consistent with
3409 the current contents of rs6000_isa_flags. This variable controls
3410 expansion of built-in functions.
3412 Various other global variables and fields of global structures
3413 (over 50 in all) are initialized to reflect the desired options
3414 for the most current compilation efforts. */
3416 static bool
3417 rs6000_option_override_internal (bool global_init_p)
3419 bool ret = true;
3421 HOST_WIDE_INT set_masks;
3422 HOST_WIDE_INT ignore_masks;
3423 int cpu_index = -1;
3424 int tune_index;
3425 struct cl_target_option *main_target_opt
3426 = ((global_init_p || target_option_default_node == NULL)
3427 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3429 /* Print defaults. */
3430 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3431 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3433 /* Remember the explicit arguments. */
3434 if (global_init_p)
3435 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3437 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3438 library functions, so warn about it. The flag may be useful for
3439 performance studies from time to time though, so don't disable it
3440 entirely. */
3441 if (global_options_set.x_rs6000_alignment_flags
3442 && rs6000_alignment_flags == MASK_ALIGN_POWER
3443 && DEFAULT_ABI == ABI_DARWIN
3444 && TARGET_64BIT)
3445 warning (0, "%qs is not supported for 64-bit Darwin;"
3446 " it is incompatible with the installed C and C++ libraries",
3447 "-malign-power");
3449 /* Numerous experiment shows that IRA based loop pressure
3450 calculation works better for RTL loop invariant motion on targets
3451 with enough (>= 32) registers. It is an expensive optimization.
3452 So it is on only for peak performance. */
3453 if (optimize >= 3 && global_init_p
3454 && !global_options_set.x_flag_ira_loop_pressure)
3455 flag_ira_loop_pressure = 1;
3457 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3458 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3459 options were already specified. */
3460 if (flag_sanitize & SANITIZE_USER_ADDRESS
3461 && !global_options_set.x_flag_asynchronous_unwind_tables)
3462 flag_asynchronous_unwind_tables = 1;
3464 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3465 loop unroller is active. It is only checked during unrolling, so
3466 we can just set it on by default. */
3467 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3468 flag_variable_expansion_in_unroller = 1;
3470 /* Set the pointer size. */
3471 if (TARGET_64BIT)
3473 rs6000_pmode = DImode;
3474 rs6000_pointer_size = 64;
3476 else
3478 rs6000_pmode = SImode;
3479 rs6000_pointer_size = 32;
3482 /* Some OSs don't support saving the high part of 64-bit registers on context
3483 switch. Other OSs don't support saving Altivec registers. On those OSs,
3484 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3485 if the user wants either, the user must explicitly specify them and we
3486 won't interfere with the user's specification. */
3488 set_masks = POWERPC_MASKS;
3489 #ifdef OS_MISSING_POWERPC64
3490 if (OS_MISSING_POWERPC64)
3491 set_masks &= ~OPTION_MASK_POWERPC64;
3492 #endif
3493 #ifdef OS_MISSING_ALTIVEC
3494 if (OS_MISSING_ALTIVEC)
3495 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3496 | OTHER_VSX_VECTOR_MASKS);
3497 #endif
3499 /* Don't override by the processor default if given explicitly. */
3500 set_masks &= ~rs6000_isa_flags_explicit;
3502 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3503 the cpu in a target attribute or pragma, but did not specify a tuning
3504 option, use the cpu for the tuning option rather than the option specified
3505 with -mtune on the command line. Process a '--with-cpu' configuration
3506 request as an implicit --cpu. */
3507 if (rs6000_cpu_index >= 0)
3508 cpu_index = rs6000_cpu_index;
3509 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3510 cpu_index = main_target_opt->x_rs6000_cpu_index;
3511 else if (OPTION_TARGET_CPU_DEFAULT)
3512 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3514 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3515 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3516 with those from the cpu, except for options that were explicitly set. If
3517 we don't have a cpu, do not override the target bits set in
3518 TARGET_DEFAULT. */
3519 if (cpu_index >= 0)
3521 rs6000_cpu_index = cpu_index;
3522 rs6000_isa_flags &= ~set_masks;
3523 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3524 & set_masks);
3526 else
3528 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3529 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3530 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3531 to using rs6000_isa_flags, we need to do the initialization here.
3533 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3534 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3535 HOST_WIDE_INT flags;
3536 if (TARGET_DEFAULT)
3537 flags = TARGET_DEFAULT;
3538 else
3540 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3541 const char *default_cpu = (!TARGET_POWERPC64
3542 ? "powerpc"
3543 : (BYTES_BIG_ENDIAN
3544 ? "powerpc64"
3545 : "powerpc64le"));
3546 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3547 flags = processor_target_table[default_cpu_index].target_enable;
3549 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3552 if (rs6000_tune_index >= 0)
3553 tune_index = rs6000_tune_index;
3554 else if (cpu_index >= 0)
3555 rs6000_tune_index = tune_index = cpu_index;
3556 else
3558 size_t i;
3559 enum processor_type tune_proc
3560 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3562 tune_index = -1;
3563 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3564 if (processor_target_table[i].processor == tune_proc)
3566 tune_index = i;
3567 break;
3571 if (cpu_index >= 0)
3572 rs6000_cpu = processor_target_table[cpu_index].processor;
3573 else
3574 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3576 gcc_assert (tune_index >= 0);
3577 rs6000_tune = processor_target_table[tune_index].processor;
3579 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3580 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3581 || rs6000_cpu == PROCESSOR_PPCE5500)
3583 if (TARGET_ALTIVEC)
3584 error ("AltiVec not supported in this target");
3587 /* If we are optimizing big endian systems for space, use the load/store
3588 multiple instructions. */
3589 if (BYTES_BIG_ENDIAN && optimize_size)
3590 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3592 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3593 because the hardware doesn't support the instructions used in little
3594 endian mode, and causes an alignment trap. The 750 does not cause an
3595 alignment trap (except when the target is unaligned). */
3597 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3599 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3600 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3601 warning (0, "%qs is not supported on little endian systems",
3602 "-mmultiple");
3605 /* If little-endian, default to -mstrict-align on older processors.
3606 Testing for htm matches power8 and later. */
3607 if (!BYTES_BIG_ENDIAN
3608 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3609 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3611 if (!rs6000_fold_gimple)
3612 fprintf (stderr,
3613 "gimple folding of rs6000 builtins has been disabled.\n");
3615 /* Add some warnings for VSX. */
3616 if (TARGET_VSX)
3618 const char *msg = NULL;
3619 if (!TARGET_HARD_FLOAT)
3621 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3622 msg = N_("%<-mvsx%> requires hardware floating point");
3623 else
3625 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3626 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3629 else if (TARGET_AVOID_XFORM > 0)
3630 msg = N_("%<-mvsx%> needs indexed addressing");
3631 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3632 & OPTION_MASK_ALTIVEC))
3634 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3635 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3636 else
3637 msg = N_("%<-mno-altivec%> disables vsx");
3640 if (msg)
3642 warning (0, msg);
3643 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3644 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3648 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3649 the -mcpu setting to enable options that conflict. */
3650 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3651 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3652 | OPTION_MASK_ALTIVEC
3653 | OPTION_MASK_VSX)) != 0)
3654 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3655 | OPTION_MASK_DIRECT_MOVE)
3656 & ~rs6000_isa_flags_explicit);
3658 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3659 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3661 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3662 off all of the options that depend on those flags. */
3663 ignore_masks = rs6000_disable_incompatible_switches ();
3665 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3666 unless the user explicitly used the -mno-<option> to disable the code. */
3667 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3668 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3669 else if (TARGET_P9_MINMAX)
3671 if (cpu_index >= 0)
3673 if (cpu_index == PROCESSOR_POWER9)
3675 /* legacy behavior: allow -mcpu=power9 with certain
3676 capabilities explicitly disabled. */
3677 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3679 else
3680 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3681 "for <xxx> less than power9", "-mcpu");
3683 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3684 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3685 & rs6000_isa_flags_explicit))
3686 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3687 were explicitly cleared. */
3688 error ("%qs incompatible with explicitly disabled options",
3689 "-mpower9-minmax");
3690 else
3691 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3693 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3694 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3695 else if (TARGET_VSX)
3696 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3697 else if (TARGET_POPCNTD)
3698 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3699 else if (TARGET_DFP)
3700 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3701 else if (TARGET_CMPB)
3702 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3703 else if (TARGET_FPRND)
3704 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3705 else if (TARGET_POPCNTB)
3706 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3707 else if (TARGET_ALTIVEC)
3708 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3710 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3712 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3713 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3714 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3717 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3719 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3720 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3721 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3724 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3726 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3727 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3728 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3731 if (TARGET_P8_VECTOR && !TARGET_VSX)
3733 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3734 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3735 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3736 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3738 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3739 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3740 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3742 else
3744 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3745 not explicit. */
3746 rs6000_isa_flags |= OPTION_MASK_VSX;
3747 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3751 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3753 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3754 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3755 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3758 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3759 silently turn off quad memory mode. */
3760 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3762 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3763 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3765 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3766 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3768 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3769 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3772 /* Non-atomic quad memory load/store are disabled for little endian, since
3773 the words are reversed, but atomic operations can still be done by
3774 swapping the words. */
3775 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3777 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3778 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3779 "mode"));
3781 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3784 /* Assume if the user asked for normal quad memory instructions, they want
3785 the atomic versions as well, unless they explicity told us not to use quad
3786 word atomic instructions. */
3787 if (TARGET_QUAD_MEMORY
3788 && !TARGET_QUAD_MEMORY_ATOMIC
3789 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3790 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3792 /* If we can shrink-wrap the TOC register save separately, then use
3793 -msave-toc-indirect unless explicitly disabled. */
3794 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3795 && flag_shrink_wrap_separate
3796 && optimize_function_for_speed_p (cfun))
3797 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3799 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3800 generating power8 instructions. Power9 does not optimize power8 fusion
3801 cases. */
3802 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3804 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3805 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3806 else
3807 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3810 /* Setting additional fusion flags turns on base fusion. */
3811 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3813 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3815 if (TARGET_P8_FUSION_SIGN)
3816 error ("%qs requires %qs", "-mpower8-fusion-sign",
3817 "-mpower8-fusion");
3819 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3821 else
3822 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3825 /* Power8 does not fuse sign extended loads with the addis. If we are
3826 optimizing at high levels for speed, convert a sign extended load into a
3827 zero extending load, and an explicit sign extension. */
3828 if (TARGET_P8_FUSION
3829 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3830 && optimize_function_for_speed_p (cfun)
3831 && optimize >= 3)
3832 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3834 /* ISA 3.0 vector instructions include ISA 2.07. */
3835 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3837 /* We prefer to not mention undocumented options in
3838 error messages. However, if users have managed to select
3839 power9-vector without selecting power8-vector, they
3840 already know about undocumented flags. */
3841 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3842 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3843 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3844 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3846 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3847 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3848 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3850 else
3852 /* OPTION_MASK_P9_VECTOR is explicit and
3853 OPTION_MASK_P8_VECTOR is not explicit. */
3854 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3855 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3859 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3860 support. If we only have ISA 2.06 support, and the user did not specify
3861 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3862 but we don't enable the full vectorization support */
3863 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3864 TARGET_ALLOW_MOVMISALIGN = 1;
3866 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3868 if (TARGET_ALLOW_MOVMISALIGN > 0
3869 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3870 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3872 TARGET_ALLOW_MOVMISALIGN = 0;
3875 /* Determine when unaligned vector accesses are permitted, and when
3876 they are preferred over masked Altivec loads. Note that if
3877 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3878 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3879 not true. */
3880 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3882 if (!TARGET_VSX)
3884 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3885 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3887 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3890 else if (!TARGET_ALLOW_MOVMISALIGN)
3892 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3893 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3894 "-mallow-movmisalign");
3896 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3900 /* Use long double size to select the appropriate long double. We use
3901 TYPE_PRECISION to differentiate the 3 different long double types. We map
3902 128 into the precision used for TFmode. */
3903 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3904 ? 64
3905 : FLOAT_PRECISION_TFmode);
3907 /* Set long double size before the IEEE 128-bit tests. */
3908 if (!global_options_set.x_rs6000_long_double_type_size)
3910 if (main_target_opt != NULL
3911 && (main_target_opt->x_rs6000_long_double_type_size
3912 != default_long_double_size))
3913 error ("target attribute or pragma changes %<long double%> size");
3914 else
3915 rs6000_long_double_type_size = default_long_double_size;
3917 else if (rs6000_long_double_type_size == 128)
3918 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3919 else if (global_options_set.x_rs6000_ieeequad)
3921 if (global_options.x_rs6000_ieeequad)
3922 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3923 else
3924 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3927 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3928 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3929 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3930 those systems will not pick up this default. Warn if the user changes the
3931 default unless -Wno-psabi. */
3932 if (!global_options_set.x_rs6000_ieeequad)
3933 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3935 else
3937 if (global_options.x_rs6000_ieeequad
3938 && (!TARGET_POPCNTD || !TARGET_VSX))
3939 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3941 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3943 static bool warned_change_long_double;
3944 if (!warned_change_long_double)
3946 warned_change_long_double = true;
3947 if (TARGET_IEEEQUAD)
3948 warning (OPT_Wpsabi, "Using IEEE extended precision "
3949 "%<long double%>");
3950 else
3951 warning (OPT_Wpsabi, "Using IBM extended precision "
3952 "%<long double%>");
3957 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3958 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3959 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3960 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3961 the keyword as well as the type. */
3962 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3964 /* IEEE 128-bit floating point requires VSX support. */
3965 if (TARGET_FLOAT128_KEYWORD)
3967 if (!TARGET_VSX)
3969 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3970 error ("%qs requires VSX support", "%<-mfloat128%>");
3972 TARGET_FLOAT128_TYPE = 0;
3973 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3974 | OPTION_MASK_FLOAT128_HW);
3976 else if (!TARGET_FLOAT128_TYPE)
3978 TARGET_FLOAT128_TYPE = 1;
3979 warning (0, "The %<-mfloat128%> option may not be fully supported");
3983 /* Enable the __float128 keyword under Linux by default. */
3984 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3985 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3986 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3988 /* If we have are supporting the float128 type and full ISA 3.0 support,
3989 enable -mfloat128-hardware by default. However, don't enable the
3990 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3991 because sometimes the compiler wants to put things in an integer
3992 container, and if we don't have __int128 support, it is impossible. */
3993 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
3994 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
3995 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
3996 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
3998 if (TARGET_FLOAT128_HW
3999 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4001 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4002 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4004 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4007 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4009 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4010 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4012 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4015 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4016 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4018 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4019 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4020 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4021 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4023 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4026 /* -mpcrel requires prefixed load/store addressing. */
4027 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4029 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4030 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4032 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4035 /* Print the options after updating the defaults. */
4036 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4037 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4039 /* E500mc does "better" if we inline more aggressively. Respect the
4040 user's opinion, though. */
4041 if (rs6000_block_move_inline_limit == 0
4042 && (rs6000_tune == PROCESSOR_PPCE500MC
4043 || rs6000_tune == PROCESSOR_PPCE500MC64
4044 || rs6000_tune == PROCESSOR_PPCE5500
4045 || rs6000_tune == PROCESSOR_PPCE6500))
4046 rs6000_block_move_inline_limit = 128;
4048 /* store_one_arg depends on expand_block_move to handle at least the
4049 size of reg_parm_stack_space. */
4050 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4051 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4053 if (global_init_p)
4055 /* If the appropriate debug option is enabled, replace the target hooks
4056 with debug versions that call the real version and then prints
4057 debugging information. */
4058 if (TARGET_DEBUG_COST)
4060 targetm.rtx_costs = rs6000_debug_rtx_costs;
4061 targetm.address_cost = rs6000_debug_address_cost;
4062 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4065 if (TARGET_DEBUG_ADDR)
4067 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4068 targetm.legitimize_address = rs6000_debug_legitimize_address;
4069 rs6000_secondary_reload_class_ptr
4070 = rs6000_debug_secondary_reload_class;
4071 targetm.secondary_memory_needed
4072 = rs6000_debug_secondary_memory_needed;
4073 targetm.can_change_mode_class
4074 = rs6000_debug_can_change_mode_class;
4075 rs6000_preferred_reload_class_ptr
4076 = rs6000_debug_preferred_reload_class;
4077 rs6000_mode_dependent_address_ptr
4078 = rs6000_debug_mode_dependent_address;
4081 if (rs6000_veclibabi_name)
4083 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4084 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4085 else
4087 error ("unknown vectorization library ABI type (%qs) for "
4088 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4089 ret = false;
4094 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4095 target attribute or pragma which automatically enables both options,
4096 unless the altivec ABI was set. This is set by default for 64-bit, but
4097 not for 32-bit. */
4098 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4100 TARGET_FLOAT128_TYPE = 0;
4101 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4102 | OPTION_MASK_FLOAT128_KEYWORD)
4103 & ~rs6000_isa_flags_explicit);
4106 /* Enable Altivec ABI for AIX -maltivec. */
4107 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4109 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4110 error ("target attribute or pragma changes AltiVec ABI");
4111 else
4112 rs6000_altivec_abi = 1;
4115 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4116 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4117 be explicitly overridden in either case. */
4118 if (TARGET_ELF)
4120 if (!global_options_set.x_rs6000_altivec_abi
4121 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4123 if (main_target_opt != NULL &&
4124 !main_target_opt->x_rs6000_altivec_abi)
4125 error ("target attribute or pragma changes AltiVec ABI");
4126 else
4127 rs6000_altivec_abi = 1;
4131 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4132 So far, the only darwin64 targets are also MACH-O. */
4133 if (TARGET_MACHO
4134 && DEFAULT_ABI == ABI_DARWIN
4135 && TARGET_64BIT)
4137 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4138 error ("target attribute or pragma changes darwin64 ABI");
4139 else
4141 rs6000_darwin64_abi = 1;
4142 /* Default to natural alignment, for better performance. */
4143 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4147 /* Place FP constants in the constant pool instead of TOC
4148 if section anchors enabled. */
4149 if (flag_section_anchors
4150 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4151 TARGET_NO_FP_IN_TOC = 1;
4153 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4154 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4156 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4157 SUBTARGET_OVERRIDE_OPTIONS;
4158 #endif
4159 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4160 SUBSUBTARGET_OVERRIDE_OPTIONS;
4161 #endif
4162 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4163 SUB3TARGET_OVERRIDE_OPTIONS;
4164 #endif
4166 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4167 after the subtarget override options are done. */
4168 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4170 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4171 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4173 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4176 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4177 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4179 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4180 && rs6000_tune != PROCESSOR_POWER5
4181 && rs6000_tune != PROCESSOR_POWER6
4182 && rs6000_tune != PROCESSOR_POWER7
4183 && rs6000_tune != PROCESSOR_POWER8
4184 && rs6000_tune != PROCESSOR_POWER9
4185 && rs6000_tune != PROCESSOR_FUTURE
4186 && rs6000_tune != PROCESSOR_PPCA2
4187 && rs6000_tune != PROCESSOR_CELL
4188 && rs6000_tune != PROCESSOR_PPC476);
4189 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4190 || rs6000_tune == PROCESSOR_POWER5
4191 || rs6000_tune == PROCESSOR_POWER7
4192 || rs6000_tune == PROCESSOR_POWER8);
4193 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4194 || rs6000_tune == PROCESSOR_POWER5
4195 || rs6000_tune == PROCESSOR_POWER6
4196 || rs6000_tune == PROCESSOR_POWER7
4197 || rs6000_tune == PROCESSOR_POWER8
4198 || rs6000_tune == PROCESSOR_POWER9
4199 || rs6000_tune == PROCESSOR_FUTURE
4200 || rs6000_tune == PROCESSOR_PPCE500MC
4201 || rs6000_tune == PROCESSOR_PPCE500MC64
4202 || rs6000_tune == PROCESSOR_PPCE5500
4203 || rs6000_tune == PROCESSOR_PPCE6500);
4205 /* Allow debug switches to override the above settings. These are set to -1
4206 in rs6000.opt to indicate the user hasn't directly set the switch. */
4207 if (TARGET_ALWAYS_HINT >= 0)
4208 rs6000_always_hint = TARGET_ALWAYS_HINT;
4210 if (TARGET_SCHED_GROUPS >= 0)
4211 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4213 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4214 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4216 rs6000_sched_restricted_insns_priority
4217 = (rs6000_sched_groups ? 1 : 0);
4219 /* Handle -msched-costly-dep option. */
4220 rs6000_sched_costly_dep
4221 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4223 if (rs6000_sched_costly_dep_str)
4225 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4226 rs6000_sched_costly_dep = no_dep_costly;
4227 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4228 rs6000_sched_costly_dep = all_deps_costly;
4229 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4230 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4231 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4232 rs6000_sched_costly_dep = store_to_load_dep_costly;
4233 else
4234 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4235 atoi (rs6000_sched_costly_dep_str));
4238 /* Handle -minsert-sched-nops option. */
4239 rs6000_sched_insert_nops
4240 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4242 if (rs6000_sched_insert_nops_str)
4244 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4245 rs6000_sched_insert_nops = sched_finish_none;
4246 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4247 rs6000_sched_insert_nops = sched_finish_pad_groups;
4248 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4249 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4250 else
4251 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4252 atoi (rs6000_sched_insert_nops_str));
4255 /* Handle stack protector */
4256 if (!global_options_set.x_rs6000_stack_protector_guard)
4257 #ifdef TARGET_THREAD_SSP_OFFSET
4258 rs6000_stack_protector_guard = SSP_TLS;
4259 #else
4260 rs6000_stack_protector_guard = SSP_GLOBAL;
4261 #endif
4263 #ifdef TARGET_THREAD_SSP_OFFSET
4264 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4265 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4266 #endif
4268 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4270 char *endp;
4271 const char *str = rs6000_stack_protector_guard_offset_str;
4273 errno = 0;
4274 long offset = strtol (str, &endp, 0);
4275 if (!*str || *endp || errno)
4276 error ("%qs is not a valid number in %qs", str,
4277 "-mstack-protector-guard-offset=");
4279 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4280 || (TARGET_64BIT && (offset & 3)))
4281 error ("%qs is not a valid offset in %qs", str,
4282 "-mstack-protector-guard-offset=");
4284 rs6000_stack_protector_guard_offset = offset;
4287 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4289 const char *str = rs6000_stack_protector_guard_reg_str;
4290 int reg = decode_reg_name (str);
4292 if (!IN_RANGE (reg, 1, 31))
4293 error ("%qs is not a valid base register in %qs", str,
4294 "-mstack-protector-guard-reg=");
4296 rs6000_stack_protector_guard_reg = reg;
4299 if (rs6000_stack_protector_guard == SSP_TLS
4300 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4301 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4303 if (global_init_p)
4305 #ifdef TARGET_REGNAMES
4306 /* If the user desires alternate register names, copy in the
4307 alternate names now. */
4308 if (TARGET_REGNAMES)
4309 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4310 #endif
4312 /* Set aix_struct_return last, after the ABI is determined.
4313 If -maix-struct-return or -msvr4-struct-return was explicitly
4314 used, don't override with the ABI default. */
4315 if (!global_options_set.x_aix_struct_return)
4316 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4318 #if 0
4319 /* IBM XL compiler defaults to unsigned bitfields. */
4320 if (TARGET_XL_COMPAT)
4321 flag_signed_bitfields = 0;
4322 #endif
4324 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4325 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4327 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4329 /* We can only guarantee the availability of DI pseudo-ops when
4330 assembling for 64-bit targets. */
4331 if (!TARGET_64BIT)
4333 targetm.asm_out.aligned_op.di = NULL;
4334 targetm.asm_out.unaligned_op.di = NULL;
4338 /* Set branch target alignment, if not optimizing for size. */
4339 if (!optimize_size)
4341 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4342 aligned 8byte to avoid misprediction by the branch predictor. */
4343 if (rs6000_tune == PROCESSOR_TITAN
4344 || rs6000_tune == PROCESSOR_CELL)
4346 if (flag_align_functions && !str_align_functions)
4347 str_align_functions = "8";
4348 if (flag_align_jumps && !str_align_jumps)
4349 str_align_jumps = "8";
4350 if (flag_align_loops && !str_align_loops)
4351 str_align_loops = "8";
4353 if (rs6000_align_branch_targets)
4355 if (flag_align_functions && !str_align_functions)
4356 str_align_functions = "16";
4357 if (flag_align_jumps && !str_align_jumps)
4358 str_align_jumps = "16";
4359 if (flag_align_loops && !str_align_loops)
4361 can_override_loop_align = 1;
4362 str_align_loops = "16";
4366 if (flag_align_jumps && !str_align_jumps)
4367 str_align_jumps = "16";
4368 if (flag_align_loops && !str_align_loops)
4369 str_align_loops = "16";
4372 /* Arrange to save and restore machine status around nested functions. */
4373 init_machine_status = rs6000_init_machine_status;
4375 /* We should always be splitting complex arguments, but we can't break
4376 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4377 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4378 targetm.calls.split_complex_arg = NULL;
4380 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4381 if (DEFAULT_ABI == ABI_AIX)
4382 targetm.calls.custom_function_descriptors = 0;
4385 /* Initialize rs6000_cost with the appropriate target costs. */
4386 if (optimize_size)
4387 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4388 else
4389 switch (rs6000_tune)
4391 case PROCESSOR_RS64A:
4392 rs6000_cost = &rs64a_cost;
4393 break;
4395 case PROCESSOR_MPCCORE:
4396 rs6000_cost = &mpccore_cost;
4397 break;
4399 case PROCESSOR_PPC403:
4400 rs6000_cost = &ppc403_cost;
4401 break;
4403 case PROCESSOR_PPC405:
4404 rs6000_cost = &ppc405_cost;
4405 break;
4407 case PROCESSOR_PPC440:
4408 rs6000_cost = &ppc440_cost;
4409 break;
4411 case PROCESSOR_PPC476:
4412 rs6000_cost = &ppc476_cost;
4413 break;
4415 case PROCESSOR_PPC601:
4416 rs6000_cost = &ppc601_cost;
4417 break;
4419 case PROCESSOR_PPC603:
4420 rs6000_cost = &ppc603_cost;
4421 break;
4423 case PROCESSOR_PPC604:
4424 rs6000_cost = &ppc604_cost;
4425 break;
4427 case PROCESSOR_PPC604e:
4428 rs6000_cost = &ppc604e_cost;
4429 break;
4431 case PROCESSOR_PPC620:
4432 rs6000_cost = &ppc620_cost;
4433 break;
4435 case PROCESSOR_PPC630:
4436 rs6000_cost = &ppc630_cost;
4437 break;
4439 case PROCESSOR_CELL:
4440 rs6000_cost = &ppccell_cost;
4441 break;
4443 case PROCESSOR_PPC750:
4444 case PROCESSOR_PPC7400:
4445 rs6000_cost = &ppc750_cost;
4446 break;
4448 case PROCESSOR_PPC7450:
4449 rs6000_cost = &ppc7450_cost;
4450 break;
4452 case PROCESSOR_PPC8540:
4453 case PROCESSOR_PPC8548:
4454 rs6000_cost = &ppc8540_cost;
4455 break;
4457 case PROCESSOR_PPCE300C2:
4458 case PROCESSOR_PPCE300C3:
4459 rs6000_cost = &ppce300c2c3_cost;
4460 break;
4462 case PROCESSOR_PPCE500MC:
4463 rs6000_cost = &ppce500mc_cost;
4464 break;
4466 case PROCESSOR_PPCE500MC64:
4467 rs6000_cost = &ppce500mc64_cost;
4468 break;
4470 case PROCESSOR_PPCE5500:
4471 rs6000_cost = &ppce5500_cost;
4472 break;
4474 case PROCESSOR_PPCE6500:
4475 rs6000_cost = &ppce6500_cost;
4476 break;
4478 case PROCESSOR_TITAN:
4479 rs6000_cost = &titan_cost;
4480 break;
4482 case PROCESSOR_POWER4:
4483 case PROCESSOR_POWER5:
4484 rs6000_cost = &power4_cost;
4485 break;
4487 case PROCESSOR_POWER6:
4488 rs6000_cost = &power6_cost;
4489 break;
4491 case PROCESSOR_POWER7:
4492 rs6000_cost = &power7_cost;
4493 break;
4495 case PROCESSOR_POWER8:
4496 rs6000_cost = &power8_cost;
4497 break;
4499 case PROCESSOR_POWER9:
4500 case PROCESSOR_FUTURE:
4501 rs6000_cost = &power9_cost;
4502 break;
4504 case PROCESSOR_PPCA2:
4505 rs6000_cost = &ppca2_cost;
4506 break;
4508 default:
4509 gcc_unreachable ();
4512 if (global_init_p)
4514 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4515 rs6000_cost->simultaneous_prefetches,
4516 global_options.x_param_values,
4517 global_options_set.x_param_values);
4518 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4519 global_options.x_param_values,
4520 global_options_set.x_param_values);
4521 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4522 rs6000_cost->cache_line_size,
4523 global_options.x_param_values,
4524 global_options_set.x_param_values);
4525 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4526 global_options.x_param_values,
4527 global_options_set.x_param_values);
4529 /* Increase loop peeling limits based on performance analysis. */
4530 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4531 global_options.x_param_values,
4532 global_options_set.x_param_values);
4533 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4534 global_options.x_param_values,
4535 global_options_set.x_param_values);
4537 /* Use the 'model' -fsched-pressure algorithm by default. */
4538 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
4539 SCHED_PRESSURE_MODEL,
4540 global_options.x_param_values,
4541 global_options_set.x_param_values);
4543 /* If using typedef char *va_list, signal that
4544 __builtin_va_start (&ap, 0) can be optimized to
4545 ap = __builtin_next_arg (0). */
4546 if (DEFAULT_ABI != ABI_V4)
4547 targetm.expand_builtin_va_start = NULL;
4550 /* If not explicitly specified via option, decide whether to generate indexed
4551 load/store instructions. A value of -1 indicates that the
4552 initial value of this variable has not been overwritten. During
4553 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4554 if (TARGET_AVOID_XFORM == -1)
4555 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4556 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4557 need indexed accesses and the type used is the scalar type of the element
4558 being loaded or stored. */
4559 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4560 && !TARGET_ALTIVEC);
4562 /* Set the -mrecip options. */
4563 if (rs6000_recip_name)
4565 char *p = ASTRDUP (rs6000_recip_name);
4566 char *q;
4567 unsigned int mask, i;
4568 bool invert;
4570 while ((q = strtok (p, ",")) != NULL)
4572 p = NULL;
4573 if (*q == '!')
4575 invert = true;
4576 q++;
4578 else
4579 invert = false;
4581 if (!strcmp (q, "default"))
4582 mask = ((TARGET_RECIP_PRECISION)
4583 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4584 else
4586 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4587 if (!strcmp (q, recip_options[i].string))
4589 mask = recip_options[i].mask;
4590 break;
4593 if (i == ARRAY_SIZE (recip_options))
4595 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4596 invert = false;
4597 mask = 0;
4598 ret = false;
4602 if (invert)
4603 rs6000_recip_control &= ~mask;
4604 else
4605 rs6000_recip_control |= mask;
4609 /* Set the builtin mask of the various options used that could affect which
4610 builtins were used. In the past we used target_flags, but we've run out
4611 of bits, and some options are no longer in target_flags. */
4612 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4613 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4614 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4615 rs6000_builtin_mask);
4617 /* Initialize all of the registers. */
4618 rs6000_init_hard_regno_mode_ok (global_init_p);
4620 /* Save the initial options in case the user does function specific options */
4621 if (global_init_p)
4622 target_option_default_node = target_option_current_node
4623 = build_target_option_node (&global_options);
4625 /* If not explicitly specified via option, decide whether to generate the
4626 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4627 if (TARGET_LINK_STACK == -1)
4628 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4630 /* Deprecate use of -mno-speculate-indirect-jumps. */
4631 if (!rs6000_speculate_indirect_jumps)
4632 warning (0, "%qs is deprecated and not recommended in any circumstances",
4633 "-mno-speculate-indirect-jumps");
4635 return ret;
4638 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4639 define the target cpu type. */
4641 static void
4642 rs6000_option_override (void)
4644 (void) rs6000_option_override_internal (true);
4648 /* Implement targetm.vectorize.builtin_mask_for_load. */
4649 static tree
4650 rs6000_builtin_mask_for_load (void)
4652 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4653 if ((TARGET_ALTIVEC && !TARGET_VSX)
4654 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4655 return altivec_builtin_mask_for_load;
4656 else
4657 return 0;
4660 /* Implement LOOP_ALIGN. */
4661 align_flags
4662 rs6000_loop_align (rtx label)
4664 basic_block bb;
4665 int ninsns;
4667 /* Don't override loop alignment if -falign-loops was specified. */
4668 if (!can_override_loop_align)
4669 return align_loops;
4671 bb = BLOCK_FOR_INSN (label);
4672 ninsns = num_loop_insns(bb->loop_father);
4674 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4675 if (ninsns > 4 && ninsns <= 8
4676 && (rs6000_tune == PROCESSOR_POWER4
4677 || rs6000_tune == PROCESSOR_POWER5
4678 || rs6000_tune == PROCESSOR_POWER6
4679 || rs6000_tune == PROCESSOR_POWER7
4680 || rs6000_tune == PROCESSOR_POWER8))
4681 return align_flags (5);
4682 else
4683 return align_loops;
4686 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4687 after applying N number of iterations. This routine does not determine
4688 how may iterations are required to reach desired alignment. */
4690 static bool
4691 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4693 if (is_packed)
4694 return false;
4696 if (TARGET_32BIT)
4698 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4699 return true;
4701 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4702 return true;
4704 return false;
4706 else
4708 if (TARGET_MACHO)
4709 return false;
4711 /* Assuming that all other types are naturally aligned. CHECKME! */
4712 return true;
4716 /* Return true if the vector misalignment factor is supported by the
4717 target. */
4718 static bool
4719 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4720 const_tree type,
4721 int misalignment,
4722 bool is_packed)
4724 if (TARGET_VSX)
4726 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4727 return true;
4729 /* Return if movmisalign pattern is not supported for this mode. */
4730 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4731 return false;
4733 if (misalignment == -1)
4735 /* Misalignment factor is unknown at compile time but we know
4736 it's word aligned. */
4737 if (rs6000_vector_alignment_reachable (type, is_packed))
4739 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4741 if (element_size == 64 || element_size == 32)
4742 return true;
4745 return false;
4748 /* VSX supports word-aligned vector. */
4749 if (misalignment % 4 == 0)
4750 return true;
4752 return false;
4755 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4756 static int
4757 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4758 tree vectype, int misalign)
4760 unsigned elements;
4761 tree elem_type;
4763 switch (type_of_cost)
4765 case scalar_stmt:
4766 case scalar_load:
4767 case scalar_store:
4768 case vector_stmt:
4769 case vector_load:
4770 case vector_store:
4771 case vec_to_scalar:
4772 case scalar_to_vec:
4773 case cond_branch_not_taken:
4774 return 1;
4776 case vec_perm:
4777 /* Power7 has only one permute unit, make it a bit expensive. */
4778 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4779 return 3;
4780 else
4781 return 1;
4783 case vec_promote_demote:
4784 if (TARGET_VSX)
4785 return 4;
4786 else
4787 return 1;
4789 case cond_branch_taken:
4790 return 3;
4792 case unaligned_load:
4793 case vector_gather_load:
4794 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4795 return 1;
4797 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4799 elements = TYPE_VECTOR_SUBPARTS (vectype);
4800 if (elements == 2)
4801 /* Double word aligned. */
4802 return 2;
4804 if (elements == 4)
4806 switch (misalign)
4808 case 8:
4809 /* Double word aligned. */
4810 return 2;
4812 case -1:
4813 /* Unknown misalignment. */
4814 case 4:
4815 case 12:
4816 /* Word aligned. */
4817 return 22;
4819 default:
4820 gcc_unreachable ();
4825 if (TARGET_ALTIVEC)
4826 /* Misaligned loads are not supported. */
4827 gcc_unreachable ();
4829 return 2;
4831 case unaligned_store:
4832 case vector_scatter_store:
4833 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4834 return 1;
4836 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4838 elements = TYPE_VECTOR_SUBPARTS (vectype);
4839 if (elements == 2)
4840 /* Double word aligned. */
4841 return 2;
4843 if (elements == 4)
4845 switch (misalign)
4847 case 8:
4848 /* Double word aligned. */
4849 return 2;
4851 case -1:
4852 /* Unknown misalignment. */
4853 case 4:
4854 case 12:
4855 /* Word aligned. */
4856 return 23;
4858 default:
4859 gcc_unreachable ();
4864 if (TARGET_ALTIVEC)
4865 /* Misaligned stores are not supported. */
4866 gcc_unreachable ();
4868 return 2;
4870 case vec_construct:
4871 /* This is a rough approximation assuming non-constant elements
4872 constructed into a vector via element insertion. FIXME:
4873 vec_construct is not granular enough for uniformly good
4874 decisions. If the initialization is a splat, this is
4875 cheaper than we estimate. Improve this someday. */
4876 elem_type = TREE_TYPE (vectype);
4877 /* 32-bit vectors loaded into registers are stored as double
4878 precision, so we need 2 permutes, 2 converts, and 1 merge
4879 to construct a vector of short floats from them. */
4880 if (SCALAR_FLOAT_TYPE_P (elem_type)
4881 && TYPE_PRECISION (elem_type) == 32)
4882 return 5;
4883 /* On POWER9, integer vector types are built up in GPRs and then
4884 use a direct move (2 cycles). For POWER8 this is even worse,
4885 as we need two direct moves and a merge, and the direct moves
4886 are five cycles. */
4887 else if (INTEGRAL_TYPE_P (elem_type))
4889 if (TARGET_P9_VECTOR)
4890 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4891 else
4892 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4894 else
4895 /* V2DFmode doesn't need a direct move. */
4896 return 2;
4898 default:
4899 gcc_unreachable ();
4903 /* Implement targetm.vectorize.preferred_simd_mode. */
4905 static machine_mode
4906 rs6000_preferred_simd_mode (scalar_mode mode)
4908 if (TARGET_VSX)
4909 switch (mode)
4911 case E_DFmode:
4912 return V2DFmode;
4913 default:;
4915 if (TARGET_ALTIVEC || TARGET_VSX)
4916 switch (mode)
4918 case E_SFmode:
4919 return V4SFmode;
4920 case E_TImode:
4921 return V1TImode;
4922 case E_DImode:
4923 return V2DImode;
4924 case E_SImode:
4925 return V4SImode;
4926 case E_HImode:
4927 return V8HImode;
4928 case E_QImode:
4929 return V16QImode;
4930 default:;
4932 return word_mode;
4935 typedef struct _rs6000_cost_data
4937 struct loop *loop_info;
4938 unsigned cost[3];
4939 } rs6000_cost_data;
4941 /* Test for likely overcommitment of vector hardware resources. If a
4942 loop iteration is relatively large, and too large a percentage of
4943 instructions in the loop are vectorized, the cost model may not
4944 adequately reflect delays from unavailable vector resources.
4945 Penalize the loop body cost for this case. */
4947 static void
4948 rs6000_density_test (rs6000_cost_data *data)
4950 const int DENSITY_PCT_THRESHOLD = 85;
4951 const int DENSITY_SIZE_THRESHOLD = 70;
4952 const int DENSITY_PENALTY = 10;
4953 struct loop *loop = data->loop_info;
4954 basic_block *bbs = get_loop_body (loop);
4955 int nbbs = loop->num_nodes;
4956 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4957 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4958 int i, density_pct;
4960 for (i = 0; i < nbbs; i++)
4962 basic_block bb = bbs[i];
4963 gimple_stmt_iterator gsi;
4965 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4967 gimple *stmt = gsi_stmt (gsi);
4968 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4970 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4971 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4972 not_vec_cost++;
4976 free (bbs);
4977 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4979 if (density_pct > DENSITY_PCT_THRESHOLD
4980 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4982 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4983 if (dump_enabled_p ())
4984 dump_printf_loc (MSG_NOTE, vect_location,
4985 "density %d%%, cost %d exceeds threshold, penalizing "
4986 "loop body cost by %d%%", density_pct,
4987 vec_cost + not_vec_cost, DENSITY_PENALTY);
4991 /* Implement targetm.vectorize.init_cost. */
4993 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4994 instruction is needed by the vectorization. */
4995 static bool rs6000_vect_nonmem;
4997 static void *
4998 rs6000_init_cost (struct loop *loop_info)
5000 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5001 data->loop_info = loop_info;
5002 data->cost[vect_prologue] = 0;
5003 data->cost[vect_body] = 0;
5004 data->cost[vect_epilogue] = 0;
5005 rs6000_vect_nonmem = false;
5006 return data;
5009 /* Implement targetm.vectorize.add_stmt_cost. */
5011 static unsigned
5012 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5013 struct _stmt_vec_info *stmt_info, int misalign,
5014 enum vect_cost_model_location where)
5016 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5017 unsigned retval = 0;
5019 if (flag_vect_cost_model)
5021 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5022 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5023 misalign);
5024 /* Statements in an inner loop relative to the loop being
5025 vectorized are weighted more heavily. The value here is
5026 arbitrary and could potentially be improved with analysis. */
5027 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5028 count *= 50; /* FIXME. */
5030 retval = (unsigned) (count * stmt_cost);
5031 cost_data->cost[where] += retval;
5033 /* Check whether we're doing something other than just a copy loop.
5034 Not all such loops may be profitably vectorized; see
5035 rs6000_finish_cost. */
5036 if ((kind == vec_to_scalar || kind == vec_perm
5037 || kind == vec_promote_demote || kind == vec_construct
5038 || kind == scalar_to_vec)
5039 || (where == vect_body && kind == vector_stmt))
5040 rs6000_vect_nonmem = true;
5043 return retval;
5046 /* Implement targetm.vectorize.finish_cost. */
5048 static void
5049 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5050 unsigned *body_cost, unsigned *epilogue_cost)
5052 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5054 if (cost_data->loop_info)
5055 rs6000_density_test (cost_data);
5057 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5058 that require versioning for any reason. The vectorization is at
5059 best a wash inside the loop, and the versioning checks make
5060 profitability highly unlikely and potentially quite harmful. */
5061 if (cost_data->loop_info)
5063 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5064 if (!rs6000_vect_nonmem
5065 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5066 && LOOP_REQUIRES_VERSIONING (vec_info))
5067 cost_data->cost[vect_body] += 10000;
5070 *prologue_cost = cost_data->cost[vect_prologue];
5071 *body_cost = cost_data->cost[vect_body];
5072 *epilogue_cost = cost_data->cost[vect_epilogue];
5075 /* Implement targetm.vectorize.destroy_cost_data. */
5077 static void
5078 rs6000_destroy_cost_data (void *data)
5080 free (data);
5083 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5084 library with vectorized intrinsics. */
5086 static tree
5087 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5088 tree type_in)
5090 char name[32];
5091 const char *suffix = NULL;
5092 tree fntype, new_fndecl, bdecl = NULL_TREE;
5093 int n_args = 1;
5094 const char *bname;
5095 machine_mode el_mode, in_mode;
5096 int n, in_n;
5098 /* Libmass is suitable for unsafe math only as it does not correctly support
5099 parts of IEEE with the required precision such as denormals. Only support
5100 it if we have VSX to use the simd d2 or f4 functions.
5101 XXX: Add variable length support. */
5102 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5103 return NULL_TREE;
5105 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5106 n = TYPE_VECTOR_SUBPARTS (type_out);
5107 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5108 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5109 if (el_mode != in_mode
5110 || n != in_n)
5111 return NULL_TREE;
5113 switch (fn)
5115 CASE_CFN_ATAN2:
5116 CASE_CFN_HYPOT:
5117 CASE_CFN_POW:
5118 n_args = 2;
5119 gcc_fallthrough ();
5121 CASE_CFN_ACOS:
5122 CASE_CFN_ACOSH:
5123 CASE_CFN_ASIN:
5124 CASE_CFN_ASINH:
5125 CASE_CFN_ATAN:
5126 CASE_CFN_ATANH:
5127 CASE_CFN_CBRT:
5128 CASE_CFN_COS:
5129 CASE_CFN_COSH:
5130 CASE_CFN_ERF:
5131 CASE_CFN_ERFC:
5132 CASE_CFN_EXP2:
5133 CASE_CFN_EXP:
5134 CASE_CFN_EXPM1:
5135 CASE_CFN_LGAMMA:
5136 CASE_CFN_LOG10:
5137 CASE_CFN_LOG1P:
5138 CASE_CFN_LOG2:
5139 CASE_CFN_LOG:
5140 CASE_CFN_SIN:
5141 CASE_CFN_SINH:
5142 CASE_CFN_SQRT:
5143 CASE_CFN_TAN:
5144 CASE_CFN_TANH:
5145 if (el_mode == DFmode && n == 2)
5147 bdecl = mathfn_built_in (double_type_node, fn);
5148 suffix = "d2"; /* pow -> powd2 */
5150 else if (el_mode == SFmode && n == 4)
5152 bdecl = mathfn_built_in (float_type_node, fn);
5153 suffix = "4"; /* powf -> powf4 */
5155 else
5156 return NULL_TREE;
5157 if (!bdecl)
5158 return NULL_TREE;
5159 break;
5161 default:
5162 return NULL_TREE;
5165 gcc_assert (suffix != NULL);
5166 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5167 if (!bname)
5168 return NULL_TREE;
5170 strcpy (name, bname + sizeof ("__builtin_") - 1);
5171 strcat (name, suffix);
5173 if (n_args == 1)
5174 fntype = build_function_type_list (type_out, type_in, NULL);
5175 else if (n_args == 2)
5176 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5177 else
5178 gcc_unreachable ();
5180 /* Build a function declaration for the vectorized function. */
5181 new_fndecl = build_decl (BUILTINS_LOCATION,
5182 FUNCTION_DECL, get_identifier (name), fntype);
5183 TREE_PUBLIC (new_fndecl) = 1;
5184 DECL_EXTERNAL (new_fndecl) = 1;
5185 DECL_IS_NOVOPS (new_fndecl) = 1;
5186 TREE_READONLY (new_fndecl) = 1;
5188 return new_fndecl;
5191 /* Returns a function decl for a vectorized version of the builtin function
5192 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5193 if it is not available. */
5195 static tree
5196 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5197 tree type_in)
5199 machine_mode in_mode, out_mode;
5200 int in_n, out_n;
5202 if (TARGET_DEBUG_BUILTIN)
5203 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5204 combined_fn_name (combined_fn (fn)),
5205 GET_MODE_NAME (TYPE_MODE (type_out)),
5206 GET_MODE_NAME (TYPE_MODE (type_in)));
5208 if (TREE_CODE (type_out) != VECTOR_TYPE
5209 || TREE_CODE (type_in) != VECTOR_TYPE)
5210 return NULL_TREE;
5212 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5213 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5214 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5215 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5217 switch (fn)
5219 CASE_CFN_COPYSIGN:
5220 if (VECTOR_UNIT_VSX_P (V2DFmode)
5221 && out_mode == DFmode && out_n == 2
5222 && in_mode == DFmode && in_n == 2)
5223 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5224 if (VECTOR_UNIT_VSX_P (V4SFmode)
5225 && out_mode == SFmode && out_n == 4
5226 && in_mode == SFmode && in_n == 4)
5227 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5228 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5229 && out_mode == SFmode && out_n == 4
5230 && in_mode == SFmode && in_n == 4)
5231 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5232 break;
5233 CASE_CFN_CEIL:
5234 if (VECTOR_UNIT_VSX_P (V2DFmode)
5235 && out_mode == DFmode && out_n == 2
5236 && in_mode == DFmode && in_n == 2)
5237 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5238 if (VECTOR_UNIT_VSX_P (V4SFmode)
5239 && out_mode == SFmode && out_n == 4
5240 && in_mode == SFmode && in_n == 4)
5241 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5242 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5243 && out_mode == SFmode && out_n == 4
5244 && in_mode == SFmode && in_n == 4)
5245 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5246 break;
5247 CASE_CFN_FLOOR:
5248 if (VECTOR_UNIT_VSX_P (V2DFmode)
5249 && out_mode == DFmode && out_n == 2
5250 && in_mode == DFmode && in_n == 2)
5251 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5252 if (VECTOR_UNIT_VSX_P (V4SFmode)
5253 && out_mode == SFmode && out_n == 4
5254 && in_mode == SFmode && in_n == 4)
5255 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5256 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5257 && out_mode == SFmode && out_n == 4
5258 && in_mode == SFmode && in_n == 4)
5259 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5260 break;
5261 CASE_CFN_FMA:
5262 if (VECTOR_UNIT_VSX_P (V2DFmode)
5263 && out_mode == DFmode && out_n == 2
5264 && in_mode == DFmode && in_n == 2)
5265 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5266 if (VECTOR_UNIT_VSX_P (V4SFmode)
5267 && out_mode == SFmode && out_n == 4
5268 && in_mode == SFmode && in_n == 4)
5269 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5270 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5271 && out_mode == SFmode && out_n == 4
5272 && in_mode == SFmode && in_n == 4)
5273 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5274 break;
5275 CASE_CFN_TRUNC:
5276 if (VECTOR_UNIT_VSX_P (V2DFmode)
5277 && out_mode == DFmode && out_n == 2
5278 && in_mode == DFmode && in_n == 2)
5279 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5280 if (VECTOR_UNIT_VSX_P (V4SFmode)
5281 && out_mode == SFmode && out_n == 4
5282 && in_mode == SFmode && in_n == 4)
5283 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5284 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5285 && out_mode == SFmode && out_n == 4
5286 && in_mode == SFmode && in_n == 4)
5287 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5288 break;
5289 CASE_CFN_NEARBYINT:
5290 if (VECTOR_UNIT_VSX_P (V2DFmode)
5291 && flag_unsafe_math_optimizations
5292 && out_mode == DFmode && out_n == 2
5293 && in_mode == DFmode && in_n == 2)
5294 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5295 if (VECTOR_UNIT_VSX_P (V4SFmode)
5296 && flag_unsafe_math_optimizations
5297 && out_mode == SFmode && out_n == 4
5298 && in_mode == SFmode && in_n == 4)
5299 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5300 break;
5301 CASE_CFN_RINT:
5302 if (VECTOR_UNIT_VSX_P (V2DFmode)
5303 && !flag_trapping_math
5304 && out_mode == DFmode && out_n == 2
5305 && in_mode == DFmode && in_n == 2)
5306 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5307 if (VECTOR_UNIT_VSX_P (V4SFmode)
5308 && !flag_trapping_math
5309 && out_mode == SFmode && out_n == 4
5310 && in_mode == SFmode && in_n == 4)
5311 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5312 break;
5313 default:
5314 break;
5317 /* Generate calls to libmass if appropriate. */
5318 if (rs6000_veclib_handler)
5319 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5321 return NULL_TREE;
5324 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5326 static tree
5327 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5328 tree type_in)
5330 machine_mode in_mode, out_mode;
5331 int in_n, out_n;
5333 if (TARGET_DEBUG_BUILTIN)
5334 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5335 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5336 GET_MODE_NAME (TYPE_MODE (type_out)),
5337 GET_MODE_NAME (TYPE_MODE (type_in)));
5339 if (TREE_CODE (type_out) != VECTOR_TYPE
5340 || TREE_CODE (type_in) != VECTOR_TYPE)
5341 return NULL_TREE;
5343 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5344 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5345 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5346 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5348 enum rs6000_builtins fn
5349 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5350 switch (fn)
5352 case RS6000_BUILTIN_RSQRTF:
5353 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5354 && out_mode == SFmode && out_n == 4
5355 && in_mode == SFmode && in_n == 4)
5356 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5357 break;
5358 case RS6000_BUILTIN_RSQRT:
5359 if (VECTOR_UNIT_VSX_P (V2DFmode)
5360 && out_mode == DFmode && out_n == 2
5361 && in_mode == DFmode && in_n == 2)
5362 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5363 break;
5364 case RS6000_BUILTIN_RECIPF:
5365 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5366 && out_mode == SFmode && out_n == 4
5367 && in_mode == SFmode && in_n == 4)
5368 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5369 break;
5370 case RS6000_BUILTIN_RECIP:
5371 if (VECTOR_UNIT_VSX_P (V2DFmode)
5372 && out_mode == DFmode && out_n == 2
5373 && in_mode == DFmode && in_n == 2)
5374 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5375 break;
5376 default:
5377 break;
5379 return NULL_TREE;
5382 /* Default CPU string for rs6000*_file_start functions. */
5383 static const char *rs6000_default_cpu;
5385 #ifdef USING_ELFOS_H
5386 const char *rs6000_machine;
5388 const char *
5389 rs6000_machine_from_flags (void)
5391 HOST_WIDE_INT flags = rs6000_isa_flags;
5393 /* Disable the flags that should never influence the .machine selection. */
5394 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5396 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5397 return "future";
5398 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5399 return "power9";
5400 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5401 return "power8";
5402 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5403 return "power7";
5404 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5405 return "power6";
5406 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5407 return "power5";
5408 if ((flags & ISA_2_1_MASKS) != 0)
5409 return "power4";
5410 if ((flags & OPTION_MASK_POWERPC64) != 0)
5411 return "ppc64";
5412 return "ppc";
5415 void
5416 emit_asm_machine (void)
5418 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5420 #endif
5422 /* Do anything needed at the start of the asm file. */
5424 static void
5425 rs6000_file_start (void)
5427 char buffer[80];
5428 const char *start = buffer;
5429 FILE *file = asm_out_file;
5431 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5433 default_file_start ();
5435 if (flag_verbose_asm)
5437 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5439 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5441 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5442 start = "";
5445 if (global_options_set.x_rs6000_cpu_index)
5447 fprintf (file, "%s -mcpu=%s", start,
5448 processor_target_table[rs6000_cpu_index].name);
5449 start = "";
5452 if (global_options_set.x_rs6000_tune_index)
5454 fprintf (file, "%s -mtune=%s", start,
5455 processor_target_table[rs6000_tune_index].name);
5456 start = "";
5459 if (PPC405_ERRATUM77)
5461 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5462 start = "";
5465 #ifdef USING_ELFOS_H
5466 switch (rs6000_sdata)
5468 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5469 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5470 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5471 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5474 if (rs6000_sdata && g_switch_value)
5476 fprintf (file, "%s -G %d", start,
5477 g_switch_value);
5478 start = "";
5480 #endif
5482 if (*start == '\0')
5483 putc ('\n', file);
5486 #ifdef USING_ELFOS_H
5487 rs6000_machine = rs6000_machine_from_flags ();
5488 emit_asm_machine ();
5489 #endif
5491 if (DEFAULT_ABI == ABI_ELFv2)
5492 fprintf (file, "\t.abiversion 2\n");
5496 /* Return nonzero if this function is known to have a null epilogue. */
5499 direct_return (void)
5501 if (reload_completed)
5503 rs6000_stack_t *info = rs6000_stack_info ();
5505 if (info->first_gp_reg_save == 32
5506 && info->first_fp_reg_save == 64
5507 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5508 && ! info->lr_save_p
5509 && ! info->cr_save_p
5510 && info->vrsave_size == 0
5511 && ! info->push_p)
5512 return 1;
5515 return 0;
5518 /* Helper for num_insns_constant. Calculate number of instructions to
5519 load VALUE to a single gpr using combinations of addi, addis, ori,
5520 oris and sldi instructions. */
5522 static int
5523 num_insns_constant_gpr (HOST_WIDE_INT value)
5525 /* signed constant loadable with addi */
5526 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5527 return 1;
5529 /* constant loadable with addis */
5530 else if ((value & 0xffff) == 0
5531 && (value >> 31 == -1 || value >> 31 == 0))
5532 return 1;
5534 else if (TARGET_POWERPC64)
5536 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5537 HOST_WIDE_INT high = value >> 31;
5539 if (high == 0 || high == -1)
5540 return 2;
5542 high >>= 1;
5544 if (low == 0)
5545 return num_insns_constant_gpr (high) + 1;
5546 else if (high == 0)
5547 return num_insns_constant_gpr (low) + 1;
5548 else
5549 return (num_insns_constant_gpr (high)
5550 + num_insns_constant_gpr (low) + 1);
5553 else
5554 return 2;
5557 /* Helper for num_insns_constant. Allow constants formed by the
5558 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5559 and handle modes that require multiple gprs. */
5561 static int
5562 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5564 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5565 int total = 0;
5566 while (nregs-- > 0)
5568 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5569 int insns = num_insns_constant_gpr (low);
5570 if (insns > 2
5571 /* We won't get more than 2 from num_insns_constant_gpr
5572 except when TARGET_POWERPC64 and mode is DImode or
5573 wider, so the register mode must be DImode. */
5574 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5575 insns = 2;
5576 total += insns;
5577 value >>= BITS_PER_WORD;
5579 return total;
5582 /* Return the number of instructions it takes to form a constant in as
5583 many gprs are needed for MODE. */
5586 num_insns_constant (rtx op, machine_mode mode)
5588 HOST_WIDE_INT val;
5590 switch (GET_CODE (op))
5592 case CONST_INT:
5593 val = INTVAL (op);
5594 break;
5596 case CONST_WIDE_INT:
5598 int insns = 0;
5599 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5600 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5601 DImode);
5602 return insns;
5605 case CONST_DOUBLE:
5607 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5609 if (mode == SFmode || mode == SDmode)
5611 long l;
5613 if (mode == SDmode)
5614 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5615 else
5616 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5617 /* See the first define_split in rs6000.md handling a
5618 const_double_operand. */
5619 val = l;
5620 mode = SImode;
5622 else if (mode == DFmode || mode == DDmode)
5624 long l[2];
5626 if (mode == DDmode)
5627 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5628 else
5629 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5631 /* See the second (32-bit) and third (64-bit) define_split
5632 in rs6000.md handling a const_double_operand. */
5633 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5634 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5635 mode = DImode;
5637 else if (mode == TFmode || mode == TDmode
5638 || mode == KFmode || mode == IFmode)
5640 long l[4];
5641 int insns;
5643 if (mode == TDmode)
5644 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5645 else
5646 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5648 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5649 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5650 insns = num_insns_constant_multi (val, DImode);
5651 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5652 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5653 insns += num_insns_constant_multi (val, DImode);
5654 return insns;
5656 else
5657 gcc_unreachable ();
5659 break;
5661 default:
5662 gcc_unreachable ();
5665 return num_insns_constant_multi (val, mode);
5668 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5669 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5670 corresponding element of the vector, but for V4SFmode, the
5671 corresponding "float" is interpreted as an SImode integer. */
5673 HOST_WIDE_INT
5674 const_vector_elt_as_int (rtx op, unsigned int elt)
5676 rtx tmp;
5678 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5679 gcc_assert (GET_MODE (op) != V2DImode
5680 && GET_MODE (op) != V2DFmode);
5682 tmp = CONST_VECTOR_ELT (op, elt);
5683 if (GET_MODE (op) == V4SFmode)
5684 tmp = gen_lowpart (SImode, tmp);
5685 return INTVAL (tmp);
5688 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5689 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5690 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5691 all items are set to the same value and contain COPIES replicas of the
5692 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5693 operand and the others are set to the value of the operand's msb. */
5695 static bool
5696 vspltis_constant (rtx op, unsigned step, unsigned copies)
5698 machine_mode mode = GET_MODE (op);
5699 machine_mode inner = GET_MODE_INNER (mode);
5701 unsigned i;
5702 unsigned nunits;
5703 unsigned bitsize;
5704 unsigned mask;
5706 HOST_WIDE_INT val;
5707 HOST_WIDE_INT splat_val;
5708 HOST_WIDE_INT msb_val;
5710 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5711 return false;
5713 nunits = GET_MODE_NUNITS (mode);
5714 bitsize = GET_MODE_BITSIZE (inner);
5715 mask = GET_MODE_MASK (inner);
5717 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5718 splat_val = val;
5719 msb_val = val >= 0 ? 0 : -1;
5721 /* Construct the value to be splatted, if possible. If not, return 0. */
5722 for (i = 2; i <= copies; i *= 2)
5724 HOST_WIDE_INT small_val;
5725 bitsize /= 2;
5726 small_val = splat_val >> bitsize;
5727 mask >>= bitsize;
5728 if (splat_val != ((HOST_WIDE_INT)
5729 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5730 | (small_val & mask)))
5731 return false;
5732 splat_val = small_val;
5735 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5736 if (EASY_VECTOR_15 (splat_val))
5739 /* Also check if we can splat, and then add the result to itself. Do so if
5740 the value is positive, of if the splat instruction is using OP's mode;
5741 for splat_val < 0, the splat and the add should use the same mode. */
5742 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5743 && (splat_val >= 0 || (step == 1 && copies == 1)))
5746 /* Also check if are loading up the most significant bit which can be done by
5747 loading up -1 and shifting the value left by -1. */
5748 else if (EASY_VECTOR_MSB (splat_val, inner))
5751 else
5752 return false;
5754 /* Check if VAL is present in every STEP-th element, and the
5755 other elements are filled with its most significant bit. */
5756 for (i = 1; i < nunits; ++i)
5758 HOST_WIDE_INT desired_val;
5759 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5760 if ((i & (step - 1)) == 0)
5761 desired_val = val;
5762 else
5763 desired_val = msb_val;
5765 if (desired_val != const_vector_elt_as_int (op, elt))
5766 return false;
5769 return true;
5772 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5773 instruction, filling in the bottom elements with 0 or -1.
5775 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5776 for the number of zeroes to shift in, or negative for the number of 0xff
5777 bytes to shift in.
5779 OP is a CONST_VECTOR. */
5782 vspltis_shifted (rtx op)
5784 machine_mode mode = GET_MODE (op);
5785 machine_mode inner = GET_MODE_INNER (mode);
5787 unsigned i, j;
5788 unsigned nunits;
5789 unsigned mask;
5791 HOST_WIDE_INT val;
5793 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5794 return false;
5796 /* We need to create pseudo registers to do the shift, so don't recognize
5797 shift vector constants after reload. */
5798 if (!can_create_pseudo_p ())
5799 return false;
5801 nunits = GET_MODE_NUNITS (mode);
5802 mask = GET_MODE_MASK (inner);
5804 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5806 /* Check if the value can really be the operand of a vspltis[bhw]. */
5807 if (EASY_VECTOR_15 (val))
5810 /* Also check if we are loading up the most significant bit which can be done
5811 by loading up -1 and shifting the value left by -1. */
5812 else if (EASY_VECTOR_MSB (val, inner))
5815 else
5816 return 0;
5818 /* Check if VAL is present in every STEP-th element until we find elements
5819 that are 0 or all 1 bits. */
5820 for (i = 1; i < nunits; ++i)
5822 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5823 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5825 /* If the value isn't the splat value, check for the remaining elements
5826 being 0/-1. */
5827 if (val != elt_val)
5829 if (elt_val == 0)
5831 for (j = i+1; j < nunits; ++j)
5833 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5834 if (const_vector_elt_as_int (op, elt2) != 0)
5835 return 0;
5838 return (nunits - i) * GET_MODE_SIZE (inner);
5841 else if ((elt_val & mask) == mask)
5843 for (j = i+1; j < nunits; ++j)
5845 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5846 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5847 return 0;
5850 return -((nunits - i) * GET_MODE_SIZE (inner));
5853 else
5854 return 0;
5858 /* If all elements are equal, we don't need to do VLSDOI. */
5859 return 0;
5863 /* Return true if OP is of the given MODE and can be synthesized
5864 with a vspltisb, vspltish or vspltisw. */
5866 bool
5867 easy_altivec_constant (rtx op, machine_mode mode)
5869 unsigned step, copies;
5871 if (mode == VOIDmode)
5872 mode = GET_MODE (op);
5873 else if (mode != GET_MODE (op))
5874 return false;
5876 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5877 constants. */
5878 if (mode == V2DFmode)
5879 return zero_constant (op, mode);
5881 else if (mode == V2DImode)
5883 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5884 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5885 return false;
5887 if (zero_constant (op, mode))
5888 return true;
5890 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5891 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5892 return true;
5894 return false;
5897 /* V1TImode is a special container for TImode. Ignore for now. */
5898 else if (mode == V1TImode)
5899 return false;
5901 /* Start with a vspltisw. */
5902 step = GET_MODE_NUNITS (mode) / 4;
5903 copies = 1;
5905 if (vspltis_constant (op, step, copies))
5906 return true;
5908 /* Then try with a vspltish. */
5909 if (step == 1)
5910 copies <<= 1;
5911 else
5912 step >>= 1;
5914 if (vspltis_constant (op, step, copies))
5915 return true;
5917 /* And finally a vspltisb. */
5918 if (step == 1)
5919 copies <<= 1;
5920 else
5921 step >>= 1;
5923 if (vspltis_constant (op, step, copies))
5924 return true;
5926 if (vspltis_shifted (op) != 0)
5927 return true;
5929 return false;
5932 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5933 result is OP. Abort if it is not possible. */
5936 gen_easy_altivec_constant (rtx op)
5938 machine_mode mode = GET_MODE (op);
5939 int nunits = GET_MODE_NUNITS (mode);
5940 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5941 unsigned step = nunits / 4;
5942 unsigned copies = 1;
5944 /* Start with a vspltisw. */
5945 if (vspltis_constant (op, step, copies))
5946 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5948 /* Then try with a vspltish. */
5949 if (step == 1)
5950 copies <<= 1;
5951 else
5952 step >>= 1;
5954 if (vspltis_constant (op, step, copies))
5955 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5957 /* And finally a vspltisb. */
5958 if (step == 1)
5959 copies <<= 1;
5960 else
5961 step >>= 1;
5963 if (vspltis_constant (op, step, copies))
5964 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5966 gcc_unreachable ();
5969 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
5970 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
5972 Return the number of instructions needed (1 or 2) into the address pointed
5973 via NUM_INSNS_PTR.
5975 Return the constant that is being split via CONSTANT_PTR. */
5977 bool
5978 xxspltib_constant_p (rtx op,
5979 machine_mode mode,
5980 int *num_insns_ptr,
5981 int *constant_ptr)
5983 size_t nunits = GET_MODE_NUNITS (mode);
5984 size_t i;
5985 HOST_WIDE_INT value;
5986 rtx element;
5988 /* Set the returned values to out of bound values. */
5989 *num_insns_ptr = -1;
5990 *constant_ptr = 256;
5992 if (!TARGET_P9_VECTOR)
5993 return false;
5995 if (mode == VOIDmode)
5996 mode = GET_MODE (op);
5998 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
5999 return false;
6001 /* Handle (vec_duplicate <constant>). */
6002 if (GET_CODE (op) == VEC_DUPLICATE)
6004 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6005 && mode != V2DImode)
6006 return false;
6008 element = XEXP (op, 0);
6009 if (!CONST_INT_P (element))
6010 return false;
6012 value = INTVAL (element);
6013 if (!IN_RANGE (value, -128, 127))
6014 return false;
6017 /* Handle (const_vector [...]). */
6018 else if (GET_CODE (op) == CONST_VECTOR)
6020 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6021 && mode != V2DImode)
6022 return false;
6024 element = CONST_VECTOR_ELT (op, 0);
6025 if (!CONST_INT_P (element))
6026 return false;
6028 value = INTVAL (element);
6029 if (!IN_RANGE (value, -128, 127))
6030 return false;
6032 for (i = 1; i < nunits; i++)
6034 element = CONST_VECTOR_ELT (op, i);
6035 if (!CONST_INT_P (element))
6036 return false;
6038 if (value != INTVAL (element))
6039 return false;
6043 /* Handle integer constants being loaded into the upper part of the VSX
6044 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6045 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6046 else if (CONST_INT_P (op))
6048 if (!SCALAR_INT_MODE_P (mode))
6049 return false;
6051 value = INTVAL (op);
6052 if (!IN_RANGE (value, -128, 127))
6053 return false;
6055 if (!IN_RANGE (value, -1, 0))
6057 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6058 return false;
6060 if (EASY_VECTOR_15 (value))
6061 return false;
6065 else
6066 return false;
6068 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6069 sign extend. Special case 0/-1 to allow getting any VSX register instead
6070 of an Altivec register. */
6071 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6072 && EASY_VECTOR_15 (value))
6073 return false;
6075 /* Return # of instructions and the constant byte for XXSPLTIB. */
6076 if (mode == V16QImode)
6077 *num_insns_ptr = 1;
6079 else if (IN_RANGE (value, -1, 0))
6080 *num_insns_ptr = 1;
6082 else
6083 *num_insns_ptr = 2;
6085 *constant_ptr = (int) value;
6086 return true;
6089 const char *
6090 output_vec_const_move (rtx *operands)
6092 int shift;
6093 machine_mode mode;
6094 rtx dest, vec;
6096 dest = operands[0];
6097 vec = operands[1];
6098 mode = GET_MODE (dest);
6100 if (TARGET_VSX)
6102 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6103 int xxspltib_value = 256;
6104 int num_insns = -1;
6106 if (zero_constant (vec, mode))
6108 if (TARGET_P9_VECTOR)
6109 return "xxspltib %x0,0";
6111 else if (dest_vmx_p)
6112 return "vspltisw %0,0";
6114 else
6115 return "xxlxor %x0,%x0,%x0";
6118 if (all_ones_constant (vec, mode))
6120 if (TARGET_P9_VECTOR)
6121 return "xxspltib %x0,255";
6123 else if (dest_vmx_p)
6124 return "vspltisw %0,-1";
6126 else if (TARGET_P8_VECTOR)
6127 return "xxlorc %x0,%x0,%x0";
6129 else
6130 gcc_unreachable ();
6133 if (TARGET_P9_VECTOR
6134 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6136 if (num_insns == 1)
6138 operands[2] = GEN_INT (xxspltib_value & 0xff);
6139 return "xxspltib %x0,%2";
6142 return "#";
6146 if (TARGET_ALTIVEC)
6148 rtx splat_vec;
6150 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6151 if (zero_constant (vec, mode))
6152 return "vspltisw %0,0";
6154 if (all_ones_constant (vec, mode))
6155 return "vspltisw %0,-1";
6157 /* Do we need to construct a value using VSLDOI? */
6158 shift = vspltis_shifted (vec);
6159 if (shift != 0)
6160 return "#";
6162 splat_vec = gen_easy_altivec_constant (vec);
6163 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6164 operands[1] = XEXP (splat_vec, 0);
6165 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6166 return "#";
6168 switch (GET_MODE (splat_vec))
6170 case E_V4SImode:
6171 return "vspltisw %0,%1";
6173 case E_V8HImode:
6174 return "vspltish %0,%1";
6176 case E_V16QImode:
6177 return "vspltisb %0,%1";
6179 default:
6180 gcc_unreachable ();
6184 gcc_unreachable ();
6187 /* Initialize vector TARGET to VALS. */
6189 void
6190 rs6000_expand_vector_init (rtx target, rtx vals)
6192 machine_mode mode = GET_MODE (target);
6193 machine_mode inner_mode = GET_MODE_INNER (mode);
6194 int n_elts = GET_MODE_NUNITS (mode);
6195 int n_var = 0, one_var = -1;
6196 bool all_same = true, all_const_zero = true;
6197 rtx x, mem;
6198 int i;
6200 for (i = 0; i < n_elts; ++i)
6202 x = XVECEXP (vals, 0, i);
6203 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6204 ++n_var, one_var = i;
6205 else if (x != CONST0_RTX (inner_mode))
6206 all_const_zero = false;
6208 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6209 all_same = false;
6212 if (n_var == 0)
6214 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6215 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6216 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6218 /* Zero register. */
6219 emit_move_insn (target, CONST0_RTX (mode));
6220 return;
6222 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6224 /* Splat immediate. */
6225 emit_insn (gen_rtx_SET (target, const_vec));
6226 return;
6228 else
6230 /* Load from constant pool. */
6231 emit_move_insn (target, const_vec);
6232 return;
6236 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6237 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6239 rtx op[2];
6240 size_t i;
6241 size_t num_elements = all_same ? 1 : 2;
6242 for (i = 0; i < num_elements; i++)
6244 op[i] = XVECEXP (vals, 0, i);
6245 /* Just in case there is a SUBREG with a smaller mode, do a
6246 conversion. */
6247 if (GET_MODE (op[i]) != inner_mode)
6249 rtx tmp = gen_reg_rtx (inner_mode);
6250 convert_move (tmp, op[i], 0);
6251 op[i] = tmp;
6253 /* Allow load with splat double word. */
6254 else if (MEM_P (op[i]))
6256 if (!all_same)
6257 op[i] = force_reg (inner_mode, op[i]);
6259 else if (!REG_P (op[i]))
6260 op[i] = force_reg (inner_mode, op[i]);
6263 if (all_same)
6265 if (mode == V2DFmode)
6266 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6267 else
6268 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6270 else
6272 if (mode == V2DFmode)
6273 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6274 else
6275 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6277 return;
6280 /* Special case initializing vector int if we are on 64-bit systems with
6281 direct move or we have the ISA 3.0 instructions. */
6282 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6283 && TARGET_DIRECT_MOVE_64BIT)
6285 if (all_same)
6287 rtx element0 = XVECEXP (vals, 0, 0);
6288 if (MEM_P (element0))
6289 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6290 else
6291 element0 = force_reg (SImode, element0);
6293 if (TARGET_P9_VECTOR)
6294 emit_insn (gen_vsx_splat_v4si (target, element0));
6295 else
6297 rtx tmp = gen_reg_rtx (DImode);
6298 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6299 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6301 return;
6303 else
6305 rtx elements[4];
6306 size_t i;
6308 for (i = 0; i < 4; i++)
6309 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6311 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6312 elements[2], elements[3]));
6313 return;
6317 /* With single precision floating point on VSX, know that internally single
6318 precision is actually represented as a double, and either make 2 V2DF
6319 vectors, and convert these vectors to single precision, or do one
6320 conversion, and splat the result to the other elements. */
6321 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6323 if (all_same)
6325 rtx element0 = XVECEXP (vals, 0, 0);
6327 if (TARGET_P9_VECTOR)
6329 if (MEM_P (element0))
6330 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6332 emit_insn (gen_vsx_splat_v4sf (target, element0));
6335 else
6337 rtx freg = gen_reg_rtx (V4SFmode);
6338 rtx sreg = force_reg (SFmode, element0);
6339 rtx cvt = (TARGET_XSCVDPSPN
6340 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6341 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6343 emit_insn (cvt);
6344 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6345 const0_rtx));
6348 else
6350 rtx dbl_even = gen_reg_rtx (V2DFmode);
6351 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6352 rtx flt_even = gen_reg_rtx (V4SFmode);
6353 rtx flt_odd = gen_reg_rtx (V4SFmode);
6354 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6355 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6356 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6357 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6359 /* Use VMRGEW if we can instead of doing a permute. */
6360 if (TARGET_P8_VECTOR)
6362 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6363 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6364 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6365 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6366 if (BYTES_BIG_ENDIAN)
6367 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6368 else
6369 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6371 else
6373 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6374 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6375 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6376 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6377 rs6000_expand_extract_even (target, flt_even, flt_odd);
6380 return;
6383 /* Special case initializing vector short/char that are splats if we are on
6384 64-bit systems with direct move. */
6385 if (all_same && TARGET_DIRECT_MOVE_64BIT
6386 && (mode == V16QImode || mode == V8HImode))
6388 rtx op0 = XVECEXP (vals, 0, 0);
6389 rtx di_tmp = gen_reg_rtx (DImode);
6391 if (!REG_P (op0))
6392 op0 = force_reg (GET_MODE_INNER (mode), op0);
6394 if (mode == V16QImode)
6396 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6397 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6398 return;
6401 if (mode == V8HImode)
6403 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6404 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6405 return;
6409 /* Store value to stack temp. Load vector element. Splat. However, splat
6410 of 64-bit items is not supported on Altivec. */
6411 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6413 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6414 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6415 XVECEXP (vals, 0, 0));
6416 x = gen_rtx_UNSPEC (VOIDmode,
6417 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6418 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6419 gen_rtvec (2,
6420 gen_rtx_SET (target, mem),
6421 x)));
6422 x = gen_rtx_VEC_SELECT (inner_mode, target,
6423 gen_rtx_PARALLEL (VOIDmode,
6424 gen_rtvec (1, const0_rtx)));
6425 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6426 return;
6429 /* One field is non-constant. Load constant then overwrite
6430 varying field. */
6431 if (n_var == 1)
6433 rtx copy = copy_rtx (vals);
6435 /* Load constant part of vector, substitute neighboring value for
6436 varying element. */
6437 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6438 rs6000_expand_vector_init (target, copy);
6440 /* Insert variable. */
6441 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6442 return;
6445 /* Construct the vector in memory one field at a time
6446 and load the whole vector. */
6447 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6448 for (i = 0; i < n_elts; i++)
6449 emit_move_insn (adjust_address_nv (mem, inner_mode,
6450 i * GET_MODE_SIZE (inner_mode)),
6451 XVECEXP (vals, 0, i));
6452 emit_move_insn (target, mem);
6455 /* Set field ELT of TARGET to VAL. */
6457 void
6458 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6460 machine_mode mode = GET_MODE (target);
6461 machine_mode inner_mode = GET_MODE_INNER (mode);
6462 rtx reg = gen_reg_rtx (mode);
6463 rtx mask, mem, x;
6464 int width = GET_MODE_SIZE (inner_mode);
6465 int i;
6467 val = force_reg (GET_MODE (val), val);
6469 if (VECTOR_MEM_VSX_P (mode))
6471 rtx insn = NULL_RTX;
6472 rtx elt_rtx = GEN_INT (elt);
6474 if (mode == V2DFmode)
6475 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6477 else if (mode == V2DImode)
6478 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6480 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6482 if (mode == V4SImode)
6483 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6484 else if (mode == V8HImode)
6485 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6486 else if (mode == V16QImode)
6487 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6488 else if (mode == V4SFmode)
6489 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6492 if (insn)
6494 emit_insn (insn);
6495 return;
6499 /* Simplify setting single element vectors like V1TImode. */
6500 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6502 emit_move_insn (target, gen_lowpart (mode, val));
6503 return;
6506 /* Load single variable value. */
6507 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6508 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6509 x = gen_rtx_UNSPEC (VOIDmode,
6510 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6511 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6512 gen_rtvec (2,
6513 gen_rtx_SET (reg, mem),
6514 x)));
6516 /* Linear sequence. */
6517 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6518 for (i = 0; i < 16; ++i)
6519 XVECEXP (mask, 0, i) = GEN_INT (i);
6521 /* Set permute mask to insert element into target. */
6522 for (i = 0; i < width; ++i)
6523 XVECEXP (mask, 0, elt*width + i)
6524 = GEN_INT (i + 0x10);
6525 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6527 if (BYTES_BIG_ENDIAN)
6528 x = gen_rtx_UNSPEC (mode,
6529 gen_rtvec (3, target, reg,
6530 force_reg (V16QImode, x)),
6531 UNSPEC_VPERM);
6532 else
6534 if (TARGET_P9_VECTOR)
6535 x = gen_rtx_UNSPEC (mode,
6536 gen_rtvec (3, reg, target,
6537 force_reg (V16QImode, x)),
6538 UNSPEC_VPERMR);
6539 else
6541 /* Invert selector. We prefer to generate VNAND on P8 so
6542 that future fusion opportunities can kick in, but must
6543 generate VNOR elsewhere. */
6544 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6545 rtx iorx = (TARGET_P8_VECTOR
6546 ? gen_rtx_IOR (V16QImode, notx, notx)
6547 : gen_rtx_AND (V16QImode, notx, notx));
6548 rtx tmp = gen_reg_rtx (V16QImode);
6549 emit_insn (gen_rtx_SET (tmp, iorx));
6551 /* Permute with operands reversed and adjusted selector. */
6552 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6553 UNSPEC_VPERM);
6557 emit_insn (gen_rtx_SET (target, x));
6560 /* Extract field ELT from VEC into TARGET. */
6562 void
6563 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6565 machine_mode mode = GET_MODE (vec);
6566 machine_mode inner_mode = GET_MODE_INNER (mode);
6567 rtx mem;
6569 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6571 switch (mode)
6573 default:
6574 break;
6575 case E_V1TImode:
6576 emit_move_insn (target, gen_lowpart (TImode, vec));
6577 break;
6578 case E_V2DFmode:
6579 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6580 return;
6581 case E_V2DImode:
6582 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6583 return;
6584 case E_V4SFmode:
6585 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6586 return;
6587 case E_V16QImode:
6588 if (TARGET_DIRECT_MOVE_64BIT)
6590 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6591 return;
6593 else
6594 break;
6595 case E_V8HImode:
6596 if (TARGET_DIRECT_MOVE_64BIT)
6598 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6599 return;
6601 else
6602 break;
6603 case E_V4SImode:
6604 if (TARGET_DIRECT_MOVE_64BIT)
6606 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6607 return;
6609 break;
6612 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6613 && TARGET_DIRECT_MOVE_64BIT)
6615 if (GET_MODE (elt) != DImode)
6617 rtx tmp = gen_reg_rtx (DImode);
6618 convert_move (tmp, elt, 0);
6619 elt = tmp;
6621 else if (!REG_P (elt))
6622 elt = force_reg (DImode, elt);
6624 switch (mode)
6626 case E_V1TImode:
6627 emit_move_insn (target, gen_lowpart (TImode, vec));
6628 return;
6630 case E_V2DFmode:
6631 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6632 return;
6634 case E_V2DImode:
6635 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6636 return;
6638 case E_V4SFmode:
6639 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6640 return;
6642 case E_V4SImode:
6643 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6644 return;
6646 case E_V8HImode:
6647 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6648 return;
6650 case E_V16QImode:
6651 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6652 return;
6654 default:
6655 gcc_unreachable ();
6659 /* Allocate mode-sized buffer. */
6660 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6662 emit_move_insn (mem, vec);
6663 if (CONST_INT_P (elt))
6665 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6667 /* Add offset to field within buffer matching vector element. */
6668 mem = adjust_address_nv (mem, inner_mode,
6669 modulo_elt * GET_MODE_SIZE (inner_mode));
6670 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6672 else
6674 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6675 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6676 rtx new_addr = gen_reg_rtx (Pmode);
6678 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6679 if (ele_size > 1)
6680 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6681 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6682 new_addr = change_address (mem, inner_mode, new_addr);
6683 emit_move_insn (target, new_addr);
6687 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6688 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6689 temporary (BASE_TMP) to fixup the address. Return the new memory address
6690 that is valid for reads or writes to a given register (SCALAR_REG). */
6693 rs6000_adjust_vec_address (rtx scalar_reg,
6694 rtx mem,
6695 rtx element,
6696 rtx base_tmp,
6697 machine_mode scalar_mode)
6699 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6700 rtx addr = XEXP (mem, 0);
6701 rtx element_offset;
6702 rtx new_addr;
6703 bool valid_addr_p;
6705 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6706 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6708 /* Calculate what we need to add to the address to get the element
6709 address. */
6710 if (CONST_INT_P (element))
6711 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6712 else
6714 int byte_shift = exact_log2 (scalar_size);
6715 gcc_assert (byte_shift >= 0);
6717 if (byte_shift == 0)
6718 element_offset = element;
6720 else
6722 if (TARGET_POWERPC64)
6723 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6724 else
6725 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6727 element_offset = base_tmp;
6731 /* Create the new address pointing to the element within the vector. If we
6732 are adding 0, we don't have to change the address. */
6733 if (element_offset == const0_rtx)
6734 new_addr = addr;
6736 /* A simple indirect address can be converted into a reg + offset
6737 address. */
6738 else if (REG_P (addr) || SUBREG_P (addr))
6739 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6741 /* Optimize D-FORM addresses with constant offset with a constant element, to
6742 include the element offset in the address directly. */
6743 else if (GET_CODE (addr) == PLUS)
6745 rtx op0 = XEXP (addr, 0);
6746 rtx op1 = XEXP (addr, 1);
6747 rtx insn;
6749 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6750 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6752 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6753 rtx offset_rtx = GEN_INT (offset);
6755 if (IN_RANGE (offset, -32768, 32767)
6756 && (scalar_size < 8 || (offset & 0x3) == 0))
6757 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6758 else
6760 emit_move_insn (base_tmp, offset_rtx);
6761 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6764 else
6766 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6767 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6769 /* Note, ADDI requires the register being added to be a base
6770 register. If the register was R0, load it up into the temporary
6771 and do the add. */
6772 if (op1_reg_p
6773 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6775 insn = gen_add3_insn (base_tmp, op1, element_offset);
6776 gcc_assert (insn != NULL_RTX);
6777 emit_insn (insn);
6780 else if (ele_reg_p
6781 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6783 insn = gen_add3_insn (base_tmp, element_offset, op1);
6784 gcc_assert (insn != NULL_RTX);
6785 emit_insn (insn);
6788 else
6790 emit_move_insn (base_tmp, op1);
6791 emit_insn (gen_add2_insn (base_tmp, element_offset));
6794 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6798 else
6800 emit_move_insn (base_tmp, addr);
6801 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6804 /* If we have a PLUS, we need to see whether the particular register class
6805 allows for D-FORM or X-FORM addressing. */
6806 if (GET_CODE (new_addr) == PLUS)
6808 rtx op1 = XEXP (new_addr, 1);
6809 addr_mask_type addr_mask;
6810 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6812 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6813 if (INT_REGNO_P (scalar_regno))
6814 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6816 else if (FP_REGNO_P (scalar_regno))
6817 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6819 else if (ALTIVEC_REGNO_P (scalar_regno))
6820 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6822 else
6823 gcc_unreachable ();
6825 if (REG_P (op1) || SUBREG_P (op1))
6826 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6827 else
6828 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6831 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6832 valid_addr_p = true;
6834 else
6835 valid_addr_p = false;
6837 if (!valid_addr_p)
6839 emit_move_insn (base_tmp, new_addr);
6840 new_addr = base_tmp;
6843 return change_address (mem, scalar_mode, new_addr);
6846 /* Split a variable vec_extract operation into the component instructions. */
6848 void
6849 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6850 rtx tmp_altivec)
6852 machine_mode mode = GET_MODE (src);
6853 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6854 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6855 int byte_shift = exact_log2 (scalar_size);
6857 gcc_assert (byte_shift >= 0);
6859 /* If we are given a memory address, optimize to load just the element. We
6860 don't have to adjust the vector element number on little endian
6861 systems. */
6862 if (MEM_P (src))
6864 int num_elements = GET_MODE_NUNITS (mode);
6865 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6867 emit_insn (gen_anddi3 (element, element, num_ele_m1));
6868 gcc_assert (REG_P (tmp_gpr));
6869 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
6870 tmp_gpr, scalar_mode));
6871 return;
6874 else if (REG_P (src) || SUBREG_P (src))
6876 int num_elements = GET_MODE_NUNITS (mode);
6877 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6878 int bit_shift = 7 - exact_log2 (num_elements);
6879 rtx element2;
6880 unsigned int dest_regno = reg_or_subregno (dest);
6881 unsigned int src_regno = reg_or_subregno (src);
6882 unsigned int element_regno = reg_or_subregno (element);
6884 gcc_assert (REG_P (tmp_gpr));
6886 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6887 a general purpose register. */
6888 if (TARGET_P9_VECTOR
6889 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6890 && INT_REGNO_P (dest_regno)
6891 && ALTIVEC_REGNO_P (src_regno)
6892 && INT_REGNO_P (element_regno))
6894 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6895 rtx element_si = gen_rtx_REG (SImode, element_regno);
6897 if (mode == V16QImode)
6898 emit_insn (BYTES_BIG_ENDIAN
6899 ? gen_vextublx (dest_si, element_si, src)
6900 : gen_vextubrx (dest_si, element_si, src));
6902 else if (mode == V8HImode)
6904 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6905 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6906 emit_insn (BYTES_BIG_ENDIAN
6907 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6908 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6912 else
6914 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6915 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
6916 emit_insn (BYTES_BIG_ENDIAN
6917 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
6918 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
6921 return;
6925 gcc_assert (REG_P (tmp_altivec));
6927 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6928 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6929 will shift the element into the upper position (adding 3 to convert a
6930 byte shift into a bit shift). */
6931 if (scalar_size == 8)
6933 if (!BYTES_BIG_ENDIAN)
6935 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
6936 element2 = tmp_gpr;
6938 else
6939 element2 = element;
6941 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
6942 bit. */
6943 emit_insn (gen_rtx_SET (tmp_gpr,
6944 gen_rtx_AND (DImode,
6945 gen_rtx_ASHIFT (DImode,
6946 element2,
6947 GEN_INT (6)),
6948 GEN_INT (64))));
6950 else
6952 if (!BYTES_BIG_ENDIAN)
6954 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6956 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
6957 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
6958 element2 = tmp_gpr;
6960 else
6961 element2 = element;
6963 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
6966 /* Get the value into the lower byte of the Altivec register where VSLO
6967 expects it. */
6968 if (TARGET_P9_VECTOR)
6969 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
6970 else if (can_create_pseudo_p ())
6971 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
6972 else
6974 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
6975 emit_move_insn (tmp_di, tmp_gpr);
6976 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
6979 /* Do the VSLO to get the value into the final location. */
6980 switch (mode)
6982 case E_V2DFmode:
6983 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
6984 return;
6986 case E_V2DImode:
6987 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
6988 return;
6990 case E_V4SFmode:
6992 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
6993 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
6994 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
6995 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
6996 tmp_altivec));
6998 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
6999 return;
7002 case E_V4SImode:
7003 case E_V8HImode:
7004 case E_V16QImode:
7006 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7007 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7008 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7009 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7010 tmp_altivec));
7011 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7012 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7013 GEN_INT (64 - bits_in_element)));
7014 return;
7017 default:
7018 gcc_unreachable ();
7021 return;
7023 else
7024 gcc_unreachable ();
7027 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7028 selects whether the alignment is abi mandated, optional, or
7029 both abi and optional alignment. */
7031 unsigned int
7032 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7034 if (how != align_opt)
7036 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7037 align = 128;
7040 if (how != align_abi)
7042 if (TREE_CODE (type) == ARRAY_TYPE
7043 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7045 if (align < BITS_PER_WORD)
7046 align = BITS_PER_WORD;
7050 return align;
7053 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7054 instructions simply ignore the low bits; VSX memory instructions
7055 are aligned to 4 or 8 bytes. */
7057 static bool
7058 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7060 return (STRICT_ALIGNMENT
7061 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7062 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7063 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7064 && (int) align < VECTOR_ALIGN (mode)))));
7067 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7069 bool
7070 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7072 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7074 if (computed != 128)
7076 static bool warned;
7077 if (!warned && warn_psabi)
7079 warned = true;
7080 inform (input_location,
7081 "the layout of aggregates containing vectors with"
7082 " %d-byte alignment has changed in GCC 5",
7083 computed / BITS_PER_UNIT);
7086 /* In current GCC there is no special case. */
7087 return false;
7090 return false;
7093 /* AIX increases natural record alignment to doubleword if the first
7094 field is an FP double while the FP fields remain word aligned. */
7096 unsigned int
7097 rs6000_special_round_type_align (tree type, unsigned int computed,
7098 unsigned int specified)
7100 unsigned int align = MAX (computed, specified);
7101 tree field = TYPE_FIELDS (type);
7103 /* Skip all non field decls */
7104 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7105 field = DECL_CHAIN (field);
7107 if (field != NULL && field != type)
7109 type = TREE_TYPE (field);
7110 while (TREE_CODE (type) == ARRAY_TYPE)
7111 type = TREE_TYPE (type);
7113 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7114 align = MAX (align, 64);
7117 return align;
7120 /* Darwin increases record alignment to the natural alignment of
7121 the first field. */
7123 unsigned int
7124 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7125 unsigned int specified)
7127 unsigned int align = MAX (computed, specified);
7129 if (TYPE_PACKED (type))
7130 return align;
7132 /* Find the first field, looking down into aggregates. */
7133 do {
7134 tree field = TYPE_FIELDS (type);
7135 /* Skip all non field decls */
7136 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7137 field = DECL_CHAIN (field);
7138 if (! field)
7139 break;
7140 /* A packed field does not contribute any extra alignment. */
7141 if (DECL_PACKED (field))
7142 return align;
7143 type = TREE_TYPE (field);
7144 while (TREE_CODE (type) == ARRAY_TYPE)
7145 type = TREE_TYPE (type);
7146 } while (AGGREGATE_TYPE_P (type));
7148 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7149 align = MAX (align, TYPE_ALIGN (type));
7151 return align;
7154 /* Return 1 for an operand in small memory on V.4/eabi. */
7157 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7158 machine_mode mode ATTRIBUTE_UNUSED)
7160 #if TARGET_ELF
7161 rtx sym_ref;
7163 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7164 return 0;
7166 if (DEFAULT_ABI != ABI_V4)
7167 return 0;
7169 if (SYMBOL_REF_P (op))
7170 sym_ref = op;
7172 else if (GET_CODE (op) != CONST
7173 || GET_CODE (XEXP (op, 0)) != PLUS
7174 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7175 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7176 return 0;
7178 else
7180 rtx sum = XEXP (op, 0);
7181 HOST_WIDE_INT summand;
7183 /* We have to be careful here, because it is the referenced address
7184 that must be 32k from _SDA_BASE_, not just the symbol. */
7185 summand = INTVAL (XEXP (sum, 1));
7186 if (summand < 0 || summand > g_switch_value)
7187 return 0;
7189 sym_ref = XEXP (sum, 0);
7192 return SYMBOL_REF_SMALL_P (sym_ref);
7193 #else
7194 return 0;
7195 #endif
7198 /* Return true if either operand is a general purpose register. */
7200 bool
7201 gpr_or_gpr_p (rtx op0, rtx op1)
7203 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7204 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7207 /* Return true if this is a move direct operation between GPR registers and
7208 floating point/VSX registers. */
7210 bool
7211 direct_move_p (rtx op0, rtx op1)
7213 if (!REG_P (op0) || !REG_P (op1))
7214 return false;
7216 if (!TARGET_DIRECT_MOVE)
7217 return false;
7219 int regno0 = REGNO (op0);
7220 int regno1 = REGNO (op1);
7221 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7222 return false;
7224 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7225 return true;
7227 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7228 return true;
7230 return false;
7233 /* Return true if the ADDR is an acceptable address for a quad memory
7234 operation of mode MODE (either LQ/STQ for general purpose registers, or
7235 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7236 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7237 3.0 LXV/STXV instruction. */
7239 bool
7240 quad_address_p (rtx addr, machine_mode mode, bool strict)
7242 rtx op0, op1;
7244 if (GET_MODE_SIZE (mode) != 16)
7245 return false;
7247 if (legitimate_indirect_address_p (addr, strict))
7248 return true;
7250 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7251 return false;
7253 if (GET_CODE (addr) != PLUS)
7254 return false;
7256 op0 = XEXP (addr, 0);
7257 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7258 return false;
7260 op1 = XEXP (addr, 1);
7261 if (!CONST_INT_P (op1))
7262 return false;
7264 return quad_address_offset_p (INTVAL (op1));
7267 /* Return true if this is a load or store quad operation. This function does
7268 not handle the atomic quad memory instructions. */
7270 bool
7271 quad_load_store_p (rtx op0, rtx op1)
7273 bool ret;
7275 if (!TARGET_QUAD_MEMORY)
7276 ret = false;
7278 else if (REG_P (op0) && MEM_P (op1))
7279 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7280 && quad_memory_operand (op1, GET_MODE (op1))
7281 && !reg_overlap_mentioned_p (op0, op1));
7283 else if (MEM_P (op0) && REG_P (op1))
7284 ret = (quad_memory_operand (op0, GET_MODE (op0))
7285 && quad_int_reg_operand (op1, GET_MODE (op1)));
7287 else
7288 ret = false;
7290 if (TARGET_DEBUG_ADDR)
7292 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7293 ret ? "true" : "false");
7294 debug_rtx (gen_rtx_SET (op0, op1));
7297 return ret;
7300 /* Given an address, return a constant offset term if one exists. */
7302 static rtx
7303 address_offset (rtx op)
7305 if (GET_CODE (op) == PRE_INC
7306 || GET_CODE (op) == PRE_DEC)
7307 op = XEXP (op, 0);
7308 else if (GET_CODE (op) == PRE_MODIFY
7309 || GET_CODE (op) == LO_SUM)
7310 op = XEXP (op, 1);
7312 if (GET_CODE (op) == CONST)
7313 op = XEXP (op, 0);
7315 if (GET_CODE (op) == PLUS)
7316 op = XEXP (op, 1);
7318 if (CONST_INT_P (op))
7319 return op;
7321 return NULL_RTX;
7324 /* Return true if the MEM operand is a memory operand suitable for use
7325 with a (full width, possibly multiple) gpr load/store. On
7326 powerpc64 this means the offset must be divisible by 4.
7327 Implements 'Y' constraint.
7329 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7330 a constraint function we know the operand has satisfied a suitable
7331 memory predicate.
7333 Offsetting a lo_sum should not be allowed, except where we know by
7334 alignment that a 32k boundary is not crossed. Note that by
7335 "offsetting" here we mean a further offset to access parts of the
7336 MEM. It's fine to have a lo_sum where the inner address is offset
7337 from a sym, since the same sym+offset will appear in the high part
7338 of the address calculation. */
7340 bool
7341 mem_operand_gpr (rtx op, machine_mode mode)
7343 unsigned HOST_WIDE_INT offset;
7344 int extra;
7345 rtx addr = XEXP (op, 0);
7347 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7348 if (TARGET_UPDATE
7349 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7350 && mode_supports_pre_incdec_p (mode)
7351 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7352 return true;
7354 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
7355 if (!rs6000_offsettable_memref_p (op, mode, false))
7356 return false;
7358 op = address_offset (addr);
7359 if (op == NULL_RTX)
7360 return true;
7362 offset = INTVAL (op);
7363 if (TARGET_POWERPC64 && (offset & 3) != 0)
7364 return false;
7366 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7367 if (extra < 0)
7368 extra = 0;
7370 if (GET_CODE (addr) == LO_SUM)
7371 /* For lo_sum addresses, we must allow any offset except one that
7372 causes a wrap, so test only the low 16 bits. */
7373 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7375 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7378 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7379 enforce an offset divisible by 4 even for 32-bit. */
7381 bool
7382 mem_operand_ds_form (rtx op, machine_mode mode)
7384 unsigned HOST_WIDE_INT offset;
7385 int extra;
7386 rtx addr = XEXP (op, 0);
7388 if (!offsettable_address_p (false, mode, addr))
7389 return false;
7391 op = address_offset (addr);
7392 if (op == NULL_RTX)
7393 return true;
7395 offset = INTVAL (op);
7396 if ((offset & 3) != 0)
7397 return false;
7399 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7400 if (extra < 0)
7401 extra = 0;
7403 if (GET_CODE (addr) == LO_SUM)
7404 /* For lo_sum addresses, we must allow any offset except one that
7405 causes a wrap, so test only the low 16 bits. */
7406 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7408 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7411 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7413 static bool
7414 reg_offset_addressing_ok_p (machine_mode mode)
7416 switch (mode)
7418 case E_V16QImode:
7419 case E_V8HImode:
7420 case E_V4SFmode:
7421 case E_V4SImode:
7422 case E_V2DFmode:
7423 case E_V2DImode:
7424 case E_V1TImode:
7425 case E_TImode:
7426 case E_TFmode:
7427 case E_KFmode:
7428 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7429 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7430 a vector mode, if we want to use the VSX registers to move it around,
7431 we need to restrict ourselves to reg+reg addressing. Similarly for
7432 IEEE 128-bit floating point that is passed in a single vector
7433 register. */
7434 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7435 return mode_supports_dq_form (mode);
7436 break;
7438 case E_SDmode:
7439 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7440 addressing for the LFIWZX and STFIWX instructions. */
7441 if (TARGET_NO_SDMODE_STACK)
7442 return false;
7443 break;
7445 default:
7446 break;
7449 return true;
7452 static bool
7453 virtual_stack_registers_memory_p (rtx op)
7455 int regnum;
7457 if (REG_P (op))
7458 regnum = REGNO (op);
7460 else if (GET_CODE (op) == PLUS
7461 && REG_P (XEXP (op, 0))
7462 && CONST_INT_P (XEXP (op, 1)))
7463 regnum = REGNO (XEXP (op, 0));
7465 else
7466 return false;
7468 return (regnum >= FIRST_VIRTUAL_REGISTER
7469 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7472 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7473 is known to not straddle a 32k boundary. This function is used
7474 to determine whether -mcmodel=medium code can use TOC pointer
7475 relative addressing for OP. This means the alignment of the TOC
7476 pointer must also be taken into account, and unfortunately that is
7477 only 8 bytes. */
7479 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7480 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7481 #endif
7483 static bool
7484 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7485 machine_mode mode)
7487 tree decl;
7488 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7490 if (!SYMBOL_REF_P (op))
7491 return false;
7493 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7494 SYMBOL_REF. */
7495 if (mode_supports_dq_form (mode))
7496 return false;
7498 dsize = GET_MODE_SIZE (mode);
7499 decl = SYMBOL_REF_DECL (op);
7500 if (!decl)
7502 if (dsize == 0)
7503 return false;
7505 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7506 replacing memory addresses with an anchor plus offset. We
7507 could find the decl by rummaging around in the block->objects
7508 VEC for the given offset but that seems like too much work. */
7509 dalign = BITS_PER_UNIT;
7510 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7511 && SYMBOL_REF_ANCHOR_P (op)
7512 && SYMBOL_REF_BLOCK (op) != NULL)
7514 struct object_block *block = SYMBOL_REF_BLOCK (op);
7516 dalign = block->alignment;
7517 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7519 else if (CONSTANT_POOL_ADDRESS_P (op))
7521 /* It would be nice to have get_pool_align().. */
7522 machine_mode cmode = get_pool_mode (op);
7524 dalign = GET_MODE_ALIGNMENT (cmode);
7527 else if (DECL_P (decl))
7529 dalign = DECL_ALIGN (decl);
7531 if (dsize == 0)
7533 /* Allow BLKmode when the entire object is known to not
7534 cross a 32k boundary. */
7535 if (!DECL_SIZE_UNIT (decl))
7536 return false;
7538 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7539 return false;
7541 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7542 if (dsize > 32768)
7543 return false;
7545 dalign /= BITS_PER_UNIT;
7546 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7547 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7548 return dalign >= dsize;
7551 else
7552 gcc_unreachable ();
7554 /* Find how many bits of the alignment we know for this access. */
7555 dalign /= BITS_PER_UNIT;
7556 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7557 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7558 mask = dalign - 1;
7559 lsb = offset & -offset;
7560 mask &= lsb - 1;
7561 dalign = mask + 1;
7563 return dalign >= dsize;
7566 static bool
7567 constant_pool_expr_p (rtx op)
7569 rtx base, offset;
7571 split_const (op, &base, &offset);
7572 return (SYMBOL_REF_P (base)
7573 && CONSTANT_POOL_ADDRESS_P (base)
7574 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7577 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7578 use that as the register to put the HIGH value into if register allocation
7579 is already done. */
7582 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7584 rtx tocrel, tocreg, hi;
7586 gcc_assert (TARGET_TOC);
7588 if (TARGET_DEBUG_ADDR)
7590 if (SYMBOL_REF_P (symbol))
7591 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7592 XSTR (symbol, 0));
7593 else
7595 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7596 GET_RTX_NAME (GET_CODE (symbol)));
7597 debug_rtx (symbol);
7601 if (!can_create_pseudo_p ())
7602 df_set_regs_ever_live (TOC_REGISTER, true);
7604 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7605 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7606 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7607 return tocrel;
7609 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7610 if (largetoc_reg != NULL)
7612 emit_move_insn (largetoc_reg, hi);
7613 hi = largetoc_reg;
7615 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7618 /* These are only used to pass through from print_operand/print_operand_address
7619 to rs6000_output_addr_const_extra over the intervening function
7620 output_addr_const which is not target code. */
7621 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7623 /* Return true if OP is a toc pointer relative address (the output
7624 of create_TOC_reference). If STRICT, do not match non-split
7625 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7626 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7627 TOCREL_OFFSET_RET respectively. */
7629 bool
7630 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7631 const_rtx *tocrel_offset_ret)
7633 if (!TARGET_TOC)
7634 return false;
7636 if (TARGET_CMODEL != CMODEL_SMALL)
7638 /* When strict ensure we have everything tidy. */
7639 if (strict
7640 && !(GET_CODE (op) == LO_SUM
7641 && REG_P (XEXP (op, 0))
7642 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7643 return false;
7645 /* When not strict, allow non-split TOC addresses and also allow
7646 (lo_sum (high ..)) TOC addresses created during reload. */
7647 if (GET_CODE (op) == LO_SUM)
7648 op = XEXP (op, 1);
7651 const_rtx tocrel_base = op;
7652 const_rtx tocrel_offset = const0_rtx;
7654 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7656 tocrel_base = XEXP (op, 0);
7657 tocrel_offset = XEXP (op, 1);
7660 if (tocrel_base_ret)
7661 *tocrel_base_ret = tocrel_base;
7662 if (tocrel_offset_ret)
7663 *tocrel_offset_ret = tocrel_offset;
7665 return (GET_CODE (tocrel_base) == UNSPEC
7666 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7667 && REG_P (XVECEXP (tocrel_base, 0, 1))
7668 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7671 /* Return true if X is a constant pool address, and also for cmodel=medium
7672 if X is a toc-relative address known to be offsettable within MODE. */
7674 bool
7675 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7676 bool strict)
7678 const_rtx tocrel_base, tocrel_offset;
7679 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7680 && (TARGET_CMODEL != CMODEL_MEDIUM
7681 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7682 || mode == QImode
7683 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7684 INTVAL (tocrel_offset), mode)));
7687 static bool
7688 legitimate_small_data_p (machine_mode mode, rtx x)
7690 return (DEFAULT_ABI == ABI_V4
7691 && !flag_pic && !TARGET_TOC
7692 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7693 && small_data_operand (x, mode));
7696 bool
7697 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7698 bool strict, bool worst_case)
7700 unsigned HOST_WIDE_INT offset;
7701 unsigned int extra;
7703 if (GET_CODE (x) != PLUS)
7704 return false;
7705 if (!REG_P (XEXP (x, 0)))
7706 return false;
7707 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7708 return false;
7709 if (mode_supports_dq_form (mode))
7710 return quad_address_p (x, mode, strict);
7711 if (!reg_offset_addressing_ok_p (mode))
7712 return virtual_stack_registers_memory_p (x);
7713 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7714 return true;
7715 if (!CONST_INT_P (XEXP (x, 1)))
7716 return false;
7718 offset = INTVAL (XEXP (x, 1));
7719 extra = 0;
7720 switch (mode)
7722 case E_DFmode:
7723 case E_DDmode:
7724 case E_DImode:
7725 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7726 addressing. */
7727 if (VECTOR_MEM_VSX_P (mode))
7728 return false;
7730 if (!worst_case)
7731 break;
7732 if (!TARGET_POWERPC64)
7733 extra = 4;
7734 else if (offset & 3)
7735 return false;
7736 break;
7738 case E_TFmode:
7739 case E_IFmode:
7740 case E_KFmode:
7741 case E_TDmode:
7742 case E_TImode:
7743 case E_PTImode:
7744 extra = 8;
7745 if (!worst_case)
7746 break;
7747 if (!TARGET_POWERPC64)
7748 extra = 12;
7749 else if (offset & 3)
7750 return false;
7751 break;
7753 default:
7754 break;
7757 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7760 bool
7761 legitimate_indexed_address_p (rtx x, int strict)
7763 rtx op0, op1;
7765 if (GET_CODE (x) != PLUS)
7766 return false;
7768 op0 = XEXP (x, 0);
7769 op1 = XEXP (x, 1);
7771 return (REG_P (op0) && REG_P (op1)
7772 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7773 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7774 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7775 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7778 bool
7779 avoiding_indexed_address_p (machine_mode mode)
7781 /* Avoid indexed addressing for modes that have non-indexed
7782 load/store instruction forms. */
7783 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7786 bool
7787 legitimate_indirect_address_p (rtx x, int strict)
7789 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7792 bool
7793 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7795 if (!TARGET_MACHO || !flag_pic
7796 || mode != SImode || !MEM_P (x))
7797 return false;
7798 x = XEXP (x, 0);
7800 if (GET_CODE (x) != LO_SUM)
7801 return false;
7802 if (!REG_P (XEXP (x, 0)))
7803 return false;
7804 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7805 return false;
7806 x = XEXP (x, 1);
7808 return CONSTANT_P (x);
7811 static bool
7812 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7814 if (GET_CODE (x) != LO_SUM)
7815 return false;
7816 if (!REG_P (XEXP (x, 0)))
7817 return false;
7818 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7819 return false;
7820 /* quad word addresses are restricted, and we can't use LO_SUM. */
7821 if (mode_supports_dq_form (mode))
7822 return false;
7823 x = XEXP (x, 1);
7825 if (TARGET_ELF || TARGET_MACHO)
7827 bool large_toc_ok;
7829 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7830 return false;
7831 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7832 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7833 recognizes some LO_SUM addresses as valid although this
7834 function says opposite. In most cases, LRA through different
7835 transformations can generate correct code for address reloads.
7836 It cannot manage only some LO_SUM cases. So we need to add
7837 code here saying that some addresses are still valid. */
7838 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7839 && small_toc_ref (x, VOIDmode));
7840 if (TARGET_TOC && ! large_toc_ok)
7841 return false;
7842 if (GET_MODE_NUNITS (mode) != 1)
7843 return false;
7844 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7845 && !(/* ??? Assume floating point reg based on mode? */
7846 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
7847 return false;
7849 return CONSTANT_P (x) || large_toc_ok;
7852 return false;
7856 /* Try machine-dependent ways of modifying an illegitimate address
7857 to be legitimate. If we find one, return the new, valid address.
7858 This is used from only one place: `memory_address' in explow.c.
7860 OLDX is the address as it was before break_out_memory_refs was
7861 called. In some cases it is useful to look at this to decide what
7862 needs to be done.
7864 It is always safe for this function to do nothing. It exists to
7865 recognize opportunities to optimize the output.
7867 On RS/6000, first check for the sum of a register with a constant
7868 integer that is out of range. If so, generate code to add the
7869 constant with the low-order 16 bits masked to the register and force
7870 this result into another register (this can be done with `cau').
7871 Then generate an address of REG+(CONST&0xffff), allowing for the
7872 possibility of bit 16 being a one.
7874 Then check for the sum of a register and something not constant, try to
7875 load the other things into a register and return the sum. */
7877 static rtx
7878 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7879 machine_mode mode)
7881 unsigned int extra;
7883 if (!reg_offset_addressing_ok_p (mode)
7884 || mode_supports_dq_form (mode))
7886 if (virtual_stack_registers_memory_p (x))
7887 return x;
7889 /* In theory we should not be seeing addresses of the form reg+0,
7890 but just in case it is generated, optimize it away. */
7891 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7892 return force_reg (Pmode, XEXP (x, 0));
7894 /* For TImode with load/store quad, restrict addresses to just a single
7895 pointer, so it works with both GPRs and VSX registers. */
7896 /* Make sure both operands are registers. */
7897 else if (GET_CODE (x) == PLUS
7898 && (mode != TImode || !TARGET_VSX))
7899 return gen_rtx_PLUS (Pmode,
7900 force_reg (Pmode, XEXP (x, 0)),
7901 force_reg (Pmode, XEXP (x, 1)));
7902 else
7903 return force_reg (Pmode, x);
7905 if (SYMBOL_REF_P (x))
7907 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7908 if (model != 0)
7909 return rs6000_legitimize_tls_address (x, model);
7912 extra = 0;
7913 switch (mode)
7915 case E_TFmode:
7916 case E_TDmode:
7917 case E_TImode:
7918 case E_PTImode:
7919 case E_IFmode:
7920 case E_KFmode:
7921 /* As in legitimate_offset_address_p we do not assume
7922 worst-case. The mode here is just a hint as to the registers
7923 used. A TImode is usually in gprs, but may actually be in
7924 fprs. Leave worst-case scenario for reload to handle via
7925 insn constraints. PTImode is only GPRs. */
7926 extra = 8;
7927 break;
7928 default:
7929 break;
7932 if (GET_CODE (x) == PLUS
7933 && REG_P (XEXP (x, 0))
7934 && CONST_INT_P (XEXP (x, 1))
7935 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7936 >= 0x10000 - extra))
7938 HOST_WIDE_INT high_int, low_int;
7939 rtx sum;
7940 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7941 if (low_int >= 0x8000 - extra)
7942 low_int = 0;
7943 high_int = INTVAL (XEXP (x, 1)) - low_int;
7944 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7945 GEN_INT (high_int)), 0);
7946 return plus_constant (Pmode, sum, low_int);
7948 else if (GET_CODE (x) == PLUS
7949 && REG_P (XEXP (x, 0))
7950 && !CONST_INT_P (XEXP (x, 1))
7951 && GET_MODE_NUNITS (mode) == 1
7952 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7953 || (/* ??? Assume floating point reg based on mode? */
7954 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
7955 && !avoiding_indexed_address_p (mode))
7957 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7958 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7960 else if ((TARGET_ELF
7961 #if TARGET_MACHO
7962 || !MACHO_DYNAMIC_NO_PIC_P
7963 #endif
7965 && TARGET_32BIT
7966 && TARGET_NO_TOC_OR_PCREL
7967 && !flag_pic
7968 && !CONST_INT_P (x)
7969 && !CONST_WIDE_INT_P (x)
7970 && !CONST_DOUBLE_P (x)
7971 && CONSTANT_P (x)
7972 && GET_MODE_NUNITS (mode) == 1
7973 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7974 || (/* ??? Assume floating point reg based on mode? */
7975 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
7977 rtx reg = gen_reg_rtx (Pmode);
7978 if (TARGET_ELF)
7979 emit_insn (gen_elf_high (reg, x));
7980 else
7981 emit_insn (gen_macho_high (Pmode, reg, x));
7982 return gen_rtx_LO_SUM (Pmode, reg, x);
7984 else if (TARGET_TOC
7985 && SYMBOL_REF_P (x)
7986 && constant_pool_expr_p (x)
7987 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7988 return create_TOC_reference (x, NULL_RTX);
7989 else
7990 return x;
7993 /* Debug version of rs6000_legitimize_address. */
7994 static rtx
7995 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7997 rtx ret;
7998 rtx_insn *insns;
8000 start_sequence ();
8001 ret = rs6000_legitimize_address (x, oldx, mode);
8002 insns = get_insns ();
8003 end_sequence ();
8005 if (ret != x)
8007 fprintf (stderr,
8008 "\nrs6000_legitimize_address: mode %s, old code %s, "
8009 "new code %s, modified\n",
8010 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8011 GET_RTX_NAME (GET_CODE (ret)));
8013 fprintf (stderr, "Original address:\n");
8014 debug_rtx (x);
8016 fprintf (stderr, "oldx:\n");
8017 debug_rtx (oldx);
8019 fprintf (stderr, "New address:\n");
8020 debug_rtx (ret);
8022 if (insns)
8024 fprintf (stderr, "Insns added:\n");
8025 debug_rtx_list (insns, 20);
8028 else
8030 fprintf (stderr,
8031 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8032 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8034 debug_rtx (x);
8037 if (insns)
8038 emit_insn (insns);
8040 return ret;
8043 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8044 We need to emit DTP-relative relocations. */
8046 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8047 static void
8048 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8050 switch (size)
8052 case 4:
8053 fputs ("\t.long\t", file);
8054 break;
8055 case 8:
8056 fputs (DOUBLE_INT_ASM_OP, file);
8057 break;
8058 default:
8059 gcc_unreachable ();
8061 output_addr_const (file, x);
8062 if (TARGET_ELF)
8063 fputs ("@dtprel+0x8000", file);
8064 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8066 switch (SYMBOL_REF_TLS_MODEL (x))
8068 case 0:
8069 break;
8070 case TLS_MODEL_LOCAL_EXEC:
8071 fputs ("@le", file);
8072 break;
8073 case TLS_MODEL_INITIAL_EXEC:
8074 fputs ("@ie", file);
8075 break;
8076 case TLS_MODEL_GLOBAL_DYNAMIC:
8077 case TLS_MODEL_LOCAL_DYNAMIC:
8078 fputs ("@m", file);
8079 break;
8080 default:
8081 gcc_unreachable ();
8086 /* Return true if X is a symbol that refers to real (rather than emulated)
8087 TLS. */
8089 static bool
8090 rs6000_real_tls_symbol_ref_p (rtx x)
8092 return (SYMBOL_REF_P (x)
8093 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8096 /* In the name of slightly smaller debug output, and to cater to
8097 general assembler lossage, recognize various UNSPEC sequences
8098 and turn them back into a direct symbol reference. */
8100 static rtx
8101 rs6000_delegitimize_address (rtx orig_x)
8103 rtx x, y, offset;
8105 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8106 orig_x = XVECEXP (orig_x, 0, 0);
8108 orig_x = delegitimize_mem_from_attrs (orig_x);
8110 x = orig_x;
8111 if (MEM_P (x))
8112 x = XEXP (x, 0);
8114 y = x;
8115 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8116 y = XEXP (y, 1);
8118 offset = NULL_RTX;
8119 if (GET_CODE (y) == PLUS
8120 && GET_MODE (y) == Pmode
8121 && CONST_INT_P (XEXP (y, 1)))
8123 offset = XEXP (y, 1);
8124 y = XEXP (y, 0);
8127 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8129 y = XVECEXP (y, 0, 0);
8131 #ifdef HAVE_AS_TLS
8132 /* Do not associate thread-local symbols with the original
8133 constant pool symbol. */
8134 if (TARGET_XCOFF
8135 && SYMBOL_REF_P (y)
8136 && CONSTANT_POOL_ADDRESS_P (y)
8137 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8138 return orig_x;
8139 #endif
8141 if (offset != NULL_RTX)
8142 y = gen_rtx_PLUS (Pmode, y, offset);
8143 if (!MEM_P (orig_x))
8144 return y;
8145 else
8146 return replace_equiv_address_nv (orig_x, y);
8149 if (TARGET_MACHO
8150 && GET_CODE (orig_x) == LO_SUM
8151 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8153 y = XEXP (XEXP (orig_x, 1), 0);
8154 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8155 return XVECEXP (y, 0, 0);
8158 return orig_x;
8161 /* Return true if X shouldn't be emitted into the debug info.
8162 The linker doesn't like .toc section references from
8163 .debug_* sections, so reject .toc section symbols. */
8165 static bool
8166 rs6000_const_not_ok_for_debug_p (rtx x)
8168 if (GET_CODE (x) == UNSPEC)
8169 return true;
8170 if (SYMBOL_REF_P (x)
8171 && CONSTANT_POOL_ADDRESS_P (x))
8173 rtx c = get_pool_constant (x);
8174 machine_mode cmode = get_pool_mode (x);
8175 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8176 return true;
8179 return false;
8182 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8184 static bool
8185 rs6000_legitimate_combined_insn (rtx_insn *insn)
8187 int icode = INSN_CODE (insn);
8189 /* Reject creating doloop insns. Combine should not be allowed
8190 to create these for a number of reasons:
8191 1) In a nested loop, if combine creates one of these in an
8192 outer loop and the register allocator happens to allocate ctr
8193 to the outer loop insn, then the inner loop can't use ctr.
8194 Inner loops ought to be more highly optimized.
8195 2) Combine often wants to create one of these from what was
8196 originally a three insn sequence, first combining the three
8197 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8198 allocated ctr, the splitter takes use back to the three insn
8199 sequence. It's better to stop combine at the two insn
8200 sequence.
8201 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8202 insns, the register allocator sometimes uses floating point
8203 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8204 jump insn and output reloads are not implemented for jumps,
8205 the ctrsi/ctrdi splitters need to handle all possible cases.
8206 That's a pain, and it gets to be seriously difficult when a
8207 splitter that runs after reload needs memory to transfer from
8208 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8209 for the difficult case. It's better to not create problems
8210 in the first place. */
8211 if (icode != CODE_FOR_nothing
8212 && (icode == CODE_FOR_bdz_si
8213 || icode == CODE_FOR_bdz_di
8214 || icode == CODE_FOR_bdnz_si
8215 || icode == CODE_FOR_bdnz_di
8216 || icode == CODE_FOR_bdztf_si
8217 || icode == CODE_FOR_bdztf_di
8218 || icode == CODE_FOR_bdnztf_si
8219 || icode == CODE_FOR_bdnztf_di))
8220 return false;
8222 return true;
8225 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8227 static GTY(()) rtx rs6000_tls_symbol;
8228 static rtx
8229 rs6000_tls_get_addr (void)
8231 if (!rs6000_tls_symbol)
8232 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8234 return rs6000_tls_symbol;
8237 /* Construct the SYMBOL_REF for TLS GOT references. */
8239 static GTY(()) rtx rs6000_got_symbol;
8241 rs6000_got_sym (void)
8243 if (!rs6000_got_symbol)
8245 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8246 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8247 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8250 return rs6000_got_symbol;
8253 /* AIX Thread-Local Address support. */
8255 static rtx
8256 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8258 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8259 const char *name;
8260 char *tlsname;
8262 name = XSTR (addr, 0);
8263 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8264 or the symbol will be in TLS private data section. */
8265 if (name[strlen (name) - 1] != ']'
8266 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8267 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8269 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8270 strcpy (tlsname, name);
8271 strcat (tlsname,
8272 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8273 tlsaddr = copy_rtx (addr);
8274 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8276 else
8277 tlsaddr = addr;
8279 /* Place addr into TOC constant pool. */
8280 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8282 /* Output the TOC entry and create the MEM referencing the value. */
8283 if (constant_pool_expr_p (XEXP (sym, 0))
8284 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8286 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8287 mem = gen_const_mem (Pmode, tocref);
8288 set_mem_alias_set (mem, get_TOC_alias_set ());
8290 else
8291 return sym;
8293 /* Use global-dynamic for local-dynamic. */
8294 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8295 || model == TLS_MODEL_LOCAL_DYNAMIC)
8297 /* Create new TOC reference for @m symbol. */
8298 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8299 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8300 strcpy (tlsname, "*LCM");
8301 strcat (tlsname, name + 3);
8302 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8303 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8304 tocref = create_TOC_reference (modaddr, NULL_RTX);
8305 rtx modmem = gen_const_mem (Pmode, tocref);
8306 set_mem_alias_set (modmem, get_TOC_alias_set ());
8308 rtx modreg = gen_reg_rtx (Pmode);
8309 emit_insn (gen_rtx_SET (modreg, modmem));
8311 tmpreg = gen_reg_rtx (Pmode);
8312 emit_insn (gen_rtx_SET (tmpreg, mem));
8314 dest = gen_reg_rtx (Pmode);
8315 if (TARGET_32BIT)
8316 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8317 else
8318 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8319 return dest;
8321 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8322 else if (TARGET_32BIT)
8324 tlsreg = gen_reg_rtx (SImode);
8325 emit_insn (gen_tls_get_tpointer (tlsreg));
8327 else
8328 tlsreg = gen_rtx_REG (DImode, 13);
8330 /* Load the TOC value into temporary register. */
8331 tmpreg = gen_reg_rtx (Pmode);
8332 emit_insn (gen_rtx_SET (tmpreg, mem));
8333 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8334 gen_rtx_MINUS (Pmode, addr, tlsreg));
8336 /* Add TOC symbol value to TLS pointer. */
8337 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8339 return dest;
8342 /* Output arg setup instructions for a !TARGET_TLS_MARKERS
8343 __tls_get_addr call. */
8345 void
8346 rs6000_output_tlsargs (rtx *operands)
8348 /* Set up operands for output_asm_insn, without modifying OPERANDS. */
8349 rtx op[3];
8351 /* The set dest of the call, ie. r3, which is also the first arg reg. */
8352 op[0] = operands[0];
8353 /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
8354 op[1] = XVECEXP (operands[2], 0, 0);
8355 if (XINT (operands[2], 1) == UNSPEC_TLSGD)
8357 /* The GOT register. */
8358 op[2] = XVECEXP (operands[2], 0, 1);
8359 if (TARGET_CMODEL != CMODEL_SMALL)
8360 output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
8361 "addi %0,%0,%1@got@tlsgd@l", op);
8362 else
8363 output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
8365 else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
8367 if (TARGET_CMODEL != CMODEL_SMALL)
8368 output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
8369 "addi %0,%0,%&@got@tlsld@l", op);
8370 else
8371 output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
8373 else
8374 gcc_unreachable ();
8377 /* Passes the tls arg value for global dynamic and local dynamic
8378 emit_library_call_value in rs6000_legitimize_tls_address to
8379 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8380 marker relocs put on __tls_get_addr calls. */
8381 static rtx global_tlsarg;
8383 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8384 this (thread-local) address. */
8386 static rtx
8387 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8389 rtx dest, insn;
8391 if (TARGET_XCOFF)
8392 return rs6000_legitimize_tls_address_aix (addr, model);
8394 dest = gen_reg_rtx (Pmode);
8395 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8397 rtx tlsreg;
8399 if (TARGET_64BIT)
8401 tlsreg = gen_rtx_REG (Pmode, 13);
8402 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8404 else
8406 tlsreg = gen_rtx_REG (Pmode, 2);
8407 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8409 emit_insn (insn);
8411 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8413 rtx tlsreg, tmp;
8415 tmp = gen_reg_rtx (Pmode);
8416 if (TARGET_64BIT)
8418 tlsreg = gen_rtx_REG (Pmode, 13);
8419 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8421 else
8423 tlsreg = gen_rtx_REG (Pmode, 2);
8424 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8426 emit_insn (insn);
8427 if (TARGET_64BIT)
8428 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8429 else
8430 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8431 emit_insn (insn);
8433 else
8435 rtx got, tga, tmp1, tmp2;
8437 /* We currently use relocations like @got@tlsgd for tls, which
8438 means the linker will handle allocation of tls entries, placing
8439 them in the .got section. So use a pointer to the .got section,
8440 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8441 or to secondary GOT sections used by 32-bit -fPIC. */
8442 if (TARGET_64BIT)
8443 got = gen_rtx_REG (Pmode, 2);
8444 else
8446 if (flag_pic == 1)
8447 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8448 else
8450 rtx gsym = rs6000_got_sym ();
8451 got = gen_reg_rtx (Pmode);
8452 if (flag_pic == 0)
8453 rs6000_emit_move (got, gsym, Pmode);
8454 else
8456 rtx mem, lab;
8458 tmp1 = gen_reg_rtx (Pmode);
8459 tmp2 = gen_reg_rtx (Pmode);
8460 mem = gen_const_mem (Pmode, tmp1);
8461 lab = gen_label_rtx ();
8462 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8463 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8464 if (TARGET_LINK_STACK)
8465 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8466 emit_move_insn (tmp2, mem);
8467 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8468 set_unique_reg_note (last, REG_EQUAL, gsym);
8473 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8475 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8476 UNSPEC_TLSGD);
8477 tga = rs6000_tls_get_addr ();
8478 global_tlsarg = arg;
8479 if (TARGET_TLS_MARKERS)
8481 rtx argreg = gen_rtx_REG (Pmode, 3);
8482 emit_insn (gen_rtx_SET (argreg, arg));
8483 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
8484 argreg, Pmode);
8486 else
8487 emit_library_call_value (tga, dest, LCT_CONST, Pmode);
8488 global_tlsarg = NULL_RTX;
8490 /* Make a note so that the result of this call can be CSEd. */
8491 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8492 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8493 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8495 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8497 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8498 tga = rs6000_tls_get_addr ();
8499 tmp1 = gen_reg_rtx (Pmode);
8500 global_tlsarg = arg;
8501 if (TARGET_TLS_MARKERS)
8503 rtx argreg = gen_rtx_REG (Pmode, 3);
8504 emit_insn (gen_rtx_SET (argreg, arg));
8505 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
8506 argreg, Pmode);
8508 else
8509 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
8510 global_tlsarg = NULL_RTX;
8512 /* Make a note so that the result of this call can be CSEd. */
8513 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8514 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8515 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8517 if (rs6000_tls_size == 16)
8519 if (TARGET_64BIT)
8520 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8521 else
8522 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8524 else if (rs6000_tls_size == 32)
8526 tmp2 = gen_reg_rtx (Pmode);
8527 if (TARGET_64BIT)
8528 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8529 else
8530 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8531 emit_insn (insn);
8532 if (TARGET_64BIT)
8533 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8534 else
8535 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8537 else
8539 tmp2 = gen_reg_rtx (Pmode);
8540 if (TARGET_64BIT)
8541 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8542 else
8543 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8544 emit_insn (insn);
8545 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8547 emit_insn (insn);
8549 else
8551 /* IE, or 64-bit offset LE. */
8552 tmp2 = gen_reg_rtx (Pmode);
8553 if (TARGET_64BIT)
8554 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8555 else
8556 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8557 emit_insn (insn);
8558 if (TARGET_64BIT)
8559 insn = gen_tls_tls_64 (dest, tmp2, addr);
8560 else
8561 insn = gen_tls_tls_32 (dest, tmp2, addr);
8562 emit_insn (insn);
8566 return dest;
8569 /* Only create the global variable for the stack protect guard if we are using
8570 the global flavor of that guard. */
8571 static tree
8572 rs6000_init_stack_protect_guard (void)
8574 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8575 return default_stack_protect_guard ();
8577 return NULL_TREE;
8580 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8582 static bool
8583 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8585 if (GET_CODE (x) == HIGH
8586 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8587 return true;
8589 /* A TLS symbol in the TOC cannot contain a sum. */
8590 if (GET_CODE (x) == CONST
8591 && GET_CODE (XEXP (x, 0)) == PLUS
8592 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8593 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8594 return true;
8596 /* Do not place an ELF TLS symbol in the constant pool. */
8597 return TARGET_ELF && tls_referenced_p (x);
8600 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8601 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8602 can be addressed relative to the toc pointer. */
8604 static bool
8605 use_toc_relative_ref (rtx sym, machine_mode mode)
8607 return ((constant_pool_expr_p (sym)
8608 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8609 get_pool_mode (sym)))
8610 || (TARGET_CMODEL == CMODEL_MEDIUM
8611 && SYMBOL_REF_LOCAL_P (sym)
8612 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8615 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8616 that is a valid memory address for an instruction.
8617 The MODE argument is the machine mode for the MEM expression
8618 that wants to use this address.
8620 On the RS/6000, there are four valid address: a SYMBOL_REF that
8621 refers to a constant pool entry of an address (or the sum of it
8622 plus a constant), a short (16-bit signed) constant plus a register,
8623 the sum of two registers, or a register indirect, possibly with an
8624 auto-increment. For DFmode, DDmode and DImode with a constant plus
8625 register, we must ensure that both words are addressable or PowerPC64
8626 with offset word aligned.
8628 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8629 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8630 because adjacent memory cells are accessed by adding word-sized offsets
8631 during assembly output. */
8632 static bool
8633 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8635 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8636 bool quad_offset_p = mode_supports_dq_form (mode);
8638 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8639 if (VECTOR_MEM_ALTIVEC_P (mode)
8640 && GET_CODE (x) == AND
8641 && CONST_INT_P (XEXP (x, 1))
8642 && INTVAL (XEXP (x, 1)) == -16)
8643 x = XEXP (x, 0);
8645 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8646 return 0;
8647 if (legitimate_indirect_address_p (x, reg_ok_strict))
8648 return 1;
8649 if (TARGET_UPDATE
8650 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8651 && mode_supports_pre_incdec_p (mode)
8652 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8653 return 1;
8654 /* Handle restricted vector d-form offsets in ISA 3.0. */
8655 if (quad_offset_p)
8657 if (quad_address_p (x, mode, reg_ok_strict))
8658 return 1;
8660 else if (virtual_stack_registers_memory_p (x))
8661 return 1;
8663 else if (reg_offset_p)
8665 if (legitimate_small_data_p (mode, x))
8666 return 1;
8667 if (legitimate_constant_pool_address_p (x, mode,
8668 reg_ok_strict || lra_in_progress))
8669 return 1;
8672 /* For TImode, if we have TImode in VSX registers, only allow register
8673 indirect addresses. This will allow the values to go in either GPRs
8674 or VSX registers without reloading. The vector types would tend to
8675 go into VSX registers, so we allow REG+REG, while TImode seems
8676 somewhat split, in that some uses are GPR based, and some VSX based. */
8677 /* FIXME: We could loosen this by changing the following to
8678 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8679 but currently we cannot allow REG+REG addressing for TImode. See
8680 PR72827 for complete details on how this ends up hoodwinking DSE. */
8681 if (mode == TImode && TARGET_VSX)
8682 return 0;
8683 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8684 if (! reg_ok_strict
8685 && reg_offset_p
8686 && GET_CODE (x) == PLUS
8687 && REG_P (XEXP (x, 0))
8688 && (XEXP (x, 0) == virtual_stack_vars_rtx
8689 || XEXP (x, 0) == arg_pointer_rtx)
8690 && CONST_INT_P (XEXP (x, 1)))
8691 return 1;
8692 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8693 return 1;
8694 if (!FLOAT128_2REG_P (mode)
8695 && (TARGET_HARD_FLOAT
8696 || TARGET_POWERPC64
8697 || (mode != DFmode && mode != DDmode))
8698 && (TARGET_POWERPC64 || mode != DImode)
8699 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8700 && mode != PTImode
8701 && !avoiding_indexed_address_p (mode)
8702 && legitimate_indexed_address_p (x, reg_ok_strict))
8703 return 1;
8704 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8705 && mode_supports_pre_modify_p (mode)
8706 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8707 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8708 reg_ok_strict, false)
8709 || (!avoiding_indexed_address_p (mode)
8710 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8711 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8712 return 1;
8713 if (reg_offset_p && !quad_offset_p
8714 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8715 return 1;
8716 return 0;
8719 /* Debug version of rs6000_legitimate_address_p. */
8720 static bool
8721 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8722 bool reg_ok_strict)
8724 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8725 fprintf (stderr,
8726 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8727 "strict = %d, reload = %s, code = %s\n",
8728 ret ? "true" : "false",
8729 GET_MODE_NAME (mode),
8730 reg_ok_strict,
8731 (reload_completed ? "after" : "before"),
8732 GET_RTX_NAME (GET_CODE (x)));
8733 debug_rtx (x);
8735 return ret;
8738 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8740 static bool
8741 rs6000_mode_dependent_address_p (const_rtx addr,
8742 addr_space_t as ATTRIBUTE_UNUSED)
8744 return rs6000_mode_dependent_address_ptr (addr);
8747 /* Go to LABEL if ADDR (a legitimate address expression)
8748 has an effect that depends on the machine mode it is used for.
8750 On the RS/6000 this is true of all integral offsets (since AltiVec
8751 and VSX modes don't allow them) or is a pre-increment or decrement.
8753 ??? Except that due to conceptual problems in offsettable_address_p
8754 we can't really report the problems of integral offsets. So leave
8755 this assuming that the adjustable offset must be valid for the
8756 sub-words of a TFmode operand, which is what we had before. */
8758 static bool
8759 rs6000_mode_dependent_address (const_rtx addr)
8761 switch (GET_CODE (addr))
8763 case PLUS:
8764 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8765 is considered a legitimate address before reload, so there
8766 are no offset restrictions in that case. Note that this
8767 condition is safe in strict mode because any address involving
8768 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8769 been rejected as illegitimate. */
8770 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8771 && XEXP (addr, 0) != arg_pointer_rtx
8772 && CONST_INT_P (XEXP (addr, 1)))
8774 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8775 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8776 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8778 break;
8780 case LO_SUM:
8781 /* Anything in the constant pool is sufficiently aligned that
8782 all bytes have the same high part address. */
8783 return !legitimate_constant_pool_address_p (addr, QImode, false);
8785 /* Auto-increment cases are now treated generically in recog.c. */
8786 case PRE_MODIFY:
8787 return TARGET_UPDATE;
8789 /* AND is only allowed in Altivec loads. */
8790 case AND:
8791 return true;
8793 default:
8794 break;
8797 return false;
8800 /* Debug version of rs6000_mode_dependent_address. */
8801 static bool
8802 rs6000_debug_mode_dependent_address (const_rtx addr)
8804 bool ret = rs6000_mode_dependent_address (addr);
8806 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8807 ret ? "true" : "false");
8808 debug_rtx (addr);
8810 return ret;
8813 /* Implement FIND_BASE_TERM. */
8816 rs6000_find_base_term (rtx op)
8818 rtx base;
8820 base = op;
8821 if (GET_CODE (base) == CONST)
8822 base = XEXP (base, 0);
8823 if (GET_CODE (base) == PLUS)
8824 base = XEXP (base, 0);
8825 if (GET_CODE (base) == UNSPEC)
8826 switch (XINT (base, 1))
8828 case UNSPEC_TOCREL:
8829 case UNSPEC_MACHOPIC_OFFSET:
8830 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8831 for aliasing purposes. */
8832 return XVECEXP (base, 0, 0);
8835 return op;
8838 /* More elaborate version of recog's offsettable_memref_p predicate
8839 that works around the ??? note of rs6000_mode_dependent_address.
8840 In particular it accepts
8842 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8844 in 32-bit mode, that the recog predicate rejects. */
8846 static bool
8847 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
8849 bool worst_case;
8851 if (!MEM_P (op))
8852 return false;
8854 /* First mimic offsettable_memref_p. */
8855 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
8856 return true;
8858 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8859 the latter predicate knows nothing about the mode of the memory
8860 reference and, therefore, assumes that it is the largest supported
8861 mode (TFmode). As a consequence, legitimate offsettable memory
8862 references are rejected. rs6000_legitimate_offset_address_p contains
8863 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8864 at least with a little bit of help here given that we know the
8865 actual registers used. */
8866 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8867 || GET_MODE_SIZE (reg_mode) == 4);
8868 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8869 strict, worst_case);
8872 /* Determine the reassociation width to be used in reassociate_bb.
8873 This takes into account how many parallel operations we
8874 can actually do of a given type, and also the latency.
8876 int add/sub 6/cycle
8877 mul 2/cycle
8878 vect add/sub/mul 2/cycle
8879 fp add/sub/mul 2/cycle
8880 dfp 1/cycle
8883 static int
8884 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
8885 machine_mode mode)
8887 switch (rs6000_tune)
8889 case PROCESSOR_POWER8:
8890 case PROCESSOR_POWER9:
8891 case PROCESSOR_FUTURE:
8892 if (DECIMAL_FLOAT_MODE_P (mode))
8893 return 1;
8894 if (VECTOR_MODE_P (mode))
8895 return 4;
8896 if (INTEGRAL_MODE_P (mode))
8897 return 1;
8898 if (FLOAT_MODE_P (mode))
8899 return 4;
8900 break;
8901 default:
8902 break;
8904 return 1;
8907 /* Change register usage conditional on target flags. */
8908 static void
8909 rs6000_conditional_register_usage (void)
8911 int i;
8913 if (TARGET_DEBUG_TARGET)
8914 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8916 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8917 if (TARGET_64BIT)
8918 fixed_regs[13] = call_used_regs[13] = 1;
8920 /* Conditionally disable FPRs. */
8921 if (TARGET_SOFT_FLOAT)
8922 for (i = 32; i < 64; i++)
8923 fixed_regs[i] = call_used_regs[i] = 1;
8925 /* The TOC register is not killed across calls in a way that is
8926 visible to the compiler. */
8927 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8928 call_used_regs[2] = 0;
8930 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
8931 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8933 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
8934 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8935 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8937 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
8938 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8939 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8941 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8942 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8944 if (!TARGET_ALTIVEC && !TARGET_VSX)
8946 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8947 fixed_regs[i] = call_used_regs[i] = 1;
8948 call_used_regs[VRSAVE_REGNO] = 1;
8951 if (TARGET_ALTIVEC || TARGET_VSX)
8952 global_regs[VSCR_REGNO] = 1;
8954 if (TARGET_ALTIVEC_ABI)
8956 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8957 call_used_regs[i] = 1;
8959 /* AIX reserves VR20:31 in non-extended ABI mode. */
8960 if (TARGET_XCOFF)
8961 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8962 fixed_regs[i] = call_used_regs[i] = 1;
8967 /* Output insns to set DEST equal to the constant SOURCE as a series of
8968 lis, ori and shl instructions and return TRUE. */
8970 bool
8971 rs6000_emit_set_const (rtx dest, rtx source)
8973 machine_mode mode = GET_MODE (dest);
8974 rtx temp, set;
8975 rtx_insn *insn;
8976 HOST_WIDE_INT c;
8978 gcc_checking_assert (CONST_INT_P (source));
8979 c = INTVAL (source);
8980 switch (mode)
8982 case E_QImode:
8983 case E_HImode:
8984 emit_insn (gen_rtx_SET (dest, source));
8985 return true;
8987 case E_SImode:
8988 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8990 emit_insn (gen_rtx_SET (copy_rtx (temp),
8991 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8992 emit_insn (gen_rtx_SET (dest,
8993 gen_rtx_IOR (SImode, copy_rtx (temp),
8994 GEN_INT (c & 0xffff))));
8995 break;
8997 case E_DImode:
8998 if (!TARGET_POWERPC64)
9000 rtx hi, lo;
9002 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9003 DImode);
9004 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9005 DImode);
9006 emit_move_insn (hi, GEN_INT (c >> 32));
9007 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9008 emit_move_insn (lo, GEN_INT (c));
9010 else
9011 rs6000_emit_set_long_const (dest, c);
9012 break;
9014 default:
9015 gcc_unreachable ();
9018 insn = get_last_insn ();
9019 set = single_set (insn);
9020 if (! CONSTANT_P (SET_SRC (set)))
9021 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9023 return true;
9026 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9027 Output insns to set DEST equal to the constant C as a series of
9028 lis, ori and shl instructions. */
9030 static void
9031 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9033 rtx temp;
9034 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9036 ud1 = c & 0xffff;
9037 c = c >> 16;
9038 ud2 = c & 0xffff;
9039 c = c >> 16;
9040 ud3 = c & 0xffff;
9041 c = c >> 16;
9042 ud4 = c & 0xffff;
9044 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9045 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9046 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9048 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9049 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9051 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9053 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9054 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9055 if (ud1 != 0)
9056 emit_move_insn (dest,
9057 gen_rtx_IOR (DImode, copy_rtx (temp),
9058 GEN_INT (ud1)));
9060 else if (ud3 == 0 && ud4 == 0)
9062 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9064 gcc_assert (ud2 & 0x8000);
9065 emit_move_insn (copy_rtx (temp),
9066 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9067 if (ud1 != 0)
9068 emit_move_insn (copy_rtx (temp),
9069 gen_rtx_IOR (DImode, copy_rtx (temp),
9070 GEN_INT (ud1)));
9071 emit_move_insn (dest,
9072 gen_rtx_ZERO_EXTEND (DImode,
9073 gen_lowpart (SImode,
9074 copy_rtx (temp))));
9076 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9077 || (ud4 == 0 && ! (ud3 & 0x8000)))
9079 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9081 emit_move_insn (copy_rtx (temp),
9082 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9083 if (ud2 != 0)
9084 emit_move_insn (copy_rtx (temp),
9085 gen_rtx_IOR (DImode, copy_rtx (temp),
9086 GEN_INT (ud2)));
9087 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9088 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9089 GEN_INT (16)));
9090 if (ud1 != 0)
9091 emit_move_insn (dest,
9092 gen_rtx_IOR (DImode, copy_rtx (temp),
9093 GEN_INT (ud1)));
9095 else
9097 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9099 emit_move_insn (copy_rtx (temp),
9100 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9101 if (ud3 != 0)
9102 emit_move_insn (copy_rtx (temp),
9103 gen_rtx_IOR (DImode, copy_rtx (temp),
9104 GEN_INT (ud3)));
9106 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9107 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9108 GEN_INT (32)));
9109 if (ud2 != 0)
9110 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9111 gen_rtx_IOR (DImode, copy_rtx (temp),
9112 GEN_INT (ud2 << 16)));
9113 if (ud1 != 0)
9114 emit_move_insn (dest,
9115 gen_rtx_IOR (DImode, copy_rtx (temp),
9116 GEN_INT (ud1)));
9120 /* Helper for the following. Get rid of [r+r] memory refs
9121 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9123 static void
9124 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9126 if (MEM_P (operands[0])
9127 && !REG_P (XEXP (operands[0], 0))
9128 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9129 GET_MODE (operands[0]), false))
9130 operands[0]
9131 = replace_equiv_address (operands[0],
9132 copy_addr_to_reg (XEXP (operands[0], 0)));
9134 if (MEM_P (operands[1])
9135 && !REG_P (XEXP (operands[1], 0))
9136 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9137 GET_MODE (operands[1]), false))
9138 operands[1]
9139 = replace_equiv_address (operands[1],
9140 copy_addr_to_reg (XEXP (operands[1], 0)));
9143 /* Generate a vector of constants to permute MODE for a little-endian
9144 storage operation by swapping the two halves of a vector. */
9145 static rtvec
9146 rs6000_const_vec (machine_mode mode)
9148 int i, subparts;
9149 rtvec v;
9151 switch (mode)
9153 case E_V1TImode:
9154 subparts = 1;
9155 break;
9156 case E_V2DFmode:
9157 case E_V2DImode:
9158 subparts = 2;
9159 break;
9160 case E_V4SFmode:
9161 case E_V4SImode:
9162 subparts = 4;
9163 break;
9164 case E_V8HImode:
9165 subparts = 8;
9166 break;
9167 case E_V16QImode:
9168 subparts = 16;
9169 break;
9170 default:
9171 gcc_unreachable();
9174 v = rtvec_alloc (subparts);
9176 for (i = 0; i < subparts / 2; ++i)
9177 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9178 for (i = subparts / 2; i < subparts; ++i)
9179 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9181 return v;
9184 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9185 store operation. */
9186 void
9187 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9189 /* Scalar permutations are easier to express in integer modes rather than
9190 floating-point modes, so cast them here. We use V1TImode instead
9191 of TImode to ensure that the values don't go through GPRs. */
9192 if (FLOAT128_VECTOR_P (mode))
9194 dest = gen_lowpart (V1TImode, dest);
9195 source = gen_lowpart (V1TImode, source);
9196 mode = V1TImode;
9199 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9200 scalar. */
9201 if (mode == TImode || mode == V1TImode)
9202 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9203 GEN_INT (64))));
9204 else
9206 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9207 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9211 /* Emit a little-endian load from vector memory location SOURCE to VSX
9212 register DEST in mode MODE. The load is done with two permuting
9213 insn's that represent an lxvd2x and xxpermdi. */
9214 void
9215 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9217 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9218 V1TImode). */
9219 if (mode == TImode || mode == V1TImode)
9221 mode = V2DImode;
9222 dest = gen_lowpart (V2DImode, dest);
9223 source = adjust_address (source, V2DImode, 0);
9226 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9227 rs6000_emit_le_vsx_permute (tmp, source, mode);
9228 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9231 /* Emit a little-endian store to vector memory location DEST from VSX
9232 register SOURCE in mode MODE. The store is done with two permuting
9233 insn's that represent an xxpermdi and an stxvd2x. */
9234 void
9235 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9237 /* This should never be called during or after LRA, because it does
9238 not re-permute the source register. It is intended only for use
9239 during expand. */
9240 gcc_assert (!lra_in_progress && !reload_completed);
9242 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9243 V1TImode). */
9244 if (mode == TImode || mode == V1TImode)
9246 mode = V2DImode;
9247 dest = adjust_address (dest, V2DImode, 0);
9248 source = gen_lowpart (V2DImode, source);
9251 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9252 rs6000_emit_le_vsx_permute (tmp, source, mode);
9253 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9256 /* Emit a sequence representing a little-endian VSX load or store,
9257 moving data from SOURCE to DEST in mode MODE. This is done
9258 separately from rs6000_emit_move to ensure it is called only
9259 during expand. LE VSX loads and stores introduced later are
9260 handled with a split. The expand-time RTL generation allows
9261 us to optimize away redundant pairs of register-permutes. */
9262 void
9263 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9265 gcc_assert (!BYTES_BIG_ENDIAN
9266 && VECTOR_MEM_VSX_P (mode)
9267 && !TARGET_P9_VECTOR
9268 && !gpr_or_gpr_p (dest, source)
9269 && (MEM_P (source) ^ MEM_P (dest)));
9271 if (MEM_P (source))
9273 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9274 rs6000_emit_le_vsx_load (dest, source, mode);
9276 else
9278 if (!REG_P (source))
9279 source = force_reg (mode, source);
9280 rs6000_emit_le_vsx_store (dest, source, mode);
9284 /* Return whether a SFmode or SImode move can be done without converting one
9285 mode to another. This arrises when we have:
9287 (SUBREG:SF (REG:SI ...))
9288 (SUBREG:SI (REG:SF ...))
9290 and one of the values is in a floating point/vector register, where SFmode
9291 scalars are stored in DFmode format. */
9293 bool
9294 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9296 if (TARGET_ALLOW_SF_SUBREG)
9297 return true;
9299 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9300 return true;
9302 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9303 return true;
9305 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9306 if (SUBREG_P (dest))
9308 rtx dest_subreg = SUBREG_REG (dest);
9309 rtx src_subreg = SUBREG_REG (src);
9310 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9313 return false;
9317 /* Helper function to change moves with:
9319 (SUBREG:SF (REG:SI)) and
9320 (SUBREG:SI (REG:SF))
9322 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9323 values are stored as DFmode values in the VSX registers. We need to convert
9324 the bits before we can use a direct move or operate on the bits in the
9325 vector register as an integer type.
9327 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9329 static bool
9330 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9332 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9333 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9334 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9336 rtx inner_source = SUBREG_REG (source);
9337 machine_mode inner_mode = GET_MODE (inner_source);
9339 if (mode == SImode && inner_mode == SFmode)
9341 emit_insn (gen_movsi_from_sf (dest, inner_source));
9342 return true;
9345 if (mode == SFmode && inner_mode == SImode)
9347 emit_insn (gen_movsf_from_si (dest, inner_source));
9348 return true;
9352 return false;
9355 /* Emit a move from SOURCE to DEST in mode MODE. */
9356 void
9357 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9359 rtx operands[2];
9360 operands[0] = dest;
9361 operands[1] = source;
9363 if (TARGET_DEBUG_ADDR)
9365 fprintf (stderr,
9366 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9367 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9368 GET_MODE_NAME (mode),
9369 lra_in_progress,
9370 reload_completed,
9371 can_create_pseudo_p ());
9372 debug_rtx (dest);
9373 fprintf (stderr, "source:\n");
9374 debug_rtx (source);
9377 /* Check that we get CONST_WIDE_INT only when we should. */
9378 if (CONST_WIDE_INT_P (operands[1])
9379 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9380 gcc_unreachable ();
9382 #ifdef HAVE_AS_GNU_ATTRIBUTE
9383 /* If we use a long double type, set the flags in .gnu_attribute that say
9384 what the long double type is. This is to allow the linker's warning
9385 message for the wrong long double to be useful, even if the function does
9386 not do a call (for example, doing a 128-bit add on power9 if the long
9387 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9388 used if they aren't the default long dobule type. */
9389 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9391 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9392 rs6000_passes_float = rs6000_passes_long_double = true;
9394 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9395 rs6000_passes_float = rs6000_passes_long_double = true;
9397 #endif
9399 /* See if we need to special case SImode/SFmode SUBREG moves. */
9400 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9401 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9402 return;
9404 /* Check if GCC is setting up a block move that will end up using FP
9405 registers as temporaries. We must make sure this is acceptable. */
9406 if (MEM_P (operands[0])
9407 && MEM_P (operands[1])
9408 && mode == DImode
9409 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9410 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9411 && ! (rs6000_slow_unaligned_access (SImode,
9412 (MEM_ALIGN (operands[0]) > 32
9413 ? 32 : MEM_ALIGN (operands[0])))
9414 || rs6000_slow_unaligned_access (SImode,
9415 (MEM_ALIGN (operands[1]) > 32
9416 ? 32 : MEM_ALIGN (operands[1]))))
9417 && ! MEM_VOLATILE_P (operands [0])
9418 && ! MEM_VOLATILE_P (operands [1]))
9420 emit_move_insn (adjust_address (operands[0], SImode, 0),
9421 adjust_address (operands[1], SImode, 0));
9422 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9423 adjust_address (copy_rtx (operands[1]), SImode, 4));
9424 return;
9427 if (can_create_pseudo_p () && MEM_P (operands[0])
9428 && !gpc_reg_operand (operands[1], mode))
9429 operands[1] = force_reg (mode, operands[1]);
9431 /* Recognize the case where operand[1] is a reference to thread-local
9432 data and load its address to a register. */
9433 if (tls_referenced_p (operands[1]))
9435 enum tls_model model;
9436 rtx tmp = operands[1];
9437 rtx addend = NULL;
9439 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9441 addend = XEXP (XEXP (tmp, 0), 1);
9442 tmp = XEXP (XEXP (tmp, 0), 0);
9445 gcc_assert (SYMBOL_REF_P (tmp));
9446 model = SYMBOL_REF_TLS_MODEL (tmp);
9447 gcc_assert (model != 0);
9449 tmp = rs6000_legitimize_tls_address (tmp, model);
9450 if (addend)
9452 tmp = gen_rtx_PLUS (mode, tmp, addend);
9453 tmp = force_operand (tmp, operands[0]);
9455 operands[1] = tmp;
9458 /* 128-bit constant floating-point values on Darwin should really be loaded
9459 as two parts. However, this premature splitting is a problem when DFmode
9460 values can go into Altivec registers. */
9461 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9462 && !reg_addr[DFmode].scalar_in_vmx_p)
9464 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9465 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9466 DFmode);
9467 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9468 GET_MODE_SIZE (DFmode)),
9469 simplify_gen_subreg (DFmode, operands[1], mode,
9470 GET_MODE_SIZE (DFmode)),
9471 DFmode);
9472 return;
9475 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9476 p1:SD) if p1 is not of floating point class and p0 is spilled as
9477 we can have no analogous movsd_store for this. */
9478 if (lra_in_progress && mode == DDmode
9479 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9480 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9481 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9482 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9484 enum reg_class cl;
9485 int regno = REGNO (SUBREG_REG (operands[1]));
9487 if (!HARD_REGISTER_NUM_P (regno))
9489 cl = reg_preferred_class (regno);
9490 regno = reg_renumber[regno];
9491 if (regno < 0)
9492 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9494 if (regno >= 0 && ! FP_REGNO_P (regno))
9496 mode = SDmode;
9497 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9498 operands[1] = SUBREG_REG (operands[1]);
9501 if (lra_in_progress
9502 && mode == SDmode
9503 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9504 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9505 && (REG_P (operands[1])
9506 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9508 int regno = reg_or_subregno (operands[1]);
9509 enum reg_class cl;
9511 if (!HARD_REGISTER_NUM_P (regno))
9513 cl = reg_preferred_class (regno);
9514 gcc_assert (cl != NO_REGS);
9515 regno = reg_renumber[regno];
9516 if (regno < 0)
9517 regno = ira_class_hard_regs[cl][0];
9519 if (FP_REGNO_P (regno))
9521 if (GET_MODE (operands[0]) != DDmode)
9522 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9523 emit_insn (gen_movsd_store (operands[0], operands[1]));
9525 else if (INT_REGNO_P (regno))
9526 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9527 else
9528 gcc_unreachable();
9529 return;
9531 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9532 p:DD)) if p0 is not of floating point class and p1 is spilled as
9533 we can have no analogous movsd_load for this. */
9534 if (lra_in_progress && mode == DDmode
9535 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9536 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9537 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9538 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9540 enum reg_class cl;
9541 int regno = REGNO (SUBREG_REG (operands[0]));
9543 if (!HARD_REGISTER_NUM_P (regno))
9545 cl = reg_preferred_class (regno);
9546 regno = reg_renumber[regno];
9547 if (regno < 0)
9548 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9550 if (regno >= 0 && ! FP_REGNO_P (regno))
9552 mode = SDmode;
9553 operands[0] = SUBREG_REG (operands[0]);
9554 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9557 if (lra_in_progress
9558 && mode == SDmode
9559 && (REG_P (operands[0])
9560 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9561 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9562 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9564 int regno = reg_or_subregno (operands[0]);
9565 enum reg_class cl;
9567 if (!HARD_REGISTER_NUM_P (regno))
9569 cl = reg_preferred_class (regno);
9570 gcc_assert (cl != NO_REGS);
9571 regno = reg_renumber[regno];
9572 if (regno < 0)
9573 regno = ira_class_hard_regs[cl][0];
9575 if (FP_REGNO_P (regno))
9577 if (GET_MODE (operands[1]) != DDmode)
9578 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9579 emit_insn (gen_movsd_load (operands[0], operands[1]));
9581 else if (INT_REGNO_P (regno))
9582 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9583 else
9584 gcc_unreachable();
9585 return;
9588 /* FIXME: In the long term, this switch statement should go away
9589 and be replaced by a sequence of tests based on things like
9590 mode == Pmode. */
9591 switch (mode)
9593 case E_HImode:
9594 case E_QImode:
9595 if (CONSTANT_P (operands[1])
9596 && !CONST_INT_P (operands[1]))
9597 operands[1] = force_const_mem (mode, operands[1]);
9598 break;
9600 case E_TFmode:
9601 case E_TDmode:
9602 case E_IFmode:
9603 case E_KFmode:
9604 if (FLOAT128_2REG_P (mode))
9605 rs6000_eliminate_indexed_memrefs (operands);
9606 /* fall through */
9608 case E_DFmode:
9609 case E_DDmode:
9610 case E_SFmode:
9611 case E_SDmode:
9612 if (CONSTANT_P (operands[1])
9613 && ! easy_fp_constant (operands[1], mode))
9614 operands[1] = force_const_mem (mode, operands[1]);
9615 break;
9617 case E_V16QImode:
9618 case E_V8HImode:
9619 case E_V4SFmode:
9620 case E_V4SImode:
9621 case E_V2DFmode:
9622 case E_V2DImode:
9623 case E_V1TImode:
9624 if (CONSTANT_P (operands[1])
9625 && !easy_vector_constant (operands[1], mode))
9626 operands[1] = force_const_mem (mode, operands[1]);
9627 break;
9629 case E_SImode:
9630 case E_DImode:
9631 /* Use default pattern for address of ELF small data */
9632 if (TARGET_ELF
9633 && mode == Pmode
9634 && DEFAULT_ABI == ABI_V4
9635 && (SYMBOL_REF_P (operands[1])
9636 || GET_CODE (operands[1]) == CONST)
9637 && small_data_operand (operands[1], mode))
9639 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9640 return;
9643 /* Use the default pattern for loading up PC-relative addresses. */
9644 if (TARGET_PCREL && mode == Pmode
9645 && pcrel_local_or_external_address (operands[1], Pmode))
9647 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9648 return;
9651 if (DEFAULT_ABI == ABI_V4
9652 && mode == Pmode && mode == SImode
9653 && flag_pic == 1 && got_operand (operands[1], mode))
9655 emit_insn (gen_movsi_got (operands[0], operands[1]));
9656 return;
9659 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9660 && TARGET_NO_TOC_OR_PCREL
9661 && ! flag_pic
9662 && mode == Pmode
9663 && CONSTANT_P (operands[1])
9664 && GET_CODE (operands[1]) != HIGH
9665 && !CONST_INT_P (operands[1]))
9667 rtx target = (!can_create_pseudo_p ()
9668 ? operands[0]
9669 : gen_reg_rtx (mode));
9671 /* If this is a function address on -mcall-aixdesc,
9672 convert it to the address of the descriptor. */
9673 if (DEFAULT_ABI == ABI_AIX
9674 && SYMBOL_REF_P (operands[1])
9675 && XSTR (operands[1], 0)[0] == '.')
9677 const char *name = XSTR (operands[1], 0);
9678 rtx new_ref;
9679 while (*name == '.')
9680 name++;
9681 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9682 CONSTANT_POOL_ADDRESS_P (new_ref)
9683 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9684 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9685 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9686 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9687 operands[1] = new_ref;
9690 if (DEFAULT_ABI == ABI_DARWIN)
9692 #if TARGET_MACHO
9693 /* This is not PIC code, but could require the subset of
9694 indirections used by mdynamic-no-pic. */
9695 if (MACHO_DYNAMIC_NO_PIC_P)
9697 /* Take care of any required data indirection. */
9698 operands[1] = rs6000_machopic_legitimize_pic_address (
9699 operands[1], mode, operands[0]);
9700 if (operands[0] != operands[1])
9701 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9702 return;
9704 #endif
9705 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9706 emit_insn (gen_macho_low (Pmode, operands[0],
9707 target, operands[1]));
9708 return;
9711 emit_insn (gen_elf_high (target, operands[1]));
9712 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9713 return;
9716 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9717 and we have put it in the TOC, we just need to make a TOC-relative
9718 reference to it. */
9719 if (TARGET_TOC
9720 && SYMBOL_REF_P (operands[1])
9721 && use_toc_relative_ref (operands[1], mode))
9722 operands[1] = create_TOC_reference (operands[1], operands[0]);
9723 else if (mode == Pmode
9724 && CONSTANT_P (operands[1])
9725 && GET_CODE (operands[1]) != HIGH
9726 && ((REG_P (operands[0])
9727 && FP_REGNO_P (REGNO (operands[0])))
9728 || !CONST_INT_P (operands[1])
9729 || (num_insns_constant (operands[1], mode)
9730 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9731 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9732 && (TARGET_CMODEL == CMODEL_SMALL
9733 || can_create_pseudo_p ()
9734 || (REG_P (operands[0])
9735 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9738 #if TARGET_MACHO
9739 /* Darwin uses a special PIC legitimizer. */
9740 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9742 operands[1] =
9743 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9744 operands[0]);
9745 if (operands[0] != operands[1])
9746 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9747 return;
9749 #endif
9751 /* If we are to limit the number of things we put in the TOC and
9752 this is a symbol plus a constant we can add in one insn,
9753 just put the symbol in the TOC and add the constant. */
9754 if (GET_CODE (operands[1]) == CONST
9755 && TARGET_NO_SUM_IN_TOC
9756 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9757 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9758 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9759 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9760 && ! side_effects_p (operands[0]))
9762 rtx sym =
9763 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9764 rtx other = XEXP (XEXP (operands[1], 0), 1);
9766 sym = force_reg (mode, sym);
9767 emit_insn (gen_add3_insn (operands[0], sym, other));
9768 return;
9771 operands[1] = force_const_mem (mode, operands[1]);
9773 if (TARGET_TOC
9774 && SYMBOL_REF_P (XEXP (operands[1], 0))
9775 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9777 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9778 operands[0]);
9779 operands[1] = gen_const_mem (mode, tocref);
9780 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9783 break;
9785 case E_TImode:
9786 if (!VECTOR_MEM_VSX_P (TImode))
9787 rs6000_eliminate_indexed_memrefs (operands);
9788 break;
9790 case E_PTImode:
9791 rs6000_eliminate_indexed_memrefs (operands);
9792 break;
9794 default:
9795 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9798 /* Above, we may have called force_const_mem which may have returned
9799 an invalid address. If we can, fix this up; otherwise, reload will
9800 have to deal with it. */
9801 if (MEM_P (operands[1]))
9802 operands[1] = validize_mem (operands[1]);
9804 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9808 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9809 static void
9810 init_float128_ibm (machine_mode mode)
9812 if (!TARGET_XL_COMPAT)
9814 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
9815 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
9816 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
9817 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
9819 if (!TARGET_HARD_FLOAT)
9821 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
9822 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
9823 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
9824 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
9825 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
9826 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
9827 set_optab_libfunc (le_optab, mode, "__gcc_qle");
9828 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
9830 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
9831 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
9832 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
9833 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
9834 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
9835 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
9836 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
9837 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
9840 else
9842 set_optab_libfunc (add_optab, mode, "_xlqadd");
9843 set_optab_libfunc (sub_optab, mode, "_xlqsub");
9844 set_optab_libfunc (smul_optab, mode, "_xlqmul");
9845 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
9848 /* Add various conversions for IFmode to use the traditional TFmode
9849 names. */
9850 if (mode == IFmode)
9852 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
9853 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
9854 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
9855 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
9856 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
9857 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
9859 if (TARGET_POWERPC64)
9861 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
9862 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
9863 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
9864 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
9869 /* Create a decl for either complex long double multiply or complex long double
9870 divide when long double is IEEE 128-bit floating point. We can't use
9871 __multc3 and __divtc3 because the original long double using IBM extended
9872 double used those names. The complex multiply/divide functions are encoded
9873 as builtin functions with a complex result and 4 scalar inputs. */
9875 static void
9876 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
9878 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
9879 name, NULL_TREE);
9881 set_builtin_decl (fncode, fndecl, true);
9883 if (TARGET_DEBUG_BUILTIN)
9884 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
9886 return;
9889 /* Set up IEEE 128-bit floating point routines. Use different names if the
9890 arguments can be passed in a vector register. The historical PowerPC
9891 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
9892 continue to use that if we aren't using vector registers to pass IEEE
9893 128-bit floating point. */
9895 static void
9896 init_float128_ieee (machine_mode mode)
9898 if (FLOAT128_VECTOR_P (mode))
9900 static bool complex_muldiv_init_p = false;
9902 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
9903 we have clone or target attributes, this will be called a second
9904 time. We want to create the built-in function only once. */
9905 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
9907 complex_muldiv_init_p = true;
9908 built_in_function fncode_mul =
9909 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
9910 - MIN_MODE_COMPLEX_FLOAT);
9911 built_in_function fncode_div =
9912 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
9913 - MIN_MODE_COMPLEX_FLOAT);
9915 tree fntype = build_function_type_list (complex_long_double_type_node,
9916 long_double_type_node,
9917 long_double_type_node,
9918 long_double_type_node,
9919 long_double_type_node,
9920 NULL_TREE);
9922 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
9923 create_complex_muldiv ("__divkc3", fncode_div, fntype);
9926 set_optab_libfunc (add_optab, mode, "__addkf3");
9927 set_optab_libfunc (sub_optab, mode, "__subkf3");
9928 set_optab_libfunc (neg_optab, mode, "__negkf2");
9929 set_optab_libfunc (smul_optab, mode, "__mulkf3");
9930 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
9931 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
9932 set_optab_libfunc (abs_optab, mode, "__abskf2");
9933 set_optab_libfunc (powi_optab, mode, "__powikf2");
9935 set_optab_libfunc (eq_optab, mode, "__eqkf2");
9936 set_optab_libfunc (ne_optab, mode, "__nekf2");
9937 set_optab_libfunc (gt_optab, mode, "__gtkf2");
9938 set_optab_libfunc (ge_optab, mode, "__gekf2");
9939 set_optab_libfunc (lt_optab, mode, "__ltkf2");
9940 set_optab_libfunc (le_optab, mode, "__lekf2");
9941 set_optab_libfunc (unord_optab, mode, "__unordkf2");
9943 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
9944 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
9945 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
9946 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
9948 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
9949 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
9950 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
9952 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
9953 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
9954 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
9956 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
9957 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
9958 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
9959 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
9960 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
9961 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
9963 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
9964 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
9965 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
9966 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
9968 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
9969 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
9970 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
9971 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
9973 if (TARGET_POWERPC64)
9975 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
9976 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
9977 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
9978 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
9982 else
9984 set_optab_libfunc (add_optab, mode, "_q_add");
9985 set_optab_libfunc (sub_optab, mode, "_q_sub");
9986 set_optab_libfunc (neg_optab, mode, "_q_neg");
9987 set_optab_libfunc (smul_optab, mode, "_q_mul");
9988 set_optab_libfunc (sdiv_optab, mode, "_q_div");
9989 if (TARGET_PPC_GPOPT)
9990 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
9992 set_optab_libfunc (eq_optab, mode, "_q_feq");
9993 set_optab_libfunc (ne_optab, mode, "_q_fne");
9994 set_optab_libfunc (gt_optab, mode, "_q_fgt");
9995 set_optab_libfunc (ge_optab, mode, "_q_fge");
9996 set_optab_libfunc (lt_optab, mode, "_q_flt");
9997 set_optab_libfunc (le_optab, mode, "_q_fle");
9999 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10000 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10001 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10002 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10003 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10004 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10005 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10006 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10010 static void
10011 rs6000_init_libfuncs (void)
10013 /* __float128 support. */
10014 if (TARGET_FLOAT128_TYPE)
10016 init_float128_ibm (IFmode);
10017 init_float128_ieee (KFmode);
10020 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10021 if (TARGET_LONG_DOUBLE_128)
10023 if (!TARGET_IEEEQUAD)
10024 init_float128_ibm (TFmode);
10026 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10027 else
10028 init_float128_ieee (TFmode);
10032 /* Emit a potentially record-form instruction, setting DST from SRC.
10033 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10034 signed comparison of DST with zero. If DOT is 1, the generated RTL
10035 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10036 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10037 a separate COMPARE. */
10039 void
10040 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10042 if (dot == 0)
10044 emit_move_insn (dst, src);
10045 return;
10048 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10050 emit_move_insn (dst, src);
10051 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10052 return;
10055 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10056 if (dot == 1)
10058 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10059 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10061 else
10063 rtx set = gen_rtx_SET (dst, src);
10064 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10069 /* A validation routine: say whether CODE, a condition code, and MODE
10070 match. The other alternatives either don't make sense or should
10071 never be generated. */
10073 void
10074 validate_condition_mode (enum rtx_code code, machine_mode mode)
10076 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10077 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10078 && GET_MODE_CLASS (mode) == MODE_CC);
10080 /* These don't make sense. */
10081 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10082 || mode != CCUNSmode);
10084 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10085 || mode == CCUNSmode);
10087 gcc_assert (mode == CCFPmode
10088 || (code != ORDERED && code != UNORDERED
10089 && code != UNEQ && code != LTGT
10090 && code != UNGT && code != UNLT
10091 && code != UNGE && code != UNLE));
10093 /* These should never be generated except for
10094 flag_finite_math_only. */
10095 gcc_assert (mode != CCFPmode
10096 || flag_finite_math_only
10097 || (code != LE && code != GE
10098 && code != UNEQ && code != LTGT
10099 && code != UNGT && code != UNLT));
10101 /* These are invalid; the information is not there. */
10102 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10106 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10107 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10108 not zero, store there the bit offset (counted from the right) where
10109 the single stretch of 1 bits begins; and similarly for B, the bit
10110 offset where it ends. */
10112 bool
10113 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10115 unsigned HOST_WIDE_INT val = INTVAL (mask);
10116 unsigned HOST_WIDE_INT bit;
10117 int nb, ne;
10118 int n = GET_MODE_PRECISION (mode);
10120 if (mode != DImode && mode != SImode)
10121 return false;
10123 if (INTVAL (mask) >= 0)
10125 bit = val & -val;
10126 ne = exact_log2 (bit);
10127 nb = exact_log2 (val + bit);
10129 else if (val + 1 == 0)
10131 nb = n;
10132 ne = 0;
10134 else if (val & 1)
10136 val = ~val;
10137 bit = val & -val;
10138 nb = exact_log2 (bit);
10139 ne = exact_log2 (val + bit);
10141 else
10143 bit = val & -val;
10144 ne = exact_log2 (bit);
10145 if (val + bit == 0)
10146 nb = n;
10147 else
10148 nb = 0;
10151 nb--;
10153 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10154 return false;
10156 if (b)
10157 *b = nb;
10158 if (e)
10159 *e = ne;
10161 return true;
10164 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10165 or rldicr instruction, to implement an AND with it in mode MODE. */
10167 bool
10168 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10170 int nb, ne;
10172 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10173 return false;
10175 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10176 does not wrap. */
10177 if (mode == DImode)
10178 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10180 /* For SImode, rlwinm can do everything. */
10181 if (mode == SImode)
10182 return (nb < 32 && ne < 32);
10184 return false;
10187 /* Return the instruction template for an AND with mask in mode MODE, with
10188 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10190 const char *
10191 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10193 int nb, ne;
10195 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10196 gcc_unreachable ();
10198 if (mode == DImode && ne == 0)
10200 operands[3] = GEN_INT (63 - nb);
10201 if (dot)
10202 return "rldicl. %0,%1,0,%3";
10203 return "rldicl %0,%1,0,%3";
10206 if (mode == DImode && nb == 63)
10208 operands[3] = GEN_INT (63 - ne);
10209 if (dot)
10210 return "rldicr. %0,%1,0,%3";
10211 return "rldicr %0,%1,0,%3";
10214 if (nb < 32 && ne < 32)
10216 operands[3] = GEN_INT (31 - nb);
10217 operands[4] = GEN_INT (31 - ne);
10218 if (dot)
10219 return "rlwinm. %0,%1,0,%3,%4";
10220 return "rlwinm %0,%1,0,%3,%4";
10223 gcc_unreachable ();
10226 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10227 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10228 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10230 bool
10231 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10233 int nb, ne;
10235 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10236 return false;
10238 int n = GET_MODE_PRECISION (mode);
10239 int sh = -1;
10241 if (CONST_INT_P (XEXP (shift, 1)))
10243 sh = INTVAL (XEXP (shift, 1));
10244 if (sh < 0 || sh >= n)
10245 return false;
10248 rtx_code code = GET_CODE (shift);
10250 /* Convert any shift by 0 to a rotate, to simplify below code. */
10251 if (sh == 0)
10252 code = ROTATE;
10254 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10255 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10256 code = ASHIFT;
10257 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10259 code = LSHIFTRT;
10260 sh = n - sh;
10263 /* DImode rotates need rld*. */
10264 if (mode == DImode && code == ROTATE)
10265 return (nb == 63 || ne == 0 || ne == sh);
10267 /* SImode rotates need rlw*. */
10268 if (mode == SImode && code == ROTATE)
10269 return (nb < 32 && ne < 32 && sh < 32);
10271 /* Wrap-around masks are only okay for rotates. */
10272 if (ne > nb)
10273 return false;
10275 /* Variable shifts are only okay for rotates. */
10276 if (sh < 0)
10277 return false;
10279 /* Don't allow ASHIFT if the mask is wrong for that. */
10280 if (code == ASHIFT && ne < sh)
10281 return false;
10283 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10284 if the mask is wrong for that. */
10285 if (nb < 32 && ne < 32 && sh < 32
10286 && !(code == LSHIFTRT && nb >= 32 - sh))
10287 return true;
10289 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10290 if the mask is wrong for that. */
10291 if (code == LSHIFTRT)
10292 sh = 64 - sh;
10293 if (nb == 63 || ne == 0 || ne == sh)
10294 return !(code == LSHIFTRT && nb >= sh);
10296 return false;
10299 /* Return the instruction template for a shift with mask in mode MODE, with
10300 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10302 const char *
10303 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10305 int nb, ne;
10307 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10308 gcc_unreachable ();
10310 if (mode == DImode && ne == 0)
10312 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10313 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10314 operands[3] = GEN_INT (63 - nb);
10315 if (dot)
10316 return "rld%I2cl. %0,%1,%2,%3";
10317 return "rld%I2cl %0,%1,%2,%3";
10320 if (mode == DImode && nb == 63)
10322 operands[3] = GEN_INT (63 - ne);
10323 if (dot)
10324 return "rld%I2cr. %0,%1,%2,%3";
10325 return "rld%I2cr %0,%1,%2,%3";
10328 if (mode == DImode
10329 && GET_CODE (operands[4]) != LSHIFTRT
10330 && CONST_INT_P (operands[2])
10331 && ne == INTVAL (operands[2]))
10333 operands[3] = GEN_INT (63 - nb);
10334 if (dot)
10335 return "rld%I2c. %0,%1,%2,%3";
10336 return "rld%I2c %0,%1,%2,%3";
10339 if (nb < 32 && ne < 32)
10341 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10342 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10343 operands[3] = GEN_INT (31 - nb);
10344 operands[4] = GEN_INT (31 - ne);
10345 /* This insn can also be a 64-bit rotate with mask that really makes
10346 it just a shift right (with mask); the %h below are to adjust for
10347 that situation (shift count is >= 32 in that case). */
10348 if (dot)
10349 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10350 return "rlw%I2nm %0,%1,%h2,%3,%4";
10353 gcc_unreachable ();
10356 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10357 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10358 ASHIFT, or LSHIFTRT) in mode MODE. */
10360 bool
10361 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10363 int nb, ne;
10365 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10366 return false;
10368 int n = GET_MODE_PRECISION (mode);
10370 int sh = INTVAL (XEXP (shift, 1));
10371 if (sh < 0 || sh >= n)
10372 return false;
10374 rtx_code code = GET_CODE (shift);
10376 /* Convert any shift by 0 to a rotate, to simplify below code. */
10377 if (sh == 0)
10378 code = ROTATE;
10380 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10381 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10382 code = ASHIFT;
10383 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10385 code = LSHIFTRT;
10386 sh = n - sh;
10389 /* DImode rotates need rldimi. */
10390 if (mode == DImode && code == ROTATE)
10391 return (ne == sh);
10393 /* SImode rotates need rlwimi. */
10394 if (mode == SImode && code == ROTATE)
10395 return (nb < 32 && ne < 32 && sh < 32);
10397 /* Wrap-around masks are only okay for rotates. */
10398 if (ne > nb)
10399 return false;
10401 /* Don't allow ASHIFT if the mask is wrong for that. */
10402 if (code == ASHIFT && ne < sh)
10403 return false;
10405 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10406 if the mask is wrong for that. */
10407 if (nb < 32 && ne < 32 && sh < 32
10408 && !(code == LSHIFTRT && nb >= 32 - sh))
10409 return true;
10411 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10412 if the mask is wrong for that. */
10413 if (code == LSHIFTRT)
10414 sh = 64 - sh;
10415 if (ne == sh)
10416 return !(code == LSHIFTRT && nb >= sh);
10418 return false;
10421 /* Return the instruction template for an insert with mask in mode MODE, with
10422 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10424 const char *
10425 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10427 int nb, ne;
10429 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10430 gcc_unreachable ();
10432 /* Prefer rldimi because rlwimi is cracked. */
10433 if (TARGET_POWERPC64
10434 && (!dot || mode == DImode)
10435 && GET_CODE (operands[4]) != LSHIFTRT
10436 && ne == INTVAL (operands[2]))
10438 operands[3] = GEN_INT (63 - nb);
10439 if (dot)
10440 return "rldimi. %0,%1,%2,%3";
10441 return "rldimi %0,%1,%2,%3";
10444 if (nb < 32 && ne < 32)
10446 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10447 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10448 operands[3] = GEN_INT (31 - nb);
10449 operands[4] = GEN_INT (31 - ne);
10450 if (dot)
10451 return "rlwimi. %0,%1,%2,%3,%4";
10452 return "rlwimi %0,%1,%2,%3,%4";
10455 gcc_unreachable ();
10458 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10459 using two machine instructions. */
10461 bool
10462 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10464 /* There are two kinds of AND we can handle with two insns:
10465 1) those we can do with two rl* insn;
10466 2) ori[s];xori[s].
10468 We do not handle that last case yet. */
10470 /* If there is just one stretch of ones, we can do it. */
10471 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10472 return true;
10474 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10475 one insn, we can do the whole thing with two. */
10476 unsigned HOST_WIDE_INT val = INTVAL (c);
10477 unsigned HOST_WIDE_INT bit1 = val & -val;
10478 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10479 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10480 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10481 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10484 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10485 If EXPAND is true, split rotate-and-mask instructions we generate to
10486 their constituent parts as well (this is used during expand); if DOT
10487 is 1, make the last insn a record-form instruction clobbering the
10488 destination GPR and setting the CC reg (from operands[3]); if 2, set
10489 that GPR as well as the CC reg. */
10491 void
10492 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10494 gcc_assert (!(expand && dot));
10496 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10498 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10499 shift right. This generates better code than doing the masks without
10500 shifts, or shifting first right and then left. */
10501 int nb, ne;
10502 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10504 gcc_assert (mode == DImode);
10506 int shift = 63 - nb;
10507 if (expand)
10509 rtx tmp1 = gen_reg_rtx (DImode);
10510 rtx tmp2 = gen_reg_rtx (DImode);
10511 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10512 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10513 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10515 else
10517 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10518 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10519 emit_move_insn (operands[0], tmp);
10520 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10521 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10523 return;
10526 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10527 that does the rest. */
10528 unsigned HOST_WIDE_INT bit1 = val & -val;
10529 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10530 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10531 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10533 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10534 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10536 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10538 /* Two "no-rotate"-and-mask instructions, for SImode. */
10539 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10541 gcc_assert (mode == SImode);
10543 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10544 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10545 emit_move_insn (reg, tmp);
10546 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10547 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10548 return;
10551 gcc_assert (mode == DImode);
10553 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10554 insns; we have to do the first in SImode, because it wraps. */
10555 if (mask2 <= 0xffffffff
10556 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10558 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10559 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10560 GEN_INT (mask1));
10561 rtx reg_low = gen_lowpart (SImode, reg);
10562 emit_move_insn (reg_low, tmp);
10563 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10564 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10565 return;
10568 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10569 at the top end), rotate back and clear the other hole. */
10570 int right = exact_log2 (bit3);
10571 int left = 64 - right;
10573 /* Rotate the mask too. */
10574 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10576 if (expand)
10578 rtx tmp1 = gen_reg_rtx (DImode);
10579 rtx tmp2 = gen_reg_rtx (DImode);
10580 rtx tmp3 = gen_reg_rtx (DImode);
10581 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10582 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10583 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10584 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10586 else
10588 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10589 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10590 emit_move_insn (operands[0], tmp);
10591 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10592 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10593 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10597 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10598 for lfq and stfq insns iff the registers are hard registers. */
10601 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10603 /* We might have been passed a SUBREG. */
10604 if (!REG_P (reg1) || !REG_P (reg2))
10605 return 0;
10607 /* We might have been passed non floating point registers. */
10608 if (!FP_REGNO_P (REGNO (reg1))
10609 || !FP_REGNO_P (REGNO (reg2)))
10610 return 0;
10612 return (REGNO (reg1) == REGNO (reg2) - 1);
10615 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10616 addr1 and addr2 must be in consecutive memory locations
10617 (addr2 == addr1 + 8). */
10620 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10622 rtx addr1, addr2;
10623 unsigned int reg1, reg2;
10624 int offset1, offset2;
10626 /* The mems cannot be volatile. */
10627 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10628 return 0;
10630 addr1 = XEXP (mem1, 0);
10631 addr2 = XEXP (mem2, 0);
10633 /* Extract an offset (if used) from the first addr. */
10634 if (GET_CODE (addr1) == PLUS)
10636 /* If not a REG, return zero. */
10637 if (!REG_P (XEXP (addr1, 0)))
10638 return 0;
10639 else
10641 reg1 = REGNO (XEXP (addr1, 0));
10642 /* The offset must be constant! */
10643 if (!CONST_INT_P (XEXP (addr1, 1)))
10644 return 0;
10645 offset1 = INTVAL (XEXP (addr1, 1));
10648 else if (!REG_P (addr1))
10649 return 0;
10650 else
10652 reg1 = REGNO (addr1);
10653 /* This was a simple (mem (reg)) expression. Offset is 0. */
10654 offset1 = 0;
10657 /* And now for the second addr. */
10658 if (GET_CODE (addr2) == PLUS)
10660 /* If not a REG, return zero. */
10661 if (!REG_P (XEXP (addr2, 0)))
10662 return 0;
10663 else
10665 reg2 = REGNO (XEXP (addr2, 0));
10666 /* The offset must be constant. */
10667 if (!CONST_INT_P (XEXP (addr2, 1)))
10668 return 0;
10669 offset2 = INTVAL (XEXP (addr2, 1));
10672 else if (!REG_P (addr2))
10673 return 0;
10674 else
10676 reg2 = REGNO (addr2);
10677 /* This was a simple (mem (reg)) expression. Offset is 0. */
10678 offset2 = 0;
10681 /* Both of these must have the same base register. */
10682 if (reg1 != reg2)
10683 return 0;
10685 /* The offset for the second addr must be 8 more than the first addr. */
10686 if (offset2 != offset1 + 8)
10687 return 0;
10689 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10690 instructions. */
10691 return 1;
10694 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10695 need to use DDmode, in all other cases we can use the same mode. */
10696 static machine_mode
10697 rs6000_secondary_memory_needed_mode (machine_mode mode)
10699 if (lra_in_progress && mode == SDmode)
10700 return DDmode;
10701 return mode;
10704 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10705 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10706 only work on the traditional altivec registers, note if an altivec register
10707 was chosen. */
10709 static enum rs6000_reg_type
10710 register_to_reg_type (rtx reg, bool *is_altivec)
10712 HOST_WIDE_INT regno;
10713 enum reg_class rclass;
10715 if (SUBREG_P (reg))
10716 reg = SUBREG_REG (reg);
10718 if (!REG_P (reg))
10719 return NO_REG_TYPE;
10721 regno = REGNO (reg);
10722 if (!HARD_REGISTER_NUM_P (regno))
10724 if (!lra_in_progress && !reload_completed)
10725 return PSEUDO_REG_TYPE;
10727 regno = true_regnum (reg);
10728 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10729 return PSEUDO_REG_TYPE;
10732 gcc_assert (regno >= 0);
10734 if (is_altivec && ALTIVEC_REGNO_P (regno))
10735 *is_altivec = true;
10737 rclass = rs6000_regno_regclass[regno];
10738 return reg_class_to_reg_type[(int)rclass];
10741 /* Helper function to return the cost of adding a TOC entry address. */
10743 static inline int
10744 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10746 int ret;
10748 if (TARGET_CMODEL != CMODEL_SMALL)
10749 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10751 else
10752 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10754 return ret;
10757 /* Helper function for rs6000_secondary_reload to determine whether the memory
10758 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10759 needs reloading. Return negative if the memory is not handled by the memory
10760 helper functions and to try a different reload method, 0 if no additional
10761 instructions are need, and positive to give the extra cost for the
10762 memory. */
10764 static int
10765 rs6000_secondary_reload_memory (rtx addr,
10766 enum reg_class rclass,
10767 machine_mode mode)
10769 int extra_cost = 0;
10770 rtx reg, and_arg, plus_arg0, plus_arg1;
10771 addr_mask_type addr_mask;
10772 const char *type = NULL;
10773 const char *fail_msg = NULL;
10775 if (GPR_REG_CLASS_P (rclass))
10776 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10778 else if (rclass == FLOAT_REGS)
10779 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10781 else if (rclass == ALTIVEC_REGS)
10782 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10784 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10785 else if (rclass == VSX_REGS)
10786 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10787 & ~RELOAD_REG_AND_M16);
10789 /* If the register allocator hasn't made up its mind yet on the register
10790 class to use, settle on defaults to use. */
10791 else if (rclass == NO_REGS)
10793 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10794 & ~RELOAD_REG_AND_M16);
10796 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10797 addr_mask &= ~(RELOAD_REG_INDEXED
10798 | RELOAD_REG_PRE_INCDEC
10799 | RELOAD_REG_PRE_MODIFY);
10802 else
10803 addr_mask = 0;
10805 /* If the register isn't valid in this register class, just return now. */
10806 if ((addr_mask & RELOAD_REG_VALID) == 0)
10808 if (TARGET_DEBUG_ADDR)
10810 fprintf (stderr,
10811 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10812 "not valid in class\n",
10813 GET_MODE_NAME (mode), reg_class_names[rclass]);
10814 debug_rtx (addr);
10817 return -1;
10820 switch (GET_CODE (addr))
10822 /* Does the register class supports auto update forms for this mode? We
10823 don't need a scratch register, since the powerpc only supports
10824 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10825 case PRE_INC:
10826 case PRE_DEC:
10827 reg = XEXP (addr, 0);
10828 if (!base_reg_operand (addr, GET_MODE (reg)))
10830 fail_msg = "no base register #1";
10831 extra_cost = -1;
10834 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
10836 extra_cost = 1;
10837 type = "update";
10839 break;
10841 case PRE_MODIFY:
10842 reg = XEXP (addr, 0);
10843 plus_arg1 = XEXP (addr, 1);
10844 if (!base_reg_operand (reg, GET_MODE (reg))
10845 || GET_CODE (plus_arg1) != PLUS
10846 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
10848 fail_msg = "bad PRE_MODIFY";
10849 extra_cost = -1;
10852 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
10854 extra_cost = 1;
10855 type = "update";
10857 break;
10859 /* Do we need to simulate AND -16 to clear the bottom address bits used
10860 in VMX load/stores? Only allow the AND for vector sizes. */
10861 case AND:
10862 and_arg = XEXP (addr, 0);
10863 if (GET_MODE_SIZE (mode) != 16
10864 || !CONST_INT_P (XEXP (addr, 1))
10865 || INTVAL (XEXP (addr, 1)) != -16)
10867 fail_msg = "bad Altivec AND #1";
10868 extra_cost = -1;
10871 if (rclass != ALTIVEC_REGS)
10873 if (legitimate_indirect_address_p (and_arg, false))
10874 extra_cost = 1;
10876 else if (legitimate_indexed_address_p (and_arg, false))
10877 extra_cost = 2;
10879 else
10881 fail_msg = "bad Altivec AND #2";
10882 extra_cost = -1;
10885 type = "and";
10887 break;
10889 /* If this is an indirect address, make sure it is a base register. */
10890 case REG:
10891 case SUBREG:
10892 if (!legitimate_indirect_address_p (addr, false))
10894 extra_cost = 1;
10895 type = "move";
10897 break;
10899 /* If this is an indexed address, make sure the register class can handle
10900 indexed addresses for this mode. */
10901 case PLUS:
10902 plus_arg0 = XEXP (addr, 0);
10903 plus_arg1 = XEXP (addr, 1);
10905 /* (plus (plus (reg) (constant)) (constant)) is generated during
10906 push_reload processing, so handle it now. */
10907 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
10909 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
10911 extra_cost = 1;
10912 type = "offset";
10916 /* (plus (plus (reg) (constant)) (reg)) is also generated during
10917 push_reload processing, so handle it now. */
10918 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
10920 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
10922 extra_cost = 1;
10923 type = "indexed #2";
10927 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
10929 fail_msg = "no base register #2";
10930 extra_cost = -1;
10933 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
10935 if ((addr_mask & RELOAD_REG_INDEXED) == 0
10936 || !legitimate_indexed_address_p (addr, false))
10938 extra_cost = 1;
10939 type = "indexed";
10943 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
10944 && CONST_INT_P (plus_arg1))
10946 if (!quad_address_offset_p (INTVAL (plus_arg1)))
10948 extra_cost = 1;
10949 type = "vector d-form offset";
10953 /* Make sure the register class can handle offset addresses. */
10954 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
10956 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
10958 extra_cost = 1;
10959 type = "offset #2";
10963 else
10965 fail_msg = "bad PLUS";
10966 extra_cost = -1;
10969 break;
10971 case LO_SUM:
10972 /* Quad offsets are restricted and can't handle normal addresses. */
10973 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
10975 extra_cost = -1;
10976 type = "vector d-form lo_sum";
10979 else if (!legitimate_lo_sum_address_p (mode, addr, false))
10981 fail_msg = "bad LO_SUM";
10982 extra_cost = -1;
10985 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
10987 extra_cost = 1;
10988 type = "lo_sum";
10990 break;
10992 /* Static addresses need to create a TOC entry. */
10993 case CONST:
10994 case SYMBOL_REF:
10995 case LABEL_REF:
10996 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
10998 extra_cost = -1;
10999 type = "vector d-form lo_sum #2";
11002 else
11004 type = "address";
11005 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11007 break;
11009 /* TOC references look like offsetable memory. */
11010 case UNSPEC:
11011 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11013 fail_msg = "bad UNSPEC";
11014 extra_cost = -1;
11017 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11019 extra_cost = -1;
11020 type = "vector d-form lo_sum #3";
11023 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11025 extra_cost = 1;
11026 type = "toc reference";
11028 break;
11030 default:
11032 fail_msg = "bad address";
11033 extra_cost = -1;
11037 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11039 if (extra_cost < 0)
11040 fprintf (stderr,
11041 "rs6000_secondary_reload_memory error: mode = %s, "
11042 "class = %s, addr_mask = '%s', %s\n",
11043 GET_MODE_NAME (mode),
11044 reg_class_names[rclass],
11045 rs6000_debug_addr_mask (addr_mask, false),
11046 (fail_msg != NULL) ? fail_msg : "<bad address>");
11048 else
11049 fprintf (stderr,
11050 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11051 "addr_mask = '%s', extra cost = %d, %s\n",
11052 GET_MODE_NAME (mode),
11053 reg_class_names[rclass],
11054 rs6000_debug_addr_mask (addr_mask, false),
11055 extra_cost,
11056 (type) ? type : "<none>");
11058 debug_rtx (addr);
11061 return extra_cost;
11064 /* Helper function for rs6000_secondary_reload to return true if a move to a
11065 different register classe is really a simple move. */
11067 static bool
11068 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11069 enum rs6000_reg_type from_type,
11070 machine_mode mode)
11072 int size = GET_MODE_SIZE (mode);
11074 /* Add support for various direct moves available. In this function, we only
11075 look at cases where we don't need any extra registers, and one or more
11076 simple move insns are issued. Originally small integers are not allowed
11077 in FPR/VSX registers. Single precision binary floating is not a simple
11078 move because we need to convert to the single precision memory layout.
11079 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11080 need special direct move handling, which we do not support yet. */
11081 if (TARGET_DIRECT_MOVE
11082 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11083 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11085 if (TARGET_POWERPC64)
11087 /* ISA 2.07: MTVSRD or MVFVSRD. */
11088 if (size == 8)
11089 return true;
11091 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11092 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11093 return true;
11096 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11097 if (TARGET_P8_VECTOR)
11099 if (mode == SImode)
11100 return true;
11102 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11103 return true;
11106 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11107 if (mode == SDmode)
11108 return true;
11111 /* Move to/from SPR. */
11112 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11113 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11114 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11115 return true;
11117 return false;
11120 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11121 special direct moves that involve allocating an extra register, return the
11122 insn code of the helper function if there is such a function or
11123 CODE_FOR_nothing if not. */
11125 static bool
11126 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11127 enum rs6000_reg_type from_type,
11128 machine_mode mode,
11129 secondary_reload_info *sri,
11130 bool altivec_p)
11132 bool ret = false;
11133 enum insn_code icode = CODE_FOR_nothing;
11134 int cost = 0;
11135 int size = GET_MODE_SIZE (mode);
11137 if (TARGET_POWERPC64 && size == 16)
11139 /* Handle moving 128-bit values from GPRs to VSX point registers on
11140 ISA 2.07 (power8, power9) when running in 64-bit mode using
11141 XXPERMDI to glue the two 64-bit values back together. */
11142 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11144 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11145 icode = reg_addr[mode].reload_vsx_gpr;
11148 /* Handle moving 128-bit values from VSX point registers to GPRs on
11149 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11150 bottom 64-bit value. */
11151 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11153 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11154 icode = reg_addr[mode].reload_gpr_vsx;
11158 else if (TARGET_POWERPC64 && mode == SFmode)
11160 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11162 cost = 3; /* xscvdpspn, mfvsrd, and. */
11163 icode = reg_addr[mode].reload_gpr_vsx;
11166 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11168 cost = 2; /* mtvsrz, xscvspdpn. */
11169 icode = reg_addr[mode].reload_vsx_gpr;
11173 else if (!TARGET_POWERPC64 && size == 8)
11175 /* Handle moving 64-bit values from GPRs to floating point registers on
11176 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11177 32-bit values back together. Altivec register classes must be handled
11178 specially since a different instruction is used, and the secondary
11179 reload support requires a single instruction class in the scratch
11180 register constraint. However, right now TFmode is not allowed in
11181 Altivec registers, so the pattern will never match. */
11182 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11184 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11185 icode = reg_addr[mode].reload_fpr_gpr;
11189 if (icode != CODE_FOR_nothing)
11191 ret = true;
11192 if (sri)
11194 sri->icode = icode;
11195 sri->extra_cost = cost;
11199 return ret;
11202 /* Return whether a move between two register classes can be done either
11203 directly (simple move) or via a pattern that uses a single extra temporary
11204 (using ISA 2.07's direct move in this case. */
11206 static bool
11207 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11208 enum rs6000_reg_type from_type,
11209 machine_mode mode,
11210 secondary_reload_info *sri,
11211 bool altivec_p)
11213 /* Fall back to load/store reloads if either type is not a register. */
11214 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11215 return false;
11217 /* If we haven't allocated registers yet, assume the move can be done for the
11218 standard register types. */
11219 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11220 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11221 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11222 return true;
11224 /* Moves to the same set of registers is a simple move for non-specialized
11225 registers. */
11226 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11227 return true;
11229 /* Check whether a simple move can be done directly. */
11230 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11232 if (sri)
11234 sri->icode = CODE_FOR_nothing;
11235 sri->extra_cost = 0;
11237 return true;
11240 /* Now check if we can do it in a few steps. */
11241 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11242 altivec_p);
11245 /* Inform reload about cases where moving X with a mode MODE to a register in
11246 RCLASS requires an extra scratch or immediate register. Return the class
11247 needed for the immediate register.
11249 For VSX and Altivec, we may need a register to convert sp+offset into
11250 reg+sp.
11252 For misaligned 64-bit gpr loads and stores we need a register to
11253 convert an offset address to indirect. */
11255 static reg_class_t
11256 rs6000_secondary_reload (bool in_p,
11257 rtx x,
11258 reg_class_t rclass_i,
11259 machine_mode mode,
11260 secondary_reload_info *sri)
11262 enum reg_class rclass = (enum reg_class) rclass_i;
11263 reg_class_t ret = ALL_REGS;
11264 enum insn_code icode;
11265 bool default_p = false;
11266 bool done_p = false;
11268 /* Allow subreg of memory before/during reload. */
11269 bool memory_p = (MEM_P (x)
11270 || (!reload_completed && SUBREG_P (x)
11271 && MEM_P (SUBREG_REG (x))));
11273 sri->icode = CODE_FOR_nothing;
11274 sri->t_icode = CODE_FOR_nothing;
11275 sri->extra_cost = 0;
11276 icode = ((in_p)
11277 ? reg_addr[mode].reload_load
11278 : reg_addr[mode].reload_store);
11280 if (REG_P (x) || register_operand (x, mode))
11282 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11283 bool altivec_p = (rclass == ALTIVEC_REGS);
11284 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11286 if (!in_p)
11287 std::swap (to_type, from_type);
11289 /* Can we do a direct move of some sort? */
11290 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11291 altivec_p))
11293 icode = (enum insn_code)sri->icode;
11294 default_p = false;
11295 done_p = true;
11296 ret = NO_REGS;
11300 /* Make sure 0.0 is not reloaded or forced into memory. */
11301 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11303 ret = NO_REGS;
11304 default_p = false;
11305 done_p = true;
11308 /* If this is a scalar floating point value and we want to load it into the
11309 traditional Altivec registers, do it via a move via a traditional floating
11310 point register, unless we have D-form addressing. Also make sure that
11311 non-zero constants use a FPR. */
11312 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11313 && !mode_supports_vmx_dform (mode)
11314 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11315 && (memory_p || CONST_DOUBLE_P (x)))
11317 ret = FLOAT_REGS;
11318 default_p = false;
11319 done_p = true;
11322 /* Handle reload of load/stores if we have reload helper functions. */
11323 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11325 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11326 mode);
11328 if (extra_cost >= 0)
11330 done_p = true;
11331 ret = NO_REGS;
11332 if (extra_cost > 0)
11334 sri->extra_cost = extra_cost;
11335 sri->icode = icode;
11340 /* Handle unaligned loads and stores of integer registers. */
11341 if (!done_p && TARGET_POWERPC64
11342 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11343 && memory_p
11344 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11346 rtx addr = XEXP (x, 0);
11347 rtx off = address_offset (addr);
11349 if (off != NULL_RTX)
11351 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11352 unsigned HOST_WIDE_INT offset = INTVAL (off);
11354 /* We need a secondary reload when our legitimate_address_p
11355 says the address is good (as otherwise the entire address
11356 will be reloaded), and the offset is not a multiple of
11357 four or we have an address wrap. Address wrap will only
11358 occur for LO_SUMs since legitimate_offset_address_p
11359 rejects addresses for 16-byte mems that will wrap. */
11360 if (GET_CODE (addr) == LO_SUM
11361 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11362 && ((offset & 3) != 0
11363 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11364 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11365 && (offset & 3) != 0))
11367 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11368 if (in_p)
11369 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11370 : CODE_FOR_reload_di_load);
11371 else
11372 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11373 : CODE_FOR_reload_di_store);
11374 sri->extra_cost = 2;
11375 ret = NO_REGS;
11376 done_p = true;
11378 else
11379 default_p = true;
11381 else
11382 default_p = true;
11385 if (!done_p && !TARGET_POWERPC64
11386 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11387 && memory_p
11388 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11390 rtx addr = XEXP (x, 0);
11391 rtx off = address_offset (addr);
11393 if (off != NULL_RTX)
11395 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11396 unsigned HOST_WIDE_INT offset = INTVAL (off);
11398 /* We need a secondary reload when our legitimate_address_p
11399 says the address is good (as otherwise the entire address
11400 will be reloaded), and we have a wrap.
11402 legitimate_lo_sum_address_p allows LO_SUM addresses to
11403 have any offset so test for wrap in the low 16 bits.
11405 legitimate_offset_address_p checks for the range
11406 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11407 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11408 [0x7ff4,0x7fff] respectively, so test for the
11409 intersection of these ranges, [0x7ffc,0x7fff] and
11410 [0x7ff4,0x7ff7] respectively.
11412 Note that the address we see here may have been
11413 manipulated by legitimize_reload_address. */
11414 if (GET_CODE (addr) == LO_SUM
11415 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11416 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11418 if (in_p)
11419 sri->icode = CODE_FOR_reload_si_load;
11420 else
11421 sri->icode = CODE_FOR_reload_si_store;
11422 sri->extra_cost = 2;
11423 ret = NO_REGS;
11424 done_p = true;
11426 else
11427 default_p = true;
11429 else
11430 default_p = true;
11433 if (!done_p)
11434 default_p = true;
11436 if (default_p)
11437 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11439 gcc_assert (ret != ALL_REGS);
11441 if (TARGET_DEBUG_ADDR)
11443 fprintf (stderr,
11444 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11445 "mode = %s",
11446 reg_class_names[ret],
11447 in_p ? "true" : "false",
11448 reg_class_names[rclass],
11449 GET_MODE_NAME (mode));
11451 if (reload_completed)
11452 fputs (", after reload", stderr);
11454 if (!done_p)
11455 fputs (", done_p not set", stderr);
11457 if (default_p)
11458 fputs (", default secondary reload", stderr);
11460 if (sri->icode != CODE_FOR_nothing)
11461 fprintf (stderr, ", reload func = %s, extra cost = %d",
11462 insn_data[sri->icode].name, sri->extra_cost);
11464 else if (sri->extra_cost > 0)
11465 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11467 fputs ("\n", stderr);
11468 debug_rtx (x);
11471 return ret;
11474 /* Better tracing for rs6000_secondary_reload_inner. */
11476 static void
11477 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11478 bool store_p)
11480 rtx set, clobber;
11482 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11484 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11485 store_p ? "store" : "load");
11487 if (store_p)
11488 set = gen_rtx_SET (mem, reg);
11489 else
11490 set = gen_rtx_SET (reg, mem);
11492 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11493 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11496 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11497 ATTRIBUTE_NORETURN;
11499 static void
11500 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11501 bool store_p)
11503 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11504 gcc_unreachable ();
11507 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11508 reload helper functions. These were identified in
11509 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11510 reload, it calls the insns:
11511 reload_<RELOAD:mode>_<P:mptrsize>_store
11512 reload_<RELOAD:mode>_<P:mptrsize>_load
11514 which in turn calls this function, to do whatever is necessary to create
11515 valid addresses. */
11517 void
11518 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11520 int regno = true_regnum (reg);
11521 machine_mode mode = GET_MODE (reg);
11522 addr_mask_type addr_mask;
11523 rtx addr;
11524 rtx new_addr;
11525 rtx op_reg, op0, op1;
11526 rtx and_op;
11527 rtx cc_clobber;
11528 rtvec rv;
11530 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11531 || !base_reg_operand (scratch, GET_MODE (scratch)))
11532 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11534 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11535 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11537 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11538 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11540 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11541 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11543 else
11544 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11546 /* Make sure the mode is valid in this register class. */
11547 if ((addr_mask & RELOAD_REG_VALID) == 0)
11548 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11550 if (TARGET_DEBUG_ADDR)
11551 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11553 new_addr = addr = XEXP (mem, 0);
11554 switch (GET_CODE (addr))
11556 /* Does the register class support auto update forms for this mode? If
11557 not, do the update now. We don't need a scratch register, since the
11558 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11559 case PRE_INC:
11560 case PRE_DEC:
11561 op_reg = XEXP (addr, 0);
11562 if (!base_reg_operand (op_reg, Pmode))
11563 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11565 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11567 int delta = GET_MODE_SIZE (mode);
11568 if (GET_CODE (addr) == PRE_DEC)
11569 delta = -delta;
11570 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11571 new_addr = op_reg;
11573 break;
11575 case PRE_MODIFY:
11576 op0 = XEXP (addr, 0);
11577 op1 = XEXP (addr, 1);
11578 if (!base_reg_operand (op0, Pmode)
11579 || GET_CODE (op1) != PLUS
11580 || !rtx_equal_p (op0, XEXP (op1, 0)))
11581 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11583 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11585 emit_insn (gen_rtx_SET (op0, op1));
11586 new_addr = reg;
11588 break;
11590 /* Do we need to simulate AND -16 to clear the bottom address bits used
11591 in VMX load/stores? */
11592 case AND:
11593 op0 = XEXP (addr, 0);
11594 op1 = XEXP (addr, 1);
11595 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11597 if (REG_P (op0) || SUBREG_P (op0))
11598 op_reg = op0;
11600 else if (GET_CODE (op1) == PLUS)
11602 emit_insn (gen_rtx_SET (scratch, op1));
11603 op_reg = scratch;
11606 else
11607 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11609 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11610 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11611 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11612 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11613 new_addr = scratch;
11615 break;
11617 /* If this is an indirect address, make sure it is a base register. */
11618 case REG:
11619 case SUBREG:
11620 if (!base_reg_operand (addr, GET_MODE (addr)))
11622 emit_insn (gen_rtx_SET (scratch, addr));
11623 new_addr = scratch;
11625 break;
11627 /* If this is an indexed address, make sure the register class can handle
11628 indexed addresses for this mode. */
11629 case PLUS:
11630 op0 = XEXP (addr, 0);
11631 op1 = XEXP (addr, 1);
11632 if (!base_reg_operand (op0, Pmode))
11633 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11635 else if (int_reg_operand (op1, Pmode))
11637 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11639 emit_insn (gen_rtx_SET (scratch, addr));
11640 new_addr = scratch;
11644 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11646 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11647 || !quad_address_p (addr, mode, false))
11649 emit_insn (gen_rtx_SET (scratch, addr));
11650 new_addr = scratch;
11654 /* Make sure the register class can handle offset addresses. */
11655 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11657 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11659 emit_insn (gen_rtx_SET (scratch, addr));
11660 new_addr = scratch;
11664 else
11665 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11667 break;
11669 case LO_SUM:
11670 op0 = XEXP (addr, 0);
11671 op1 = XEXP (addr, 1);
11672 if (!base_reg_operand (op0, Pmode))
11673 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11675 else if (int_reg_operand (op1, Pmode))
11677 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11679 emit_insn (gen_rtx_SET (scratch, addr));
11680 new_addr = scratch;
11684 /* Quad offsets are restricted and can't handle normal addresses. */
11685 else if (mode_supports_dq_form (mode))
11687 emit_insn (gen_rtx_SET (scratch, addr));
11688 new_addr = scratch;
11691 /* Make sure the register class can handle offset addresses. */
11692 else if (legitimate_lo_sum_address_p (mode, addr, false))
11694 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11696 emit_insn (gen_rtx_SET (scratch, addr));
11697 new_addr = scratch;
11701 else
11702 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11704 break;
11706 case SYMBOL_REF:
11707 case CONST:
11708 case LABEL_REF:
11709 rs6000_emit_move (scratch, addr, Pmode);
11710 new_addr = scratch;
11711 break;
11713 default:
11714 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11717 /* Adjust the address if it changed. */
11718 if (addr != new_addr)
11720 mem = replace_equiv_address_nv (mem, new_addr);
11721 if (TARGET_DEBUG_ADDR)
11722 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11725 /* Now create the move. */
11726 if (store_p)
11727 emit_insn (gen_rtx_SET (mem, reg));
11728 else
11729 emit_insn (gen_rtx_SET (reg, mem));
11731 return;
11734 /* Convert reloads involving 64-bit gprs and misaligned offset
11735 addressing, or multiple 32-bit gprs and offsets that are too large,
11736 to use indirect addressing. */
11738 void
11739 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11741 int regno = true_regnum (reg);
11742 enum reg_class rclass;
11743 rtx addr;
11744 rtx scratch_or_premodify = scratch;
11746 if (TARGET_DEBUG_ADDR)
11748 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11749 store_p ? "store" : "load");
11750 fprintf (stderr, "reg:\n");
11751 debug_rtx (reg);
11752 fprintf (stderr, "mem:\n");
11753 debug_rtx (mem);
11754 fprintf (stderr, "scratch:\n");
11755 debug_rtx (scratch);
11758 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11759 gcc_assert (MEM_P (mem));
11760 rclass = REGNO_REG_CLASS (regno);
11761 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11762 addr = XEXP (mem, 0);
11764 if (GET_CODE (addr) == PRE_MODIFY)
11766 gcc_assert (REG_P (XEXP (addr, 0))
11767 && GET_CODE (XEXP (addr, 1)) == PLUS
11768 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11769 scratch_or_premodify = XEXP (addr, 0);
11770 addr = XEXP (addr, 1);
11772 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11774 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11776 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11778 /* Now create the move. */
11779 if (store_p)
11780 emit_insn (gen_rtx_SET (mem, reg));
11781 else
11782 emit_insn (gen_rtx_SET (reg, mem));
11784 return;
11787 /* Given an rtx X being reloaded into a reg required to be
11788 in class CLASS, return the class of reg to actually use.
11789 In general this is just CLASS; but on some machines
11790 in some cases it is preferable to use a more restrictive class.
11792 On the RS/6000, we have to return NO_REGS when we want to reload a
11793 floating-point CONST_DOUBLE to force it to be copied to memory.
11795 We also don't want to reload integer values into floating-point
11796 registers if we can at all help it. In fact, this can
11797 cause reload to die, if it tries to generate a reload of CTR
11798 into a FP register and discovers it doesn't have the memory location
11799 required.
11801 ??? Would it be a good idea to have reload do the converse, that is
11802 try to reload floating modes into FP registers if possible?
11805 static enum reg_class
11806 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11808 machine_mode mode = GET_MODE (x);
11809 bool is_constant = CONSTANT_P (x);
11811 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11812 reload class for it. */
11813 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11814 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
11815 return NO_REGS;
11817 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
11818 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
11819 return NO_REGS;
11821 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11822 the reloading of address expressions using PLUS into floating point
11823 registers. */
11824 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
11826 if (is_constant)
11828 /* Zero is always allowed in all VSX registers. */
11829 if (x == CONST0_RTX (mode))
11830 return rclass;
11832 /* If this is a vector constant that can be formed with a few Altivec
11833 instructions, we want altivec registers. */
11834 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
11835 return ALTIVEC_REGS;
11837 /* If this is an integer constant that can easily be loaded into
11838 vector registers, allow it. */
11839 if (CONST_INT_P (x))
11841 HOST_WIDE_INT value = INTVAL (x);
11843 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
11844 2.06 can generate it in the Altivec registers with
11845 VSPLTI<x>. */
11846 if (value == -1)
11848 if (TARGET_P8_VECTOR)
11849 return rclass;
11850 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11851 return ALTIVEC_REGS;
11852 else
11853 return NO_REGS;
11856 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
11857 a sign extend in the Altivec registers. */
11858 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
11859 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
11860 return ALTIVEC_REGS;
11863 /* Force constant to memory. */
11864 return NO_REGS;
11867 /* D-form addressing can easily reload the value. */
11868 if (mode_supports_vmx_dform (mode)
11869 || mode_supports_dq_form (mode))
11870 return rclass;
11872 /* If this is a scalar floating point value and we don't have D-form
11873 addressing, prefer the traditional floating point registers so that we
11874 can use D-form (register+offset) addressing. */
11875 if (rclass == VSX_REGS
11876 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
11877 return FLOAT_REGS;
11879 /* Prefer the Altivec registers if Altivec is handling the vector
11880 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
11881 loads. */
11882 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
11883 || mode == V1TImode)
11884 return ALTIVEC_REGS;
11886 return rclass;
11889 if (is_constant || GET_CODE (x) == PLUS)
11891 if (reg_class_subset_p (GENERAL_REGS, rclass))
11892 return GENERAL_REGS;
11893 if (reg_class_subset_p (BASE_REGS, rclass))
11894 return BASE_REGS;
11895 return NO_REGS;
11898 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
11899 return GENERAL_REGS;
11901 return rclass;
11904 /* Debug version of rs6000_preferred_reload_class. */
11905 static enum reg_class
11906 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
11908 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
11910 fprintf (stderr,
11911 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
11912 "mode = %s, x:\n",
11913 reg_class_names[ret], reg_class_names[rclass],
11914 GET_MODE_NAME (GET_MODE (x)));
11915 debug_rtx (x);
11917 return ret;
11920 /* If we are copying between FP or AltiVec registers and anything else, we need
11921 a memory location. The exception is when we are targeting ppc64 and the
11922 move to/from fpr to gpr instructions are available. Also, under VSX, you
11923 can copy vector registers from the FP register set to the Altivec register
11924 set and vice versa. */
11926 static bool
11927 rs6000_secondary_memory_needed (machine_mode mode,
11928 reg_class_t from_class,
11929 reg_class_t to_class)
11931 enum rs6000_reg_type from_type, to_type;
11932 bool altivec_p = ((from_class == ALTIVEC_REGS)
11933 || (to_class == ALTIVEC_REGS));
11935 /* If a simple/direct move is available, we don't need secondary memory */
11936 from_type = reg_class_to_reg_type[(int)from_class];
11937 to_type = reg_class_to_reg_type[(int)to_class];
11939 if (rs6000_secondary_reload_move (to_type, from_type, mode,
11940 (secondary_reload_info *)0, altivec_p))
11941 return false;
11943 /* If we have a floating point or vector register class, we need to use
11944 memory to transfer the data. */
11945 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
11946 return true;
11948 return false;
11951 /* Debug version of rs6000_secondary_memory_needed. */
11952 static bool
11953 rs6000_debug_secondary_memory_needed (machine_mode mode,
11954 reg_class_t from_class,
11955 reg_class_t to_class)
11957 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
11959 fprintf (stderr,
11960 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
11961 "to_class = %s, mode = %s\n",
11962 ret ? "true" : "false",
11963 reg_class_names[from_class],
11964 reg_class_names[to_class],
11965 GET_MODE_NAME (mode));
11967 return ret;
11970 /* Return the register class of a scratch register needed to copy IN into
11971 or out of a register in RCLASS in MODE. If it can be done directly,
11972 NO_REGS is returned. */
11974 static enum reg_class
11975 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
11976 rtx in)
11978 int regno;
11980 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
11981 #if TARGET_MACHO
11982 && MACHOPIC_INDIRECT
11983 #endif
11986 /* We cannot copy a symbolic operand directly into anything
11987 other than BASE_REGS for TARGET_ELF. So indicate that a
11988 register from BASE_REGS is needed as an intermediate
11989 register.
11991 On Darwin, pic addresses require a load from memory, which
11992 needs a base register. */
11993 if (rclass != BASE_REGS
11994 && (SYMBOL_REF_P (in)
11995 || GET_CODE (in) == HIGH
11996 || GET_CODE (in) == LABEL_REF
11997 || GET_CODE (in) == CONST))
11998 return BASE_REGS;
12001 if (REG_P (in))
12003 regno = REGNO (in);
12004 if (!HARD_REGISTER_NUM_P (regno))
12006 regno = true_regnum (in);
12007 if (!HARD_REGISTER_NUM_P (regno))
12008 regno = -1;
12011 else if (SUBREG_P (in))
12013 regno = true_regnum (in);
12014 if (!HARD_REGISTER_NUM_P (regno))
12015 regno = -1;
12017 else
12018 regno = -1;
12020 /* If we have VSX register moves, prefer moving scalar values between
12021 Altivec registers and GPR by going via an FPR (and then via memory)
12022 instead of reloading the secondary memory address for Altivec moves. */
12023 if (TARGET_VSX
12024 && GET_MODE_SIZE (mode) < 16
12025 && !mode_supports_vmx_dform (mode)
12026 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12027 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12028 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12029 && (regno >= 0 && INT_REGNO_P (regno)))))
12030 return FLOAT_REGS;
12032 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12033 into anything. */
12034 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12035 || (regno >= 0 && INT_REGNO_P (regno)))
12036 return NO_REGS;
12038 /* Constants, memory, and VSX registers can go into VSX registers (both the
12039 traditional floating point and the altivec registers). */
12040 if (rclass == VSX_REGS
12041 && (regno == -1 || VSX_REGNO_P (regno)))
12042 return NO_REGS;
12044 /* Constants, memory, and FP registers can go into FP registers. */
12045 if ((regno == -1 || FP_REGNO_P (regno))
12046 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12047 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12049 /* Memory, and AltiVec registers can go into AltiVec registers. */
12050 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12051 && rclass == ALTIVEC_REGS)
12052 return NO_REGS;
12054 /* We can copy among the CR registers. */
12055 if ((rclass == CR_REGS || rclass == CR0_REGS)
12056 && regno >= 0 && CR_REGNO_P (regno))
12057 return NO_REGS;
12059 /* Otherwise, we need GENERAL_REGS. */
12060 return GENERAL_REGS;
12063 /* Debug version of rs6000_secondary_reload_class. */
12064 static enum reg_class
12065 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12066 machine_mode mode, rtx in)
12068 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12069 fprintf (stderr,
12070 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12071 "mode = %s, input rtx:\n",
12072 reg_class_names[ret], reg_class_names[rclass],
12073 GET_MODE_NAME (mode));
12074 debug_rtx (in);
12076 return ret;
12079 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12081 static bool
12082 rs6000_can_change_mode_class (machine_mode from,
12083 machine_mode to,
12084 reg_class_t rclass)
12086 unsigned from_size = GET_MODE_SIZE (from);
12087 unsigned to_size = GET_MODE_SIZE (to);
12089 if (from_size != to_size)
12091 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12093 if (reg_classes_intersect_p (xclass, rclass))
12095 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12096 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12097 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12098 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12100 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12101 single register under VSX because the scalar part of the register
12102 is in the upper 64-bits, and not the lower 64-bits. Types like
12103 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12104 IEEE floating point can't overlap, and neither can small
12105 values. */
12107 if (to_float128_vector_p && from_float128_vector_p)
12108 return true;
12110 else if (to_float128_vector_p || from_float128_vector_p)
12111 return false;
12113 /* TDmode in floating-mode registers must always go into a register
12114 pair with the most significant word in the even-numbered register
12115 to match ISA requirements. In little-endian mode, this does not
12116 match subreg numbering, so we cannot allow subregs. */
12117 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12118 return false;
12120 if (from_size < 8 || to_size < 8)
12121 return false;
12123 if (from_size == 8 && (8 * to_nregs) != to_size)
12124 return false;
12126 if (to_size == 8 && (8 * from_nregs) != from_size)
12127 return false;
12129 return true;
12131 else
12132 return true;
12135 /* Since the VSX register set includes traditional floating point registers
12136 and altivec registers, just check for the size being different instead of
12137 trying to check whether the modes are vector modes. Otherwise it won't
12138 allow say DF and DI to change classes. For types like TFmode and TDmode
12139 that take 2 64-bit registers, rather than a single 128-bit register, don't
12140 allow subregs of those types to other 128 bit types. */
12141 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12143 unsigned num_regs = (from_size + 15) / 16;
12144 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12145 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12146 return false;
12148 return (from_size == 8 || from_size == 16);
12151 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12152 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12153 return false;
12155 return true;
12158 /* Debug version of rs6000_can_change_mode_class. */
12159 static bool
12160 rs6000_debug_can_change_mode_class (machine_mode from,
12161 machine_mode to,
12162 reg_class_t rclass)
12164 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12166 fprintf (stderr,
12167 "rs6000_can_change_mode_class, return %s, from = %s, "
12168 "to = %s, rclass = %s\n",
12169 ret ? "true" : "false",
12170 GET_MODE_NAME (from), GET_MODE_NAME (to),
12171 reg_class_names[rclass]);
12173 return ret;
12176 /* Return a string to do a move operation of 128 bits of data. */
12178 const char *
12179 rs6000_output_move_128bit (rtx operands[])
12181 rtx dest = operands[0];
12182 rtx src = operands[1];
12183 machine_mode mode = GET_MODE (dest);
12184 int dest_regno;
12185 int src_regno;
12186 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12187 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12189 if (REG_P (dest))
12191 dest_regno = REGNO (dest);
12192 dest_gpr_p = INT_REGNO_P (dest_regno);
12193 dest_fp_p = FP_REGNO_P (dest_regno);
12194 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12195 dest_vsx_p = dest_fp_p | dest_vmx_p;
12197 else
12199 dest_regno = -1;
12200 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12203 if (REG_P (src))
12205 src_regno = REGNO (src);
12206 src_gpr_p = INT_REGNO_P (src_regno);
12207 src_fp_p = FP_REGNO_P (src_regno);
12208 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12209 src_vsx_p = src_fp_p | src_vmx_p;
12211 else
12213 src_regno = -1;
12214 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12217 /* Register moves. */
12218 if (dest_regno >= 0 && src_regno >= 0)
12220 if (dest_gpr_p)
12222 if (src_gpr_p)
12223 return "#";
12225 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12226 return (WORDS_BIG_ENDIAN
12227 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12228 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12230 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12231 return "#";
12234 else if (TARGET_VSX && dest_vsx_p)
12236 if (src_vsx_p)
12237 return "xxlor %x0,%x1,%x1";
12239 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12240 return (WORDS_BIG_ENDIAN
12241 ? "mtvsrdd %x0,%1,%L1"
12242 : "mtvsrdd %x0,%L1,%1");
12244 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12245 return "#";
12248 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12249 return "vor %0,%1,%1";
12251 else if (dest_fp_p && src_fp_p)
12252 return "#";
12255 /* Loads. */
12256 else if (dest_regno >= 0 && MEM_P (src))
12258 if (dest_gpr_p)
12260 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12261 return "lq %0,%1";
12262 else
12263 return "#";
12266 else if (TARGET_ALTIVEC && dest_vmx_p
12267 && altivec_indexed_or_indirect_operand (src, mode))
12268 return "lvx %0,%y1";
12270 else if (TARGET_VSX && dest_vsx_p)
12272 if (mode_supports_dq_form (mode)
12273 && quad_address_p (XEXP (src, 0), mode, true))
12274 return "lxv %x0,%1";
12276 else if (TARGET_P9_VECTOR)
12277 return "lxvx %x0,%y1";
12279 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12280 return "lxvw4x %x0,%y1";
12282 else
12283 return "lxvd2x %x0,%y1";
12286 else if (TARGET_ALTIVEC && dest_vmx_p)
12287 return "lvx %0,%y1";
12289 else if (dest_fp_p)
12290 return "#";
12293 /* Stores. */
12294 else if (src_regno >= 0 && MEM_P (dest))
12296 if (src_gpr_p)
12298 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12299 return "stq %1,%0";
12300 else
12301 return "#";
12304 else if (TARGET_ALTIVEC && src_vmx_p
12305 && altivec_indexed_or_indirect_operand (dest, mode))
12306 return "stvx %1,%y0";
12308 else if (TARGET_VSX && src_vsx_p)
12310 if (mode_supports_dq_form (mode)
12311 && quad_address_p (XEXP (dest, 0), mode, true))
12312 return "stxv %x1,%0";
12314 else if (TARGET_P9_VECTOR)
12315 return "stxvx %x1,%y0";
12317 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12318 return "stxvw4x %x1,%y0";
12320 else
12321 return "stxvd2x %x1,%y0";
12324 else if (TARGET_ALTIVEC && src_vmx_p)
12325 return "stvx %1,%y0";
12327 else if (src_fp_p)
12328 return "#";
12331 /* Constants. */
12332 else if (dest_regno >= 0
12333 && (CONST_INT_P (src)
12334 || CONST_WIDE_INT_P (src)
12335 || CONST_DOUBLE_P (src)
12336 || GET_CODE (src) == CONST_VECTOR))
12338 if (dest_gpr_p)
12339 return "#";
12341 else if ((dest_vmx_p && TARGET_ALTIVEC)
12342 || (dest_vsx_p && TARGET_VSX))
12343 return output_vec_const_move (operands);
12346 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12349 /* Validate a 128-bit move. */
12350 bool
12351 rs6000_move_128bit_ok_p (rtx operands[])
12353 machine_mode mode = GET_MODE (operands[0]);
12354 return (gpc_reg_operand (operands[0], mode)
12355 || gpc_reg_operand (operands[1], mode));
12358 /* Return true if a 128-bit move needs to be split. */
12359 bool
12360 rs6000_split_128bit_ok_p (rtx operands[])
12362 if (!reload_completed)
12363 return false;
12365 if (!gpr_or_gpr_p (operands[0], operands[1]))
12366 return false;
12368 if (quad_load_store_p (operands[0], operands[1]))
12369 return false;
12371 return true;
12375 /* Given a comparison operation, return the bit number in CCR to test. We
12376 know this is a valid comparison.
12378 SCC_P is 1 if this is for an scc. That means that %D will have been
12379 used instead of %C, so the bits will be in different places.
12381 Return -1 if OP isn't a valid comparison for some reason. */
12384 ccr_bit (rtx op, int scc_p)
12386 enum rtx_code code = GET_CODE (op);
12387 machine_mode cc_mode;
12388 int cc_regnum;
12389 int base_bit;
12390 rtx reg;
12392 if (!COMPARISON_P (op))
12393 return -1;
12395 reg = XEXP (op, 0);
12397 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12398 return -1;
12400 cc_mode = GET_MODE (reg);
12401 cc_regnum = REGNO (reg);
12402 base_bit = 4 * (cc_regnum - CR0_REGNO);
12404 validate_condition_mode (code, cc_mode);
12406 /* When generating a sCOND operation, only positive conditions are
12407 allowed. */
12408 if (scc_p)
12409 switch (code)
12411 case EQ:
12412 case GT:
12413 case LT:
12414 case UNORDERED:
12415 case GTU:
12416 case LTU:
12417 break;
12418 default:
12419 return -1;
12422 switch (code)
12424 case NE:
12425 return scc_p ? base_bit + 3 : base_bit + 2;
12426 case EQ:
12427 return base_bit + 2;
12428 case GT: case GTU: case UNLE:
12429 return base_bit + 1;
12430 case LT: case LTU: case UNGE:
12431 return base_bit;
12432 case ORDERED: case UNORDERED:
12433 return base_bit + 3;
12435 case GE: case GEU:
12436 /* If scc, we will have done a cror to put the bit in the
12437 unordered position. So test that bit. For integer, this is ! LT
12438 unless this is an scc insn. */
12439 return scc_p ? base_bit + 3 : base_bit;
12441 case LE: case LEU:
12442 return scc_p ? base_bit + 3 : base_bit + 1;
12444 default:
12445 return -1;
12449 /* Return the GOT register. */
12452 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12454 /* The second flow pass currently (June 1999) can't update
12455 regs_ever_live without disturbing other parts of the compiler, so
12456 update it here to make the prolog/epilogue code happy. */
12457 if (!can_create_pseudo_p ()
12458 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12459 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12461 crtl->uses_pic_offset_table = 1;
12463 return pic_offset_table_rtx;
12466 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12468 /* Write out a function code label. */
12470 void
12471 rs6000_output_function_entry (FILE *file, const char *fname)
12473 if (fname[0] != '.')
12475 switch (DEFAULT_ABI)
12477 default:
12478 gcc_unreachable ();
12480 case ABI_AIX:
12481 if (DOT_SYMBOLS)
12482 putc ('.', file);
12483 else
12484 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12485 break;
12487 case ABI_ELFv2:
12488 case ABI_V4:
12489 case ABI_DARWIN:
12490 break;
12494 RS6000_OUTPUT_BASENAME (file, fname);
12497 /* Print an operand. Recognize special options, documented below. */
12499 #if TARGET_ELF
12500 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12501 only introduced by the linker, when applying the sda21
12502 relocation. */
12503 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12504 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12505 #else
12506 #define SMALL_DATA_RELOC "sda21"
12507 #define SMALL_DATA_REG 0
12508 #endif
12510 void
12511 print_operand (FILE *file, rtx x, int code)
12513 int i;
12514 unsigned HOST_WIDE_INT uval;
12516 switch (code)
12518 /* %a is output_address. */
12520 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12521 output_operand. */
12523 case 'D':
12524 /* Like 'J' but get to the GT bit only. */
12525 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12527 output_operand_lossage ("invalid %%D value");
12528 return;
12531 /* Bit 1 is GT bit. */
12532 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12534 /* Add one for shift count in rlinm for scc. */
12535 fprintf (file, "%d", i + 1);
12536 return;
12538 case 'e':
12539 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12540 if (! INT_P (x))
12542 output_operand_lossage ("invalid %%e value");
12543 return;
12546 uval = INTVAL (x);
12547 if ((uval & 0xffff) == 0 && uval != 0)
12548 putc ('s', file);
12549 return;
12551 case 'E':
12552 /* X is a CR register. Print the number of the EQ bit of the CR */
12553 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12554 output_operand_lossage ("invalid %%E value");
12555 else
12556 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12557 return;
12559 case 'f':
12560 /* X is a CR register. Print the shift count needed to move it
12561 to the high-order four bits. */
12562 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12563 output_operand_lossage ("invalid %%f value");
12564 else
12565 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12566 return;
12568 case 'F':
12569 /* Similar, but print the count for the rotate in the opposite
12570 direction. */
12571 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12572 output_operand_lossage ("invalid %%F value");
12573 else
12574 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12575 return;
12577 case 'G':
12578 /* X is a constant integer. If it is negative, print "m",
12579 otherwise print "z". This is to make an aze or ame insn. */
12580 if (!CONST_INT_P (x))
12581 output_operand_lossage ("invalid %%G value");
12582 else if (INTVAL (x) >= 0)
12583 putc ('z', file);
12584 else
12585 putc ('m', file);
12586 return;
12588 case 'h':
12589 /* If constant, output low-order five bits. Otherwise, write
12590 normally. */
12591 if (INT_P (x))
12592 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12593 else
12594 print_operand (file, x, 0);
12595 return;
12597 case 'H':
12598 /* If constant, output low-order six bits. Otherwise, write
12599 normally. */
12600 if (INT_P (x))
12601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12602 else
12603 print_operand (file, x, 0);
12604 return;
12606 case 'I':
12607 /* Print `i' if this is a constant, else nothing. */
12608 if (INT_P (x))
12609 putc ('i', file);
12610 return;
12612 case 'j':
12613 /* Write the bit number in CCR for jump. */
12614 i = ccr_bit (x, 0);
12615 if (i == -1)
12616 output_operand_lossage ("invalid %%j code");
12617 else
12618 fprintf (file, "%d", i);
12619 return;
12621 case 'J':
12622 /* Similar, but add one for shift count in rlinm for scc and pass
12623 scc flag to `ccr_bit'. */
12624 i = ccr_bit (x, 1);
12625 if (i == -1)
12626 output_operand_lossage ("invalid %%J code");
12627 else
12628 /* If we want bit 31, write a shift count of zero, not 32. */
12629 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12630 return;
12632 case 'k':
12633 /* X must be a constant. Write the 1's complement of the
12634 constant. */
12635 if (! INT_P (x))
12636 output_operand_lossage ("invalid %%k value");
12637 else
12638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12639 return;
12641 case 'K':
12642 /* X must be a symbolic constant on ELF. Write an
12643 expression suitable for an 'addi' that adds in the low 16
12644 bits of the MEM. */
12645 if (GET_CODE (x) == CONST)
12647 if (GET_CODE (XEXP (x, 0)) != PLUS
12648 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12649 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12650 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12651 output_operand_lossage ("invalid %%K value");
12653 print_operand_address (file, x);
12654 fputs ("@l", file);
12655 return;
12657 /* %l is output_asm_label. */
12659 case 'L':
12660 /* Write second word of DImode or DFmode reference. Works on register
12661 or non-indexed memory only. */
12662 if (REG_P (x))
12663 fputs (reg_names[REGNO (x) + 1], file);
12664 else if (MEM_P (x))
12666 machine_mode mode = GET_MODE (x);
12667 /* Handle possible auto-increment. Since it is pre-increment and
12668 we have already done it, we can just use an offset of word. */
12669 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12670 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12671 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12672 UNITS_PER_WORD));
12673 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12674 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12675 UNITS_PER_WORD));
12676 else
12677 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12678 UNITS_PER_WORD),
12679 0));
12681 if (small_data_operand (x, GET_MODE (x)))
12682 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12683 reg_names[SMALL_DATA_REG]);
12685 return;
12687 case 'N': /* Unused */
12688 /* Write the number of elements in the vector times 4. */
12689 if (GET_CODE (x) != PARALLEL)
12690 output_operand_lossage ("invalid %%N value");
12691 else
12692 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12693 return;
12695 case 'O': /* Unused */
12696 /* Similar, but subtract 1 first. */
12697 if (GET_CODE (x) != PARALLEL)
12698 output_operand_lossage ("invalid %%O value");
12699 else
12700 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12701 return;
12703 case 'p':
12704 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12705 if (! INT_P (x)
12706 || INTVAL (x) < 0
12707 || (i = exact_log2 (INTVAL (x))) < 0)
12708 output_operand_lossage ("invalid %%p value");
12709 else
12710 fprintf (file, "%d", i);
12711 return;
12713 case 'P':
12714 /* The operand must be an indirect memory reference. The result
12715 is the register name. */
12716 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12717 || REGNO (XEXP (x, 0)) >= 32)
12718 output_operand_lossage ("invalid %%P value");
12719 else
12720 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12721 return;
12723 case 'q':
12724 /* This outputs the logical code corresponding to a boolean
12725 expression. The expression may have one or both operands
12726 negated (if one, only the first one). For condition register
12727 logical operations, it will also treat the negated
12728 CR codes as NOTs, but not handle NOTs of them. */
12730 const char *const *t = 0;
12731 const char *s;
12732 enum rtx_code code = GET_CODE (x);
12733 static const char * const tbl[3][3] = {
12734 { "and", "andc", "nor" },
12735 { "or", "orc", "nand" },
12736 { "xor", "eqv", "xor" } };
12738 if (code == AND)
12739 t = tbl[0];
12740 else if (code == IOR)
12741 t = tbl[1];
12742 else if (code == XOR)
12743 t = tbl[2];
12744 else
12745 output_operand_lossage ("invalid %%q value");
12747 if (GET_CODE (XEXP (x, 0)) != NOT)
12748 s = t[0];
12749 else
12751 if (GET_CODE (XEXP (x, 1)) == NOT)
12752 s = t[2];
12753 else
12754 s = t[1];
12757 fputs (s, file);
12759 return;
12761 case 'Q':
12762 if (! TARGET_MFCRF)
12763 return;
12764 fputc (',', file);
12765 /* FALLTHRU */
12767 case 'R':
12768 /* X is a CR register. Print the mask for `mtcrf'. */
12769 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12770 output_operand_lossage ("invalid %%R value");
12771 else
12772 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12773 return;
12775 case 's':
12776 /* Low 5 bits of 32 - value */
12777 if (! INT_P (x))
12778 output_operand_lossage ("invalid %%s value");
12779 else
12780 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12781 return;
12783 case 't':
12784 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12785 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12787 output_operand_lossage ("invalid %%t value");
12788 return;
12791 /* Bit 3 is OV bit. */
12792 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12794 /* If we want bit 31, write a shift count of zero, not 32. */
12795 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12796 return;
12798 case 'T':
12799 /* Print the symbolic name of a branch target register. */
12800 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12801 x = XVECEXP (x, 0, 0);
12802 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12803 && REGNO (x) != CTR_REGNO))
12804 output_operand_lossage ("invalid %%T value");
12805 else if (REGNO (x) == LR_REGNO)
12806 fputs ("lr", file);
12807 else
12808 fputs ("ctr", file);
12809 return;
12811 case 'u':
12812 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12813 for use in unsigned operand. */
12814 if (! INT_P (x))
12816 output_operand_lossage ("invalid %%u value");
12817 return;
12820 uval = INTVAL (x);
12821 if ((uval & 0xffff) == 0)
12822 uval >>= 16;
12824 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
12825 return;
12827 case 'v':
12828 /* High-order 16 bits of constant for use in signed operand. */
12829 if (! INT_P (x))
12830 output_operand_lossage ("invalid %%v value");
12831 else
12832 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
12833 (INTVAL (x) >> 16) & 0xffff);
12834 return;
12836 case 'U':
12837 /* Print `u' if this has an auto-increment or auto-decrement. */
12838 if (MEM_P (x)
12839 && (GET_CODE (XEXP (x, 0)) == PRE_INC
12840 || GET_CODE (XEXP (x, 0)) == PRE_DEC
12841 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
12842 putc ('u', file);
12843 return;
12845 case 'V':
12846 /* Print the trap code for this operand. */
12847 switch (GET_CODE (x))
12849 case EQ:
12850 fputs ("eq", file); /* 4 */
12851 break;
12852 case NE:
12853 fputs ("ne", file); /* 24 */
12854 break;
12855 case LT:
12856 fputs ("lt", file); /* 16 */
12857 break;
12858 case LE:
12859 fputs ("le", file); /* 20 */
12860 break;
12861 case GT:
12862 fputs ("gt", file); /* 8 */
12863 break;
12864 case GE:
12865 fputs ("ge", file); /* 12 */
12866 break;
12867 case LTU:
12868 fputs ("llt", file); /* 2 */
12869 break;
12870 case LEU:
12871 fputs ("lle", file); /* 6 */
12872 break;
12873 case GTU:
12874 fputs ("lgt", file); /* 1 */
12875 break;
12876 case GEU:
12877 fputs ("lge", file); /* 5 */
12878 break;
12879 default:
12880 output_operand_lossage ("invalid %%V value");
12882 break;
12884 case 'w':
12885 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
12886 normally. */
12887 if (INT_P (x))
12888 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
12889 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
12890 else
12891 print_operand (file, x, 0);
12892 return;
12894 case 'x':
12895 /* X is a FPR or Altivec register used in a VSX context. */
12896 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
12897 output_operand_lossage ("invalid %%x value");
12898 else
12900 int reg = REGNO (x);
12901 int vsx_reg = (FP_REGNO_P (reg)
12902 ? reg - 32
12903 : reg - FIRST_ALTIVEC_REGNO + 32);
12905 #ifdef TARGET_REGNAMES
12906 if (TARGET_REGNAMES)
12907 fprintf (file, "%%vs%d", vsx_reg);
12908 else
12909 #endif
12910 fprintf (file, "%d", vsx_reg);
12912 return;
12914 case 'X':
12915 if (MEM_P (x)
12916 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
12917 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
12918 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
12919 putc ('x', file);
12920 return;
12922 case 'Y':
12923 /* Like 'L', for third word of TImode/PTImode */
12924 if (REG_P (x))
12925 fputs (reg_names[REGNO (x) + 2], file);
12926 else if (MEM_P (x))
12928 machine_mode mode = GET_MODE (x);
12929 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12930 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12931 output_address (mode, plus_constant (Pmode,
12932 XEXP (XEXP (x, 0), 0), 8));
12933 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12934 output_address (mode, plus_constant (Pmode,
12935 XEXP (XEXP (x, 0), 0), 8));
12936 else
12937 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
12938 if (small_data_operand (x, GET_MODE (x)))
12939 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12940 reg_names[SMALL_DATA_REG]);
12942 return;
12944 case 'z':
12945 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12946 x = XVECEXP (x, 0, 1);
12947 /* X is a SYMBOL_REF. Write out the name preceded by a
12948 period and without any trailing data in brackets. Used for function
12949 names. If we are configured for System V (or the embedded ABI) on
12950 the PowerPC, do not emit the period, since those systems do not use
12951 TOCs and the like. */
12952 if (!SYMBOL_REF_P (x))
12954 output_operand_lossage ("invalid %%z value");
12955 return;
12958 /* For macho, check to see if we need a stub. */
12959 if (TARGET_MACHO)
12961 const char *name = XSTR (x, 0);
12962 #if TARGET_MACHO
12963 if (darwin_symbol_stubs
12964 && MACHOPIC_INDIRECT
12965 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12966 name = machopic_indirection_name (x, /*stub_p=*/true);
12967 #endif
12968 assemble_name (file, name);
12970 else if (!DOT_SYMBOLS)
12971 assemble_name (file, XSTR (x, 0));
12972 else
12973 rs6000_output_function_entry (file, XSTR (x, 0));
12974 return;
12976 case 'Z':
12977 /* Like 'L', for last word of TImode/PTImode. */
12978 if (REG_P (x))
12979 fputs (reg_names[REGNO (x) + 3], file);
12980 else if (MEM_P (x))
12982 machine_mode mode = GET_MODE (x);
12983 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12984 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12985 output_address (mode, plus_constant (Pmode,
12986 XEXP (XEXP (x, 0), 0), 12));
12987 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12988 output_address (mode, plus_constant (Pmode,
12989 XEXP (XEXP (x, 0), 0), 12));
12990 else
12991 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
12992 if (small_data_operand (x, GET_MODE (x)))
12993 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12994 reg_names[SMALL_DATA_REG]);
12996 return;
12998 /* Print AltiVec memory operand. */
12999 case 'y':
13001 rtx tmp;
13003 gcc_assert (MEM_P (x));
13005 tmp = XEXP (x, 0);
13007 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13008 && GET_CODE (tmp) == AND
13009 && CONST_INT_P (XEXP (tmp, 1))
13010 && INTVAL (XEXP (tmp, 1)) == -16)
13011 tmp = XEXP (tmp, 0);
13012 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13013 && GET_CODE (tmp) == PRE_MODIFY)
13014 tmp = XEXP (tmp, 1);
13015 if (REG_P (tmp))
13016 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13017 else
13019 if (GET_CODE (tmp) != PLUS
13020 || !REG_P (XEXP (tmp, 0))
13021 || !REG_P (XEXP (tmp, 1)))
13023 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13024 break;
13027 if (REGNO (XEXP (tmp, 0)) == 0)
13028 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13029 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13030 else
13031 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13032 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13034 break;
13037 case 0:
13038 if (REG_P (x))
13039 fprintf (file, "%s", reg_names[REGNO (x)]);
13040 else if (MEM_P (x))
13042 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13043 know the width from the mode. */
13044 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13045 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13046 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13047 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13048 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13049 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13050 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13051 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13052 else
13053 output_address (GET_MODE (x), XEXP (x, 0));
13055 else if (toc_relative_expr_p (x, false,
13056 &tocrel_base_oac, &tocrel_offset_oac))
13057 /* This hack along with a corresponding hack in
13058 rs6000_output_addr_const_extra arranges to output addends
13059 where the assembler expects to find them. eg.
13060 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13061 without this hack would be output as "x@toc+4". We
13062 want "x+4@toc". */
13063 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13064 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13065 output_addr_const (file, XVECEXP (x, 0, 0));
13066 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13067 output_addr_const (file, XVECEXP (x, 0, 1));
13068 else
13069 output_addr_const (file, x);
13070 return;
13072 case '&':
13073 if (const char *name = get_some_local_dynamic_name ())
13074 assemble_name (file, name);
13075 else
13076 output_operand_lossage ("'%%&' used without any "
13077 "local dynamic TLS references");
13078 return;
13080 default:
13081 output_operand_lossage ("invalid %%xn code");
13085 /* Print the address of an operand. */
13087 void
13088 print_operand_address (FILE *file, rtx x)
13090 if (REG_P (x))
13091 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13093 /* Is it a PC-relative address? */
13094 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13096 HOST_WIDE_INT offset;
13098 if (GET_CODE (x) == CONST)
13099 x = XEXP (x, 0);
13101 if (GET_CODE (x) == PLUS)
13103 offset = INTVAL (XEXP (x, 1));
13104 x = XEXP (x, 0);
13106 else
13107 offset = 0;
13109 output_addr_const (file, x);
13111 if (offset)
13112 fprintf (file, "%+" PRId64, offset);
13114 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13115 fprintf (file, "@got");
13117 fprintf (file, "@pcrel");
13119 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13120 || GET_CODE (x) == LABEL_REF)
13122 output_addr_const (file, x);
13123 if (small_data_operand (x, GET_MODE (x)))
13124 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13125 reg_names[SMALL_DATA_REG]);
13126 else
13127 gcc_assert (!TARGET_TOC);
13129 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13130 && REG_P (XEXP (x, 1)))
13132 if (REGNO (XEXP (x, 0)) == 0)
13133 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13134 reg_names[ REGNO (XEXP (x, 0)) ]);
13135 else
13136 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13137 reg_names[ REGNO (XEXP (x, 1)) ]);
13139 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13140 && CONST_INT_P (XEXP (x, 1)))
13141 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13142 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13143 #if TARGET_MACHO
13144 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13145 && CONSTANT_P (XEXP (x, 1)))
13147 fprintf (file, "lo16(");
13148 output_addr_const (file, XEXP (x, 1));
13149 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13151 #endif
13152 #if TARGET_ELF
13153 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13154 && CONSTANT_P (XEXP (x, 1)))
13156 output_addr_const (file, XEXP (x, 1));
13157 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13159 #endif
13160 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13162 /* This hack along with a corresponding hack in
13163 rs6000_output_addr_const_extra arranges to output addends
13164 where the assembler expects to find them. eg.
13165 (lo_sum (reg 9)
13166 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13167 without this hack would be output as "x@toc+8@l(9)". We
13168 want "x+8@toc@l(9)". */
13169 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13170 if (GET_CODE (x) == LO_SUM)
13171 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13172 else
13173 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13175 else
13176 output_addr_const (file, x);
13179 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13181 bool
13182 rs6000_output_addr_const_extra (FILE *file, rtx x)
13184 if (GET_CODE (x) == UNSPEC)
13185 switch (XINT (x, 1))
13187 case UNSPEC_TOCREL:
13188 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13189 && REG_P (XVECEXP (x, 0, 1))
13190 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13191 output_addr_const (file, XVECEXP (x, 0, 0));
13192 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13194 if (INTVAL (tocrel_offset_oac) >= 0)
13195 fprintf (file, "+");
13196 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13198 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13200 putc ('-', file);
13201 assemble_name (file, toc_label_name);
13202 need_toc_init = 1;
13204 else if (TARGET_ELF)
13205 fputs ("@toc", file);
13206 return true;
13208 #if TARGET_MACHO
13209 case UNSPEC_MACHOPIC_OFFSET:
13210 output_addr_const (file, XVECEXP (x, 0, 0));
13211 putc ('-', file);
13212 machopic_output_function_base_name (file);
13213 return true;
13214 #endif
13216 return false;
13219 /* Target hook for assembling integer objects. The PowerPC version has
13220 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13221 is defined. It also needs to handle DI-mode objects on 64-bit
13222 targets. */
13224 static bool
13225 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13227 #ifdef RELOCATABLE_NEEDS_FIXUP
13228 /* Special handling for SI values. */
13229 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13231 static int recurse = 0;
13233 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13234 the .fixup section. Since the TOC section is already relocated, we
13235 don't need to mark it here. We used to skip the text section, but it
13236 should never be valid for relocated addresses to be placed in the text
13237 section. */
13238 if (DEFAULT_ABI == ABI_V4
13239 && (TARGET_RELOCATABLE || flag_pic > 1)
13240 && in_section != toc_section
13241 && !recurse
13242 && !CONST_SCALAR_INT_P (x)
13243 && CONSTANT_P (x))
13245 char buf[256];
13247 recurse = 1;
13248 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13249 fixuplabelno++;
13250 ASM_OUTPUT_LABEL (asm_out_file, buf);
13251 fprintf (asm_out_file, "\t.long\t(");
13252 output_addr_const (asm_out_file, x);
13253 fprintf (asm_out_file, ")@fixup\n");
13254 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13255 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13256 fprintf (asm_out_file, "\t.long\t");
13257 assemble_name (asm_out_file, buf);
13258 fprintf (asm_out_file, "\n\t.previous\n");
13259 recurse = 0;
13260 return true;
13262 /* Remove initial .'s to turn a -mcall-aixdesc function
13263 address into the address of the descriptor, not the function
13264 itself. */
13265 else if (SYMBOL_REF_P (x)
13266 && XSTR (x, 0)[0] == '.'
13267 && DEFAULT_ABI == ABI_AIX)
13269 const char *name = XSTR (x, 0);
13270 while (*name == '.')
13271 name++;
13273 fprintf (asm_out_file, "\t.long\t%s\n", name);
13274 return true;
13277 #endif /* RELOCATABLE_NEEDS_FIXUP */
13278 return default_assemble_integer (x, size, aligned_p);
13281 /* Return a template string for assembly to emit when making an
13282 external call. FUNOP is the call mem argument operand number. */
13284 static const char *
13285 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13287 /* -Wformat-overflow workaround, without which gcc thinks that %u
13288 might produce 10 digits. */
13289 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13291 char arg[12];
13292 arg[0] = 0;
13293 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
13295 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13296 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13297 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13298 sprintf (arg, "(%%&@tlsld)");
13299 else
13300 gcc_unreachable ();
13303 /* The magic 32768 offset here corresponds to the offset of
13304 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13305 char z[11];
13306 sprintf (z, "%%z%u%s", funop,
13307 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13308 ? "+32768" : ""));
13310 static char str[32]; /* 1 spare */
13311 if (rs6000_pcrel_p (cfun))
13312 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13313 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13314 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13315 sibcall ? "" : "\n\tnop");
13316 else if (DEFAULT_ABI == ABI_V4)
13317 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13318 flag_pic ? "@plt" : "");
13319 #if TARGET_MACHO
13320 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13321 else if (DEFAULT_ABI == ABI_DARWIN)
13323 /* The cookie is in operand func+2. */
13324 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13325 int cookie = INTVAL (operands[funop + 2]);
13326 if (cookie & CALL_LONG)
13328 tree funname = get_identifier (XSTR (operands[funop], 0));
13329 tree labelname = get_prev_label (funname);
13330 gcc_checking_assert (labelname && !sibcall);
13332 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13333 instruction will reach 'foo', otherwise link as 'bl L42'".
13334 "L42" should be a 'branch island', that will do a far jump to
13335 'foo'. Branch islands are generated in
13336 macho_branch_islands(). */
13337 sprintf (str, "jbsr %%z%u,%.10s", funop,
13338 IDENTIFIER_POINTER (labelname));
13340 else
13341 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13342 after the call. */
13343 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13345 #endif
13346 else
13347 gcc_unreachable ();
13348 return str;
13351 const char *
13352 rs6000_call_template (rtx *operands, unsigned int funop)
13354 return rs6000_call_template_1 (operands, funop, false);
13357 const char *
13358 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13360 return rs6000_call_template_1 (operands, funop, true);
13363 /* As above, for indirect calls. */
13365 static const char *
13366 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13367 bool sibcall)
13369 /* -Wformat-overflow workaround, without which gcc thinks that %u
13370 might produce 10 digits. Note that -Wformat-overflow will not
13371 currently warn here for str[], so do not rely on a warning to
13372 ensure str[] is correctly sized. */
13373 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13375 /* Currently, funop is either 0 or 1. The maximum string is always
13376 a !speculate 64-bit __tls_get_addr call.
13378 ABI_ELFv2, pcrel:
13379 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13380 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13381 . 9 crset 2\n\t
13382 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13383 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13384 . 8 beq%T1l-
13385 .---
13386 .142
13388 ABI_AIX:
13389 . 9 ld 2,%3\n\t
13390 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13391 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13392 . 9 crset 2\n\t
13393 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13394 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13395 . 10 beq%T1l-\n\t
13396 . 10 ld 2,%4(1)
13397 .---
13398 .151
13400 ABI_ELFv2:
13401 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13402 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13403 . 9 crset 2\n\t
13404 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13405 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13406 . 10 beq%T1l-\n\t
13407 . 10 ld 2,%3(1)
13408 .---
13409 .142
13411 ABI_V4:
13412 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13413 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13414 . 9 crset 2\n\t
13415 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13416 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13417 . 8 beq%T1l-
13418 .---
13419 .141 */
13420 static char str[160]; /* 8 spare */
13421 char *s = str;
13422 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13424 if (DEFAULT_ABI == ABI_AIX)
13425 s += sprintf (s,
13426 "l%s 2,%%%u\n\t",
13427 ptrload, funop + 2);
13429 /* We don't need the extra code to stop indirect call speculation if
13430 calling via LR. */
13431 bool speculate = (TARGET_MACHO
13432 || rs6000_speculate_indirect_jumps
13433 || (REG_P (operands[funop])
13434 && REGNO (operands[funop]) == LR_REGNO));
13436 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13438 const char *rel64 = TARGET_64BIT ? "64" : "";
13439 char tls[29];
13440 tls[0] = 0;
13441 if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
13443 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13444 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13445 rel64, funop + 1);
13446 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13447 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13448 rel64);
13449 else
13450 gcc_unreachable ();
13453 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13454 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13455 && flag_pic == 2 ? "+32768" : "");
13456 if (!speculate)
13458 s += sprintf (s,
13459 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13460 tls, rel64, notoc, funop, addend);
13461 s += sprintf (s, "crset 2\n\t");
13463 s += sprintf (s,
13464 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13465 tls, rel64, notoc, funop, addend);
13467 else if (!speculate)
13468 s += sprintf (s, "crset 2\n\t");
13470 if (rs6000_pcrel_p (cfun))
13472 if (speculate)
13473 sprintf (s, "b%%T%ul", funop);
13474 else
13475 sprintf (s, "beq%%T%ul-", funop);
13477 else if (DEFAULT_ABI == ABI_AIX)
13479 if (speculate)
13480 sprintf (s,
13481 "b%%T%ul\n\t"
13482 "l%s 2,%%%u(1)",
13483 funop, ptrload, funop + 3);
13484 else
13485 sprintf (s,
13486 "beq%%T%ul-\n\t"
13487 "l%s 2,%%%u(1)",
13488 funop, ptrload, funop + 3);
13490 else if (DEFAULT_ABI == ABI_ELFv2)
13492 if (speculate)
13493 sprintf (s,
13494 "b%%T%ul\n\t"
13495 "l%s 2,%%%u(1)",
13496 funop, ptrload, funop + 2);
13497 else
13498 sprintf (s,
13499 "beq%%T%ul-\n\t"
13500 "l%s 2,%%%u(1)",
13501 funop, ptrload, funop + 2);
13503 else
13505 if (speculate)
13506 sprintf (s,
13507 "b%%T%u%s",
13508 funop, sibcall ? "" : "l");
13509 else
13510 sprintf (s,
13511 "beq%%T%u%s-%s",
13512 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13514 return str;
13517 const char *
13518 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13520 return rs6000_indirect_call_template_1 (operands, funop, false);
13523 const char *
13524 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13526 return rs6000_indirect_call_template_1 (operands, funop, true);
13529 #if HAVE_AS_PLTSEQ
13530 /* Output indirect call insns. WHICH identifies the type of sequence. */
13531 const char *
13532 rs6000_pltseq_template (rtx *operands, int which)
13534 const char *rel64 = TARGET_64BIT ? "64" : "";
13535 char tls[30];
13536 tls[0] = 0;
13537 if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
13539 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13540 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13541 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13542 off, rel64);
13543 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13544 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13545 off, rel64);
13546 else
13547 gcc_unreachable ();
13550 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13551 static char str[96]; /* 10 spare */
13552 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13553 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13554 && flag_pic == 2 ? "+32768" : "");
13555 switch (which)
13557 case RS6000_PLTSEQ_TOCSAVE:
13558 sprintf (str,
13559 "st%s\n\t"
13560 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13561 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13562 tls, rel64);
13563 break;
13564 case RS6000_PLTSEQ_PLT16_HA:
13565 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13566 sprintf (str,
13567 "lis %%0,0\n\t"
13568 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13569 tls, off, rel64);
13570 else
13571 sprintf (str,
13572 "addis %%0,%%1,0\n\t"
13573 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13574 tls, off, rel64, addend);
13575 break;
13576 case RS6000_PLTSEQ_PLT16_LO:
13577 sprintf (str,
13578 "l%s %%0,0(%%1)\n\t"
13579 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13580 TARGET_64BIT ? "d" : "wz",
13581 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13582 break;
13583 case RS6000_PLTSEQ_MTCTR:
13584 sprintf (str,
13585 "mtctr %%1\n\t"
13586 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13587 tls, rel64, addend);
13588 break;
13589 case RS6000_PLTSEQ_PLT_PCREL34:
13590 sprintf (str,
13591 "pl%s %%0,0(0),1\n\t"
13592 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13593 TARGET_64BIT ? "d" : "wz",
13594 tls, rel64);
13595 break;
13596 default:
13597 gcc_unreachable ();
13599 return str;
13601 #endif
13603 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13604 /* Emit an assembler directive to set symbol visibility for DECL to
13605 VISIBILITY_TYPE. */
13607 static void
13608 rs6000_assemble_visibility (tree decl, int vis)
13610 if (TARGET_XCOFF)
13611 return;
13613 /* Functions need to have their entry point symbol visibility set as
13614 well as their descriptor symbol visibility. */
13615 if (DEFAULT_ABI == ABI_AIX
13616 && DOT_SYMBOLS
13617 && TREE_CODE (decl) == FUNCTION_DECL)
13619 static const char * const visibility_types[] = {
13620 NULL, "protected", "hidden", "internal"
13623 const char *name, *type;
13625 name = ((* targetm.strip_name_encoding)
13626 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13627 type = visibility_types[vis];
13629 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13630 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13632 else
13633 default_assemble_visibility (decl, vis);
13635 #endif
13637 enum rtx_code
13638 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13640 /* Reversal of FP compares takes care -- an ordered compare
13641 becomes an unordered compare and vice versa. */
13642 if (mode == CCFPmode
13643 && (!flag_finite_math_only
13644 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13645 || code == UNEQ || code == LTGT))
13646 return reverse_condition_maybe_unordered (code);
13647 else
13648 return reverse_condition (code);
13651 /* Generate a compare for CODE. Return a brand-new rtx that
13652 represents the result of the compare. */
13654 static rtx
13655 rs6000_generate_compare (rtx cmp, machine_mode mode)
13657 machine_mode comp_mode;
13658 rtx compare_result;
13659 enum rtx_code code = GET_CODE (cmp);
13660 rtx op0 = XEXP (cmp, 0);
13661 rtx op1 = XEXP (cmp, 1);
13663 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13664 comp_mode = CCmode;
13665 else if (FLOAT_MODE_P (mode))
13666 comp_mode = CCFPmode;
13667 else if (code == GTU || code == LTU
13668 || code == GEU || code == LEU)
13669 comp_mode = CCUNSmode;
13670 else if ((code == EQ || code == NE)
13671 && unsigned_reg_p (op0)
13672 && (unsigned_reg_p (op1)
13673 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13674 /* These are unsigned values, perhaps there will be a later
13675 ordering compare that can be shared with this one. */
13676 comp_mode = CCUNSmode;
13677 else
13678 comp_mode = CCmode;
13680 /* If we have an unsigned compare, make sure we don't have a signed value as
13681 an immediate. */
13682 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13683 && INTVAL (op1) < 0)
13685 op0 = copy_rtx_if_shared (op0);
13686 op1 = force_reg (GET_MODE (op0), op1);
13687 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13690 /* First, the compare. */
13691 compare_result = gen_reg_rtx (comp_mode);
13693 /* IEEE 128-bit support in VSX registers when we do not have hardware
13694 support. */
13695 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13697 rtx libfunc = NULL_RTX;
13698 bool check_nan = false;
13699 rtx dest;
13701 switch (code)
13703 case EQ:
13704 case NE:
13705 libfunc = optab_libfunc (eq_optab, mode);
13706 break;
13708 case GT:
13709 case GE:
13710 libfunc = optab_libfunc (ge_optab, mode);
13711 break;
13713 case LT:
13714 case LE:
13715 libfunc = optab_libfunc (le_optab, mode);
13716 break;
13718 case UNORDERED:
13719 case ORDERED:
13720 libfunc = optab_libfunc (unord_optab, mode);
13721 code = (code == UNORDERED) ? NE : EQ;
13722 break;
13724 case UNGE:
13725 case UNGT:
13726 check_nan = true;
13727 libfunc = optab_libfunc (ge_optab, mode);
13728 code = (code == UNGE) ? GE : GT;
13729 break;
13731 case UNLE:
13732 case UNLT:
13733 check_nan = true;
13734 libfunc = optab_libfunc (le_optab, mode);
13735 code = (code == UNLE) ? LE : LT;
13736 break;
13738 case UNEQ:
13739 case LTGT:
13740 check_nan = true;
13741 libfunc = optab_libfunc (eq_optab, mode);
13742 code = (code = UNEQ) ? EQ : NE;
13743 break;
13745 default:
13746 gcc_unreachable ();
13749 gcc_assert (libfunc);
13751 if (!check_nan)
13752 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13753 SImode, op0, mode, op1, mode);
13755 /* The library signals an exception for signalling NaNs, so we need to
13756 handle isgreater, etc. by first checking isordered. */
13757 else
13759 rtx ne_rtx, normal_dest, unord_dest;
13760 rtx unord_func = optab_libfunc (unord_optab, mode);
13761 rtx join_label = gen_label_rtx ();
13762 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13763 rtx unord_cmp = gen_reg_rtx (comp_mode);
13766 /* Test for either value being a NaN. */
13767 gcc_assert (unord_func);
13768 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13769 SImode, op0, mode, op1, mode);
13771 /* Set value (0) if either value is a NaN, and jump to the join
13772 label. */
13773 dest = gen_reg_rtx (SImode);
13774 emit_move_insn (dest, const1_rtx);
13775 emit_insn (gen_rtx_SET (unord_cmp,
13776 gen_rtx_COMPARE (comp_mode, unord_dest,
13777 const0_rtx)));
13779 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13780 emit_jump_insn (gen_rtx_SET (pc_rtx,
13781 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13782 join_ref,
13783 pc_rtx)));
13785 /* Do the normal comparison, knowing that the values are not
13786 NaNs. */
13787 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13788 SImode, op0, mode, op1, mode);
13790 emit_insn (gen_cstoresi4 (dest,
13791 gen_rtx_fmt_ee (code, SImode, normal_dest,
13792 const0_rtx),
13793 normal_dest, const0_rtx));
13795 /* Join NaN and non-Nan paths. Compare dest against 0. */
13796 emit_label (join_label);
13797 code = NE;
13800 emit_insn (gen_rtx_SET (compare_result,
13801 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13804 else
13806 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13807 CLOBBERs to match cmptf_internal2 pattern. */
13808 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13809 && FLOAT128_IBM_P (GET_MODE (op0))
13810 && TARGET_HARD_FLOAT)
13811 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13812 gen_rtvec (10,
13813 gen_rtx_SET (compare_result,
13814 gen_rtx_COMPARE (comp_mode, op0, op1)),
13815 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13816 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13817 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13818 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13819 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13820 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13821 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13822 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13823 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
13824 else if (GET_CODE (op1) == UNSPEC
13825 && XINT (op1, 1) == UNSPEC_SP_TEST)
13827 rtx op1b = XVECEXP (op1, 0, 0);
13828 comp_mode = CCEQmode;
13829 compare_result = gen_reg_rtx (CCEQmode);
13830 if (TARGET_64BIT)
13831 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
13832 else
13833 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
13835 else
13836 emit_insn (gen_rtx_SET (compare_result,
13837 gen_rtx_COMPARE (comp_mode, op0, op1)));
13840 /* Some kinds of FP comparisons need an OR operation;
13841 under flag_finite_math_only we don't bother. */
13842 if (FLOAT_MODE_P (mode)
13843 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
13844 && !flag_finite_math_only
13845 && (code == LE || code == GE
13846 || code == UNEQ || code == LTGT
13847 || code == UNGT || code == UNLT))
13849 enum rtx_code or1, or2;
13850 rtx or1_rtx, or2_rtx, compare2_rtx;
13851 rtx or_result = gen_reg_rtx (CCEQmode);
13853 switch (code)
13855 case LE: or1 = LT; or2 = EQ; break;
13856 case GE: or1 = GT; or2 = EQ; break;
13857 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
13858 case LTGT: or1 = LT; or2 = GT; break;
13859 case UNGT: or1 = UNORDERED; or2 = GT; break;
13860 case UNLT: or1 = UNORDERED; or2 = LT; break;
13861 default: gcc_unreachable ();
13863 validate_condition_mode (or1, comp_mode);
13864 validate_condition_mode (or2, comp_mode);
13865 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
13866 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
13867 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
13868 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
13869 const_true_rtx);
13870 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
13872 compare_result = or_result;
13873 code = EQ;
13876 validate_condition_mode (code, GET_MODE (compare_result));
13878 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
13882 /* Return the diagnostic message string if the binary operation OP is
13883 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13885 static const char*
13886 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
13887 const_tree type1,
13888 const_tree type2)
13890 machine_mode mode1 = TYPE_MODE (type1);
13891 machine_mode mode2 = TYPE_MODE (type2);
13893 /* For complex modes, use the inner type. */
13894 if (COMPLEX_MODE_P (mode1))
13895 mode1 = GET_MODE_INNER (mode1);
13897 if (COMPLEX_MODE_P (mode2))
13898 mode2 = GET_MODE_INNER (mode2);
13900 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
13901 double to intermix unless -mfloat128-convert. */
13902 if (mode1 == mode2)
13903 return NULL;
13905 if (!TARGET_FLOAT128_CVT)
13907 if ((mode1 == KFmode && mode2 == IFmode)
13908 || (mode1 == IFmode && mode2 == KFmode))
13909 return N_("__float128 and __ibm128 cannot be used in the same "
13910 "expression");
13912 if (TARGET_IEEEQUAD
13913 && ((mode1 == IFmode && mode2 == TFmode)
13914 || (mode1 == TFmode && mode2 == IFmode)))
13915 return N_("__ibm128 and long double cannot be used in the same "
13916 "expression");
13918 if (!TARGET_IEEEQUAD
13919 && ((mode1 == KFmode && mode2 == TFmode)
13920 || (mode1 == TFmode && mode2 == KFmode)))
13921 return N_("__float128 and long double cannot be used in the same "
13922 "expression");
13925 return NULL;
13929 /* Expand floating point conversion to/from __float128 and __ibm128. */
13931 void
13932 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
13934 machine_mode dest_mode = GET_MODE (dest);
13935 machine_mode src_mode = GET_MODE (src);
13936 convert_optab cvt = unknown_optab;
13937 bool do_move = false;
13938 rtx libfunc = NULL_RTX;
13939 rtx dest2;
13940 typedef rtx (*rtx_2func_t) (rtx, rtx);
13941 rtx_2func_t hw_convert = (rtx_2func_t)0;
13942 size_t kf_or_tf;
13944 struct hw_conv_t {
13945 rtx_2func_t from_df;
13946 rtx_2func_t from_sf;
13947 rtx_2func_t from_si_sign;
13948 rtx_2func_t from_si_uns;
13949 rtx_2func_t from_di_sign;
13950 rtx_2func_t from_di_uns;
13951 rtx_2func_t to_df;
13952 rtx_2func_t to_sf;
13953 rtx_2func_t to_si_sign;
13954 rtx_2func_t to_si_uns;
13955 rtx_2func_t to_di_sign;
13956 rtx_2func_t to_di_uns;
13957 } hw_conversions[2] = {
13958 /* convertions to/from KFmode */
13960 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
13961 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
13962 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
13963 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
13964 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
13965 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
13966 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
13967 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
13968 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
13969 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
13970 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
13971 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
13974 /* convertions to/from TFmode */
13976 gen_extenddftf2_hw, /* TFmode <- DFmode. */
13977 gen_extendsftf2_hw, /* TFmode <- SFmode. */
13978 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
13979 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
13980 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
13981 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
13982 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
13983 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
13984 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
13985 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
13986 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
13987 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
13991 if (dest_mode == src_mode)
13992 gcc_unreachable ();
13994 /* Eliminate memory operations. */
13995 if (MEM_P (src))
13996 src = force_reg (src_mode, src);
13998 if (MEM_P (dest))
14000 rtx tmp = gen_reg_rtx (dest_mode);
14001 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14002 rs6000_emit_move (dest, tmp, dest_mode);
14003 return;
14006 /* Convert to IEEE 128-bit floating point. */
14007 if (FLOAT128_IEEE_P (dest_mode))
14009 if (dest_mode == KFmode)
14010 kf_or_tf = 0;
14011 else if (dest_mode == TFmode)
14012 kf_or_tf = 1;
14013 else
14014 gcc_unreachable ();
14016 switch (src_mode)
14018 case E_DFmode:
14019 cvt = sext_optab;
14020 hw_convert = hw_conversions[kf_or_tf].from_df;
14021 break;
14023 case E_SFmode:
14024 cvt = sext_optab;
14025 hw_convert = hw_conversions[kf_or_tf].from_sf;
14026 break;
14028 case E_KFmode:
14029 case E_IFmode:
14030 case E_TFmode:
14031 if (FLOAT128_IBM_P (src_mode))
14032 cvt = sext_optab;
14033 else
14034 do_move = true;
14035 break;
14037 case E_SImode:
14038 if (unsigned_p)
14040 cvt = ufloat_optab;
14041 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14043 else
14045 cvt = sfloat_optab;
14046 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14048 break;
14050 case E_DImode:
14051 if (unsigned_p)
14053 cvt = ufloat_optab;
14054 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14056 else
14058 cvt = sfloat_optab;
14059 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14061 break;
14063 default:
14064 gcc_unreachable ();
14068 /* Convert from IEEE 128-bit floating point. */
14069 else if (FLOAT128_IEEE_P (src_mode))
14071 if (src_mode == KFmode)
14072 kf_or_tf = 0;
14073 else if (src_mode == TFmode)
14074 kf_or_tf = 1;
14075 else
14076 gcc_unreachable ();
14078 switch (dest_mode)
14080 case E_DFmode:
14081 cvt = trunc_optab;
14082 hw_convert = hw_conversions[kf_or_tf].to_df;
14083 break;
14085 case E_SFmode:
14086 cvt = trunc_optab;
14087 hw_convert = hw_conversions[kf_or_tf].to_sf;
14088 break;
14090 case E_KFmode:
14091 case E_IFmode:
14092 case E_TFmode:
14093 if (FLOAT128_IBM_P (dest_mode))
14094 cvt = trunc_optab;
14095 else
14096 do_move = true;
14097 break;
14099 case E_SImode:
14100 if (unsigned_p)
14102 cvt = ufix_optab;
14103 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14105 else
14107 cvt = sfix_optab;
14108 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14110 break;
14112 case E_DImode:
14113 if (unsigned_p)
14115 cvt = ufix_optab;
14116 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14118 else
14120 cvt = sfix_optab;
14121 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14123 break;
14125 default:
14126 gcc_unreachable ();
14130 /* Both IBM format. */
14131 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14132 do_move = true;
14134 else
14135 gcc_unreachable ();
14137 /* Handle conversion between TFmode/KFmode/IFmode. */
14138 if (do_move)
14139 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14141 /* Handle conversion if we have hardware support. */
14142 else if (TARGET_FLOAT128_HW && hw_convert)
14143 emit_insn ((hw_convert) (dest, src));
14145 /* Call an external function to do the conversion. */
14146 else if (cvt != unknown_optab)
14148 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14149 gcc_assert (libfunc != NULL_RTX);
14151 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14152 src, src_mode);
14154 gcc_assert (dest2 != NULL_RTX);
14155 if (!rtx_equal_p (dest, dest2))
14156 emit_move_insn (dest, dest2);
14159 else
14160 gcc_unreachable ();
14162 return;
14166 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14167 can be used as that dest register. Return the dest register. */
14170 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14172 if (op2 == const0_rtx)
14173 return op1;
14175 if (GET_CODE (scratch) == SCRATCH)
14176 scratch = gen_reg_rtx (mode);
14178 if (logical_operand (op2, mode))
14179 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14180 else
14181 emit_insn (gen_rtx_SET (scratch,
14182 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14184 return scratch;
14187 void
14188 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14190 rtx condition_rtx;
14191 machine_mode op_mode;
14192 enum rtx_code cond_code;
14193 rtx result = operands[0];
14195 condition_rtx = rs6000_generate_compare (operands[1], mode);
14196 cond_code = GET_CODE (condition_rtx);
14198 if (cond_code == NE
14199 || cond_code == GE || cond_code == LE
14200 || cond_code == GEU || cond_code == LEU
14201 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14203 rtx not_result = gen_reg_rtx (CCEQmode);
14204 rtx not_op, rev_cond_rtx;
14205 machine_mode cc_mode;
14207 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14209 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14210 SImode, XEXP (condition_rtx, 0), const0_rtx);
14211 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14212 emit_insn (gen_rtx_SET (not_result, not_op));
14213 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14216 op_mode = GET_MODE (XEXP (operands[1], 0));
14217 if (op_mode == VOIDmode)
14218 op_mode = GET_MODE (XEXP (operands[1], 1));
14220 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14222 PUT_MODE (condition_rtx, DImode);
14223 convert_move (result, condition_rtx, 0);
14225 else
14227 PUT_MODE (condition_rtx, SImode);
14228 emit_insn (gen_rtx_SET (result, condition_rtx));
14232 /* Emit a branch of kind CODE to location LOC. */
14234 void
14235 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14237 rtx condition_rtx, loc_ref;
14239 condition_rtx = rs6000_generate_compare (operands[0], mode);
14240 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14241 emit_jump_insn (gen_rtx_SET (pc_rtx,
14242 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
14243 loc_ref, pc_rtx)));
14246 /* Return the string to output a conditional branch to LABEL, which is
14247 the operand template of the label, or NULL if the branch is really a
14248 conditional return.
14250 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14251 condition code register and its mode specifies what kind of
14252 comparison we made.
14254 REVERSED is nonzero if we should reverse the sense of the comparison.
14256 INSN is the insn. */
14258 char *
14259 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14261 static char string[64];
14262 enum rtx_code code = GET_CODE (op);
14263 rtx cc_reg = XEXP (op, 0);
14264 machine_mode mode = GET_MODE (cc_reg);
14265 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14266 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14267 int really_reversed = reversed ^ need_longbranch;
14268 char *s = string;
14269 const char *ccode;
14270 const char *pred;
14271 rtx note;
14273 validate_condition_mode (code, mode);
14275 /* Work out which way this really branches. We could use
14276 reverse_condition_maybe_unordered here always but this
14277 makes the resulting assembler clearer. */
14278 if (really_reversed)
14280 /* Reversal of FP compares takes care -- an ordered compare
14281 becomes an unordered compare and vice versa. */
14282 if (mode == CCFPmode)
14283 code = reverse_condition_maybe_unordered (code);
14284 else
14285 code = reverse_condition (code);
14288 switch (code)
14290 /* Not all of these are actually distinct opcodes, but
14291 we distinguish them for clarity of the resulting assembler. */
14292 case NE: case LTGT:
14293 ccode = "ne"; break;
14294 case EQ: case UNEQ:
14295 ccode = "eq"; break;
14296 case GE: case GEU:
14297 ccode = "ge"; break;
14298 case GT: case GTU: case UNGT:
14299 ccode = "gt"; break;
14300 case LE: case LEU:
14301 ccode = "le"; break;
14302 case LT: case LTU: case UNLT:
14303 ccode = "lt"; break;
14304 case UNORDERED: ccode = "un"; break;
14305 case ORDERED: ccode = "nu"; break;
14306 case UNGE: ccode = "nl"; break;
14307 case UNLE: ccode = "ng"; break;
14308 default:
14309 gcc_unreachable ();
14312 /* Maybe we have a guess as to how likely the branch is. */
14313 pred = "";
14314 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14315 if (note != NULL_RTX)
14317 /* PROB is the difference from 50%. */
14318 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14319 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14321 /* Only hint for highly probable/improbable branches on newer cpus when
14322 we have real profile data, as static prediction overrides processor
14323 dynamic prediction. For older cpus we may as well always hint, but
14324 assume not taken for branches that are very close to 50% as a
14325 mispredicted taken branch is more expensive than a
14326 mispredicted not-taken branch. */
14327 if (rs6000_always_hint
14328 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14329 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14330 && br_prob_note_reliable_p (note)))
14332 if (abs (prob) > REG_BR_PROB_BASE / 20
14333 && ((prob > 0) ^ need_longbranch))
14334 pred = "+";
14335 else
14336 pred = "-";
14340 if (label == NULL)
14341 s += sprintf (s, "b%slr%s ", ccode, pred);
14342 else
14343 s += sprintf (s, "b%s%s ", ccode, pred);
14345 /* We need to escape any '%' characters in the reg_names string.
14346 Assume they'd only be the first character.... */
14347 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14348 *s++ = '%';
14349 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14351 if (label != NULL)
14353 /* If the branch distance was too far, we may have to use an
14354 unconditional branch to go the distance. */
14355 if (need_longbranch)
14356 s += sprintf (s, ",$+8\n\tb %s", label);
14357 else
14358 s += sprintf (s, ",%s", label);
14361 return string;
14364 /* Return insn for VSX or Altivec comparisons. */
14366 static rtx
14367 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14369 rtx mask;
14370 machine_mode mode = GET_MODE (op0);
14372 switch (code)
14374 default:
14375 break;
14377 case GE:
14378 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14379 return NULL_RTX;
14380 /* FALLTHRU */
14382 case EQ:
14383 case GT:
14384 case GTU:
14385 case ORDERED:
14386 case UNORDERED:
14387 case UNEQ:
14388 case LTGT:
14389 mask = gen_reg_rtx (mode);
14390 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14391 return mask;
14394 return NULL_RTX;
14397 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14398 DMODE is expected destination mode. This is a recursive function. */
14400 static rtx
14401 rs6000_emit_vector_compare (enum rtx_code rcode,
14402 rtx op0, rtx op1,
14403 machine_mode dmode)
14405 rtx mask;
14406 bool swap_operands = false;
14407 bool try_again = false;
14409 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14410 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14412 /* See if the comparison works as is. */
14413 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14414 if (mask)
14415 return mask;
14417 switch (rcode)
14419 case LT:
14420 rcode = GT;
14421 swap_operands = true;
14422 try_again = true;
14423 break;
14424 case LTU:
14425 rcode = GTU;
14426 swap_operands = true;
14427 try_again = true;
14428 break;
14429 case NE:
14430 case UNLE:
14431 case UNLT:
14432 case UNGE:
14433 case UNGT:
14434 /* Invert condition and try again.
14435 e.g., A != B becomes ~(A==B). */
14437 enum rtx_code rev_code;
14438 enum insn_code nor_code;
14439 rtx mask2;
14441 rev_code = reverse_condition_maybe_unordered (rcode);
14442 if (rev_code == UNKNOWN)
14443 return NULL_RTX;
14445 nor_code = optab_handler (one_cmpl_optab, dmode);
14446 if (nor_code == CODE_FOR_nothing)
14447 return NULL_RTX;
14449 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14450 if (!mask2)
14451 return NULL_RTX;
14453 mask = gen_reg_rtx (dmode);
14454 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14455 return mask;
14457 break;
14458 case GE:
14459 case GEU:
14460 case LE:
14461 case LEU:
14462 /* Try GT/GTU/LT/LTU OR EQ */
14464 rtx c_rtx, eq_rtx;
14465 enum insn_code ior_code;
14466 enum rtx_code new_code;
14468 switch (rcode)
14470 case GE:
14471 new_code = GT;
14472 break;
14474 case GEU:
14475 new_code = GTU;
14476 break;
14478 case LE:
14479 new_code = LT;
14480 break;
14482 case LEU:
14483 new_code = LTU;
14484 break;
14486 default:
14487 gcc_unreachable ();
14490 ior_code = optab_handler (ior_optab, dmode);
14491 if (ior_code == CODE_FOR_nothing)
14492 return NULL_RTX;
14494 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14495 if (!c_rtx)
14496 return NULL_RTX;
14498 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14499 if (!eq_rtx)
14500 return NULL_RTX;
14502 mask = gen_reg_rtx (dmode);
14503 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14504 return mask;
14506 break;
14507 default:
14508 return NULL_RTX;
14511 if (try_again)
14513 if (swap_operands)
14514 std::swap (op0, op1);
14516 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14517 if (mask)
14518 return mask;
14521 /* You only get two chances. */
14522 return NULL_RTX;
14525 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14526 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14527 operands for the relation operation COND. */
14530 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14531 rtx cond, rtx cc_op0, rtx cc_op1)
14533 machine_mode dest_mode = GET_MODE (dest);
14534 machine_mode mask_mode = GET_MODE (cc_op0);
14535 enum rtx_code rcode = GET_CODE (cond);
14536 machine_mode cc_mode = CCmode;
14537 rtx mask;
14538 rtx cond2;
14539 bool invert_move = false;
14541 if (VECTOR_UNIT_NONE_P (dest_mode))
14542 return 0;
14544 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14545 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14547 switch (rcode)
14549 /* Swap operands if we can, and fall back to doing the operation as
14550 specified, and doing a NOR to invert the test. */
14551 case NE:
14552 case UNLE:
14553 case UNLT:
14554 case UNGE:
14555 case UNGT:
14556 /* Invert condition and try again.
14557 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14558 invert_move = true;
14559 rcode = reverse_condition_maybe_unordered (rcode);
14560 if (rcode == UNKNOWN)
14561 return 0;
14562 break;
14564 case GE:
14565 case LE:
14566 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14568 /* Invert condition to avoid compound test. */
14569 invert_move = true;
14570 rcode = reverse_condition (rcode);
14572 break;
14574 case GTU:
14575 case GEU:
14576 case LTU:
14577 case LEU:
14578 /* Mark unsigned tests with CCUNSmode. */
14579 cc_mode = CCUNSmode;
14581 /* Invert condition to avoid compound test if necessary. */
14582 if (rcode == GEU || rcode == LEU)
14584 invert_move = true;
14585 rcode = reverse_condition (rcode);
14587 break;
14589 default:
14590 break;
14593 /* Get the vector mask for the given relational operations. */
14594 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14596 if (!mask)
14597 return 0;
14599 if (invert_move)
14600 std::swap (op_true, op_false);
14602 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14603 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14604 && (GET_CODE (op_true) == CONST_VECTOR
14605 || GET_CODE (op_false) == CONST_VECTOR))
14607 rtx constant_0 = CONST0_RTX (dest_mode);
14608 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14610 if (op_true == constant_m1 && op_false == constant_0)
14612 emit_move_insn (dest, mask);
14613 return 1;
14616 else if (op_true == constant_0 && op_false == constant_m1)
14618 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14619 return 1;
14622 /* If we can't use the vector comparison directly, perhaps we can use
14623 the mask for the true or false fields, instead of loading up a
14624 constant. */
14625 if (op_true == constant_m1)
14626 op_true = mask;
14628 if (op_false == constant_0)
14629 op_false = mask;
14632 if (!REG_P (op_true) && !SUBREG_P (op_true))
14633 op_true = force_reg (dest_mode, op_true);
14635 if (!REG_P (op_false) && !SUBREG_P (op_false))
14636 op_false = force_reg (dest_mode, op_false);
14638 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14639 CONST0_RTX (dest_mode));
14640 emit_insn (gen_rtx_SET (dest,
14641 gen_rtx_IF_THEN_ELSE (dest_mode,
14642 cond2,
14643 op_true,
14644 op_false)));
14645 return 1;
14648 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14649 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14650 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14651 hardware has no such operation. */
14653 static int
14654 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14656 enum rtx_code code = GET_CODE (op);
14657 rtx op0 = XEXP (op, 0);
14658 rtx op1 = XEXP (op, 1);
14659 machine_mode compare_mode = GET_MODE (op0);
14660 machine_mode result_mode = GET_MODE (dest);
14661 bool max_p = false;
14663 if (result_mode != compare_mode)
14664 return 0;
14666 if (code == GE || code == GT)
14667 max_p = true;
14668 else if (code == LE || code == LT)
14669 max_p = false;
14670 else
14671 return 0;
14673 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14676 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
14677 max_p = !max_p;
14679 else
14680 return 0;
14682 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14683 return 1;
14686 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14687 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14688 operands of the last comparison is nonzero/true, FALSE_COND if it is
14689 zero/false. Return 0 if the hardware has no such operation. */
14691 static int
14692 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14694 enum rtx_code code = GET_CODE (op);
14695 rtx op0 = XEXP (op, 0);
14696 rtx op1 = XEXP (op, 1);
14697 machine_mode result_mode = GET_MODE (dest);
14698 rtx compare_rtx;
14699 rtx cmove_rtx;
14700 rtx clobber_rtx;
14702 if (!can_create_pseudo_p ())
14703 return 0;
14705 switch (code)
14707 case EQ:
14708 case GE:
14709 case GT:
14710 break;
14712 case NE:
14713 case LT:
14714 case LE:
14715 code = swap_condition (code);
14716 std::swap (op0, op1);
14717 break;
14719 default:
14720 return 0;
14723 /* Generate: [(parallel [(set (dest)
14724 (if_then_else (op (cmp1) (cmp2))
14725 (true)
14726 (false)))
14727 (clobber (scratch))])]. */
14729 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14730 cmove_rtx = gen_rtx_SET (dest,
14731 gen_rtx_IF_THEN_ELSE (result_mode,
14732 compare_rtx,
14733 true_cond,
14734 false_cond));
14736 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14737 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14738 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14740 return 1;
14743 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14744 operands of the last comparison is nonzero/true, FALSE_COND if it
14745 is zero/false. Return 0 if the hardware has no such operation. */
14748 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14750 enum rtx_code code = GET_CODE (op);
14751 rtx op0 = XEXP (op, 0);
14752 rtx op1 = XEXP (op, 1);
14753 machine_mode compare_mode = GET_MODE (op0);
14754 machine_mode result_mode = GET_MODE (dest);
14755 rtx temp;
14756 bool is_against_zero;
14758 /* These modes should always match. */
14759 if (GET_MODE (op1) != compare_mode
14760 /* In the isel case however, we can use a compare immediate, so
14761 op1 may be a small constant. */
14762 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14763 return 0;
14764 if (GET_MODE (true_cond) != result_mode)
14765 return 0;
14766 if (GET_MODE (false_cond) != result_mode)
14767 return 0;
14769 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14770 if (TARGET_P9_MINMAX
14771 && (compare_mode == SFmode || compare_mode == DFmode)
14772 && (result_mode == SFmode || result_mode == DFmode))
14774 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14775 return 1;
14777 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14778 return 1;
14781 /* Don't allow using floating point comparisons for integer results for
14782 now. */
14783 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14784 return 0;
14786 /* First, work out if the hardware can do this at all, or
14787 if it's too slow.... */
14788 if (!FLOAT_MODE_P (compare_mode))
14790 if (TARGET_ISEL)
14791 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14792 return 0;
14795 is_against_zero = op1 == CONST0_RTX (compare_mode);
14797 /* A floating-point subtract might overflow, underflow, or produce
14798 an inexact result, thus changing the floating-point flags, so it
14799 can't be generated if we care about that. It's safe if one side
14800 of the construct is zero, since then no subtract will be
14801 generated. */
14802 if (SCALAR_FLOAT_MODE_P (compare_mode)
14803 && flag_trapping_math && ! is_against_zero)
14804 return 0;
14806 /* Eliminate half of the comparisons by switching operands, this
14807 makes the remaining code simpler. */
14808 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14809 || code == LTGT || code == LT || code == UNLE)
14811 code = reverse_condition_maybe_unordered (code);
14812 temp = true_cond;
14813 true_cond = false_cond;
14814 false_cond = temp;
14817 /* UNEQ and LTGT take four instructions for a comparison with zero,
14818 it'll probably be faster to use a branch here too. */
14819 if (code == UNEQ && HONOR_NANS (compare_mode))
14820 return 0;
14822 /* We're going to try to implement comparisons by performing
14823 a subtract, then comparing against zero. Unfortunately,
14824 Inf - Inf is NaN which is not zero, and so if we don't
14825 know that the operand is finite and the comparison
14826 would treat EQ different to UNORDERED, we can't do it. */
14827 if (HONOR_INFINITIES (compare_mode)
14828 && code != GT && code != UNGE
14829 && (!CONST_DOUBLE_P (op1)
14830 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
14831 /* Constructs of the form (a OP b ? a : b) are safe. */
14832 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
14833 || (! rtx_equal_p (op0, true_cond)
14834 && ! rtx_equal_p (op1, true_cond))))
14835 return 0;
14837 /* At this point we know we can use fsel. */
14839 /* Reduce the comparison to a comparison against zero. */
14840 if (! is_against_zero)
14842 temp = gen_reg_rtx (compare_mode);
14843 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
14844 op0 = temp;
14845 op1 = CONST0_RTX (compare_mode);
14848 /* If we don't care about NaNs we can reduce some of the comparisons
14849 down to faster ones. */
14850 if (! HONOR_NANS (compare_mode))
14851 switch (code)
14853 case GT:
14854 code = LE;
14855 temp = true_cond;
14856 true_cond = false_cond;
14857 false_cond = temp;
14858 break;
14859 case UNGE:
14860 code = GE;
14861 break;
14862 case UNEQ:
14863 code = EQ;
14864 break;
14865 default:
14866 break;
14869 /* Now, reduce everything down to a GE. */
14870 switch (code)
14872 case GE:
14873 break;
14875 case LE:
14876 temp = gen_reg_rtx (compare_mode);
14877 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
14878 op0 = temp;
14879 break;
14881 case ORDERED:
14882 temp = gen_reg_rtx (compare_mode);
14883 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
14884 op0 = temp;
14885 break;
14887 case EQ:
14888 temp = gen_reg_rtx (compare_mode);
14889 emit_insn (gen_rtx_SET (temp,
14890 gen_rtx_NEG (compare_mode,
14891 gen_rtx_ABS (compare_mode, op0))));
14892 op0 = temp;
14893 break;
14895 case UNGE:
14896 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
14897 temp = gen_reg_rtx (result_mode);
14898 emit_insn (gen_rtx_SET (temp,
14899 gen_rtx_IF_THEN_ELSE (result_mode,
14900 gen_rtx_GE (VOIDmode,
14901 op0, op1),
14902 true_cond, false_cond)));
14903 false_cond = true_cond;
14904 true_cond = temp;
14906 temp = gen_reg_rtx (compare_mode);
14907 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
14908 op0 = temp;
14909 break;
14911 case GT:
14912 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
14913 temp = gen_reg_rtx (result_mode);
14914 emit_insn (gen_rtx_SET (temp,
14915 gen_rtx_IF_THEN_ELSE (result_mode,
14916 gen_rtx_GE (VOIDmode,
14917 op0, op1),
14918 true_cond, false_cond)));
14919 true_cond = false_cond;
14920 false_cond = temp;
14922 temp = gen_reg_rtx (compare_mode);
14923 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
14924 op0 = temp;
14925 break;
14927 default:
14928 gcc_unreachable ();
14931 emit_insn (gen_rtx_SET (dest,
14932 gen_rtx_IF_THEN_ELSE (result_mode,
14933 gen_rtx_GE (VOIDmode,
14934 op0, op1),
14935 true_cond, false_cond)));
14936 return 1;
14939 /* Same as above, but for ints (isel). */
14942 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14944 rtx condition_rtx, cr;
14945 machine_mode mode = GET_MODE (dest);
14946 enum rtx_code cond_code;
14947 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
14948 bool signedp;
14950 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
14951 return 0;
14953 /* We still have to do the compare, because isel doesn't do a
14954 compare, it just looks at the CRx bits set by a previous compare
14955 instruction. */
14956 condition_rtx = rs6000_generate_compare (op, mode);
14957 cond_code = GET_CODE (condition_rtx);
14958 cr = XEXP (condition_rtx, 0);
14959 signedp = GET_MODE (cr) == CCmode;
14961 isel_func = (mode == SImode
14962 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
14963 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
14965 switch (cond_code)
14967 case LT: case GT: case LTU: case GTU: case EQ:
14968 /* isel handles these directly. */
14969 break;
14971 default:
14972 /* We need to swap the sense of the comparison. */
14974 std::swap (false_cond, true_cond);
14975 PUT_CODE (condition_rtx, reverse_condition (cond_code));
14977 break;
14980 false_cond = force_reg (mode, false_cond);
14981 if (true_cond != const0_rtx)
14982 true_cond = force_reg (mode, true_cond);
14984 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
14986 return 1;
14989 void
14990 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
14992 machine_mode mode = GET_MODE (op0);
14993 enum rtx_code c;
14994 rtx target;
14996 /* VSX/altivec have direct min/max insns. */
14997 if ((code == SMAX || code == SMIN)
14998 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
14999 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15001 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15002 return;
15005 if (code == SMAX || code == SMIN)
15006 c = GE;
15007 else
15008 c = GEU;
15010 if (code == SMAX || code == UMAX)
15011 target = emit_conditional_move (dest, c, op0, op1, mode,
15012 op0, op1, mode, 0);
15013 else
15014 target = emit_conditional_move (dest, c, op0, op1, mode,
15015 op1, op0, mode, 0);
15016 gcc_assert (target);
15017 if (target != dest)
15018 emit_move_insn (dest, target);
15021 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15022 COND is true. Mark the jump as unlikely to be taken. */
15024 static void
15025 emit_unlikely_jump (rtx cond, rtx label)
15027 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15028 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15029 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15032 /* A subroutine of the atomic operation splitters. Emit a load-locked
15033 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15034 the zero_extend operation. */
15036 static void
15037 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15039 rtx (*fn) (rtx, rtx) = NULL;
15041 switch (mode)
15043 case E_QImode:
15044 fn = gen_load_lockedqi;
15045 break;
15046 case E_HImode:
15047 fn = gen_load_lockedhi;
15048 break;
15049 case E_SImode:
15050 if (GET_MODE (mem) == QImode)
15051 fn = gen_load_lockedqi_si;
15052 else if (GET_MODE (mem) == HImode)
15053 fn = gen_load_lockedhi_si;
15054 else
15055 fn = gen_load_lockedsi;
15056 break;
15057 case E_DImode:
15058 fn = gen_load_lockeddi;
15059 break;
15060 case E_TImode:
15061 fn = gen_load_lockedti;
15062 break;
15063 default:
15064 gcc_unreachable ();
15066 emit_insn (fn (reg, mem));
15069 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15070 instruction in MODE. */
15072 static void
15073 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15075 rtx (*fn) (rtx, rtx, rtx) = NULL;
15077 switch (mode)
15079 case E_QImode:
15080 fn = gen_store_conditionalqi;
15081 break;
15082 case E_HImode:
15083 fn = gen_store_conditionalhi;
15084 break;
15085 case E_SImode:
15086 fn = gen_store_conditionalsi;
15087 break;
15088 case E_DImode:
15089 fn = gen_store_conditionaldi;
15090 break;
15091 case E_TImode:
15092 fn = gen_store_conditionalti;
15093 break;
15094 default:
15095 gcc_unreachable ();
15098 /* Emit sync before stwcx. to address PPC405 Erratum. */
15099 if (PPC405_ERRATUM77)
15100 emit_insn (gen_hwsync ());
15102 emit_insn (fn (res, mem, val));
15105 /* Expand barriers before and after a load_locked/store_cond sequence. */
15107 static rtx
15108 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15110 rtx addr = XEXP (mem, 0);
15112 if (!legitimate_indirect_address_p (addr, reload_completed)
15113 && !legitimate_indexed_address_p (addr, reload_completed))
15115 addr = force_reg (Pmode, addr);
15116 mem = replace_equiv_address_nv (mem, addr);
15119 switch (model)
15121 case MEMMODEL_RELAXED:
15122 case MEMMODEL_CONSUME:
15123 case MEMMODEL_ACQUIRE:
15124 break;
15125 case MEMMODEL_RELEASE:
15126 case MEMMODEL_ACQ_REL:
15127 emit_insn (gen_lwsync ());
15128 break;
15129 case MEMMODEL_SEQ_CST:
15130 emit_insn (gen_hwsync ());
15131 break;
15132 default:
15133 gcc_unreachable ();
15135 return mem;
15138 static void
15139 rs6000_post_atomic_barrier (enum memmodel model)
15141 switch (model)
15143 case MEMMODEL_RELAXED:
15144 case MEMMODEL_CONSUME:
15145 case MEMMODEL_RELEASE:
15146 break;
15147 case MEMMODEL_ACQUIRE:
15148 case MEMMODEL_ACQ_REL:
15149 case MEMMODEL_SEQ_CST:
15150 emit_insn (gen_isync ());
15151 break;
15152 default:
15153 gcc_unreachable ();
15157 /* A subroutine of the various atomic expanders. For sub-word operations,
15158 we must adjust things to operate on SImode. Given the original MEM,
15159 return a new aligned memory. Also build and return the quantities by
15160 which to shift and mask. */
15162 static rtx
15163 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15165 rtx addr, align, shift, mask, mem;
15166 HOST_WIDE_INT shift_mask;
15167 machine_mode mode = GET_MODE (orig_mem);
15169 /* For smaller modes, we have to implement this via SImode. */
15170 shift_mask = (mode == QImode ? 0x18 : 0x10);
15172 addr = XEXP (orig_mem, 0);
15173 addr = force_reg (GET_MODE (addr), addr);
15175 /* Aligned memory containing subword. Generate a new memory. We
15176 do not want any of the existing MEM_ATTR data, as we're now
15177 accessing memory outside the original object. */
15178 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15179 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15180 mem = gen_rtx_MEM (SImode, align);
15181 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15182 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15183 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15185 /* Shift amount for subword relative to aligned word. */
15186 shift = gen_reg_rtx (SImode);
15187 addr = gen_lowpart (SImode, addr);
15188 rtx tmp = gen_reg_rtx (SImode);
15189 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15190 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15191 if (BYTES_BIG_ENDIAN)
15192 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15193 shift, 1, OPTAB_LIB_WIDEN);
15194 *pshift = shift;
15196 /* Mask for insertion. */
15197 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15198 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15199 *pmask = mask;
15201 return mem;
15204 /* A subroutine of the various atomic expanders. For sub-word operands,
15205 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15207 static rtx
15208 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15210 rtx x;
15212 x = gen_reg_rtx (SImode);
15213 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15214 gen_rtx_NOT (SImode, mask),
15215 oldval)));
15217 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15219 return x;
15222 /* A subroutine of the various atomic expanders. For sub-word operands,
15223 extract WIDE to NARROW via SHIFT. */
15225 static void
15226 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15228 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15229 wide, 1, OPTAB_LIB_WIDEN);
15230 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15233 /* Expand an atomic compare and swap operation. */
15235 void
15236 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15238 rtx boolval, retval, mem, oldval, newval, cond;
15239 rtx label1, label2, x, mask, shift;
15240 machine_mode mode, orig_mode;
15241 enum memmodel mod_s, mod_f;
15242 bool is_weak;
15244 boolval = operands[0];
15245 retval = operands[1];
15246 mem = operands[2];
15247 oldval = operands[3];
15248 newval = operands[4];
15249 is_weak = (INTVAL (operands[5]) != 0);
15250 mod_s = memmodel_base (INTVAL (operands[6]));
15251 mod_f = memmodel_base (INTVAL (operands[7]));
15252 orig_mode = mode = GET_MODE (mem);
15254 mask = shift = NULL_RTX;
15255 if (mode == QImode || mode == HImode)
15257 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15258 lwarx and shift/mask operations. With power8, we need to do the
15259 comparison in SImode, but the store is still done in QI/HImode. */
15260 oldval = convert_modes (SImode, mode, oldval, 1);
15262 if (!TARGET_SYNC_HI_QI)
15264 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15266 /* Shift and mask OLDVAL into position with the word. */
15267 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15268 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15270 /* Shift and mask NEWVAL into position within the word. */
15271 newval = convert_modes (SImode, mode, newval, 1);
15272 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15273 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15276 /* Prepare to adjust the return value. */
15277 retval = gen_reg_rtx (SImode);
15278 mode = SImode;
15280 else if (reg_overlap_mentioned_p (retval, oldval))
15281 oldval = copy_to_reg (oldval);
15283 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15284 oldval = copy_to_mode_reg (mode, oldval);
15286 if (reg_overlap_mentioned_p (retval, newval))
15287 newval = copy_to_reg (newval);
15289 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15291 label1 = NULL_RTX;
15292 if (!is_weak)
15294 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15295 emit_label (XEXP (label1, 0));
15297 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15299 emit_load_locked (mode, retval, mem);
15301 x = retval;
15302 if (mask)
15303 x = expand_simple_binop (SImode, AND, retval, mask,
15304 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15306 cond = gen_reg_rtx (CCmode);
15307 /* If we have TImode, synthesize a comparison. */
15308 if (mode != TImode)
15309 x = gen_rtx_COMPARE (CCmode, x, oldval);
15310 else
15312 rtx xor1_result = gen_reg_rtx (DImode);
15313 rtx xor2_result = gen_reg_rtx (DImode);
15314 rtx or_result = gen_reg_rtx (DImode);
15315 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15316 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15317 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15318 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15320 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15321 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15322 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15323 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15326 emit_insn (gen_rtx_SET (cond, x));
15328 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15329 emit_unlikely_jump (x, label2);
15331 x = newval;
15332 if (mask)
15333 x = rs6000_mask_atomic_subword (retval, newval, mask);
15335 emit_store_conditional (orig_mode, cond, mem, x);
15337 if (!is_weak)
15339 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15340 emit_unlikely_jump (x, label1);
15343 if (!is_mm_relaxed (mod_f))
15344 emit_label (XEXP (label2, 0));
15346 rs6000_post_atomic_barrier (mod_s);
15348 if (is_mm_relaxed (mod_f))
15349 emit_label (XEXP (label2, 0));
15351 if (shift)
15352 rs6000_finish_atomic_subword (operands[1], retval, shift);
15353 else if (mode != GET_MODE (operands[1]))
15354 convert_move (operands[1], retval, 1);
15356 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15357 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15358 emit_insn (gen_rtx_SET (boolval, x));
15361 /* Expand an atomic exchange operation. */
15363 void
15364 rs6000_expand_atomic_exchange (rtx operands[])
15366 rtx retval, mem, val, cond;
15367 machine_mode mode;
15368 enum memmodel model;
15369 rtx label, x, mask, shift;
15371 retval = operands[0];
15372 mem = operands[1];
15373 val = operands[2];
15374 model = memmodel_base (INTVAL (operands[3]));
15375 mode = GET_MODE (mem);
15377 mask = shift = NULL_RTX;
15378 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15380 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15382 /* Shift and mask VAL into position with the word. */
15383 val = convert_modes (SImode, mode, val, 1);
15384 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15385 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15387 /* Prepare to adjust the return value. */
15388 retval = gen_reg_rtx (SImode);
15389 mode = SImode;
15392 mem = rs6000_pre_atomic_barrier (mem, model);
15394 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15395 emit_label (XEXP (label, 0));
15397 emit_load_locked (mode, retval, mem);
15399 x = val;
15400 if (mask)
15401 x = rs6000_mask_atomic_subword (retval, val, mask);
15403 cond = gen_reg_rtx (CCmode);
15404 emit_store_conditional (mode, cond, mem, x);
15406 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15407 emit_unlikely_jump (x, label);
15409 rs6000_post_atomic_barrier (model);
15411 if (shift)
15412 rs6000_finish_atomic_subword (operands[0], retval, shift);
15415 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15416 to perform. MEM is the memory on which to operate. VAL is the second
15417 operand of the binary operator. BEFORE and AFTER are optional locations to
15418 return the value of MEM either before of after the operation. MODEL_RTX
15419 is a CONST_INT containing the memory model to use. */
15421 void
15422 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15423 rtx orig_before, rtx orig_after, rtx model_rtx)
15425 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15426 machine_mode mode = GET_MODE (mem);
15427 machine_mode store_mode = mode;
15428 rtx label, x, cond, mask, shift;
15429 rtx before = orig_before, after = orig_after;
15431 mask = shift = NULL_RTX;
15432 /* On power8, we want to use SImode for the operation. On previous systems,
15433 use the operation in a subword and shift/mask to get the proper byte or
15434 halfword. */
15435 if (mode == QImode || mode == HImode)
15437 if (TARGET_SYNC_HI_QI)
15439 val = convert_modes (SImode, mode, val, 1);
15441 /* Prepare to adjust the return value. */
15442 before = gen_reg_rtx (SImode);
15443 if (after)
15444 after = gen_reg_rtx (SImode);
15445 mode = SImode;
15447 else
15449 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15451 /* Shift and mask VAL into position with the word. */
15452 val = convert_modes (SImode, mode, val, 1);
15453 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15454 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15456 switch (code)
15458 case IOR:
15459 case XOR:
15460 /* We've already zero-extended VAL. That is sufficient to
15461 make certain that it does not affect other bits. */
15462 mask = NULL;
15463 break;
15465 case AND:
15466 /* If we make certain that all of the other bits in VAL are
15467 set, that will be sufficient to not affect other bits. */
15468 x = gen_rtx_NOT (SImode, mask);
15469 x = gen_rtx_IOR (SImode, x, val);
15470 emit_insn (gen_rtx_SET (val, x));
15471 mask = NULL;
15472 break;
15474 case NOT:
15475 case PLUS:
15476 case MINUS:
15477 /* These will all affect bits outside the field and need
15478 adjustment via MASK within the loop. */
15479 break;
15481 default:
15482 gcc_unreachable ();
15485 /* Prepare to adjust the return value. */
15486 before = gen_reg_rtx (SImode);
15487 if (after)
15488 after = gen_reg_rtx (SImode);
15489 store_mode = mode = SImode;
15493 mem = rs6000_pre_atomic_barrier (mem, model);
15495 label = gen_label_rtx ();
15496 emit_label (label);
15497 label = gen_rtx_LABEL_REF (VOIDmode, label);
15499 if (before == NULL_RTX)
15500 before = gen_reg_rtx (mode);
15502 emit_load_locked (mode, before, mem);
15504 if (code == NOT)
15506 x = expand_simple_binop (mode, AND, before, val,
15507 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15508 after = expand_simple_unop (mode, NOT, x, after, 1);
15510 else
15512 after = expand_simple_binop (mode, code, before, val,
15513 after, 1, OPTAB_LIB_WIDEN);
15516 x = after;
15517 if (mask)
15519 x = expand_simple_binop (SImode, AND, after, mask,
15520 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15521 x = rs6000_mask_atomic_subword (before, x, mask);
15523 else if (store_mode != mode)
15524 x = convert_modes (store_mode, mode, x, 1);
15526 cond = gen_reg_rtx (CCmode);
15527 emit_store_conditional (store_mode, cond, mem, x);
15529 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15530 emit_unlikely_jump (x, label);
15532 rs6000_post_atomic_barrier (model);
15534 if (shift)
15536 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15537 then do the calcuations in a SImode register. */
15538 if (orig_before)
15539 rs6000_finish_atomic_subword (orig_before, before, shift);
15540 if (orig_after)
15541 rs6000_finish_atomic_subword (orig_after, after, shift);
15543 else if (store_mode != mode)
15545 /* QImode/HImode on machines with lbarx/lharx where we do the native
15546 operation and then do the calcuations in a SImode register. */
15547 if (orig_before)
15548 convert_move (orig_before, before, 1);
15549 if (orig_after)
15550 convert_move (orig_after, after, 1);
15552 else if (orig_after && after != orig_after)
15553 emit_move_insn (orig_after, after);
15556 /* Emit instructions to move SRC to DST. Called by splitters for
15557 multi-register moves. It will emit at most one instruction for
15558 each register that is accessed; that is, it won't emit li/lis pairs
15559 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15560 register. */
15562 void
15563 rs6000_split_multireg_move (rtx dst, rtx src)
15565 /* The register number of the first register being moved. */
15566 int reg;
15567 /* The mode that is to be moved. */
15568 machine_mode mode;
15569 /* The mode that the move is being done in, and its size. */
15570 machine_mode reg_mode;
15571 int reg_mode_size;
15572 /* The number of registers that will be moved. */
15573 int nregs;
15575 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15576 mode = GET_MODE (dst);
15577 nregs = hard_regno_nregs (reg, mode);
15578 if (FP_REGNO_P (reg))
15579 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15580 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15581 else if (ALTIVEC_REGNO_P (reg))
15582 reg_mode = V16QImode;
15583 else
15584 reg_mode = word_mode;
15585 reg_mode_size = GET_MODE_SIZE (reg_mode);
15587 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15589 /* TDmode residing in FP registers is special, since the ISA requires that
15590 the lower-numbered word of a register pair is always the most significant
15591 word, even in little-endian mode. This does not match the usual subreg
15592 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15593 the appropriate constituent registers "by hand" in little-endian mode.
15595 Note we do not need to check for destructive overlap here since TDmode
15596 can only reside in even/odd register pairs. */
15597 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15599 rtx p_src, p_dst;
15600 int i;
15602 for (i = 0; i < nregs; i++)
15604 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15605 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15606 else
15607 p_src = simplify_gen_subreg (reg_mode, src, mode,
15608 i * reg_mode_size);
15610 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15611 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15612 else
15613 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15614 i * reg_mode_size);
15616 emit_insn (gen_rtx_SET (p_dst, p_src));
15619 return;
15622 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15624 /* Move register range backwards, if we might have destructive
15625 overlap. */
15626 int i;
15627 for (i = nregs - 1; i >= 0; i--)
15628 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15629 i * reg_mode_size),
15630 simplify_gen_subreg (reg_mode, src, mode,
15631 i * reg_mode_size)));
15633 else
15635 int i;
15636 int j = -1;
15637 bool used_update = false;
15638 rtx restore_basereg = NULL_RTX;
15640 if (MEM_P (src) && INT_REGNO_P (reg))
15642 rtx breg;
15644 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15645 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15647 rtx delta_rtx;
15648 breg = XEXP (XEXP (src, 0), 0);
15649 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15650 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15651 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15652 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15653 src = replace_equiv_address (src, breg);
15655 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15657 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15659 rtx basereg = XEXP (XEXP (src, 0), 0);
15660 if (TARGET_UPDATE)
15662 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15663 emit_insn (gen_rtx_SET (ndst,
15664 gen_rtx_MEM (reg_mode,
15665 XEXP (src, 0))));
15666 used_update = true;
15668 else
15669 emit_insn (gen_rtx_SET (basereg,
15670 XEXP (XEXP (src, 0), 1)));
15671 src = replace_equiv_address (src, basereg);
15673 else
15675 rtx basereg = gen_rtx_REG (Pmode, reg);
15676 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15677 src = replace_equiv_address (src, basereg);
15681 breg = XEXP (src, 0);
15682 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15683 breg = XEXP (breg, 0);
15685 /* If the base register we are using to address memory is
15686 also a destination reg, then change that register last. */
15687 if (REG_P (breg)
15688 && REGNO (breg) >= REGNO (dst)
15689 && REGNO (breg) < REGNO (dst) + nregs)
15690 j = REGNO (breg) - REGNO (dst);
15692 else if (MEM_P (dst) && INT_REGNO_P (reg))
15694 rtx breg;
15696 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15697 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15699 rtx delta_rtx;
15700 breg = XEXP (XEXP (dst, 0), 0);
15701 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15702 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15703 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15705 /* We have to update the breg before doing the store.
15706 Use store with update, if available. */
15708 if (TARGET_UPDATE)
15710 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15711 emit_insn (TARGET_32BIT
15712 ? (TARGET_POWERPC64
15713 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15714 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15715 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15716 used_update = true;
15718 else
15719 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15720 dst = replace_equiv_address (dst, breg);
15722 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15723 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15725 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15727 rtx basereg = XEXP (XEXP (dst, 0), 0);
15728 if (TARGET_UPDATE)
15730 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15731 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15732 XEXP (dst, 0)),
15733 nsrc));
15734 used_update = true;
15736 else
15737 emit_insn (gen_rtx_SET (basereg,
15738 XEXP (XEXP (dst, 0), 1)));
15739 dst = replace_equiv_address (dst, basereg);
15741 else
15743 rtx basereg = XEXP (XEXP (dst, 0), 0);
15744 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15745 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15746 && REG_P (basereg)
15747 && REG_P (offsetreg)
15748 && REGNO (basereg) != REGNO (offsetreg));
15749 if (REGNO (basereg) == 0)
15751 rtx tmp = offsetreg;
15752 offsetreg = basereg;
15753 basereg = tmp;
15755 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15756 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15757 dst = replace_equiv_address (dst, basereg);
15760 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15761 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15764 for (i = 0; i < nregs; i++)
15766 /* Calculate index to next subword. */
15767 ++j;
15768 if (j == nregs)
15769 j = 0;
15771 /* If compiler already emitted move of first word by
15772 store with update, no need to do anything. */
15773 if (j == 0 && used_update)
15774 continue;
15776 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15777 j * reg_mode_size),
15778 simplify_gen_subreg (reg_mode, src, mode,
15779 j * reg_mode_size)));
15781 if (restore_basereg != NULL_RTX)
15782 emit_insn (restore_basereg);
15786 static GTY(()) alias_set_type TOC_alias_set = -1;
15788 alias_set_type
15789 get_TOC_alias_set (void)
15791 if (TOC_alias_set == -1)
15792 TOC_alias_set = new_alias_set ();
15793 return TOC_alias_set;
15796 /* The mode the ABI uses for a word. This is not the same as word_mode
15797 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15799 static scalar_int_mode
15800 rs6000_abi_word_mode (void)
15802 return TARGET_32BIT ? SImode : DImode;
15805 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15806 static char *
15807 rs6000_offload_options (void)
15809 if (TARGET_64BIT)
15810 return xstrdup ("-foffload-abi=lp64");
15811 else
15812 return xstrdup ("-foffload-abi=ilp32");
15816 /* A quick summary of the various types of 'constant-pool tables'
15817 under PowerPC:
15819 Target Flags Name One table per
15820 AIX (none) AIX TOC object file
15821 AIX -mfull-toc AIX TOC object file
15822 AIX -mminimal-toc AIX minimal TOC translation unit
15823 SVR4/EABI (none) SVR4 SDATA object file
15824 SVR4/EABI -fpic SVR4 pic object file
15825 SVR4/EABI -fPIC SVR4 PIC translation unit
15826 SVR4/EABI -mrelocatable EABI TOC function
15827 SVR4/EABI -maix AIX TOC object file
15828 SVR4/EABI -maix -mminimal-toc
15829 AIX minimal TOC translation unit
15831 Name Reg. Set by entries contains:
15832 made by addrs? fp? sum?
15834 AIX TOC 2 crt0 as Y option option
15835 AIX minimal TOC 30 prolog gcc Y Y option
15836 SVR4 SDATA 13 crt0 gcc N Y N
15837 SVR4 pic 30 prolog ld Y not yet N
15838 SVR4 PIC 30 prolog gcc Y option option
15839 EABI TOC 30 prolog gcc Y option option
15843 /* Hash functions for the hash table. */
15845 static unsigned
15846 rs6000_hash_constant (rtx k)
15848 enum rtx_code code = GET_CODE (k);
15849 machine_mode mode = GET_MODE (k);
15850 unsigned result = (code << 3) ^ mode;
15851 const char *format;
15852 int flen, fidx;
15854 format = GET_RTX_FORMAT (code);
15855 flen = strlen (format);
15856 fidx = 0;
15858 switch (code)
15860 case LABEL_REF:
15861 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
15863 case CONST_WIDE_INT:
15865 int i;
15866 flen = CONST_WIDE_INT_NUNITS (k);
15867 for (i = 0; i < flen; i++)
15868 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
15869 return result;
15872 case CONST_DOUBLE:
15873 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
15875 case CODE_LABEL:
15876 fidx = 3;
15877 break;
15879 default:
15880 break;
15883 for (; fidx < flen; fidx++)
15884 switch (format[fidx])
15886 case 's':
15888 unsigned i, len;
15889 const char *str = XSTR (k, fidx);
15890 len = strlen (str);
15891 result = result * 613 + len;
15892 for (i = 0; i < len; i++)
15893 result = result * 613 + (unsigned) str[i];
15894 break;
15896 case 'u':
15897 case 'e':
15898 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
15899 break;
15900 case 'i':
15901 case 'n':
15902 result = result * 613 + (unsigned) XINT (k, fidx);
15903 break;
15904 case 'w':
15905 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
15906 result = result * 613 + (unsigned) XWINT (k, fidx);
15907 else
15909 size_t i;
15910 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
15911 result = result * 613 + (unsigned) (XWINT (k, fidx)
15912 >> CHAR_BIT * i);
15914 break;
15915 case '0':
15916 break;
15917 default:
15918 gcc_unreachable ();
15921 return result;
15924 hashval_t
15925 toc_hasher::hash (toc_hash_struct *thc)
15927 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
15930 /* Compare H1 and H2 for equivalence. */
15932 bool
15933 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
15935 rtx r1 = h1->key;
15936 rtx r2 = h2->key;
15938 if (h1->key_mode != h2->key_mode)
15939 return 0;
15941 return rtx_equal_p (r1, r2);
15944 /* These are the names given by the C++ front-end to vtables, and
15945 vtable-like objects. Ideally, this logic should not be here;
15946 instead, there should be some programmatic way of inquiring as
15947 to whether or not an object is a vtable. */
15949 #define VTABLE_NAME_P(NAME) \
15950 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
15951 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
15952 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
15953 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
15954 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
15956 #ifdef NO_DOLLAR_IN_LABEL
15957 /* Return a GGC-allocated character string translating dollar signs in
15958 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
15960 const char *
15961 rs6000_xcoff_strip_dollar (const char *name)
15963 char *strip, *p;
15964 const char *q;
15965 size_t len;
15967 q = (const char *) strchr (name, '$');
15969 if (q == 0 || q == name)
15970 return name;
15972 len = strlen (name);
15973 strip = XALLOCAVEC (char, len + 1);
15974 strcpy (strip, name);
15975 p = strip + (q - name);
15976 while (p)
15978 *p = '_';
15979 p = strchr (p + 1, '$');
15982 return ggc_alloc_string (strip, len);
15984 #endif
15986 void
15987 rs6000_output_symbol_ref (FILE *file, rtx x)
15989 const char *name = XSTR (x, 0);
15991 /* Currently C++ toc references to vtables can be emitted before it
15992 is decided whether the vtable is public or private. If this is
15993 the case, then the linker will eventually complain that there is
15994 a reference to an unknown section. Thus, for vtables only,
15995 we emit the TOC reference to reference the identifier and not the
15996 symbol. */
15997 if (VTABLE_NAME_P (name))
15999 RS6000_OUTPUT_BASENAME (file, name);
16001 else
16002 assemble_name (file, name);
16005 /* Output a TOC entry. We derive the entry name from what is being
16006 written. */
16008 void
16009 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16011 char buf[256];
16012 const char *name = buf;
16013 rtx base = x;
16014 HOST_WIDE_INT offset = 0;
16016 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16018 /* When the linker won't eliminate them, don't output duplicate
16019 TOC entries (this happens on AIX if there is any kind of TOC,
16020 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16021 CODE_LABELs. */
16022 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16024 struct toc_hash_struct *h;
16026 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16027 time because GGC is not initialized at that point. */
16028 if (toc_hash_table == NULL)
16029 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16031 h = ggc_alloc<toc_hash_struct> ();
16032 h->key = x;
16033 h->key_mode = mode;
16034 h->labelno = labelno;
16036 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16037 if (*found == NULL)
16038 *found = h;
16039 else /* This is indeed a duplicate.
16040 Set this label equal to that label. */
16042 fputs ("\t.set ", file);
16043 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16044 fprintf (file, "%d,", labelno);
16045 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16046 fprintf (file, "%d\n", ((*found)->labelno));
16048 #ifdef HAVE_AS_TLS
16049 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16050 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16051 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16053 fputs ("\t.set ", file);
16054 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16055 fprintf (file, "%d,", labelno);
16056 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16057 fprintf (file, "%d\n", ((*found)->labelno));
16059 #endif
16060 return;
16064 /* If we're going to put a double constant in the TOC, make sure it's
16065 aligned properly when strict alignment is on. */
16066 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16067 && STRICT_ALIGNMENT
16068 && GET_MODE_BITSIZE (mode) >= 64
16069 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16070 ASM_OUTPUT_ALIGN (file, 3);
16073 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16075 /* Handle FP constants specially. Note that if we have a minimal
16076 TOC, things we put here aren't actually in the TOC, so we can allow
16077 FP constants. */
16078 if (CONST_DOUBLE_P (x)
16079 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16080 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16082 long k[4];
16084 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16085 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16086 else
16087 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16089 if (TARGET_64BIT)
16091 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16092 fputs (DOUBLE_INT_ASM_OP, file);
16093 else
16094 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16095 k[0] & 0xffffffff, k[1] & 0xffffffff,
16096 k[2] & 0xffffffff, k[3] & 0xffffffff);
16097 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16098 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16099 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16100 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16101 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16102 return;
16104 else
16106 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16107 fputs ("\t.long ", file);
16108 else
16109 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16110 k[0] & 0xffffffff, k[1] & 0xffffffff,
16111 k[2] & 0xffffffff, k[3] & 0xffffffff);
16112 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16113 k[0] & 0xffffffff, k[1] & 0xffffffff,
16114 k[2] & 0xffffffff, k[3] & 0xffffffff);
16115 return;
16118 else if (CONST_DOUBLE_P (x)
16119 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16121 long k[2];
16123 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16124 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16125 else
16126 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16128 if (TARGET_64BIT)
16130 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16131 fputs (DOUBLE_INT_ASM_OP, file);
16132 else
16133 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16134 k[0] & 0xffffffff, k[1] & 0xffffffff);
16135 fprintf (file, "0x%lx%08lx\n",
16136 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16137 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16138 return;
16140 else
16142 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16143 fputs ("\t.long ", file);
16144 else
16145 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16146 k[0] & 0xffffffff, k[1] & 0xffffffff);
16147 fprintf (file, "0x%lx,0x%lx\n",
16148 k[0] & 0xffffffff, k[1] & 0xffffffff);
16149 return;
16152 else if (CONST_DOUBLE_P (x)
16153 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16155 long l;
16157 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16158 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16159 else
16160 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16162 if (TARGET_64BIT)
16164 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16165 fputs (DOUBLE_INT_ASM_OP, file);
16166 else
16167 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16168 if (WORDS_BIG_ENDIAN)
16169 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16170 else
16171 fprintf (file, "0x%lx\n", l & 0xffffffff);
16172 return;
16174 else
16176 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16177 fputs ("\t.long ", file);
16178 else
16179 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16180 fprintf (file, "0x%lx\n", l & 0xffffffff);
16181 return;
16184 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16186 unsigned HOST_WIDE_INT low;
16187 HOST_WIDE_INT high;
16189 low = INTVAL (x) & 0xffffffff;
16190 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16192 /* TOC entries are always Pmode-sized, so when big-endian
16193 smaller integer constants in the TOC need to be padded.
16194 (This is still a win over putting the constants in
16195 a separate constant pool, because then we'd have
16196 to have both a TOC entry _and_ the actual constant.)
16198 For a 32-bit target, CONST_INT values are loaded and shifted
16199 entirely within `low' and can be stored in one TOC entry. */
16201 /* It would be easy to make this work, but it doesn't now. */
16202 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16204 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16206 low |= high << 32;
16207 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16208 high = (HOST_WIDE_INT) low >> 32;
16209 low &= 0xffffffff;
16212 if (TARGET_64BIT)
16214 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16215 fputs (DOUBLE_INT_ASM_OP, file);
16216 else
16217 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16218 (long) high & 0xffffffff, (long) low & 0xffffffff);
16219 fprintf (file, "0x%lx%08lx\n",
16220 (long) high & 0xffffffff, (long) low & 0xffffffff);
16221 return;
16223 else
16225 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16227 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16228 fputs ("\t.long ", file);
16229 else
16230 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16231 (long) high & 0xffffffff, (long) low & 0xffffffff);
16232 fprintf (file, "0x%lx,0x%lx\n",
16233 (long) high & 0xffffffff, (long) low & 0xffffffff);
16235 else
16237 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16238 fputs ("\t.long ", file);
16239 else
16240 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16241 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16243 return;
16247 if (GET_CODE (x) == CONST)
16249 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16250 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16252 base = XEXP (XEXP (x, 0), 0);
16253 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16256 switch (GET_CODE (base))
16258 case SYMBOL_REF:
16259 name = XSTR (base, 0);
16260 break;
16262 case LABEL_REF:
16263 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16264 CODE_LABEL_NUMBER (XEXP (base, 0)));
16265 break;
16267 case CODE_LABEL:
16268 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16269 break;
16271 default:
16272 gcc_unreachable ();
16275 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16276 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16277 else
16279 fputs ("\t.tc ", file);
16280 RS6000_OUTPUT_BASENAME (file, name);
16282 if (offset < 0)
16283 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16284 else if (offset)
16285 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16287 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16288 after other TOC symbols, reducing overflow of small TOC access
16289 to [TC] symbols. */
16290 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16291 ? "[TE]," : "[TC],", file);
16294 /* Currently C++ toc references to vtables can be emitted before it
16295 is decided whether the vtable is public or private. If this is
16296 the case, then the linker will eventually complain that there is
16297 a TOC reference to an unknown section. Thus, for vtables only,
16298 we emit the TOC reference to reference the symbol and not the
16299 section. */
16300 if (VTABLE_NAME_P (name))
16302 RS6000_OUTPUT_BASENAME (file, name);
16303 if (offset < 0)
16304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16305 else if (offset > 0)
16306 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16308 else
16309 output_addr_const (file, x);
16311 #if HAVE_AS_TLS
16312 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16314 switch (SYMBOL_REF_TLS_MODEL (base))
16316 case 0:
16317 break;
16318 case TLS_MODEL_LOCAL_EXEC:
16319 fputs ("@le", file);
16320 break;
16321 case TLS_MODEL_INITIAL_EXEC:
16322 fputs ("@ie", file);
16323 break;
16324 /* Use global-dynamic for local-dynamic. */
16325 case TLS_MODEL_GLOBAL_DYNAMIC:
16326 case TLS_MODEL_LOCAL_DYNAMIC:
16327 putc ('\n', file);
16328 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16329 fputs ("\t.tc .", file);
16330 RS6000_OUTPUT_BASENAME (file, name);
16331 fputs ("[TC],", file);
16332 output_addr_const (file, x);
16333 fputs ("@m", file);
16334 break;
16335 default:
16336 gcc_unreachable ();
16339 #endif
16341 putc ('\n', file);
16344 /* Output an assembler pseudo-op to write an ASCII string of N characters
16345 starting at P to FILE.
16347 On the RS/6000, we have to do this using the .byte operation and
16348 write out special characters outside the quoted string.
16349 Also, the assembler is broken; very long strings are truncated,
16350 so we must artificially break them up early. */
16352 void
16353 output_ascii (FILE *file, const char *p, int n)
16355 char c;
16356 int i, count_string;
16357 const char *for_string = "\t.byte \"";
16358 const char *for_decimal = "\t.byte ";
16359 const char *to_close = NULL;
16361 count_string = 0;
16362 for (i = 0; i < n; i++)
16364 c = *p++;
16365 if (c >= ' ' && c < 0177)
16367 if (for_string)
16368 fputs (for_string, file);
16369 putc (c, file);
16371 /* Write two quotes to get one. */
16372 if (c == '"')
16374 putc (c, file);
16375 ++count_string;
16378 for_string = NULL;
16379 for_decimal = "\"\n\t.byte ";
16380 to_close = "\"\n";
16381 ++count_string;
16383 if (count_string >= 512)
16385 fputs (to_close, file);
16387 for_string = "\t.byte \"";
16388 for_decimal = "\t.byte ";
16389 to_close = NULL;
16390 count_string = 0;
16393 else
16395 if (for_decimal)
16396 fputs (for_decimal, file);
16397 fprintf (file, "%d", c);
16399 for_string = "\n\t.byte \"";
16400 for_decimal = ", ";
16401 to_close = "\n";
16402 count_string = 0;
16406 /* Now close the string if we have written one. Then end the line. */
16407 if (to_close)
16408 fputs (to_close, file);
16411 /* Generate a unique section name for FILENAME for a section type
16412 represented by SECTION_DESC. Output goes into BUF.
16414 SECTION_DESC can be any string, as long as it is different for each
16415 possible section type.
16417 We name the section in the same manner as xlc. The name begins with an
16418 underscore followed by the filename (after stripping any leading directory
16419 names) with the last period replaced by the string SECTION_DESC. If
16420 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16421 the name. */
16423 void
16424 rs6000_gen_section_name (char **buf, const char *filename,
16425 const char *section_desc)
16427 const char *q, *after_last_slash, *last_period = 0;
16428 char *p;
16429 int len;
16431 after_last_slash = filename;
16432 for (q = filename; *q; q++)
16434 if (*q == '/')
16435 after_last_slash = q + 1;
16436 else if (*q == '.')
16437 last_period = q;
16440 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16441 *buf = (char *) xmalloc (len);
16443 p = *buf;
16444 *p++ = '_';
16446 for (q = after_last_slash; *q; q++)
16448 if (q == last_period)
16450 strcpy (p, section_desc);
16451 p += strlen (section_desc);
16452 break;
16455 else if (ISALNUM (*q))
16456 *p++ = *q;
16459 if (last_period == 0)
16460 strcpy (p, section_desc);
16461 else
16462 *p = '\0';
16465 /* Emit profile function. */
16467 void
16468 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16470 /* Non-standard profiling for kernels, which just saves LR then calls
16471 _mcount without worrying about arg saves. The idea is to change
16472 the function prologue as little as possible as it isn't easy to
16473 account for arg save/restore code added just for _mcount. */
16474 if (TARGET_PROFILE_KERNEL)
16475 return;
16477 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16479 #ifndef NO_PROFILE_COUNTERS
16480 # define NO_PROFILE_COUNTERS 0
16481 #endif
16482 if (NO_PROFILE_COUNTERS)
16483 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16484 LCT_NORMAL, VOIDmode);
16485 else
16487 char buf[30];
16488 const char *label_name;
16489 rtx fun;
16491 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16492 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16493 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16495 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16496 LCT_NORMAL, VOIDmode, fun, Pmode);
16499 else if (DEFAULT_ABI == ABI_DARWIN)
16501 const char *mcount_name = RS6000_MCOUNT;
16502 int caller_addr_regno = LR_REGNO;
16504 /* Be conservative and always set this, at least for now. */
16505 crtl->uses_pic_offset_table = 1;
16507 #if TARGET_MACHO
16508 /* For PIC code, set up a stub and collect the caller's address
16509 from r0, which is where the prologue puts it. */
16510 if (MACHOPIC_INDIRECT
16511 && crtl->uses_pic_offset_table)
16512 caller_addr_regno = 0;
16513 #endif
16514 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16515 LCT_NORMAL, VOIDmode,
16516 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16520 /* Write function profiler code. */
16522 void
16523 output_function_profiler (FILE *file, int labelno)
16525 char buf[100];
16527 switch (DEFAULT_ABI)
16529 default:
16530 gcc_unreachable ();
16532 case ABI_V4:
16533 if (!TARGET_32BIT)
16535 warning (0, "no profiling of 64-bit code for this ABI");
16536 return;
16538 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16539 fprintf (file, "\tmflr %s\n", reg_names[0]);
16540 if (NO_PROFILE_COUNTERS)
16542 asm_fprintf (file, "\tstw %s,4(%s)\n",
16543 reg_names[0], reg_names[1]);
16545 else if (TARGET_SECURE_PLT && flag_pic)
16547 if (TARGET_LINK_STACK)
16549 char name[32];
16550 get_ppc476_thunk_name (name);
16551 asm_fprintf (file, "\tbl %s\n", name);
16553 else
16554 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16555 asm_fprintf (file, "\tstw %s,4(%s)\n",
16556 reg_names[0], reg_names[1]);
16557 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16558 asm_fprintf (file, "\taddis %s,%s,",
16559 reg_names[12], reg_names[12]);
16560 assemble_name (file, buf);
16561 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16562 assemble_name (file, buf);
16563 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16565 else if (flag_pic == 1)
16567 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16568 asm_fprintf (file, "\tstw %s,4(%s)\n",
16569 reg_names[0], reg_names[1]);
16570 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16571 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16572 assemble_name (file, buf);
16573 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16575 else if (flag_pic > 1)
16577 asm_fprintf (file, "\tstw %s,4(%s)\n",
16578 reg_names[0], reg_names[1]);
16579 /* Now, we need to get the address of the label. */
16580 if (TARGET_LINK_STACK)
16582 char name[32];
16583 get_ppc476_thunk_name (name);
16584 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16585 assemble_name (file, buf);
16586 fputs ("-.\n1:", file);
16587 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16588 asm_fprintf (file, "\taddi %s,%s,4\n",
16589 reg_names[11], reg_names[11]);
16591 else
16593 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16594 assemble_name (file, buf);
16595 fputs ("-.\n1:", file);
16596 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16598 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16599 reg_names[0], reg_names[11]);
16600 asm_fprintf (file, "\tadd %s,%s,%s\n",
16601 reg_names[0], reg_names[0], reg_names[11]);
16603 else
16605 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16606 assemble_name (file, buf);
16607 fputs ("@ha\n", file);
16608 asm_fprintf (file, "\tstw %s,4(%s)\n",
16609 reg_names[0], reg_names[1]);
16610 asm_fprintf (file, "\tla %s,", reg_names[0]);
16611 assemble_name (file, buf);
16612 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16615 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16616 fprintf (file, "\tbl %s%s\n",
16617 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16618 break;
16620 case ABI_AIX:
16621 case ABI_ELFv2:
16622 case ABI_DARWIN:
16623 /* Don't do anything, done in output_profile_hook (). */
16624 break;
16630 /* The following variable value is the last issued insn. */
16632 static rtx_insn *last_scheduled_insn;
16634 /* The following variable helps to balance issuing of load and
16635 store instructions */
16637 static int load_store_pendulum;
16639 /* The following variable helps pair divide insns during scheduling. */
16640 static int divide_cnt;
16641 /* The following variable helps pair and alternate vector and vector load
16642 insns during scheduling. */
16643 static int vec_pairing;
16646 /* Power4 load update and store update instructions are cracked into a
16647 load or store and an integer insn which are executed in the same cycle.
16648 Branches have their own dispatch slot which does not count against the
16649 GCC issue rate, but it changes the program flow so there are no other
16650 instructions to issue in this cycle. */
16652 static int
16653 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16655 last_scheduled_insn = insn;
16656 if (GET_CODE (PATTERN (insn)) == USE
16657 || GET_CODE (PATTERN (insn)) == CLOBBER)
16659 cached_can_issue_more = more;
16660 return cached_can_issue_more;
16663 if (insn_terminates_group_p (insn, current_group))
16665 cached_can_issue_more = 0;
16666 return cached_can_issue_more;
16669 /* If no reservation, but reach here */
16670 if (recog_memoized (insn) < 0)
16671 return more;
16673 if (rs6000_sched_groups)
16675 if (is_microcoded_insn (insn))
16676 cached_can_issue_more = 0;
16677 else if (is_cracked_insn (insn))
16678 cached_can_issue_more = more > 2 ? more - 2 : 0;
16679 else
16680 cached_can_issue_more = more - 1;
16682 return cached_can_issue_more;
16685 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16686 return 0;
16688 cached_can_issue_more = more - 1;
16689 return cached_can_issue_more;
16692 static int
16693 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16695 int r = rs6000_variable_issue_1 (insn, more);
16696 if (verbose)
16697 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16698 return r;
16701 /* Adjust the cost of a scheduling dependency. Return the new cost of
16702 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16704 static int
16705 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16706 unsigned int)
16708 enum attr_type attr_type;
16710 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16711 return cost;
16713 switch (dep_type)
16715 case REG_DEP_TRUE:
16717 /* Data dependency; DEP_INSN writes a register that INSN reads
16718 some cycles later. */
16720 /* Separate a load from a narrower, dependent store. */
16721 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16722 || rs6000_tune == PROCESSOR_FUTURE)
16723 && GET_CODE (PATTERN (insn)) == SET
16724 && GET_CODE (PATTERN (dep_insn)) == SET
16725 && MEM_P (XEXP (PATTERN (insn), 1))
16726 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16727 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16728 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16729 return cost + 14;
16731 attr_type = get_attr_type (insn);
16733 switch (attr_type)
16735 case TYPE_JMPREG:
16736 /* Tell the first scheduling pass about the latency between
16737 a mtctr and bctr (and mtlr and br/blr). The first
16738 scheduling pass will not know about this latency since
16739 the mtctr instruction, which has the latency associated
16740 to it, will be generated by reload. */
16741 return 4;
16742 case TYPE_BRANCH:
16743 /* Leave some extra cycles between a compare and its
16744 dependent branch, to inhibit expensive mispredicts. */
16745 if ((rs6000_tune == PROCESSOR_PPC603
16746 || rs6000_tune == PROCESSOR_PPC604
16747 || rs6000_tune == PROCESSOR_PPC604e
16748 || rs6000_tune == PROCESSOR_PPC620
16749 || rs6000_tune == PROCESSOR_PPC630
16750 || rs6000_tune == PROCESSOR_PPC750
16751 || rs6000_tune == PROCESSOR_PPC7400
16752 || rs6000_tune == PROCESSOR_PPC7450
16753 || rs6000_tune == PROCESSOR_PPCE5500
16754 || rs6000_tune == PROCESSOR_PPCE6500
16755 || rs6000_tune == PROCESSOR_POWER4
16756 || rs6000_tune == PROCESSOR_POWER5
16757 || rs6000_tune == PROCESSOR_POWER7
16758 || rs6000_tune == PROCESSOR_POWER8
16759 || rs6000_tune == PROCESSOR_POWER9
16760 || rs6000_tune == PROCESSOR_FUTURE
16761 || rs6000_tune == PROCESSOR_CELL)
16762 && recog_memoized (dep_insn)
16763 && (INSN_CODE (dep_insn) >= 0))
16765 switch (get_attr_type (dep_insn))
16767 case TYPE_CMP:
16768 case TYPE_FPCOMPARE:
16769 case TYPE_CR_LOGICAL:
16770 return cost + 2;
16771 case TYPE_EXTS:
16772 case TYPE_MUL:
16773 if (get_attr_dot (dep_insn) == DOT_YES)
16774 return cost + 2;
16775 else
16776 break;
16777 case TYPE_SHIFT:
16778 if (get_attr_dot (dep_insn) == DOT_YES
16779 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16780 return cost + 2;
16781 else
16782 break;
16783 default:
16784 break;
16786 break;
16788 case TYPE_STORE:
16789 case TYPE_FPSTORE:
16790 if ((rs6000_tune == PROCESSOR_POWER6)
16791 && recog_memoized (dep_insn)
16792 && (INSN_CODE (dep_insn) >= 0))
16795 if (GET_CODE (PATTERN (insn)) != SET)
16796 /* If this happens, we have to extend this to schedule
16797 optimally. Return default for now. */
16798 return cost;
16800 /* Adjust the cost for the case where the value written
16801 by a fixed point operation is used as the address
16802 gen value on a store. */
16803 switch (get_attr_type (dep_insn))
16805 case TYPE_LOAD:
16806 case TYPE_CNTLZ:
16808 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16809 return get_attr_sign_extend (dep_insn)
16810 == SIGN_EXTEND_YES ? 6 : 4;
16811 break;
16813 case TYPE_SHIFT:
16815 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16816 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16817 6 : 3;
16818 break;
16820 case TYPE_INTEGER:
16821 case TYPE_ADD:
16822 case TYPE_LOGICAL:
16823 case TYPE_EXTS:
16824 case TYPE_INSERT:
16826 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16827 return 3;
16828 break;
16830 case TYPE_STORE:
16831 case TYPE_FPLOAD:
16832 case TYPE_FPSTORE:
16834 if (get_attr_update (dep_insn) == UPDATE_YES
16835 && ! rs6000_store_data_bypass_p (dep_insn, insn))
16836 return 3;
16837 break;
16839 case TYPE_MUL:
16841 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16842 return 17;
16843 break;
16845 case TYPE_DIV:
16847 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16848 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16849 break;
16851 default:
16852 break;
16855 break;
16857 case TYPE_LOAD:
16858 if ((rs6000_tune == PROCESSOR_POWER6)
16859 && recog_memoized (dep_insn)
16860 && (INSN_CODE (dep_insn) >= 0))
16863 /* Adjust the cost for the case where the value written
16864 by a fixed point instruction is used within the address
16865 gen portion of a subsequent load(u)(x) */
16866 switch (get_attr_type (dep_insn))
16868 case TYPE_LOAD:
16869 case TYPE_CNTLZ:
16871 if (set_to_load_agen (dep_insn, insn))
16872 return get_attr_sign_extend (dep_insn)
16873 == SIGN_EXTEND_YES ? 6 : 4;
16874 break;
16876 case TYPE_SHIFT:
16878 if (set_to_load_agen (dep_insn, insn))
16879 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16880 6 : 3;
16881 break;
16883 case TYPE_INTEGER:
16884 case TYPE_ADD:
16885 case TYPE_LOGICAL:
16886 case TYPE_EXTS:
16887 case TYPE_INSERT:
16889 if (set_to_load_agen (dep_insn, insn))
16890 return 3;
16891 break;
16893 case TYPE_STORE:
16894 case TYPE_FPLOAD:
16895 case TYPE_FPSTORE:
16897 if (get_attr_update (dep_insn) == UPDATE_YES
16898 && set_to_load_agen (dep_insn, insn))
16899 return 3;
16900 break;
16902 case TYPE_MUL:
16904 if (set_to_load_agen (dep_insn, insn))
16905 return 17;
16906 break;
16908 case TYPE_DIV:
16910 if (set_to_load_agen (dep_insn, insn))
16911 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16912 break;
16914 default:
16915 break;
16918 break;
16920 case TYPE_FPLOAD:
16921 if ((rs6000_tune == PROCESSOR_POWER6)
16922 && get_attr_update (insn) == UPDATE_NO
16923 && recog_memoized (dep_insn)
16924 && (INSN_CODE (dep_insn) >= 0)
16925 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
16926 return 2;
16928 default:
16929 break;
16932 /* Fall out to return default cost. */
16934 break;
16936 case REG_DEP_OUTPUT:
16937 /* Output dependency; DEP_INSN writes a register that INSN writes some
16938 cycles later. */
16939 if ((rs6000_tune == PROCESSOR_POWER6)
16940 && recog_memoized (dep_insn)
16941 && (INSN_CODE (dep_insn) >= 0))
16943 attr_type = get_attr_type (insn);
16945 switch (attr_type)
16947 case TYPE_FP:
16948 case TYPE_FPSIMPLE:
16949 if (get_attr_type (dep_insn) == TYPE_FP
16950 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
16951 return 1;
16952 break;
16953 case TYPE_FPLOAD:
16954 if (get_attr_update (insn) == UPDATE_NO
16955 && get_attr_type (dep_insn) == TYPE_MFFGPR)
16956 return 2;
16957 break;
16958 default:
16959 break;
16962 /* Fall through, no cost for output dependency. */
16963 /* FALLTHRU */
16965 case REG_DEP_ANTI:
16966 /* Anti dependency; DEP_INSN reads a register that INSN writes some
16967 cycles later. */
16968 return 0;
16970 default:
16971 gcc_unreachable ();
16974 return cost;
16977 /* Debug version of rs6000_adjust_cost. */
16979 static int
16980 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
16981 int cost, unsigned int dw)
16983 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
16985 if (ret != cost)
16987 const char *dep;
16989 switch (dep_type)
16991 default: dep = "unknown depencency"; break;
16992 case REG_DEP_TRUE: dep = "data dependency"; break;
16993 case REG_DEP_OUTPUT: dep = "output dependency"; break;
16994 case REG_DEP_ANTI: dep = "anti depencency"; break;
16997 fprintf (stderr,
16998 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
16999 "%s, insn:\n", ret, cost, dep);
17001 debug_rtx (insn);
17004 return ret;
17007 /* The function returns a true if INSN is microcoded.
17008 Return false otherwise. */
17010 static bool
17011 is_microcoded_insn (rtx_insn *insn)
17013 if (!insn || !NONDEBUG_INSN_P (insn)
17014 || GET_CODE (PATTERN (insn)) == USE
17015 || GET_CODE (PATTERN (insn)) == CLOBBER)
17016 return false;
17018 if (rs6000_tune == PROCESSOR_CELL)
17019 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17021 if (rs6000_sched_groups
17022 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17024 enum attr_type type = get_attr_type (insn);
17025 if ((type == TYPE_LOAD
17026 && get_attr_update (insn) == UPDATE_YES
17027 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17028 || ((type == TYPE_LOAD || type == TYPE_STORE)
17029 && get_attr_update (insn) == UPDATE_YES
17030 && get_attr_indexed (insn) == INDEXED_YES)
17031 || type == TYPE_MFCR)
17032 return true;
17035 return false;
17038 /* The function returns true if INSN is cracked into 2 instructions
17039 by the processor (and therefore occupies 2 issue slots). */
17041 static bool
17042 is_cracked_insn (rtx_insn *insn)
17044 if (!insn || !NONDEBUG_INSN_P (insn)
17045 || GET_CODE (PATTERN (insn)) == USE
17046 || GET_CODE (PATTERN (insn)) == CLOBBER)
17047 return false;
17049 if (rs6000_sched_groups
17050 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17052 enum attr_type type = get_attr_type (insn);
17053 if ((type == TYPE_LOAD
17054 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17055 && get_attr_update (insn) == UPDATE_NO)
17056 || (type == TYPE_LOAD
17057 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17058 && get_attr_update (insn) == UPDATE_YES
17059 && get_attr_indexed (insn) == INDEXED_NO)
17060 || (type == TYPE_STORE
17061 && get_attr_update (insn) == UPDATE_YES
17062 && get_attr_indexed (insn) == INDEXED_NO)
17063 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17064 && get_attr_update (insn) == UPDATE_YES)
17065 || (type == TYPE_CR_LOGICAL
17066 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17067 || (type == TYPE_EXTS
17068 && get_attr_dot (insn) == DOT_YES)
17069 || (type == TYPE_SHIFT
17070 && get_attr_dot (insn) == DOT_YES
17071 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17072 || (type == TYPE_MUL
17073 && get_attr_dot (insn) == DOT_YES)
17074 || type == TYPE_DIV
17075 || (type == TYPE_INSERT
17076 && get_attr_size (insn) == SIZE_32))
17077 return true;
17080 return false;
17083 /* The function returns true if INSN can be issued only from
17084 the branch slot. */
17086 static bool
17087 is_branch_slot_insn (rtx_insn *insn)
17089 if (!insn || !NONDEBUG_INSN_P (insn)
17090 || GET_CODE (PATTERN (insn)) == USE
17091 || GET_CODE (PATTERN (insn)) == CLOBBER)
17092 return false;
17094 if (rs6000_sched_groups)
17096 enum attr_type type = get_attr_type (insn);
17097 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17098 return true;
17099 return false;
17102 return false;
17105 /* The function returns true if out_inst sets a value that is
17106 used in the address generation computation of in_insn */
17107 static bool
17108 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17110 rtx out_set, in_set;
17112 /* For performance reasons, only handle the simple case where
17113 both loads are a single_set. */
17114 out_set = single_set (out_insn);
17115 if (out_set)
17117 in_set = single_set (in_insn);
17118 if (in_set)
17119 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17122 return false;
17125 /* Try to determine base/offset/size parts of the given MEM.
17126 Return true if successful, false if all the values couldn't
17127 be determined.
17129 This function only looks for REG or REG+CONST address forms.
17130 REG+REG address form will return false. */
17132 static bool
17133 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17134 HOST_WIDE_INT *size)
17136 rtx addr_rtx;
17137 if MEM_SIZE_KNOWN_P (mem)
17138 *size = MEM_SIZE (mem);
17139 else
17140 return false;
17142 addr_rtx = (XEXP (mem, 0));
17143 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17144 addr_rtx = XEXP (addr_rtx, 1);
17146 *offset = 0;
17147 while (GET_CODE (addr_rtx) == PLUS
17148 && CONST_INT_P (XEXP (addr_rtx, 1)))
17150 *offset += INTVAL (XEXP (addr_rtx, 1));
17151 addr_rtx = XEXP (addr_rtx, 0);
17153 if (!REG_P (addr_rtx))
17154 return false;
17156 *base = addr_rtx;
17157 return true;
17160 /* The function returns true if the target storage location of
17161 mem1 is adjacent to the target storage location of mem2 */
17162 /* Return 1 if memory locations are adjacent. */
17164 static bool
17165 adjacent_mem_locations (rtx mem1, rtx mem2)
17167 rtx reg1, reg2;
17168 HOST_WIDE_INT off1, size1, off2, size2;
17170 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17171 && get_memref_parts (mem2, &reg2, &off2, &size2))
17172 return ((REGNO (reg1) == REGNO (reg2))
17173 && ((off1 + size1 == off2)
17174 || (off2 + size2 == off1)));
17176 return false;
17179 /* This function returns true if it can be determined that the two MEM
17180 locations overlap by at least 1 byte based on base reg/offset/size. */
17182 static bool
17183 mem_locations_overlap (rtx mem1, rtx mem2)
17185 rtx reg1, reg2;
17186 HOST_WIDE_INT off1, size1, off2, size2;
17188 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17189 && get_memref_parts (mem2, &reg2, &off2, &size2))
17190 return ((REGNO (reg1) == REGNO (reg2))
17191 && (((off1 <= off2) && (off1 + size1 > off2))
17192 || ((off2 <= off1) && (off2 + size2 > off1))));
17194 return false;
17197 /* A C statement (sans semicolon) to update the integer scheduling
17198 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17199 INSN earlier, reduce the priority to execute INSN later. Do not
17200 define this macro if you do not need to adjust the scheduling
17201 priorities of insns. */
17203 static int
17204 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17206 rtx load_mem, str_mem;
17207 /* On machines (like the 750) which have asymmetric integer units,
17208 where one integer unit can do multiply and divides and the other
17209 can't, reduce the priority of multiply/divide so it is scheduled
17210 before other integer operations. */
17212 #if 0
17213 if (! INSN_P (insn))
17214 return priority;
17216 if (GET_CODE (PATTERN (insn)) == USE)
17217 return priority;
17219 switch (rs6000_tune) {
17220 case PROCESSOR_PPC750:
17221 switch (get_attr_type (insn))
17223 default:
17224 break;
17226 case TYPE_MUL:
17227 case TYPE_DIV:
17228 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17229 priority, priority);
17230 if (priority >= 0 && priority < 0x01000000)
17231 priority >>= 3;
17232 break;
17235 #endif
17237 if (insn_must_be_first_in_group (insn)
17238 && reload_completed
17239 && current_sched_info->sched_max_insns_priority
17240 && rs6000_sched_restricted_insns_priority)
17243 /* Prioritize insns that can be dispatched only in the first
17244 dispatch slot. */
17245 if (rs6000_sched_restricted_insns_priority == 1)
17246 /* Attach highest priority to insn. This means that in
17247 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17248 precede 'priority' (critical path) considerations. */
17249 return current_sched_info->sched_max_insns_priority;
17250 else if (rs6000_sched_restricted_insns_priority == 2)
17251 /* Increase priority of insn by a minimal amount. This means that in
17252 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17253 considerations precede dispatch-slot restriction considerations. */
17254 return (priority + 1);
17257 if (rs6000_tune == PROCESSOR_POWER6
17258 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17259 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17260 /* Attach highest priority to insn if the scheduler has just issued two
17261 stores and this instruction is a load, or two loads and this instruction
17262 is a store. Power6 wants loads and stores scheduled alternately
17263 when possible */
17264 return current_sched_info->sched_max_insns_priority;
17266 return priority;
17269 /* Return true if the instruction is nonpipelined on the Cell. */
17270 static bool
17271 is_nonpipeline_insn (rtx_insn *insn)
17273 enum attr_type type;
17274 if (!insn || !NONDEBUG_INSN_P (insn)
17275 || GET_CODE (PATTERN (insn)) == USE
17276 || GET_CODE (PATTERN (insn)) == CLOBBER)
17277 return false;
17279 type = get_attr_type (insn);
17280 if (type == TYPE_MUL
17281 || type == TYPE_DIV
17282 || type == TYPE_SDIV
17283 || type == TYPE_DDIV
17284 || type == TYPE_SSQRT
17285 || type == TYPE_DSQRT
17286 || type == TYPE_MFCR
17287 || type == TYPE_MFCRF
17288 || type == TYPE_MFJMPR)
17290 return true;
17292 return false;
17296 /* Return how many instructions the machine can issue per cycle. */
17298 static int
17299 rs6000_issue_rate (void)
17301 /* Unless scheduling for register pressure, use issue rate of 1 for
17302 first scheduling pass to decrease degradation. */
17303 if (!reload_completed && !flag_sched_pressure)
17304 return 1;
17306 switch (rs6000_tune) {
17307 case PROCESSOR_RS64A:
17308 case PROCESSOR_PPC601: /* ? */
17309 case PROCESSOR_PPC7450:
17310 return 3;
17311 case PROCESSOR_PPC440:
17312 case PROCESSOR_PPC603:
17313 case PROCESSOR_PPC750:
17314 case PROCESSOR_PPC7400:
17315 case PROCESSOR_PPC8540:
17316 case PROCESSOR_PPC8548:
17317 case PROCESSOR_CELL:
17318 case PROCESSOR_PPCE300C2:
17319 case PROCESSOR_PPCE300C3:
17320 case PROCESSOR_PPCE500MC:
17321 case PROCESSOR_PPCE500MC64:
17322 case PROCESSOR_PPCE5500:
17323 case PROCESSOR_PPCE6500:
17324 case PROCESSOR_TITAN:
17325 return 2;
17326 case PROCESSOR_PPC476:
17327 case PROCESSOR_PPC604:
17328 case PROCESSOR_PPC604e:
17329 case PROCESSOR_PPC620:
17330 case PROCESSOR_PPC630:
17331 return 4;
17332 case PROCESSOR_POWER4:
17333 case PROCESSOR_POWER5:
17334 case PROCESSOR_POWER6:
17335 case PROCESSOR_POWER7:
17336 return 5;
17337 case PROCESSOR_POWER8:
17338 return 7;
17339 case PROCESSOR_POWER9:
17340 case PROCESSOR_FUTURE:
17341 return 6;
17342 default:
17343 return 1;
17347 /* Return how many instructions to look ahead for better insn
17348 scheduling. */
17350 static int
17351 rs6000_use_sched_lookahead (void)
17353 switch (rs6000_tune)
17355 case PROCESSOR_PPC8540:
17356 case PROCESSOR_PPC8548:
17357 return 4;
17359 case PROCESSOR_CELL:
17360 return (reload_completed ? 8 : 0);
17362 default:
17363 return 0;
17367 /* We are choosing insn from the ready queue. Return zero if INSN can be
17368 chosen. */
17369 static int
17370 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17372 if (ready_index == 0)
17373 return 0;
17375 if (rs6000_tune != PROCESSOR_CELL)
17376 return 0;
17378 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17380 if (!reload_completed
17381 || is_nonpipeline_insn (insn)
17382 || is_microcoded_insn (insn))
17383 return 1;
17385 return 0;
17388 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17389 and return true. */
17391 static bool
17392 find_mem_ref (rtx pat, rtx *mem_ref)
17394 const char * fmt;
17395 int i, j;
17397 /* stack_tie does not produce any real memory traffic. */
17398 if (tie_operand (pat, VOIDmode))
17399 return false;
17401 if (MEM_P (pat))
17403 *mem_ref = pat;
17404 return true;
17407 /* Recursively process the pattern. */
17408 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17410 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17412 if (fmt[i] == 'e')
17414 if (find_mem_ref (XEXP (pat, i), mem_ref))
17415 return true;
17417 else if (fmt[i] == 'E')
17418 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17420 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17421 return true;
17425 return false;
17428 /* Determine if PAT is a PATTERN of a load insn. */
17430 static bool
17431 is_load_insn1 (rtx pat, rtx *load_mem)
17433 if (!pat || pat == NULL_RTX)
17434 return false;
17436 if (GET_CODE (pat) == SET)
17437 return find_mem_ref (SET_SRC (pat), load_mem);
17439 if (GET_CODE (pat) == PARALLEL)
17441 int i;
17443 for (i = 0; i < XVECLEN (pat, 0); i++)
17444 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17445 return true;
17448 return false;
17451 /* Determine if INSN loads from memory. */
17453 static bool
17454 is_load_insn (rtx insn, rtx *load_mem)
17456 if (!insn || !INSN_P (insn))
17457 return false;
17459 if (CALL_P (insn))
17460 return false;
17462 return is_load_insn1 (PATTERN (insn), load_mem);
17465 /* Determine if PAT is a PATTERN of a store insn. */
17467 static bool
17468 is_store_insn1 (rtx pat, rtx *str_mem)
17470 if (!pat || pat == NULL_RTX)
17471 return false;
17473 if (GET_CODE (pat) == SET)
17474 return find_mem_ref (SET_DEST (pat), str_mem);
17476 if (GET_CODE (pat) == PARALLEL)
17478 int i;
17480 for (i = 0; i < XVECLEN (pat, 0); i++)
17481 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17482 return true;
17485 return false;
17488 /* Determine if INSN stores to memory. */
17490 static bool
17491 is_store_insn (rtx insn, rtx *str_mem)
17493 if (!insn || !INSN_P (insn))
17494 return false;
17496 return is_store_insn1 (PATTERN (insn), str_mem);
17499 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17501 static bool
17502 is_power9_pairable_vec_type (enum attr_type type)
17504 switch (type)
17506 case TYPE_VECSIMPLE:
17507 case TYPE_VECCOMPLEX:
17508 case TYPE_VECDIV:
17509 case TYPE_VECCMP:
17510 case TYPE_VECPERM:
17511 case TYPE_VECFLOAT:
17512 case TYPE_VECFDIV:
17513 case TYPE_VECDOUBLE:
17514 return true;
17515 default:
17516 break;
17518 return false;
17521 /* Returns whether the dependence between INSN and NEXT is considered
17522 costly by the given target. */
17524 static bool
17525 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17527 rtx insn;
17528 rtx next;
17529 rtx load_mem, str_mem;
17531 /* If the flag is not enabled - no dependence is considered costly;
17532 allow all dependent insns in the same group.
17533 This is the most aggressive option. */
17534 if (rs6000_sched_costly_dep == no_dep_costly)
17535 return false;
17537 /* If the flag is set to 1 - a dependence is always considered costly;
17538 do not allow dependent instructions in the same group.
17539 This is the most conservative option. */
17540 if (rs6000_sched_costly_dep == all_deps_costly)
17541 return true;
17543 insn = DEP_PRO (dep);
17544 next = DEP_CON (dep);
17546 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17547 && is_load_insn (next, &load_mem)
17548 && is_store_insn (insn, &str_mem))
17549 /* Prevent load after store in the same group. */
17550 return true;
17552 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17553 && is_load_insn (next, &load_mem)
17554 && is_store_insn (insn, &str_mem)
17555 && DEP_TYPE (dep) == REG_DEP_TRUE
17556 && mem_locations_overlap(str_mem, load_mem))
17557 /* Prevent load after store in the same group if it is a true
17558 dependence. */
17559 return true;
17561 /* The flag is set to X; dependences with latency >= X are considered costly,
17562 and will not be scheduled in the same group. */
17563 if (rs6000_sched_costly_dep <= max_dep_latency
17564 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17565 return true;
17567 return false;
17570 /* Return the next insn after INSN that is found before TAIL is reached,
17571 skipping any "non-active" insns - insns that will not actually occupy
17572 an issue slot. Return NULL_RTX if such an insn is not found. */
17574 static rtx_insn *
17575 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17577 if (insn == NULL_RTX || insn == tail)
17578 return NULL;
17580 while (1)
17582 insn = NEXT_INSN (insn);
17583 if (insn == NULL_RTX || insn == tail)
17584 return NULL;
17586 if (CALL_P (insn)
17587 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17588 || (NONJUMP_INSN_P (insn)
17589 && GET_CODE (PATTERN (insn)) != USE
17590 && GET_CODE (PATTERN (insn)) != CLOBBER
17591 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17592 break;
17594 return insn;
17597 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17599 static int
17600 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17602 int pos;
17603 int i;
17604 rtx_insn *tmp;
17605 enum attr_type type, type2;
17607 type = get_attr_type (last_scheduled_insn);
17609 /* Try to issue fixed point divides back-to-back in pairs so they will be
17610 routed to separate execution units and execute in parallel. */
17611 if (type == TYPE_DIV && divide_cnt == 0)
17613 /* First divide has been scheduled. */
17614 divide_cnt = 1;
17616 /* Scan the ready list looking for another divide, if found move it
17617 to the end of the list so it is chosen next. */
17618 pos = lastpos;
17619 while (pos >= 0)
17621 if (recog_memoized (ready[pos]) >= 0
17622 && get_attr_type (ready[pos]) == TYPE_DIV)
17624 tmp = ready[pos];
17625 for (i = pos; i < lastpos; i++)
17626 ready[i] = ready[i + 1];
17627 ready[lastpos] = tmp;
17628 break;
17630 pos--;
17633 else
17635 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17636 divide_cnt = 0;
17638 /* The best dispatch throughput for vector and vector load insns can be
17639 achieved by interleaving a vector and vector load such that they'll
17640 dispatch to the same superslice. If this pairing cannot be achieved
17641 then it is best to pair vector insns together and vector load insns
17642 together.
17644 To aid in this pairing, vec_pairing maintains the current state with
17645 the following values:
17647 0 : Initial state, no vecload/vector pairing has been started.
17649 1 : A vecload or vector insn has been issued and a candidate for
17650 pairing has been found and moved to the end of the ready
17651 list. */
17652 if (type == TYPE_VECLOAD)
17654 /* Issued a vecload. */
17655 if (vec_pairing == 0)
17657 int vecload_pos = -1;
17658 /* We issued a single vecload, look for a vector insn to pair it
17659 with. If one isn't found, try to pair another vecload. */
17660 pos = lastpos;
17661 while (pos >= 0)
17663 if (recog_memoized (ready[pos]) >= 0)
17665 type2 = get_attr_type (ready[pos]);
17666 if (is_power9_pairable_vec_type (type2))
17668 /* Found a vector insn to pair with, move it to the
17669 end of the ready list so it is scheduled next. */
17670 tmp = ready[pos];
17671 for (i = pos; i < lastpos; i++)
17672 ready[i] = ready[i + 1];
17673 ready[lastpos] = tmp;
17674 vec_pairing = 1;
17675 return cached_can_issue_more;
17677 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
17678 /* Remember position of first vecload seen. */
17679 vecload_pos = pos;
17681 pos--;
17683 if (vecload_pos >= 0)
17685 /* Didn't find a vector to pair with but did find a vecload,
17686 move it to the end of the ready list. */
17687 tmp = ready[vecload_pos];
17688 for (i = vecload_pos; i < lastpos; i++)
17689 ready[i] = ready[i + 1];
17690 ready[lastpos] = tmp;
17691 vec_pairing = 1;
17692 return cached_can_issue_more;
17696 else if (is_power9_pairable_vec_type (type))
17698 /* Issued a vector operation. */
17699 if (vec_pairing == 0)
17701 int vec_pos = -1;
17702 /* We issued a single vector insn, look for a vecload to pair it
17703 with. If one isn't found, try to pair another vector. */
17704 pos = lastpos;
17705 while (pos >= 0)
17707 if (recog_memoized (ready[pos]) >= 0)
17709 type2 = get_attr_type (ready[pos]);
17710 if (type2 == TYPE_VECLOAD)
17712 /* Found a vecload insn to pair with, move it to the
17713 end of the ready list so it is scheduled next. */
17714 tmp = ready[pos];
17715 for (i = pos; i < lastpos; i++)
17716 ready[i] = ready[i + 1];
17717 ready[lastpos] = tmp;
17718 vec_pairing = 1;
17719 return cached_can_issue_more;
17721 else if (is_power9_pairable_vec_type (type2)
17722 && vec_pos == -1)
17723 /* Remember position of first vector insn seen. */
17724 vec_pos = pos;
17726 pos--;
17728 if (vec_pos >= 0)
17730 /* Didn't find a vecload to pair with but did find a vector
17731 insn, move it to the end of the ready list. */
17732 tmp = ready[vec_pos];
17733 for (i = vec_pos; i < lastpos; i++)
17734 ready[i] = ready[i + 1];
17735 ready[lastpos] = tmp;
17736 vec_pairing = 1;
17737 return cached_can_issue_more;
17742 /* We've either finished a vec/vecload pair, couldn't find an insn to
17743 continue the current pair, or the last insn had nothing to do with
17744 with pairing. In any case, reset the state. */
17745 vec_pairing = 0;
17748 return cached_can_issue_more;
17751 /* We are about to begin issuing insns for this clock cycle. */
17753 static int
17754 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
17755 rtx_insn **ready ATTRIBUTE_UNUSED,
17756 int *pn_ready ATTRIBUTE_UNUSED,
17757 int clock_var ATTRIBUTE_UNUSED)
17759 int n_ready = *pn_ready;
17761 if (sched_verbose)
17762 fprintf (dump, "// rs6000_sched_reorder :\n");
17764 /* Reorder the ready list, if the second to last ready insn
17765 is a nonepipeline insn. */
17766 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
17768 if (is_nonpipeline_insn (ready[n_ready - 1])
17769 && (recog_memoized (ready[n_ready - 2]) > 0))
17770 /* Simply swap first two insns. */
17771 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
17774 if (rs6000_tune == PROCESSOR_POWER6)
17775 load_store_pendulum = 0;
17777 return rs6000_issue_rate ();
17780 /* Like rs6000_sched_reorder, but called after issuing each insn. */
17782 static int
17783 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
17784 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
17786 if (sched_verbose)
17787 fprintf (dump, "// rs6000_sched_reorder2 :\n");
17789 /* For Power6, we need to handle some special cases to try and keep the
17790 store queue from overflowing and triggering expensive flushes.
17792 This code monitors how load and store instructions are being issued
17793 and skews the ready list one way or the other to increase the likelihood
17794 that a desired instruction is issued at the proper time.
17796 A couple of things are done. First, we maintain a "load_store_pendulum"
17797 to track the current state of load/store issue.
17799 - If the pendulum is at zero, then no loads or stores have been
17800 issued in the current cycle so we do nothing.
17802 - If the pendulum is 1, then a single load has been issued in this
17803 cycle and we attempt to locate another load in the ready list to
17804 issue with it.
17806 - If the pendulum is -2, then two stores have already been
17807 issued in this cycle, so we increase the priority of the first load
17808 in the ready list to increase it's likelihood of being chosen first
17809 in the next cycle.
17811 - If the pendulum is -1, then a single store has been issued in this
17812 cycle and we attempt to locate another store in the ready list to
17813 issue with it, preferring a store to an adjacent memory location to
17814 facilitate store pairing in the store queue.
17816 - If the pendulum is 2, then two loads have already been
17817 issued in this cycle, so we increase the priority of the first store
17818 in the ready list to increase it's likelihood of being chosen first
17819 in the next cycle.
17821 - If the pendulum < -2 or > 2, then do nothing.
17823 Note: This code covers the most common scenarios. There exist non
17824 load/store instructions which make use of the LSU and which
17825 would need to be accounted for to strictly model the behavior
17826 of the machine. Those instructions are currently unaccounted
17827 for to help minimize compile time overhead of this code.
17829 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
17831 int pos;
17832 int i;
17833 rtx_insn *tmp;
17834 rtx load_mem, str_mem;
17836 if (is_store_insn (last_scheduled_insn, &str_mem))
17837 /* Issuing a store, swing the load_store_pendulum to the left */
17838 load_store_pendulum--;
17839 else if (is_load_insn (last_scheduled_insn, &load_mem))
17840 /* Issuing a load, swing the load_store_pendulum to the right */
17841 load_store_pendulum++;
17842 else
17843 return cached_can_issue_more;
17845 /* If the pendulum is balanced, or there is only one instruction on
17846 the ready list, then all is well, so return. */
17847 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
17848 return cached_can_issue_more;
17850 if (load_store_pendulum == 1)
17852 /* A load has been issued in this cycle. Scan the ready list
17853 for another load to issue with it */
17854 pos = *pn_ready-1;
17856 while (pos >= 0)
17858 if (is_load_insn (ready[pos], &load_mem))
17860 /* Found a load. Move it to the head of the ready list,
17861 and adjust it's priority so that it is more likely to
17862 stay there */
17863 tmp = ready[pos];
17864 for (i=pos; i<*pn_ready-1; i++)
17865 ready[i] = ready[i + 1];
17866 ready[*pn_ready-1] = tmp;
17868 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
17869 INSN_PRIORITY (tmp)++;
17870 break;
17872 pos--;
17875 else if (load_store_pendulum == -2)
17877 /* Two stores have been issued in this cycle. Increase the
17878 priority of the first load in the ready list to favor it for
17879 issuing in the next cycle. */
17880 pos = *pn_ready-1;
17882 while (pos >= 0)
17884 if (is_load_insn (ready[pos], &load_mem)
17885 && !sel_sched_p ()
17886 && INSN_PRIORITY_KNOWN (ready[pos]))
17888 INSN_PRIORITY (ready[pos])++;
17890 /* Adjust the pendulum to account for the fact that a load
17891 was found and increased in priority. This is to prevent
17892 increasing the priority of multiple loads */
17893 load_store_pendulum--;
17895 break;
17897 pos--;
17900 else if (load_store_pendulum == -1)
17902 /* A store has been issued in this cycle. Scan the ready list for
17903 another store to issue with it, preferring a store to an adjacent
17904 memory location */
17905 int first_store_pos = -1;
17907 pos = *pn_ready-1;
17909 while (pos >= 0)
17911 if (is_store_insn (ready[pos], &str_mem))
17913 rtx str_mem2;
17914 /* Maintain the index of the first store found on the
17915 list */
17916 if (first_store_pos == -1)
17917 first_store_pos = pos;
17919 if (is_store_insn (last_scheduled_insn, &str_mem2)
17920 && adjacent_mem_locations (str_mem, str_mem2))
17922 /* Found an adjacent store. Move it to the head of the
17923 ready list, and adjust it's priority so that it is
17924 more likely to stay there */
17925 tmp = ready[pos];
17926 for (i=pos; i<*pn_ready-1; i++)
17927 ready[i] = ready[i + 1];
17928 ready[*pn_ready-1] = tmp;
17930 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
17931 INSN_PRIORITY (tmp)++;
17933 first_store_pos = -1;
17935 break;
17938 pos--;
17941 if (first_store_pos >= 0)
17943 /* An adjacent store wasn't found, but a non-adjacent store was,
17944 so move the non-adjacent store to the front of the ready
17945 list, and adjust its priority so that it is more likely to
17946 stay there. */
17947 tmp = ready[first_store_pos];
17948 for (i=first_store_pos; i<*pn_ready-1; i++)
17949 ready[i] = ready[i + 1];
17950 ready[*pn_ready-1] = tmp;
17951 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
17952 INSN_PRIORITY (tmp)++;
17955 else if (load_store_pendulum == 2)
17957 /* Two loads have been issued in this cycle. Increase the priority
17958 of the first store in the ready list to favor it for issuing in
17959 the next cycle. */
17960 pos = *pn_ready-1;
17962 while (pos >= 0)
17964 if (is_store_insn (ready[pos], &str_mem)
17965 && !sel_sched_p ()
17966 && INSN_PRIORITY_KNOWN (ready[pos]))
17968 INSN_PRIORITY (ready[pos])++;
17970 /* Adjust the pendulum to account for the fact that a store
17971 was found and increased in priority. This is to prevent
17972 increasing the priority of multiple stores */
17973 load_store_pendulum++;
17975 break;
17977 pos--;
17982 /* Do Power9 dependent reordering if necessary. */
17983 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
17984 && recog_memoized (last_scheduled_insn) >= 0)
17985 return power9_sched_reorder2 (ready, *pn_ready - 1);
17987 return cached_can_issue_more;
17990 /* Return whether the presence of INSN causes a dispatch group termination
17991 of group WHICH_GROUP.
17993 If WHICH_GROUP == current_group, this function will return true if INSN
17994 causes the termination of the current group (i.e, the dispatch group to
17995 which INSN belongs). This means that INSN will be the last insn in the
17996 group it belongs to.
17998 If WHICH_GROUP == previous_group, this function will return true if INSN
17999 causes the termination of the previous group (i.e, the dispatch group that
18000 precedes the group to which INSN belongs). This means that INSN will be
18001 the first insn in the group it belongs to). */
18003 static bool
18004 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18006 bool first, last;
18008 if (! insn)
18009 return false;
18011 first = insn_must_be_first_in_group (insn);
18012 last = insn_must_be_last_in_group (insn);
18014 if (first && last)
18015 return true;
18017 if (which_group == current_group)
18018 return last;
18019 else if (which_group == previous_group)
18020 return first;
18022 return false;
18026 static bool
18027 insn_must_be_first_in_group (rtx_insn *insn)
18029 enum attr_type type;
18031 if (!insn
18032 || NOTE_P (insn)
18033 || DEBUG_INSN_P (insn)
18034 || GET_CODE (PATTERN (insn)) == USE
18035 || GET_CODE (PATTERN (insn)) == CLOBBER)
18036 return false;
18038 switch (rs6000_tune)
18040 case PROCESSOR_POWER5:
18041 if (is_cracked_insn (insn))
18042 return true;
18043 /* FALLTHRU */
18044 case PROCESSOR_POWER4:
18045 if (is_microcoded_insn (insn))
18046 return true;
18048 if (!rs6000_sched_groups)
18049 return false;
18051 type = get_attr_type (insn);
18053 switch (type)
18055 case TYPE_MFCR:
18056 case TYPE_MFCRF:
18057 case TYPE_MTCR:
18058 case TYPE_CR_LOGICAL:
18059 case TYPE_MTJMPR:
18060 case TYPE_MFJMPR:
18061 case TYPE_DIV:
18062 case TYPE_LOAD_L:
18063 case TYPE_STORE_C:
18064 case TYPE_ISYNC:
18065 case TYPE_SYNC:
18066 return true;
18067 default:
18068 break;
18070 break;
18071 case PROCESSOR_POWER6:
18072 type = get_attr_type (insn);
18074 switch (type)
18076 case TYPE_EXTS:
18077 case TYPE_CNTLZ:
18078 case TYPE_TRAP:
18079 case TYPE_MUL:
18080 case TYPE_INSERT:
18081 case TYPE_FPCOMPARE:
18082 case TYPE_MFCR:
18083 case TYPE_MTCR:
18084 case TYPE_MFJMPR:
18085 case TYPE_MTJMPR:
18086 case TYPE_ISYNC:
18087 case TYPE_SYNC:
18088 case TYPE_LOAD_L:
18089 case TYPE_STORE_C:
18090 return true;
18091 case TYPE_SHIFT:
18092 if (get_attr_dot (insn) == DOT_NO
18093 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18094 return true;
18095 else
18096 break;
18097 case TYPE_DIV:
18098 if (get_attr_size (insn) == SIZE_32)
18099 return true;
18100 else
18101 break;
18102 case TYPE_LOAD:
18103 case TYPE_STORE:
18104 case TYPE_FPLOAD:
18105 case TYPE_FPSTORE:
18106 if (get_attr_update (insn) == UPDATE_YES)
18107 return true;
18108 else
18109 break;
18110 default:
18111 break;
18113 break;
18114 case PROCESSOR_POWER7:
18115 type = get_attr_type (insn);
18117 switch (type)
18119 case TYPE_CR_LOGICAL:
18120 case TYPE_MFCR:
18121 case TYPE_MFCRF:
18122 case TYPE_MTCR:
18123 case TYPE_DIV:
18124 case TYPE_ISYNC:
18125 case TYPE_LOAD_L:
18126 case TYPE_STORE_C:
18127 case TYPE_MFJMPR:
18128 case TYPE_MTJMPR:
18129 return true;
18130 case TYPE_MUL:
18131 case TYPE_SHIFT:
18132 case TYPE_EXTS:
18133 if (get_attr_dot (insn) == DOT_YES)
18134 return true;
18135 else
18136 break;
18137 case TYPE_LOAD:
18138 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18139 || get_attr_update (insn) == UPDATE_YES)
18140 return true;
18141 else
18142 break;
18143 case TYPE_STORE:
18144 case TYPE_FPLOAD:
18145 case TYPE_FPSTORE:
18146 if (get_attr_update (insn) == UPDATE_YES)
18147 return true;
18148 else
18149 break;
18150 default:
18151 break;
18153 break;
18154 case PROCESSOR_POWER8:
18155 type = get_attr_type (insn);
18157 switch (type)
18159 case TYPE_CR_LOGICAL:
18160 case TYPE_MFCR:
18161 case TYPE_MFCRF:
18162 case TYPE_MTCR:
18163 case TYPE_SYNC:
18164 case TYPE_ISYNC:
18165 case TYPE_LOAD_L:
18166 case TYPE_STORE_C:
18167 case TYPE_VECSTORE:
18168 case TYPE_MFJMPR:
18169 case TYPE_MTJMPR:
18170 return true;
18171 case TYPE_SHIFT:
18172 case TYPE_EXTS:
18173 case TYPE_MUL:
18174 if (get_attr_dot (insn) == DOT_YES)
18175 return true;
18176 else
18177 break;
18178 case TYPE_LOAD:
18179 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18180 || get_attr_update (insn) == UPDATE_YES)
18181 return true;
18182 else
18183 break;
18184 case TYPE_STORE:
18185 if (get_attr_update (insn) == UPDATE_YES
18186 && get_attr_indexed (insn) == INDEXED_YES)
18187 return true;
18188 else
18189 break;
18190 default:
18191 break;
18193 break;
18194 default:
18195 break;
18198 return false;
18201 static bool
18202 insn_must_be_last_in_group (rtx_insn *insn)
18204 enum attr_type type;
18206 if (!insn
18207 || NOTE_P (insn)
18208 || DEBUG_INSN_P (insn)
18209 || GET_CODE (PATTERN (insn)) == USE
18210 || GET_CODE (PATTERN (insn)) == CLOBBER)
18211 return false;
18213 switch (rs6000_tune) {
18214 case PROCESSOR_POWER4:
18215 case PROCESSOR_POWER5:
18216 if (is_microcoded_insn (insn))
18217 return true;
18219 if (is_branch_slot_insn (insn))
18220 return true;
18222 break;
18223 case PROCESSOR_POWER6:
18224 type = get_attr_type (insn);
18226 switch (type)
18228 case TYPE_EXTS:
18229 case TYPE_CNTLZ:
18230 case TYPE_TRAP:
18231 case TYPE_MUL:
18232 case TYPE_FPCOMPARE:
18233 case TYPE_MFCR:
18234 case TYPE_MTCR:
18235 case TYPE_MFJMPR:
18236 case TYPE_MTJMPR:
18237 case TYPE_ISYNC:
18238 case TYPE_SYNC:
18239 case TYPE_LOAD_L:
18240 case TYPE_STORE_C:
18241 return true;
18242 case TYPE_SHIFT:
18243 if (get_attr_dot (insn) == DOT_NO
18244 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18245 return true;
18246 else
18247 break;
18248 case TYPE_DIV:
18249 if (get_attr_size (insn) == SIZE_32)
18250 return true;
18251 else
18252 break;
18253 default:
18254 break;
18256 break;
18257 case PROCESSOR_POWER7:
18258 type = get_attr_type (insn);
18260 switch (type)
18262 case TYPE_ISYNC:
18263 case TYPE_SYNC:
18264 case TYPE_LOAD_L:
18265 case TYPE_STORE_C:
18266 return true;
18267 case TYPE_LOAD:
18268 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18269 && get_attr_update (insn) == UPDATE_YES)
18270 return true;
18271 else
18272 break;
18273 case TYPE_STORE:
18274 if (get_attr_update (insn) == UPDATE_YES
18275 && get_attr_indexed (insn) == INDEXED_YES)
18276 return true;
18277 else
18278 break;
18279 default:
18280 break;
18282 break;
18283 case PROCESSOR_POWER8:
18284 type = get_attr_type (insn);
18286 switch (type)
18288 case TYPE_MFCR:
18289 case TYPE_MTCR:
18290 case TYPE_ISYNC:
18291 case TYPE_SYNC:
18292 case TYPE_LOAD_L:
18293 case TYPE_STORE_C:
18294 return true;
18295 case TYPE_LOAD:
18296 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18297 && get_attr_update (insn) == UPDATE_YES)
18298 return true;
18299 else
18300 break;
18301 case TYPE_STORE:
18302 if (get_attr_update (insn) == UPDATE_YES
18303 && get_attr_indexed (insn) == INDEXED_YES)
18304 return true;
18305 else
18306 break;
18307 default:
18308 break;
18310 break;
18311 default:
18312 break;
18315 return false;
18318 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18319 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18321 static bool
18322 is_costly_group (rtx *group_insns, rtx next_insn)
18324 int i;
18325 int issue_rate = rs6000_issue_rate ();
18327 for (i = 0; i < issue_rate; i++)
18329 sd_iterator_def sd_it;
18330 dep_t dep;
18331 rtx insn = group_insns[i];
18333 if (!insn)
18334 continue;
18336 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18338 rtx next = DEP_CON (dep);
18340 if (next == next_insn
18341 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18342 return true;
18346 return false;
18349 /* Utility of the function redefine_groups.
18350 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18351 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18352 to keep it "far" (in a separate group) from GROUP_INSNS, following
18353 one of the following schemes, depending on the value of the flag
18354 -minsert_sched_nops = X:
18355 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18356 in order to force NEXT_INSN into a separate group.
18357 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18358 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18359 insertion (has a group just ended, how many vacant issue slots remain in the
18360 last group, and how many dispatch groups were encountered so far). */
18362 static int
18363 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18364 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18365 int *group_count)
18367 rtx nop;
18368 bool force;
18369 int issue_rate = rs6000_issue_rate ();
18370 bool end = *group_end;
18371 int i;
18373 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18374 return can_issue_more;
18376 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18377 return can_issue_more;
18379 force = is_costly_group (group_insns, next_insn);
18380 if (!force)
18381 return can_issue_more;
18383 if (sched_verbose > 6)
18384 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18385 *group_count ,can_issue_more);
18387 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18389 if (*group_end)
18390 can_issue_more = 0;
18392 /* Since only a branch can be issued in the last issue_slot, it is
18393 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18394 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18395 in this case the last nop will start a new group and the branch
18396 will be forced to the new group. */
18397 if (can_issue_more && !is_branch_slot_insn (next_insn))
18398 can_issue_more--;
18400 /* Do we have a special group ending nop? */
18401 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18402 || rs6000_tune == PROCESSOR_POWER8)
18404 nop = gen_group_ending_nop ();
18405 emit_insn_before (nop, next_insn);
18406 can_issue_more = 0;
18408 else
18409 while (can_issue_more > 0)
18411 nop = gen_nop ();
18412 emit_insn_before (nop, next_insn);
18413 can_issue_more--;
18416 *group_end = true;
18417 return 0;
18420 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18422 int n_nops = rs6000_sched_insert_nops;
18424 /* Nops can't be issued from the branch slot, so the effective
18425 issue_rate for nops is 'issue_rate - 1'. */
18426 if (can_issue_more == 0)
18427 can_issue_more = issue_rate;
18428 can_issue_more--;
18429 if (can_issue_more == 0)
18431 can_issue_more = issue_rate - 1;
18432 (*group_count)++;
18433 end = true;
18434 for (i = 0; i < issue_rate; i++)
18436 group_insns[i] = 0;
18440 while (n_nops > 0)
18442 nop = gen_nop ();
18443 emit_insn_before (nop, next_insn);
18444 if (can_issue_more == issue_rate - 1) /* new group begins */
18445 end = false;
18446 can_issue_more--;
18447 if (can_issue_more == 0)
18449 can_issue_more = issue_rate - 1;
18450 (*group_count)++;
18451 end = true;
18452 for (i = 0; i < issue_rate; i++)
18454 group_insns[i] = 0;
18457 n_nops--;
18460 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18461 can_issue_more++;
18463 /* Is next_insn going to start a new group? */
18464 *group_end
18465 = (end
18466 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18467 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18468 || (can_issue_more < issue_rate &&
18469 insn_terminates_group_p (next_insn, previous_group)));
18470 if (*group_end && end)
18471 (*group_count)--;
18473 if (sched_verbose > 6)
18474 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18475 *group_count, can_issue_more);
18476 return can_issue_more;
18479 return can_issue_more;
18482 /* This function tries to synch the dispatch groups that the compiler "sees"
18483 with the dispatch groups that the processor dispatcher is expected to
18484 form in practice. It tries to achieve this synchronization by forcing the
18485 estimated processor grouping on the compiler (as opposed to the function
18486 'pad_goups' which tries to force the scheduler's grouping on the processor).
18488 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18489 examines the (estimated) dispatch groups that will be formed by the processor
18490 dispatcher. It marks these group boundaries to reflect the estimated
18491 processor grouping, overriding the grouping that the scheduler had marked.
18492 Depending on the value of the flag '-minsert-sched-nops' this function can
18493 force certain insns into separate groups or force a certain distance between
18494 them by inserting nops, for example, if there exists a "costly dependence"
18495 between the insns.
18497 The function estimates the group boundaries that the processor will form as
18498 follows: It keeps track of how many vacant issue slots are available after
18499 each insn. A subsequent insn will start a new group if one of the following
18500 4 cases applies:
18501 - no more vacant issue slots remain in the current dispatch group.
18502 - only the last issue slot, which is the branch slot, is vacant, but the next
18503 insn is not a branch.
18504 - only the last 2 or less issue slots, including the branch slot, are vacant,
18505 which means that a cracked insn (which occupies two issue slots) can't be
18506 issued in this group.
18507 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18508 start a new group. */
18510 static int
18511 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18512 rtx_insn *tail)
18514 rtx_insn *insn, *next_insn;
18515 int issue_rate;
18516 int can_issue_more;
18517 int slot, i;
18518 bool group_end;
18519 int group_count = 0;
18520 rtx *group_insns;
18522 /* Initialize. */
18523 issue_rate = rs6000_issue_rate ();
18524 group_insns = XALLOCAVEC (rtx, issue_rate);
18525 for (i = 0; i < issue_rate; i++)
18527 group_insns[i] = 0;
18529 can_issue_more = issue_rate;
18530 slot = 0;
18531 insn = get_next_active_insn (prev_head_insn, tail);
18532 group_end = false;
18534 while (insn != NULL_RTX)
18536 slot = (issue_rate - can_issue_more);
18537 group_insns[slot] = insn;
18538 can_issue_more =
18539 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18540 if (insn_terminates_group_p (insn, current_group))
18541 can_issue_more = 0;
18543 next_insn = get_next_active_insn (insn, tail);
18544 if (next_insn == NULL_RTX)
18545 return group_count + 1;
18547 /* Is next_insn going to start a new group? */
18548 group_end
18549 = (can_issue_more == 0
18550 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18551 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18552 || (can_issue_more < issue_rate &&
18553 insn_terminates_group_p (next_insn, previous_group)));
18555 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18556 next_insn, &group_end, can_issue_more,
18557 &group_count);
18559 if (group_end)
18561 group_count++;
18562 can_issue_more = 0;
18563 for (i = 0; i < issue_rate; i++)
18565 group_insns[i] = 0;
18569 if (GET_MODE (next_insn) == TImode && can_issue_more)
18570 PUT_MODE (next_insn, VOIDmode);
18571 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18572 PUT_MODE (next_insn, TImode);
18574 insn = next_insn;
18575 if (can_issue_more == 0)
18576 can_issue_more = issue_rate;
18577 } /* while */
18579 return group_count;
18582 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18583 dispatch group boundaries that the scheduler had marked. Pad with nops
18584 any dispatch groups which have vacant issue slots, in order to force the
18585 scheduler's grouping on the processor dispatcher. The function
18586 returns the number of dispatch groups found. */
18588 static int
18589 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18590 rtx_insn *tail)
18592 rtx_insn *insn, *next_insn;
18593 rtx nop;
18594 int issue_rate;
18595 int can_issue_more;
18596 int group_end;
18597 int group_count = 0;
18599 /* Initialize issue_rate. */
18600 issue_rate = rs6000_issue_rate ();
18601 can_issue_more = issue_rate;
18603 insn = get_next_active_insn (prev_head_insn, tail);
18604 next_insn = get_next_active_insn (insn, tail);
18606 while (insn != NULL_RTX)
18608 can_issue_more =
18609 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18611 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18613 if (next_insn == NULL_RTX)
18614 break;
18616 if (group_end)
18618 /* If the scheduler had marked group termination at this location
18619 (between insn and next_insn), and neither insn nor next_insn will
18620 force group termination, pad the group with nops to force group
18621 termination. */
18622 if (can_issue_more
18623 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18624 && !insn_terminates_group_p (insn, current_group)
18625 && !insn_terminates_group_p (next_insn, previous_group))
18627 if (!is_branch_slot_insn (next_insn))
18628 can_issue_more--;
18630 while (can_issue_more)
18632 nop = gen_nop ();
18633 emit_insn_before (nop, next_insn);
18634 can_issue_more--;
18638 can_issue_more = issue_rate;
18639 group_count++;
18642 insn = next_insn;
18643 next_insn = get_next_active_insn (insn, tail);
18646 return group_count;
18649 /* We're beginning a new block. Initialize data structures as necessary. */
18651 static void
18652 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18653 int sched_verbose ATTRIBUTE_UNUSED,
18654 int max_ready ATTRIBUTE_UNUSED)
18656 last_scheduled_insn = NULL;
18657 load_store_pendulum = 0;
18658 divide_cnt = 0;
18659 vec_pairing = 0;
18662 /* The following function is called at the end of scheduling BB.
18663 After reload, it inserts nops at insn group bundling. */
18665 static void
18666 rs6000_sched_finish (FILE *dump, int sched_verbose)
18668 int n_groups;
18670 if (sched_verbose)
18671 fprintf (dump, "=== Finishing schedule.\n");
18673 if (reload_completed && rs6000_sched_groups)
18675 /* Do not run sched_finish hook when selective scheduling enabled. */
18676 if (sel_sched_p ())
18677 return;
18679 if (rs6000_sched_insert_nops == sched_finish_none)
18680 return;
18682 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18683 n_groups = pad_groups (dump, sched_verbose,
18684 current_sched_info->prev_head,
18685 current_sched_info->next_tail);
18686 else
18687 n_groups = redefine_groups (dump, sched_verbose,
18688 current_sched_info->prev_head,
18689 current_sched_info->next_tail);
18691 if (sched_verbose >= 6)
18693 fprintf (dump, "ngroups = %d\n", n_groups);
18694 print_rtl (dump, current_sched_info->prev_head);
18695 fprintf (dump, "Done finish_sched\n");
18700 struct rs6000_sched_context
18702 short cached_can_issue_more;
18703 rtx_insn *last_scheduled_insn;
18704 int load_store_pendulum;
18705 int divide_cnt;
18706 int vec_pairing;
18709 typedef struct rs6000_sched_context rs6000_sched_context_def;
18710 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18712 /* Allocate store for new scheduling context. */
18713 static void *
18714 rs6000_alloc_sched_context (void)
18716 return xmalloc (sizeof (rs6000_sched_context_def));
18719 /* If CLEAN_P is true then initializes _SC with clean data,
18720 and from the global context otherwise. */
18721 static void
18722 rs6000_init_sched_context (void *_sc, bool clean_p)
18724 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18726 if (clean_p)
18728 sc->cached_can_issue_more = 0;
18729 sc->last_scheduled_insn = NULL;
18730 sc->load_store_pendulum = 0;
18731 sc->divide_cnt = 0;
18732 sc->vec_pairing = 0;
18734 else
18736 sc->cached_can_issue_more = cached_can_issue_more;
18737 sc->last_scheduled_insn = last_scheduled_insn;
18738 sc->load_store_pendulum = load_store_pendulum;
18739 sc->divide_cnt = divide_cnt;
18740 sc->vec_pairing = vec_pairing;
18744 /* Sets the global scheduling context to the one pointed to by _SC. */
18745 static void
18746 rs6000_set_sched_context (void *_sc)
18748 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18750 gcc_assert (sc != NULL);
18752 cached_can_issue_more = sc->cached_can_issue_more;
18753 last_scheduled_insn = sc->last_scheduled_insn;
18754 load_store_pendulum = sc->load_store_pendulum;
18755 divide_cnt = sc->divide_cnt;
18756 vec_pairing = sc->vec_pairing;
18759 /* Free _SC. */
18760 static void
18761 rs6000_free_sched_context (void *_sc)
18763 gcc_assert (_sc != NULL);
18765 free (_sc);
18768 static bool
18769 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18771 switch (get_attr_type (insn))
18773 case TYPE_DIV:
18774 case TYPE_SDIV:
18775 case TYPE_DDIV:
18776 case TYPE_VECDIV:
18777 case TYPE_SSQRT:
18778 case TYPE_DSQRT:
18779 return false;
18781 default:
18782 return true;
18786 /* Length in units of the trampoline for entering a nested function. */
18789 rs6000_trampoline_size (void)
18791 int ret = 0;
18793 switch (DEFAULT_ABI)
18795 default:
18796 gcc_unreachable ();
18798 case ABI_AIX:
18799 ret = (TARGET_32BIT) ? 12 : 24;
18800 break;
18802 case ABI_ELFv2:
18803 gcc_assert (!TARGET_32BIT);
18804 ret = 32;
18805 break;
18807 case ABI_DARWIN:
18808 case ABI_V4:
18809 ret = (TARGET_32BIT) ? 40 : 48;
18810 break;
18813 return ret;
18816 /* Emit RTL insns to initialize the variable parts of a trampoline.
18817 FNADDR is an RTX for the address of the function's pure code.
18818 CXT is an RTX for the static chain value for the function. */
18820 static void
18821 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18823 int regsize = (TARGET_32BIT) ? 4 : 8;
18824 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
18825 rtx ctx_reg = force_reg (Pmode, cxt);
18826 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
18828 switch (DEFAULT_ABI)
18830 default:
18831 gcc_unreachable ();
18833 /* Under AIX, just build the 3 word function descriptor */
18834 case ABI_AIX:
18836 rtx fnmem, fn_reg, toc_reg;
18838 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
18839 error ("you cannot take the address of a nested function if you use "
18840 "the %qs option", "-mno-pointers-to-nested-functions");
18842 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
18843 fn_reg = gen_reg_rtx (Pmode);
18844 toc_reg = gen_reg_rtx (Pmode);
18846 /* Macro to shorten the code expansions below. */
18847 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18849 m_tramp = replace_equiv_address (m_tramp, addr);
18851 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
18852 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
18853 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
18854 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
18855 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
18857 # undef MEM_PLUS
18859 break;
18861 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18862 case ABI_ELFv2:
18863 case ABI_DARWIN:
18864 case ABI_V4:
18865 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
18866 LCT_NORMAL, VOIDmode,
18867 addr, Pmode,
18868 GEN_INT (rs6000_trampoline_size ()), SImode,
18869 fnaddr, Pmode,
18870 ctx_reg, Pmode);
18871 break;
18876 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
18877 identifier as an argument, so the front end shouldn't look it up. */
18879 static bool
18880 rs6000_attribute_takes_identifier_p (const_tree attr_id)
18882 return is_attribute_p ("altivec", attr_id);
18885 /* Handle the "altivec" attribute. The attribute may have
18886 arguments as follows:
18888 __attribute__((altivec(vector__)))
18889 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
18890 __attribute__((altivec(bool__))) (always followed by 'unsigned')
18892 and may appear more than once (e.g., 'vector bool char') in a
18893 given declaration. */
18895 static tree
18896 rs6000_handle_altivec_attribute (tree *node,
18897 tree name ATTRIBUTE_UNUSED,
18898 tree args,
18899 int flags ATTRIBUTE_UNUSED,
18900 bool *no_add_attrs)
18902 tree type = *node, result = NULL_TREE;
18903 machine_mode mode;
18904 int unsigned_p;
18905 char altivec_type
18906 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
18907 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
18908 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
18909 : '?');
18911 while (POINTER_TYPE_P (type)
18912 || TREE_CODE (type) == FUNCTION_TYPE
18913 || TREE_CODE (type) == METHOD_TYPE
18914 || TREE_CODE (type) == ARRAY_TYPE)
18915 type = TREE_TYPE (type);
18917 mode = TYPE_MODE (type);
18919 /* Check for invalid AltiVec type qualifiers. */
18920 if (type == long_double_type_node)
18921 error ("use of %<long double%> in AltiVec types is invalid");
18922 else if (type == boolean_type_node)
18923 error ("use of boolean types in AltiVec types is invalid");
18924 else if (TREE_CODE (type) == COMPLEX_TYPE)
18925 error ("use of %<complex%> in AltiVec types is invalid");
18926 else if (DECIMAL_FLOAT_MODE_P (mode))
18927 error ("use of decimal floating point types in AltiVec types is invalid");
18928 else if (!TARGET_VSX)
18930 if (type == long_unsigned_type_node || type == long_integer_type_node)
18932 if (TARGET_64BIT)
18933 error ("use of %<long%> in AltiVec types is invalid for "
18934 "64-bit code without %qs", "-mvsx");
18935 else if (rs6000_warn_altivec_long)
18936 warning (0, "use of %<long%> in AltiVec types is deprecated; "
18937 "use %<int%>");
18939 else if (type == long_long_unsigned_type_node
18940 || type == long_long_integer_type_node)
18941 error ("use of %<long long%> in AltiVec types is invalid without %qs",
18942 "-mvsx");
18943 else if (type == double_type_node)
18944 error ("use of %<double%> in AltiVec types is invalid without %qs",
18945 "-mvsx");
18948 switch (altivec_type)
18950 case 'v':
18951 unsigned_p = TYPE_UNSIGNED (type);
18952 switch (mode)
18954 case E_TImode:
18955 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
18956 break;
18957 case E_DImode:
18958 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
18959 break;
18960 case E_SImode:
18961 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
18962 break;
18963 case E_HImode:
18964 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
18965 break;
18966 case E_QImode:
18967 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
18968 break;
18969 case E_SFmode: result = V4SF_type_node; break;
18970 case E_DFmode: result = V2DF_type_node; break;
18971 /* If the user says 'vector int bool', we may be handed the 'bool'
18972 attribute _before_ the 'vector' attribute, and so select the
18973 proper type in the 'b' case below. */
18974 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
18975 case E_V2DImode: case E_V2DFmode:
18976 result = type;
18977 default: break;
18979 break;
18980 case 'b':
18981 switch (mode)
18983 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
18984 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
18985 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
18986 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
18987 default: break;
18989 break;
18990 case 'p':
18991 switch (mode)
18993 case E_V8HImode: result = pixel_V8HI_type_node;
18994 default: break;
18996 default: break;
18999 /* Propagate qualifiers attached to the element type
19000 onto the vector type. */
19001 if (result && result != type && TYPE_QUALS (type))
19002 result = build_qualified_type (result, TYPE_QUALS (type));
19004 *no_add_attrs = true; /* No need to hang on to the attribute. */
19006 if (result)
19007 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19009 return NULL_TREE;
19012 /* AltiVec defines five built-in scalar types that serve as vector
19013 elements; we must teach the compiler how to mangle them. The 128-bit
19014 floating point mangling is target-specific as well. */
19016 static const char *
19017 rs6000_mangle_type (const_tree type)
19019 type = TYPE_MAIN_VARIANT (type);
19021 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19022 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19023 return NULL;
19025 if (type == bool_char_type_node) return "U6__boolc";
19026 if (type == bool_short_type_node) return "U6__bools";
19027 if (type == pixel_type_node) return "u7__pixel";
19028 if (type == bool_int_type_node) return "U6__booli";
19029 if (type == bool_long_long_type_node) return "U6__boolx";
19031 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19032 return "g";
19033 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19034 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19036 /* For all other types, use the default mangling. */
19037 return NULL;
19040 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19041 struct attribute_spec.handler. */
19043 static tree
19044 rs6000_handle_longcall_attribute (tree *node, tree name,
19045 tree args ATTRIBUTE_UNUSED,
19046 int flags ATTRIBUTE_UNUSED,
19047 bool *no_add_attrs)
19049 if (TREE_CODE (*node) != FUNCTION_TYPE
19050 && TREE_CODE (*node) != FIELD_DECL
19051 && TREE_CODE (*node) != TYPE_DECL)
19053 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19054 name);
19055 *no_add_attrs = true;
19058 return NULL_TREE;
19061 /* Set longcall attributes on all functions declared when
19062 rs6000_default_long_calls is true. */
19063 static void
19064 rs6000_set_default_type_attributes (tree type)
19066 if (rs6000_default_long_calls
19067 && (TREE_CODE (type) == FUNCTION_TYPE
19068 || TREE_CODE (type) == METHOD_TYPE))
19069 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19070 NULL_TREE,
19071 TYPE_ATTRIBUTES (type));
19073 #if TARGET_MACHO
19074 darwin_set_default_type_attributes (type);
19075 #endif
19078 /* Return a reference suitable for calling a function with the
19079 longcall attribute. */
19081 static rtx
19082 rs6000_longcall_ref (rtx call_ref, rtx arg)
19084 /* System V adds '.' to the internal name, so skip them. */
19085 const char *call_name = XSTR (call_ref, 0);
19086 if (*call_name == '.')
19088 while (*call_name == '.')
19089 call_name++;
19091 tree node = get_identifier (call_name);
19092 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19095 if (TARGET_PLTSEQ)
19097 rtx base = const0_rtx;
19098 int regno = 12;
19099 if (rs6000_pcrel_p (cfun))
19101 rtx reg = gen_rtx_REG (Pmode, regno);
19102 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19103 UNSPEC_PLT_PCREL);
19104 emit_insn (gen_rtx_SET (reg, u));
19105 return reg;
19108 if (DEFAULT_ABI == ABI_ELFv2)
19109 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19110 else
19112 if (flag_pic)
19113 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19114 regno = 11;
19116 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19117 may be used by a function global entry point. For SysV4, r11
19118 is used by __glink_PLTresolve lazy resolver entry. */
19119 rtx reg = gen_rtx_REG (Pmode, regno);
19120 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19121 UNSPEC_PLT16_HA);
19122 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19123 UNSPEC_PLT16_LO);
19124 emit_insn (gen_rtx_SET (reg, hi));
19125 emit_insn (gen_rtx_SET (reg, lo));
19126 return reg;
19129 return force_reg (Pmode, call_ref);
19132 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19133 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19134 #endif
19136 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19137 struct attribute_spec.handler. */
19138 static tree
19139 rs6000_handle_struct_attribute (tree *node, tree name,
19140 tree args ATTRIBUTE_UNUSED,
19141 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19143 tree *type = NULL;
19144 if (DECL_P (*node))
19146 if (TREE_CODE (*node) == TYPE_DECL)
19147 type = &TREE_TYPE (*node);
19149 else
19150 type = node;
19152 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19153 || TREE_CODE (*type) == UNION_TYPE)))
19155 warning (OPT_Wattributes, "%qE attribute ignored", name);
19156 *no_add_attrs = true;
19159 else if ((is_attribute_p ("ms_struct", name)
19160 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19161 || ((is_attribute_p ("gcc_struct", name)
19162 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19164 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19165 name);
19166 *no_add_attrs = true;
19169 return NULL_TREE;
19172 static bool
19173 rs6000_ms_bitfield_layout_p (const_tree record_type)
19175 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19176 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19177 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19180 #ifdef USING_ELFOS_H
19182 /* A get_unnamed_section callback, used for switching to toc_section. */
19184 static void
19185 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19187 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19188 && TARGET_MINIMAL_TOC)
19190 if (!toc_initialized)
19192 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19193 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19194 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19195 fprintf (asm_out_file, "\t.tc ");
19196 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19197 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19198 fprintf (asm_out_file, "\n");
19200 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19201 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19202 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19203 fprintf (asm_out_file, " = .+32768\n");
19204 toc_initialized = 1;
19206 else
19207 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19209 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19211 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19212 if (!toc_initialized)
19214 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19215 toc_initialized = 1;
19218 else
19220 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19221 if (!toc_initialized)
19223 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19224 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19225 fprintf (asm_out_file, " = .+32768\n");
19226 toc_initialized = 1;
19231 /* Implement TARGET_ASM_INIT_SECTIONS. */
19233 static void
19234 rs6000_elf_asm_init_sections (void)
19236 toc_section
19237 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19239 sdata2_section
19240 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19241 SDATA2_SECTION_ASM_OP);
19244 /* Implement TARGET_SELECT_RTX_SECTION. */
19246 static section *
19247 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19248 unsigned HOST_WIDE_INT align)
19250 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19251 return toc_section;
19252 else
19253 return default_elf_select_rtx_section (mode, x, align);
19256 /* For a SYMBOL_REF, set generic flags and then perform some
19257 target-specific processing.
19259 When the AIX ABI is requested on a non-AIX system, replace the
19260 function name with the real name (with a leading .) rather than the
19261 function descriptor name. This saves a lot of overriding code to
19262 read the prefixes. */
19264 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19265 static void
19266 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19268 default_encode_section_info (decl, rtl, first);
19270 if (first
19271 && TREE_CODE (decl) == FUNCTION_DECL
19272 && !TARGET_AIX
19273 && DEFAULT_ABI == ABI_AIX)
19275 rtx sym_ref = XEXP (rtl, 0);
19276 size_t len = strlen (XSTR (sym_ref, 0));
19277 char *str = XALLOCAVEC (char, len + 2);
19278 str[0] = '.';
19279 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19280 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19284 static inline bool
19285 compare_section_name (const char *section, const char *templ)
19287 int len;
19289 len = strlen (templ);
19290 return (strncmp (section, templ, len) == 0
19291 && (section[len] == 0 || section[len] == '.'));
19294 bool
19295 rs6000_elf_in_small_data_p (const_tree decl)
19297 if (rs6000_sdata == SDATA_NONE)
19298 return false;
19300 /* We want to merge strings, so we never consider them small data. */
19301 if (TREE_CODE (decl) == STRING_CST)
19302 return false;
19304 /* Functions are never in the small data area. */
19305 if (TREE_CODE (decl) == FUNCTION_DECL)
19306 return false;
19308 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19310 const char *section = DECL_SECTION_NAME (decl);
19311 if (compare_section_name (section, ".sdata")
19312 || compare_section_name (section, ".sdata2")
19313 || compare_section_name (section, ".gnu.linkonce.s")
19314 || compare_section_name (section, ".sbss")
19315 || compare_section_name (section, ".sbss2")
19316 || compare_section_name (section, ".gnu.linkonce.sb")
19317 || strcmp (section, ".PPC.EMB.sdata0") == 0
19318 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19319 return true;
19321 else
19323 /* If we are told not to put readonly data in sdata, then don't. */
19324 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19325 && !rs6000_readonly_in_sdata)
19326 return false;
19328 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19330 if (size > 0
19331 && size <= g_switch_value
19332 /* If it's not public, and we're not going to reference it there,
19333 there's no need to put it in the small data section. */
19334 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19335 return true;
19338 return false;
19341 #endif /* USING_ELFOS_H */
19343 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19345 static bool
19346 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19348 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19351 /* Do not place thread-local symbols refs in the object blocks. */
19353 static bool
19354 rs6000_use_blocks_for_decl_p (const_tree decl)
19356 return !DECL_THREAD_LOCAL_P (decl);
19359 /* Return a REG that occurs in ADDR with coefficient 1.
19360 ADDR can be effectively incremented by incrementing REG.
19362 r0 is special and we must not select it as an address
19363 register by this routine since our caller will try to
19364 increment the returned register via an "la" instruction. */
19367 find_addr_reg (rtx addr)
19369 while (GET_CODE (addr) == PLUS)
19371 if (REG_P (XEXP (addr, 0))
19372 && REGNO (XEXP (addr, 0)) != 0)
19373 addr = XEXP (addr, 0);
19374 else if (REG_P (XEXP (addr, 1))
19375 && REGNO (XEXP (addr, 1)) != 0)
19376 addr = XEXP (addr, 1);
19377 else if (CONSTANT_P (XEXP (addr, 0)))
19378 addr = XEXP (addr, 1);
19379 else if (CONSTANT_P (XEXP (addr, 1)))
19380 addr = XEXP (addr, 0);
19381 else
19382 gcc_unreachable ();
19384 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19385 return addr;
19388 void
19389 rs6000_fatal_bad_address (rtx op)
19391 fatal_insn ("bad address", op);
19394 #if TARGET_MACHO
19396 vec<branch_island, va_gc> *branch_islands;
19398 /* Remember to generate a branch island for far calls to the given
19399 function. */
19401 static void
19402 add_compiler_branch_island (tree label_name, tree function_name,
19403 int line_number)
19405 branch_island bi = {function_name, label_name, line_number};
19406 vec_safe_push (branch_islands, bi);
19409 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19410 already there or not. */
19412 static int
19413 no_previous_def (tree function_name)
19415 branch_island *bi;
19416 unsigned ix;
19418 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19419 if (function_name == bi->function_name)
19420 return 0;
19421 return 1;
19424 /* GET_PREV_LABEL gets the label name from the previous definition of
19425 the function. */
19427 static tree
19428 get_prev_label (tree function_name)
19430 branch_island *bi;
19431 unsigned ix;
19433 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19434 if (function_name == bi->function_name)
19435 return bi->label_name;
19436 return NULL_TREE;
19439 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19441 void
19442 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19444 unsigned int length;
19445 char *symbol_name, *lazy_ptr_name;
19446 char *local_label_0;
19447 static unsigned label = 0;
19449 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19450 symb = (*targetm.strip_name_encoding) (symb);
19452 length = strlen (symb);
19453 symbol_name = XALLOCAVEC (char, length + 32);
19454 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19456 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19457 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19459 if (MACHOPIC_PURE)
19461 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19462 fprintf (file, "\t.align 5\n");
19464 fprintf (file, "%s:\n", stub);
19465 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19467 label++;
19468 local_label_0 = XALLOCAVEC (char, 16);
19469 sprintf (local_label_0, "L%u$spb", label);
19471 fprintf (file, "\tmflr r0\n");
19472 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19473 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19474 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19475 lazy_ptr_name, local_label_0);
19476 fprintf (file, "\tmtlr r0\n");
19477 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19478 (TARGET_64BIT ? "ldu" : "lwzu"),
19479 lazy_ptr_name, local_label_0);
19480 fprintf (file, "\tmtctr r12\n");
19481 fprintf (file, "\tbctr\n");
19483 else /* mdynamic-no-pic or mkernel. */
19485 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19486 fprintf (file, "\t.align 4\n");
19488 fprintf (file, "%s:\n", stub);
19489 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19491 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19492 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19493 (TARGET_64BIT ? "ldu" : "lwzu"),
19494 lazy_ptr_name);
19495 fprintf (file, "\tmtctr r12\n");
19496 fprintf (file, "\tbctr\n");
19499 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19500 fprintf (file, "%s:\n", lazy_ptr_name);
19501 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19502 fprintf (file, "%sdyld_stub_binding_helper\n",
19503 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19506 /* Legitimize PIC addresses. If the address is already
19507 position-independent, we return ORIG. Newly generated
19508 position-independent addresses go into a reg. This is REG if non
19509 zero, otherwise we allocate register(s) as necessary. */
19511 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19514 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19515 rtx reg)
19517 rtx base, offset;
19519 if (reg == NULL && !reload_completed)
19520 reg = gen_reg_rtx (Pmode);
19522 if (GET_CODE (orig) == CONST)
19524 rtx reg_temp;
19526 if (GET_CODE (XEXP (orig, 0)) == PLUS
19527 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19528 return orig;
19530 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19532 /* Use a different reg for the intermediate value, as
19533 it will be marked UNCHANGING. */
19534 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19535 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19536 Pmode, reg_temp);
19537 offset =
19538 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19539 Pmode, reg);
19541 if (CONST_INT_P (offset))
19543 if (SMALL_INT (offset))
19544 return plus_constant (Pmode, base, INTVAL (offset));
19545 else if (!reload_completed)
19546 offset = force_reg (Pmode, offset);
19547 else
19549 rtx mem = force_const_mem (Pmode, orig);
19550 return machopic_legitimize_pic_address (mem, Pmode, reg);
19553 return gen_rtx_PLUS (Pmode, base, offset);
19556 /* Fall back on generic machopic code. */
19557 return machopic_legitimize_pic_address (orig, mode, reg);
19560 /* Output a .machine directive for the Darwin assembler, and call
19561 the generic start_file routine. */
19563 static void
19564 rs6000_darwin_file_start (void)
19566 static const struct
19568 const char *arg;
19569 const char *name;
19570 HOST_WIDE_INT if_set;
19571 } mapping[] = {
19572 { "ppc64", "ppc64", MASK_64BIT },
19573 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19574 { "power4", "ppc970", 0 },
19575 { "G5", "ppc970", 0 },
19576 { "7450", "ppc7450", 0 },
19577 { "7400", "ppc7400", MASK_ALTIVEC },
19578 { "G4", "ppc7400", 0 },
19579 { "750", "ppc750", 0 },
19580 { "740", "ppc750", 0 },
19581 { "G3", "ppc750", 0 },
19582 { "604e", "ppc604e", 0 },
19583 { "604", "ppc604", 0 },
19584 { "603e", "ppc603", 0 },
19585 { "603", "ppc603", 0 },
19586 { "601", "ppc601", 0 },
19587 { NULL, "ppc", 0 } };
19588 const char *cpu_id = "";
19589 size_t i;
19591 rs6000_file_start ();
19592 darwin_file_start ();
19594 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19596 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19597 cpu_id = rs6000_default_cpu;
19599 if (global_options_set.x_rs6000_cpu_index)
19600 cpu_id = processor_target_table[rs6000_cpu_index].name;
19602 /* Look through the mapping array. Pick the first name that either
19603 matches the argument, has a bit set in IF_SET that is also set
19604 in the target flags, or has a NULL name. */
19606 i = 0;
19607 while (mapping[i].arg != NULL
19608 && strcmp (mapping[i].arg, cpu_id) != 0
19609 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19610 i++;
19612 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19615 #endif /* TARGET_MACHO */
19617 #if TARGET_ELF
19618 static int
19619 rs6000_elf_reloc_rw_mask (void)
19621 if (flag_pic)
19622 return 3;
19623 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19624 return 2;
19625 else
19626 return 0;
19629 /* Record an element in the table of global constructors. SYMBOL is
19630 a SYMBOL_REF of the function to be called; PRIORITY is a number
19631 between 0 and MAX_INIT_PRIORITY.
19633 This differs from default_named_section_asm_out_constructor in
19634 that we have special handling for -mrelocatable. */
19636 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19637 static void
19638 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19640 const char *section = ".ctors";
19641 char buf[18];
19643 if (priority != DEFAULT_INIT_PRIORITY)
19645 sprintf (buf, ".ctors.%.5u",
19646 /* Invert the numbering so the linker puts us in the proper
19647 order; constructors are run from right to left, and the
19648 linker sorts in increasing order. */
19649 MAX_INIT_PRIORITY - priority);
19650 section = buf;
19653 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19654 assemble_align (POINTER_SIZE);
19656 if (DEFAULT_ABI == ABI_V4
19657 && (TARGET_RELOCATABLE || flag_pic > 1))
19659 fputs ("\t.long (", asm_out_file);
19660 output_addr_const (asm_out_file, symbol);
19661 fputs (")@fixup\n", asm_out_file);
19663 else
19664 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19667 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19668 static void
19669 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19671 const char *section = ".dtors";
19672 char buf[18];
19674 if (priority != DEFAULT_INIT_PRIORITY)
19676 sprintf (buf, ".dtors.%.5u",
19677 /* Invert the numbering so the linker puts us in the proper
19678 order; constructors are run from right to left, and the
19679 linker sorts in increasing order. */
19680 MAX_INIT_PRIORITY - priority);
19681 section = buf;
19684 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19685 assemble_align (POINTER_SIZE);
19687 if (DEFAULT_ABI == ABI_V4
19688 && (TARGET_RELOCATABLE || flag_pic > 1))
19690 fputs ("\t.long (", asm_out_file);
19691 output_addr_const (asm_out_file, symbol);
19692 fputs (")@fixup\n", asm_out_file);
19694 else
19695 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19698 void
19699 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19701 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19703 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19704 ASM_OUTPUT_LABEL (file, name);
19705 fputs (DOUBLE_INT_ASM_OP, file);
19706 rs6000_output_function_entry (file, name);
19707 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19708 if (DOT_SYMBOLS)
19710 fputs ("\t.size\t", file);
19711 assemble_name (file, name);
19712 fputs (",24\n\t.type\t.", file);
19713 assemble_name (file, name);
19714 fputs (",@function\n", file);
19715 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19717 fputs ("\t.globl\t.", file);
19718 assemble_name (file, name);
19719 putc ('\n', file);
19722 else
19723 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19724 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19725 rs6000_output_function_entry (file, name);
19726 fputs (":\n", file);
19727 return;
19730 int uses_toc;
19731 if (DEFAULT_ABI == ABI_V4
19732 && (TARGET_RELOCATABLE || flag_pic > 1)
19733 && !TARGET_SECURE_PLT
19734 && (!constant_pool_empty_p () || crtl->profile)
19735 && (uses_toc = uses_TOC ()))
19737 char buf[256];
19739 if (uses_toc == 2)
19740 switch_to_other_text_partition ();
19741 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19743 fprintf (file, "\t.long ");
19744 assemble_name (file, toc_label_name);
19745 need_toc_init = 1;
19746 putc ('-', file);
19747 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19748 assemble_name (file, buf);
19749 putc ('\n', file);
19750 if (uses_toc == 2)
19751 switch_to_other_text_partition ();
19754 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19755 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19757 if (TARGET_CMODEL == CMODEL_LARGE
19758 && rs6000_global_entry_point_prologue_needed_p ())
19760 char buf[256];
19762 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19764 fprintf (file, "\t.quad .TOC.-");
19765 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19766 assemble_name (file, buf);
19767 putc ('\n', file);
19770 if (DEFAULT_ABI == ABI_AIX)
19772 const char *desc_name, *orig_name;
19774 orig_name = (*targetm.strip_name_encoding) (name);
19775 desc_name = orig_name;
19776 while (*desc_name == '.')
19777 desc_name++;
19779 if (TREE_PUBLIC (decl))
19780 fprintf (file, "\t.globl %s\n", desc_name);
19782 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19783 fprintf (file, "%s:\n", desc_name);
19784 fprintf (file, "\t.long %s\n", orig_name);
19785 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19786 fputs ("\t.long 0\n", file);
19787 fprintf (file, "\t.previous\n");
19789 ASM_OUTPUT_LABEL (file, name);
19792 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19793 static void
19794 rs6000_elf_file_end (void)
19796 #ifdef HAVE_AS_GNU_ATTRIBUTE
19797 /* ??? The value emitted depends on options active at file end.
19798 Assume anyone using #pragma or attributes that might change
19799 options knows what they are doing. */
19800 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19801 && rs6000_passes_float)
19803 int fp;
19805 if (TARGET_HARD_FLOAT)
19806 fp = 1;
19807 else
19808 fp = 2;
19809 if (rs6000_passes_long_double)
19811 if (!TARGET_LONG_DOUBLE_128)
19812 fp |= 2 * 4;
19813 else if (TARGET_IEEEQUAD)
19814 fp |= 3 * 4;
19815 else
19816 fp |= 1 * 4;
19818 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19820 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19822 if (rs6000_passes_vector)
19823 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19824 (TARGET_ALTIVEC_ABI ? 2 : 1));
19825 if (rs6000_returns_struct)
19826 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
19827 aix_struct_return ? 2 : 1);
19829 #endif
19830 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19831 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
19832 file_end_indicate_exec_stack ();
19833 #endif
19835 if (flag_split_stack)
19836 file_end_indicate_split_stack ();
19838 if (cpu_builtin_p)
19840 /* We have expanded a CPU builtin, so we need to emit a reference to
19841 the special symbol that LIBC uses to declare it supports the
19842 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19843 switch_to_section (data_section);
19844 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
19845 fprintf (asm_out_file, "\t%s %s\n",
19846 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
19849 #endif
19851 #if TARGET_XCOFF
19853 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19854 #define HAVE_XCOFF_DWARF_EXTRAS 0
19855 #endif
19857 static enum unwind_info_type
19858 rs6000_xcoff_debug_unwind_info (void)
19860 return UI_NONE;
19863 static void
19864 rs6000_xcoff_asm_output_anchor (rtx symbol)
19866 char buffer[100];
19868 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
19869 SYMBOL_REF_BLOCK_OFFSET (symbol));
19870 fprintf (asm_out_file, "%s", SET_ASM_OP);
19871 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
19872 fprintf (asm_out_file, ",");
19873 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
19874 fprintf (asm_out_file, "\n");
19877 static void
19878 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
19880 fputs (GLOBAL_ASM_OP, stream);
19881 RS6000_OUTPUT_BASENAME (stream, name);
19882 putc ('\n', stream);
19885 /* A get_unnamed_decl callback, used for read-only sections. PTR
19886 points to the section string variable. */
19888 static void
19889 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
19891 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
19892 *(const char *const *) directive,
19893 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
19896 /* Likewise for read-write sections. */
19898 static void
19899 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
19901 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
19902 *(const char *const *) directive,
19903 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
19906 static void
19907 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
19909 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
19910 *(const char *const *) directive,
19911 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
19914 /* A get_unnamed_section callback, used for switching to toc_section. */
19916 static void
19917 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19919 if (TARGET_MINIMAL_TOC)
19921 /* toc_section is always selected at least once from
19922 rs6000_xcoff_file_start, so this is guaranteed to
19923 always be defined once and only once in each file. */
19924 if (!toc_initialized)
19926 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
19927 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
19928 toc_initialized = 1;
19930 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
19931 (TARGET_32BIT ? "" : ",3"));
19933 else
19934 fputs ("\t.toc\n", asm_out_file);
19937 /* Implement TARGET_ASM_INIT_SECTIONS. */
19939 static void
19940 rs6000_xcoff_asm_init_sections (void)
19942 read_only_data_section
19943 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
19944 &xcoff_read_only_section_name);
19946 private_data_section
19947 = get_unnamed_section (SECTION_WRITE,
19948 rs6000_xcoff_output_readwrite_section_asm_op,
19949 &xcoff_private_data_section_name);
19951 read_only_private_data_section
19952 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
19953 &xcoff_private_rodata_section_name);
19955 tls_data_section
19956 = get_unnamed_section (SECTION_TLS,
19957 rs6000_xcoff_output_tls_section_asm_op,
19958 &xcoff_tls_data_section_name);
19960 tls_private_data_section
19961 = get_unnamed_section (SECTION_TLS,
19962 rs6000_xcoff_output_tls_section_asm_op,
19963 &xcoff_private_data_section_name);
19965 toc_section
19966 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
19968 readonly_data_section = read_only_data_section;
19971 static int
19972 rs6000_xcoff_reloc_rw_mask (void)
19974 return 3;
19977 static void
19978 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
19979 tree decl ATTRIBUTE_UNUSED)
19981 int smclass;
19982 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
19984 if (flags & SECTION_EXCLUDE)
19985 smclass = 4;
19986 else if (flags & SECTION_DEBUG)
19988 fprintf (asm_out_file, "\t.dwsect %s\n", name);
19989 return;
19991 else if (flags & SECTION_CODE)
19992 smclass = 0;
19993 else if (flags & SECTION_TLS)
19994 smclass = 3;
19995 else if (flags & SECTION_WRITE)
19996 smclass = 2;
19997 else
19998 smclass = 1;
20000 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20001 (flags & SECTION_CODE) ? "." : "",
20002 name, suffix[smclass], flags & SECTION_ENTSIZE);
20005 #define IN_NAMED_SECTION(DECL) \
20006 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20007 && DECL_SECTION_NAME (DECL) != NULL)
20009 static section *
20010 rs6000_xcoff_select_section (tree decl, int reloc,
20011 unsigned HOST_WIDE_INT align)
20013 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20014 named section. */
20015 if (align > BIGGEST_ALIGNMENT)
20017 resolve_unique_section (decl, reloc, true);
20018 if (IN_NAMED_SECTION (decl))
20019 return get_named_section (decl, NULL, reloc);
20022 if (decl_readonly_section (decl, reloc))
20024 if (TREE_PUBLIC (decl))
20025 return read_only_data_section;
20026 else
20027 return read_only_private_data_section;
20029 else
20031 #if HAVE_AS_TLS
20032 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20034 if (TREE_PUBLIC (decl))
20035 return tls_data_section;
20036 else if (bss_initializer_p (decl))
20038 /* Convert to COMMON to emit in BSS. */
20039 DECL_COMMON (decl) = 1;
20040 return tls_comm_section;
20042 else
20043 return tls_private_data_section;
20045 else
20046 #endif
20047 if (TREE_PUBLIC (decl))
20048 return data_section;
20049 else
20050 return private_data_section;
20054 static void
20055 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20057 const char *name;
20059 /* Use select_section for private data and uninitialized data with
20060 alignment <= BIGGEST_ALIGNMENT. */
20061 if (!TREE_PUBLIC (decl)
20062 || DECL_COMMON (decl)
20063 || (DECL_INITIAL (decl) == NULL_TREE
20064 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20065 || DECL_INITIAL (decl) == error_mark_node
20066 || (flag_zero_initialized_in_bss
20067 && initializer_zerop (DECL_INITIAL (decl))))
20068 return;
20070 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20071 name = (*targetm.strip_name_encoding) (name);
20072 set_decl_section_name (decl, name);
20075 /* Select section for constant in constant pool.
20077 On RS/6000, all constants are in the private read-only data area.
20078 However, if this is being placed in the TOC it must be output as a
20079 toc entry. */
20081 static section *
20082 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20083 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20085 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20086 return toc_section;
20087 else
20088 return read_only_private_data_section;
20091 /* Remove any trailing [DS] or the like from the symbol name. */
20093 static const char *
20094 rs6000_xcoff_strip_name_encoding (const char *name)
20096 size_t len;
20097 if (*name == '*')
20098 name++;
20099 len = strlen (name);
20100 if (name[len - 1] == ']')
20101 return ggc_alloc_string (name, len - 4);
20102 else
20103 return name;
20106 /* Section attributes. AIX is always PIC. */
20108 static unsigned int
20109 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20111 unsigned int align;
20112 unsigned int flags = default_section_type_flags (decl, name, reloc);
20114 /* Align to at least UNIT size. */
20115 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20116 align = MIN_UNITS_PER_WORD;
20117 else
20118 /* Increase alignment of large objects if not already stricter. */
20119 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20120 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20121 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20123 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20126 /* Output at beginning of assembler file.
20128 Initialize the section names for the RS/6000 at this point.
20130 Specify filename, including full path, to assembler.
20132 We want to go into the TOC section so at least one .toc will be emitted.
20133 Also, in order to output proper .bs/.es pairs, we need at least one static
20134 [RW] section emitted.
20136 Finally, declare mcount when profiling to make the assembler happy. */
20138 static void
20139 rs6000_xcoff_file_start (void)
20141 rs6000_gen_section_name (&xcoff_bss_section_name,
20142 main_input_filename, ".bss_");
20143 rs6000_gen_section_name (&xcoff_private_data_section_name,
20144 main_input_filename, ".rw_");
20145 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20146 main_input_filename, ".rop_");
20147 rs6000_gen_section_name (&xcoff_read_only_section_name,
20148 main_input_filename, ".ro_");
20149 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20150 main_input_filename, ".tls_");
20151 rs6000_gen_section_name (&xcoff_tbss_section_name,
20152 main_input_filename, ".tbss_[UL]");
20154 fputs ("\t.file\t", asm_out_file);
20155 output_quoted_string (asm_out_file, main_input_filename);
20156 fputc ('\n', asm_out_file);
20157 if (write_symbols != NO_DEBUG)
20158 switch_to_section (private_data_section);
20159 switch_to_section (toc_section);
20160 switch_to_section (text_section);
20161 if (profile_flag)
20162 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20163 rs6000_file_start ();
20166 /* Output at end of assembler file.
20167 On the RS/6000, referencing data should automatically pull in text. */
20169 static void
20170 rs6000_xcoff_file_end (void)
20172 switch_to_section (text_section);
20173 fputs ("_section_.text:\n", asm_out_file);
20174 switch_to_section (data_section);
20175 fputs (TARGET_32BIT
20176 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20177 asm_out_file);
20180 struct declare_alias_data
20182 FILE *file;
20183 bool function_descriptor;
20186 /* Declare alias N. A helper function for for_node_and_aliases. */
20188 static bool
20189 rs6000_declare_alias (struct symtab_node *n, void *d)
20191 struct declare_alias_data *data = (struct declare_alias_data *)d;
20192 /* Main symbol is output specially, because varasm machinery does part of
20193 the job for us - we do not need to declare .globl/lglobs and such. */
20194 if (!n->alias || n->weakref)
20195 return false;
20197 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20198 return false;
20200 /* Prevent assemble_alias from trying to use .set pseudo operation
20201 that does not behave as expected by the middle-end. */
20202 TREE_ASM_WRITTEN (n->decl) = true;
20204 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20205 char *buffer = (char *) alloca (strlen (name) + 2);
20206 char *p;
20207 int dollar_inside = 0;
20209 strcpy (buffer, name);
20210 p = strchr (buffer, '$');
20211 while (p) {
20212 *p = '_';
20213 dollar_inside++;
20214 p = strchr (p + 1, '$');
20216 if (TREE_PUBLIC (n->decl))
20218 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20220 if (dollar_inside) {
20221 if (data->function_descriptor)
20222 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20223 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20225 if (data->function_descriptor)
20227 fputs ("\t.globl .", data->file);
20228 RS6000_OUTPUT_BASENAME (data->file, buffer);
20229 putc ('\n', data->file);
20231 fputs ("\t.globl ", data->file);
20232 RS6000_OUTPUT_BASENAME (data->file, buffer);
20233 putc ('\n', data->file);
20235 #ifdef ASM_WEAKEN_DECL
20236 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20237 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20238 #endif
20240 else
20242 if (dollar_inside)
20244 if (data->function_descriptor)
20245 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20246 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20248 if (data->function_descriptor)
20250 fputs ("\t.lglobl .", data->file);
20251 RS6000_OUTPUT_BASENAME (data->file, buffer);
20252 putc ('\n', data->file);
20254 fputs ("\t.lglobl ", data->file);
20255 RS6000_OUTPUT_BASENAME (data->file, buffer);
20256 putc ('\n', data->file);
20258 if (data->function_descriptor)
20259 fputs (".", data->file);
20260 RS6000_OUTPUT_BASENAME (data->file, buffer);
20261 fputs (":\n", data->file);
20262 return false;
20266 #ifdef HAVE_GAS_HIDDEN
20267 /* Helper function to calculate visibility of a DECL
20268 and return the value as a const string. */
20270 static const char *
20271 rs6000_xcoff_visibility (tree decl)
20273 static const char * const visibility_types[] = {
20274 "", ",protected", ",hidden", ",internal"
20277 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20278 return visibility_types[vis];
20280 #endif
20283 /* This macro produces the initial definition of a function name.
20284 On the RS/6000, we need to place an extra '.' in the function name and
20285 output the function descriptor.
20286 Dollar signs are converted to underscores.
20288 The csect for the function will have already been created when
20289 text_section was selected. We do have to go back to that csect, however.
20291 The third and fourth parameters to the .function pseudo-op (16 and 044)
20292 are placeholders which no longer have any use.
20294 Because AIX assembler's .set command has unexpected semantics, we output
20295 all aliases as alternative labels in front of the definition. */
20297 void
20298 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20300 char *buffer = (char *) alloca (strlen (name) + 1);
20301 char *p;
20302 int dollar_inside = 0;
20303 struct declare_alias_data data = {file, false};
20305 strcpy (buffer, name);
20306 p = strchr (buffer, '$');
20307 while (p) {
20308 *p = '_';
20309 dollar_inside++;
20310 p = strchr (p + 1, '$');
20312 if (TREE_PUBLIC (decl))
20314 if (!RS6000_WEAK || !DECL_WEAK (decl))
20316 if (dollar_inside) {
20317 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20318 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20320 fputs ("\t.globl .", file);
20321 RS6000_OUTPUT_BASENAME (file, buffer);
20322 #ifdef HAVE_GAS_HIDDEN
20323 fputs (rs6000_xcoff_visibility (decl), file);
20324 #endif
20325 putc ('\n', file);
20328 else
20330 if (dollar_inside) {
20331 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20332 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20334 fputs ("\t.lglobl .", file);
20335 RS6000_OUTPUT_BASENAME (file, buffer);
20336 putc ('\n', file);
20338 fputs ("\t.csect ", file);
20339 RS6000_OUTPUT_BASENAME (file, buffer);
20340 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20341 RS6000_OUTPUT_BASENAME (file, buffer);
20342 fputs (":\n", file);
20343 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20344 &data, true);
20345 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20346 RS6000_OUTPUT_BASENAME (file, buffer);
20347 fputs (", TOC[tc0], 0\n", file);
20348 in_section = NULL;
20349 switch_to_section (function_section (decl));
20350 putc ('.', file);
20351 RS6000_OUTPUT_BASENAME (file, buffer);
20352 fputs (":\n", file);
20353 data.function_descriptor = true;
20354 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20355 &data, true);
20356 if (!DECL_IGNORED_P (decl))
20358 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20359 xcoffout_declare_function (file, decl, buffer);
20360 else if (write_symbols == DWARF2_DEBUG)
20362 name = (*targetm.strip_name_encoding) (name);
20363 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20366 return;
20370 /* Output assembly language to globalize a symbol from a DECL,
20371 possibly with visibility. */
20373 void
20374 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20376 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20377 fputs (GLOBAL_ASM_OP, stream);
20378 RS6000_OUTPUT_BASENAME (stream, name);
20379 #ifdef HAVE_GAS_HIDDEN
20380 fputs (rs6000_xcoff_visibility (decl), stream);
20381 #endif
20382 putc ('\n', stream);
20385 /* Output assembly language to define a symbol as COMMON from a DECL,
20386 possibly with visibility. */
20388 void
20389 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20390 tree decl ATTRIBUTE_UNUSED,
20391 const char *name,
20392 unsigned HOST_WIDE_INT size,
20393 unsigned HOST_WIDE_INT align)
20395 unsigned HOST_WIDE_INT align2 = 2;
20397 if (align > 32)
20398 align2 = floor_log2 (align / BITS_PER_UNIT);
20399 else if (size > 4)
20400 align2 = 3;
20402 fputs (COMMON_ASM_OP, stream);
20403 RS6000_OUTPUT_BASENAME (stream, name);
20405 fprintf (stream,
20406 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20407 size, align2);
20409 #ifdef HAVE_GAS_HIDDEN
20410 if (decl != NULL)
20411 fputs (rs6000_xcoff_visibility (decl), stream);
20412 #endif
20413 putc ('\n', stream);
20416 /* This macro produces the initial definition of a object (variable) name.
20417 Because AIX assembler's .set command has unexpected semantics, we output
20418 all aliases as alternative labels in front of the definition. */
20420 void
20421 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20423 struct declare_alias_data data = {file, false};
20424 RS6000_OUTPUT_BASENAME (file, name);
20425 fputs (":\n", file);
20426 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20427 &data, true);
20430 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20432 void
20433 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20435 fputs (integer_asm_op (size, FALSE), file);
20436 assemble_name (file, label);
20437 fputs ("-$", file);
20440 /* Output a symbol offset relative to the dbase for the current object.
20441 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20442 signed offsets.
20444 __gcc_unwind_dbase is embedded in all executables/libraries through
20445 libgcc/config/rs6000/crtdbase.S. */
20447 void
20448 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20450 fputs (integer_asm_op (size, FALSE), file);
20451 assemble_name (file, label);
20452 fputs("-__gcc_unwind_dbase", file);
20455 #ifdef HAVE_AS_TLS
20456 static void
20457 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20459 rtx symbol;
20460 int flags;
20461 const char *symname;
20463 default_encode_section_info (decl, rtl, first);
20465 /* Careful not to prod global register variables. */
20466 if (!MEM_P (rtl))
20467 return;
20468 symbol = XEXP (rtl, 0);
20469 if (!SYMBOL_REF_P (symbol))
20470 return;
20472 flags = SYMBOL_REF_FLAGS (symbol);
20474 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20475 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20477 SYMBOL_REF_FLAGS (symbol) = flags;
20479 /* Append mapping class to extern decls. */
20480 symname = XSTR (symbol, 0);
20481 if (decl /* sync condition with assemble_external () */
20482 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20483 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20484 || TREE_CODE (decl) == FUNCTION_DECL)
20485 && symname[strlen (symname) - 1] != ']')
20487 char *newname = (char *) alloca (strlen (symname) + 5);
20488 strcpy (newname, symname);
20489 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20490 ? "[DS]" : "[UA]"));
20491 XSTR (symbol, 0) = ggc_strdup (newname);
20494 #endif /* HAVE_AS_TLS */
20495 #endif /* TARGET_XCOFF */
20497 void
20498 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20499 const char *name, const char *val)
20501 fputs ("\t.weak\t", stream);
20502 RS6000_OUTPUT_BASENAME (stream, name);
20503 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20504 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20506 if (TARGET_XCOFF)
20507 fputs ("[DS]", stream);
20508 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20509 if (TARGET_XCOFF)
20510 fputs (rs6000_xcoff_visibility (decl), stream);
20511 #endif
20512 fputs ("\n\t.weak\t.", stream);
20513 RS6000_OUTPUT_BASENAME (stream, name);
20515 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20516 if (TARGET_XCOFF)
20517 fputs (rs6000_xcoff_visibility (decl), stream);
20518 #endif
20519 fputc ('\n', stream);
20520 if (val)
20522 #ifdef ASM_OUTPUT_DEF
20523 ASM_OUTPUT_DEF (stream, name, val);
20524 #endif
20525 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20526 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20528 fputs ("\t.set\t.", stream);
20529 RS6000_OUTPUT_BASENAME (stream, name);
20530 fputs (",.", stream);
20531 RS6000_OUTPUT_BASENAME (stream, val);
20532 fputc ('\n', stream);
20538 /* Return true if INSN should not be copied. */
20540 static bool
20541 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20543 return recog_memoized (insn) >= 0
20544 && get_attr_cannot_copy (insn);
20547 /* Compute a (partial) cost for rtx X. Return true if the complete
20548 cost has been computed, and false if subexpressions should be
20549 scanned. In either case, *TOTAL contains the cost result. */
20551 static bool
20552 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20553 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20555 int code = GET_CODE (x);
20557 switch (code)
20559 /* On the RS/6000, if it is valid in the insn, it is free. */
20560 case CONST_INT:
20561 if (((outer_code == SET
20562 || outer_code == PLUS
20563 || outer_code == MINUS)
20564 && (satisfies_constraint_I (x)
20565 || satisfies_constraint_L (x)))
20566 || (outer_code == AND
20567 && (satisfies_constraint_K (x)
20568 || (mode == SImode
20569 ? satisfies_constraint_L (x)
20570 : satisfies_constraint_J (x))))
20571 || ((outer_code == IOR || outer_code == XOR)
20572 && (satisfies_constraint_K (x)
20573 || (mode == SImode
20574 ? satisfies_constraint_L (x)
20575 : satisfies_constraint_J (x))))
20576 || outer_code == ASHIFT
20577 || outer_code == ASHIFTRT
20578 || outer_code == LSHIFTRT
20579 || outer_code == ROTATE
20580 || outer_code == ROTATERT
20581 || outer_code == ZERO_EXTRACT
20582 || (outer_code == MULT
20583 && satisfies_constraint_I (x))
20584 || ((outer_code == DIV || outer_code == UDIV
20585 || outer_code == MOD || outer_code == UMOD)
20586 && exact_log2 (INTVAL (x)) >= 0)
20587 || (outer_code == COMPARE
20588 && (satisfies_constraint_I (x)
20589 || satisfies_constraint_K (x)))
20590 || ((outer_code == EQ || outer_code == NE)
20591 && (satisfies_constraint_I (x)
20592 || satisfies_constraint_K (x)
20593 || (mode == SImode
20594 ? satisfies_constraint_L (x)
20595 : satisfies_constraint_J (x))))
20596 || (outer_code == GTU
20597 && satisfies_constraint_I (x))
20598 || (outer_code == LTU
20599 && satisfies_constraint_P (x)))
20601 *total = 0;
20602 return true;
20604 else if ((outer_code == PLUS
20605 && reg_or_add_cint_operand (x, VOIDmode))
20606 || (outer_code == MINUS
20607 && reg_or_sub_cint_operand (x, VOIDmode))
20608 || ((outer_code == SET
20609 || outer_code == IOR
20610 || outer_code == XOR)
20611 && (INTVAL (x)
20612 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20614 *total = COSTS_N_INSNS (1);
20615 return true;
20617 /* FALLTHRU */
20619 case CONST_DOUBLE:
20620 case CONST_WIDE_INT:
20621 case CONST:
20622 case HIGH:
20623 case SYMBOL_REF:
20624 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20625 return true;
20627 case MEM:
20628 /* When optimizing for size, MEM should be slightly more expensive
20629 than generating address, e.g., (plus (reg) (const)).
20630 L1 cache latency is about two instructions. */
20631 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20632 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20633 *total += COSTS_N_INSNS (100);
20634 return true;
20636 case LABEL_REF:
20637 *total = 0;
20638 return true;
20640 case PLUS:
20641 case MINUS:
20642 if (FLOAT_MODE_P (mode))
20643 *total = rs6000_cost->fp;
20644 else
20645 *total = COSTS_N_INSNS (1);
20646 return false;
20648 case MULT:
20649 if (CONST_INT_P (XEXP (x, 1))
20650 && satisfies_constraint_I (XEXP (x, 1)))
20652 if (INTVAL (XEXP (x, 1)) >= -256
20653 && INTVAL (XEXP (x, 1)) <= 255)
20654 *total = rs6000_cost->mulsi_const9;
20655 else
20656 *total = rs6000_cost->mulsi_const;
20658 else if (mode == SFmode)
20659 *total = rs6000_cost->fp;
20660 else if (FLOAT_MODE_P (mode))
20661 *total = rs6000_cost->dmul;
20662 else if (mode == DImode)
20663 *total = rs6000_cost->muldi;
20664 else
20665 *total = rs6000_cost->mulsi;
20666 return false;
20668 case FMA:
20669 if (mode == SFmode)
20670 *total = rs6000_cost->fp;
20671 else
20672 *total = rs6000_cost->dmul;
20673 break;
20675 case DIV:
20676 case MOD:
20677 if (FLOAT_MODE_P (mode))
20679 *total = mode == DFmode ? rs6000_cost->ddiv
20680 : rs6000_cost->sdiv;
20681 return false;
20683 /* FALLTHRU */
20685 case UDIV:
20686 case UMOD:
20687 if (CONST_INT_P (XEXP (x, 1))
20688 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20690 if (code == DIV || code == MOD)
20691 /* Shift, addze */
20692 *total = COSTS_N_INSNS (2);
20693 else
20694 /* Shift */
20695 *total = COSTS_N_INSNS (1);
20697 else
20699 if (GET_MODE (XEXP (x, 1)) == DImode)
20700 *total = rs6000_cost->divdi;
20701 else
20702 *total = rs6000_cost->divsi;
20704 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20705 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20706 *total += COSTS_N_INSNS (2);
20707 return false;
20709 case CTZ:
20710 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20711 return false;
20713 case FFS:
20714 *total = COSTS_N_INSNS (4);
20715 return false;
20717 case POPCOUNT:
20718 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20719 return false;
20721 case PARITY:
20722 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20723 return false;
20725 case NOT:
20726 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20727 *total = 0;
20728 else
20729 *total = COSTS_N_INSNS (1);
20730 return false;
20732 case AND:
20733 if (CONST_INT_P (XEXP (x, 1)))
20735 rtx left = XEXP (x, 0);
20736 rtx_code left_code = GET_CODE (left);
20738 /* rotate-and-mask: 1 insn. */
20739 if ((left_code == ROTATE
20740 || left_code == ASHIFT
20741 || left_code == LSHIFTRT)
20742 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20744 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20745 if (!CONST_INT_P (XEXP (left, 1)))
20746 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20747 *total += COSTS_N_INSNS (1);
20748 return true;
20751 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20752 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20753 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20754 || (val & 0xffff) == val
20755 || (val & 0xffff0000) == val
20756 || ((val & 0xffff) == 0 && mode == SImode))
20758 *total = rtx_cost (left, mode, AND, 0, speed);
20759 *total += COSTS_N_INSNS (1);
20760 return true;
20763 /* 2 insns. */
20764 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20766 *total = rtx_cost (left, mode, AND, 0, speed);
20767 *total += COSTS_N_INSNS (2);
20768 return true;
20772 *total = COSTS_N_INSNS (1);
20773 return false;
20775 case IOR:
20776 /* FIXME */
20777 *total = COSTS_N_INSNS (1);
20778 return true;
20780 case CLZ:
20781 case XOR:
20782 case ZERO_EXTRACT:
20783 *total = COSTS_N_INSNS (1);
20784 return false;
20786 case ASHIFT:
20787 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20788 the sign extend and shift separately within the insn. */
20789 if (TARGET_EXTSWSLI && mode == DImode
20790 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20791 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20793 *total = 0;
20794 return false;
20796 /* fall through */
20798 case ASHIFTRT:
20799 case LSHIFTRT:
20800 case ROTATE:
20801 case ROTATERT:
20802 /* Handle mul_highpart. */
20803 if (outer_code == TRUNCATE
20804 && GET_CODE (XEXP (x, 0)) == MULT)
20806 if (mode == DImode)
20807 *total = rs6000_cost->muldi;
20808 else
20809 *total = rs6000_cost->mulsi;
20810 return true;
20812 else if (outer_code == AND)
20813 *total = 0;
20814 else
20815 *total = COSTS_N_INSNS (1);
20816 return false;
20818 case SIGN_EXTEND:
20819 case ZERO_EXTEND:
20820 if (MEM_P (XEXP (x, 0)))
20821 *total = 0;
20822 else
20823 *total = COSTS_N_INSNS (1);
20824 return false;
20826 case COMPARE:
20827 case NEG:
20828 case ABS:
20829 if (!FLOAT_MODE_P (mode))
20831 *total = COSTS_N_INSNS (1);
20832 return false;
20834 /* FALLTHRU */
20836 case FLOAT:
20837 case UNSIGNED_FLOAT:
20838 case FIX:
20839 case UNSIGNED_FIX:
20840 case FLOAT_TRUNCATE:
20841 *total = rs6000_cost->fp;
20842 return false;
20844 case FLOAT_EXTEND:
20845 if (mode == DFmode)
20846 *total = rs6000_cost->sfdf_convert;
20847 else
20848 *total = rs6000_cost->fp;
20849 return false;
20851 case CALL:
20852 case IF_THEN_ELSE:
20853 if (!speed)
20855 *total = COSTS_N_INSNS (1);
20856 return true;
20858 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
20860 *total = rs6000_cost->fp;
20861 return false;
20863 break;
20865 case NE:
20866 case EQ:
20867 case GTU:
20868 case LTU:
20869 /* Carry bit requires mode == Pmode.
20870 NEG or PLUS already counted so only add one. */
20871 if (mode == Pmode
20872 && (outer_code == NEG || outer_code == PLUS))
20874 *total = COSTS_N_INSNS (1);
20875 return true;
20877 /* FALLTHRU */
20879 case GT:
20880 case LT:
20881 case UNORDERED:
20882 if (outer_code == SET)
20884 if (XEXP (x, 1) == const0_rtx)
20886 *total = COSTS_N_INSNS (2);
20887 return true;
20889 else
20891 *total = COSTS_N_INSNS (3);
20892 return false;
20895 /* CC COMPARE. */
20896 if (outer_code == COMPARE)
20898 *total = 0;
20899 return true;
20901 break;
20903 default:
20904 break;
20907 return false;
20910 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
20912 static bool
20913 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
20914 int opno, int *total, bool speed)
20916 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
20918 fprintf (stderr,
20919 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
20920 "opno = %d, total = %d, speed = %s, x:\n",
20921 ret ? "complete" : "scan inner",
20922 GET_MODE_NAME (mode),
20923 GET_RTX_NAME (outer_code),
20924 opno,
20925 *total,
20926 speed ? "true" : "false");
20928 debug_rtx (x);
20930 return ret;
20933 static int
20934 rs6000_insn_cost (rtx_insn *insn, bool speed)
20936 if (recog_memoized (insn) < 0)
20937 return 0;
20939 if (!speed)
20940 return get_attr_length (insn);
20942 int cost = get_attr_cost (insn);
20943 if (cost > 0)
20944 return cost;
20946 int n = get_attr_length (insn) / 4;
20947 enum attr_type type = get_attr_type (insn);
20949 switch (type)
20951 case TYPE_LOAD:
20952 case TYPE_FPLOAD:
20953 case TYPE_VECLOAD:
20954 cost = COSTS_N_INSNS (n + 1);
20955 break;
20957 case TYPE_MUL:
20958 switch (get_attr_size (insn))
20960 case SIZE_8:
20961 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
20962 break;
20963 case SIZE_16:
20964 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
20965 break;
20966 case SIZE_32:
20967 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
20968 break;
20969 case SIZE_64:
20970 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
20971 break;
20972 default:
20973 gcc_unreachable ();
20975 break;
20976 case TYPE_DIV:
20977 switch (get_attr_size (insn))
20979 case SIZE_32:
20980 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
20981 break;
20982 case SIZE_64:
20983 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
20984 break;
20985 default:
20986 gcc_unreachable ();
20988 break;
20990 case TYPE_FP:
20991 cost = n * rs6000_cost->fp;
20992 break;
20993 case TYPE_DMUL:
20994 cost = n * rs6000_cost->dmul;
20995 break;
20996 case TYPE_SDIV:
20997 cost = n * rs6000_cost->sdiv;
20998 break;
20999 case TYPE_DDIV:
21000 cost = n * rs6000_cost->ddiv;
21001 break;
21003 case TYPE_SYNC:
21004 case TYPE_LOAD_L:
21005 case TYPE_MFCR:
21006 case TYPE_MFCRF:
21007 cost = COSTS_N_INSNS (n + 2);
21008 break;
21010 default:
21011 cost = COSTS_N_INSNS (n);
21014 return cost;
21017 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21019 static int
21020 rs6000_debug_address_cost (rtx x, machine_mode mode,
21021 addr_space_t as, bool speed)
21023 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21025 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21026 ret, speed ? "true" : "false");
21027 debug_rtx (x);
21029 return ret;
21033 /* A C expression returning the cost of moving data from a register of class
21034 CLASS1 to one of CLASS2. */
21036 static int
21037 rs6000_register_move_cost (machine_mode mode,
21038 reg_class_t from, reg_class_t to)
21040 int ret;
21041 reg_class_t rclass;
21043 if (TARGET_DEBUG_COST)
21044 dbg_cost_ctrl++;
21046 /* If we have VSX, we can easily move between FPR or Altivec registers,
21047 otherwise we can only easily move within classes.
21048 Do this first so we give best-case answers for union classes
21049 containing both gprs and vsx regs. */
21050 HARD_REG_SET to_vsx, from_vsx;
21051 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21052 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21053 if (!hard_reg_set_empty_p (to_vsx)
21054 && !hard_reg_set_empty_p (from_vsx)
21055 && (TARGET_VSX
21056 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21058 int reg = FIRST_FPR_REGNO;
21059 if (TARGET_VSX
21060 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21061 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21062 reg = FIRST_ALTIVEC_REGNO;
21063 ret = 2 * hard_regno_nregs (reg, mode);
21066 /* Moves from/to GENERAL_REGS. */
21067 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21068 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21070 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21072 if (TARGET_DIRECT_MOVE)
21074 /* Keep the cost for direct moves above that for within
21075 a register class even if the actual processor cost is
21076 comparable. We do this because a direct move insn
21077 can't be a nop, whereas with ideal register
21078 allocation a move within the same class might turn
21079 out to be a nop. */
21080 if (rs6000_tune == PROCESSOR_POWER9
21081 || rs6000_tune == PROCESSOR_FUTURE)
21082 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21083 else
21084 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21085 /* SFmode requires a conversion when moving between gprs
21086 and vsx. */
21087 if (mode == SFmode)
21088 ret += 2;
21090 else
21091 ret = (rs6000_memory_move_cost (mode, rclass, false)
21092 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21095 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21096 shift. */
21097 else if (rclass == CR_REGS)
21098 ret = 4;
21100 /* For those processors that have slow LR/CTR moves, make them more
21101 expensive than memory in order to bias spills to memory .*/
21102 else if ((rs6000_tune == PROCESSOR_POWER6
21103 || rs6000_tune == PROCESSOR_POWER7
21104 || rs6000_tune == PROCESSOR_POWER8
21105 || rs6000_tune == PROCESSOR_POWER9)
21106 && reg_class_subset_p (rclass, SPECIAL_REGS))
21107 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21109 else
21110 /* A move will cost one instruction per GPR moved. */
21111 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21114 /* Everything else has to go through GENERAL_REGS. */
21115 else
21116 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21117 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21119 if (TARGET_DEBUG_COST)
21121 if (dbg_cost_ctrl == 1)
21122 fprintf (stderr,
21123 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21124 ret, GET_MODE_NAME (mode), reg_class_names[from],
21125 reg_class_names[to]);
21126 dbg_cost_ctrl--;
21129 return ret;
21132 /* A C expressions returning the cost of moving data of MODE from a register to
21133 or from memory. */
21135 static int
21136 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21137 bool in ATTRIBUTE_UNUSED)
21139 int ret;
21141 if (TARGET_DEBUG_COST)
21142 dbg_cost_ctrl++;
21144 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21145 ret = 4 * hard_regno_nregs (0, mode);
21146 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21147 || reg_classes_intersect_p (rclass, VSX_REGS)))
21148 ret = 4 * hard_regno_nregs (32, mode);
21149 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21150 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21151 else
21152 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21154 if (TARGET_DEBUG_COST)
21156 if (dbg_cost_ctrl == 1)
21157 fprintf (stderr,
21158 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21159 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21160 dbg_cost_ctrl--;
21163 return ret;
21166 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21168 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21169 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21170 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21171 move cost between GENERAL_REGS and VSX_REGS low.
21173 It might seem reasonable to use a union class. After all, if usage
21174 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21175 rather than memory. However, in cases where register pressure of
21176 both is high, like the cactus_adm spec test, allowing
21177 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21178 the first scheduling pass. This is partly due to an allocno of
21179 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21180 class, which gives too high a pressure for GENERAL_REGS and too low
21181 for VSX_REGS. So, force a choice of the subclass here.
21183 The best class is also the union if GENERAL_REGS and VSX_REGS have
21184 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21185 allocno class, since trying to narrow down the class by regno mode
21186 is prone to error. For example, SImode is allowed in VSX regs and
21187 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21188 it would be wrong to choose an allocno of GENERAL_REGS based on
21189 SImode. */
21191 static reg_class_t
21192 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21193 reg_class_t allocno_class,
21194 reg_class_t best_class)
21196 switch (allocno_class)
21198 case GEN_OR_VSX_REGS:
21199 /* best_class must be a subset of allocno_class. */
21200 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21201 || best_class == GEN_OR_FLOAT_REGS
21202 || best_class == VSX_REGS
21203 || best_class == ALTIVEC_REGS
21204 || best_class == FLOAT_REGS
21205 || best_class == GENERAL_REGS
21206 || best_class == BASE_REGS);
21207 /* Use best_class but choose wider classes when copying from the
21208 wider class to best_class is cheap. This mimics IRA choice
21209 of allocno class. */
21210 if (best_class == BASE_REGS)
21211 return GENERAL_REGS;
21212 if (TARGET_VSX
21213 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21214 return VSX_REGS;
21215 return best_class;
21217 default:
21218 break;
21221 return allocno_class;
21224 /* Returns a code for a target-specific builtin that implements
21225 reciprocal of the function, or NULL_TREE if not available. */
21227 static tree
21228 rs6000_builtin_reciprocal (tree fndecl)
21230 switch (DECL_MD_FUNCTION_CODE (fndecl))
21232 case VSX_BUILTIN_XVSQRTDP:
21233 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21234 return NULL_TREE;
21236 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21238 case VSX_BUILTIN_XVSQRTSP:
21239 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21240 return NULL_TREE;
21242 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21244 default:
21245 return NULL_TREE;
21249 /* Load up a constant. If the mode is a vector mode, splat the value across
21250 all of the vector elements. */
21252 static rtx
21253 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21255 rtx reg;
21257 if (mode == SFmode || mode == DFmode)
21259 rtx d = const_double_from_real_value (dconst, mode);
21260 reg = force_reg (mode, d);
21262 else if (mode == V4SFmode)
21264 rtx d = const_double_from_real_value (dconst, SFmode);
21265 rtvec v = gen_rtvec (4, d, d, d, d);
21266 reg = gen_reg_rtx (mode);
21267 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21269 else if (mode == V2DFmode)
21271 rtx d = const_double_from_real_value (dconst, DFmode);
21272 rtvec v = gen_rtvec (2, d, d);
21273 reg = gen_reg_rtx (mode);
21274 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21276 else
21277 gcc_unreachable ();
21279 return reg;
21282 /* Generate an FMA instruction. */
21284 static void
21285 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21287 machine_mode mode = GET_MODE (target);
21288 rtx dst;
21290 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21291 gcc_assert (dst != NULL);
21293 if (dst != target)
21294 emit_move_insn (target, dst);
21297 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21299 static void
21300 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21302 machine_mode mode = GET_MODE (dst);
21303 rtx r;
21305 /* This is a tad more complicated, since the fnma_optab is for
21306 a different expression: fma(-m1, m2, a), which is the same
21307 thing except in the case of signed zeros.
21309 Fortunately we know that if FMA is supported that FNMSUB is
21310 also supported in the ISA. Just expand it directly. */
21312 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21314 r = gen_rtx_NEG (mode, a);
21315 r = gen_rtx_FMA (mode, m1, m2, r);
21316 r = gen_rtx_NEG (mode, r);
21317 emit_insn (gen_rtx_SET (dst, r));
21320 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21321 add a reg_note saying that this was a division. Support both scalar and
21322 vector divide. Assumes no trapping math and finite arguments. */
21324 void
21325 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21327 machine_mode mode = GET_MODE (dst);
21328 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21329 int i;
21331 /* Low precision estimates guarantee 5 bits of accuracy. High
21332 precision estimates guarantee 14 bits of accuracy. SFmode
21333 requires 23 bits of accuracy. DFmode requires 52 bits of
21334 accuracy. Each pass at least doubles the accuracy, leading
21335 to the following. */
21336 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21337 if (mode == DFmode || mode == V2DFmode)
21338 passes++;
21340 enum insn_code code = optab_handler (smul_optab, mode);
21341 insn_gen_fn gen_mul = GEN_FCN (code);
21343 gcc_assert (code != CODE_FOR_nothing);
21345 one = rs6000_load_constant_and_splat (mode, dconst1);
21347 /* x0 = 1./d estimate */
21348 x0 = gen_reg_rtx (mode);
21349 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21350 UNSPEC_FRES)));
21352 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21353 if (passes > 1) {
21355 /* e0 = 1. - d * x0 */
21356 e0 = gen_reg_rtx (mode);
21357 rs6000_emit_nmsub (e0, d, x0, one);
21359 /* x1 = x0 + e0 * x0 */
21360 x1 = gen_reg_rtx (mode);
21361 rs6000_emit_madd (x1, e0, x0, x0);
21363 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21364 ++i, xprev = xnext, eprev = enext) {
21366 /* enext = eprev * eprev */
21367 enext = gen_reg_rtx (mode);
21368 emit_insn (gen_mul (enext, eprev, eprev));
21370 /* xnext = xprev + enext * xprev */
21371 xnext = gen_reg_rtx (mode);
21372 rs6000_emit_madd (xnext, enext, xprev, xprev);
21375 } else
21376 xprev = x0;
21378 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21380 /* u = n * xprev */
21381 u = gen_reg_rtx (mode);
21382 emit_insn (gen_mul (u, n, xprev));
21384 /* v = n - (d * u) */
21385 v = gen_reg_rtx (mode);
21386 rs6000_emit_nmsub (v, d, u, n);
21388 /* dst = (v * xprev) + u */
21389 rs6000_emit_madd (dst, v, xprev, u);
21391 if (note_p)
21392 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21395 /* Goldschmidt's Algorithm for single/double-precision floating point
21396 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21398 void
21399 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21401 machine_mode mode = GET_MODE (src);
21402 rtx e = gen_reg_rtx (mode);
21403 rtx g = gen_reg_rtx (mode);
21404 rtx h = gen_reg_rtx (mode);
21406 /* Low precision estimates guarantee 5 bits of accuracy. High
21407 precision estimates guarantee 14 bits of accuracy. SFmode
21408 requires 23 bits of accuracy. DFmode requires 52 bits of
21409 accuracy. Each pass at least doubles the accuracy, leading
21410 to the following. */
21411 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21412 if (mode == DFmode || mode == V2DFmode)
21413 passes++;
21415 int i;
21416 rtx mhalf;
21417 enum insn_code code = optab_handler (smul_optab, mode);
21418 insn_gen_fn gen_mul = GEN_FCN (code);
21420 gcc_assert (code != CODE_FOR_nothing);
21422 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21424 /* e = rsqrt estimate */
21425 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21426 UNSPEC_RSQRT)));
21428 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21429 if (!recip)
21431 rtx zero = force_reg (mode, CONST0_RTX (mode));
21433 if (mode == SFmode)
21435 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21436 e, zero, mode, 0);
21437 if (target != e)
21438 emit_move_insn (e, target);
21440 else
21442 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21443 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21447 /* g = sqrt estimate. */
21448 emit_insn (gen_mul (g, e, src));
21449 /* h = 1/(2*sqrt) estimate. */
21450 emit_insn (gen_mul (h, e, mhalf));
21452 if (recip)
21454 if (passes == 1)
21456 rtx t = gen_reg_rtx (mode);
21457 rs6000_emit_nmsub (t, g, h, mhalf);
21458 /* Apply correction directly to 1/rsqrt estimate. */
21459 rs6000_emit_madd (dst, e, t, e);
21461 else
21463 for (i = 0; i < passes; i++)
21465 rtx t1 = gen_reg_rtx (mode);
21466 rtx g1 = gen_reg_rtx (mode);
21467 rtx h1 = gen_reg_rtx (mode);
21469 rs6000_emit_nmsub (t1, g, h, mhalf);
21470 rs6000_emit_madd (g1, g, t1, g);
21471 rs6000_emit_madd (h1, h, t1, h);
21473 g = g1;
21474 h = h1;
21476 /* Multiply by 2 for 1/rsqrt. */
21477 emit_insn (gen_add3_insn (dst, h, h));
21480 else
21482 rtx t = gen_reg_rtx (mode);
21483 rs6000_emit_nmsub (t, g, h, mhalf);
21484 rs6000_emit_madd (dst, g, t, g);
21487 return;
21490 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21491 (Power7) targets. DST is the target, and SRC is the argument operand. */
21493 void
21494 rs6000_emit_popcount (rtx dst, rtx src)
21496 machine_mode mode = GET_MODE (dst);
21497 rtx tmp1, tmp2;
21499 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21500 if (TARGET_POPCNTD)
21502 if (mode == SImode)
21503 emit_insn (gen_popcntdsi2 (dst, src));
21504 else
21505 emit_insn (gen_popcntddi2 (dst, src));
21506 return;
21509 tmp1 = gen_reg_rtx (mode);
21511 if (mode == SImode)
21513 emit_insn (gen_popcntbsi2 (tmp1, src));
21514 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21515 NULL_RTX, 0);
21516 tmp2 = force_reg (SImode, tmp2);
21517 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21519 else
21521 emit_insn (gen_popcntbdi2 (tmp1, src));
21522 tmp2 = expand_mult (DImode, tmp1,
21523 GEN_INT ((HOST_WIDE_INT)
21524 0x01010101 << 32 | 0x01010101),
21525 NULL_RTX, 0);
21526 tmp2 = force_reg (DImode, tmp2);
21527 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21532 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21533 target, and SRC is the argument operand. */
21535 void
21536 rs6000_emit_parity (rtx dst, rtx src)
21538 machine_mode mode = GET_MODE (dst);
21539 rtx tmp;
21541 tmp = gen_reg_rtx (mode);
21543 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21544 if (TARGET_CMPB)
21546 if (mode == SImode)
21548 emit_insn (gen_popcntbsi2 (tmp, src));
21549 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21551 else
21553 emit_insn (gen_popcntbdi2 (tmp, src));
21554 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21556 return;
21559 if (mode == SImode)
21561 /* Is mult+shift >= shift+xor+shift+xor? */
21562 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21564 rtx tmp1, tmp2, tmp3, tmp4;
21566 tmp1 = gen_reg_rtx (SImode);
21567 emit_insn (gen_popcntbsi2 (tmp1, src));
21569 tmp2 = gen_reg_rtx (SImode);
21570 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21571 tmp3 = gen_reg_rtx (SImode);
21572 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21574 tmp4 = gen_reg_rtx (SImode);
21575 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21576 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21578 else
21579 rs6000_emit_popcount (tmp, src);
21580 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21582 else
21584 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21585 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21587 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21589 tmp1 = gen_reg_rtx (DImode);
21590 emit_insn (gen_popcntbdi2 (tmp1, src));
21592 tmp2 = gen_reg_rtx (DImode);
21593 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21594 tmp3 = gen_reg_rtx (DImode);
21595 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21597 tmp4 = gen_reg_rtx (DImode);
21598 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21599 tmp5 = gen_reg_rtx (DImode);
21600 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21602 tmp6 = gen_reg_rtx (DImode);
21603 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21604 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21606 else
21607 rs6000_emit_popcount (tmp, src);
21608 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21612 /* Expand an Altivec constant permutation for little endian mode.
21613 OP0 and OP1 are the input vectors and TARGET is the output vector.
21614 SEL specifies the constant permutation vector.
21616 There are two issues: First, the two input operands must be
21617 swapped so that together they form a double-wide array in LE
21618 order. Second, the vperm instruction has surprising behavior
21619 in LE mode: it interprets the elements of the source vectors
21620 in BE mode ("left to right") and interprets the elements of
21621 the destination vector in LE mode ("right to left"). To
21622 correct for this, we must subtract each element of the permute
21623 control vector from 31.
21625 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21626 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21627 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21628 serve as the permute control vector. Then, in BE mode,
21630 vperm 9,10,11,12
21632 places the desired result in vr9. However, in LE mode the
21633 vector contents will be
21635 vr10 = 00000003 00000002 00000001 00000000
21636 vr11 = 00000007 00000006 00000005 00000004
21638 The result of the vperm using the same permute control vector is
21640 vr9 = 05000000 07000000 01000000 03000000
21642 That is, the leftmost 4 bytes of vr10 are interpreted as the
21643 source for the rightmost 4 bytes of vr9, and so on.
21645 If we change the permute control vector to
21647 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21649 and issue
21651 vperm 9,11,10,12
21653 we get the desired
21655 vr9 = 00000006 00000004 00000002 00000000. */
21657 static void
21658 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21659 const vec_perm_indices &sel)
21661 unsigned int i;
21662 rtx perm[16];
21663 rtx constv, unspec;
21665 /* Unpack and adjust the constant selector. */
21666 for (i = 0; i < 16; ++i)
21668 unsigned int elt = 31 - (sel[i] & 31);
21669 perm[i] = GEN_INT (elt);
21672 /* Expand to a permute, swapping the inputs and using the
21673 adjusted selector. */
21674 if (!REG_P (op0))
21675 op0 = force_reg (V16QImode, op0);
21676 if (!REG_P (op1))
21677 op1 = force_reg (V16QImode, op1);
21679 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21680 constv = force_reg (V16QImode, constv);
21681 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21682 UNSPEC_VPERM);
21683 if (!REG_P (target))
21685 rtx tmp = gen_reg_rtx (V16QImode);
21686 emit_move_insn (tmp, unspec);
21687 unspec = tmp;
21690 emit_move_insn (target, unspec);
21693 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21694 permute control vector. But here it's not a constant, so we must
21695 generate a vector NAND or NOR to do the adjustment. */
21697 void
21698 altivec_expand_vec_perm_le (rtx operands[4])
21700 rtx notx, iorx, unspec;
21701 rtx target = operands[0];
21702 rtx op0 = operands[1];
21703 rtx op1 = operands[2];
21704 rtx sel = operands[3];
21705 rtx tmp = target;
21706 rtx norreg = gen_reg_rtx (V16QImode);
21707 machine_mode mode = GET_MODE (target);
21709 /* Get everything in regs so the pattern matches. */
21710 if (!REG_P (op0))
21711 op0 = force_reg (mode, op0);
21712 if (!REG_P (op1))
21713 op1 = force_reg (mode, op1);
21714 if (!REG_P (sel))
21715 sel = force_reg (V16QImode, sel);
21716 if (!REG_P (target))
21717 tmp = gen_reg_rtx (mode);
21719 if (TARGET_P9_VECTOR)
21721 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21722 UNSPEC_VPERMR);
21724 else
21726 /* Invert the selector with a VNAND if available, else a VNOR.
21727 The VNAND is preferred for future fusion opportunities. */
21728 notx = gen_rtx_NOT (V16QImode, sel);
21729 iorx = (TARGET_P8_VECTOR
21730 ? gen_rtx_IOR (V16QImode, notx, notx)
21731 : gen_rtx_AND (V16QImode, notx, notx));
21732 emit_insn (gen_rtx_SET (norreg, iorx));
21734 /* Permute with operands reversed and adjusted selector. */
21735 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21736 UNSPEC_VPERM);
21739 /* Copy into target, possibly by way of a register. */
21740 if (!REG_P (target))
21742 emit_move_insn (tmp, unspec);
21743 unspec = tmp;
21746 emit_move_insn (target, unspec);
21749 /* Expand an Altivec constant permutation. Return true if we match
21750 an efficient implementation; false to fall back to VPERM.
21752 OP0 and OP1 are the input vectors and TARGET is the output vector.
21753 SEL specifies the constant permutation vector. */
21755 static bool
21756 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21757 const vec_perm_indices &sel)
21759 struct altivec_perm_insn {
21760 HOST_WIDE_INT mask;
21761 enum insn_code impl;
21762 unsigned char perm[16];
21764 static const struct altivec_perm_insn patterns[] = {
21765 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21766 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21767 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21768 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21769 { OPTION_MASK_ALTIVEC,
21770 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21771 : CODE_FOR_altivec_vmrglb_direct),
21772 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21773 { OPTION_MASK_ALTIVEC,
21774 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21775 : CODE_FOR_altivec_vmrglh_direct),
21776 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21777 { OPTION_MASK_ALTIVEC,
21778 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21779 : CODE_FOR_altivec_vmrglw_direct),
21780 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21781 { OPTION_MASK_ALTIVEC,
21782 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21783 : CODE_FOR_altivec_vmrghb_direct),
21784 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21785 { OPTION_MASK_ALTIVEC,
21786 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21787 : CODE_FOR_altivec_vmrghh_direct),
21788 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21789 { OPTION_MASK_ALTIVEC,
21790 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21791 : CODE_FOR_altivec_vmrghw_direct),
21792 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21793 { OPTION_MASK_P8_VECTOR,
21794 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21795 : CODE_FOR_p8_vmrgow_v4sf_direct),
21796 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21797 { OPTION_MASK_P8_VECTOR,
21798 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21799 : CODE_FOR_p8_vmrgew_v4sf_direct),
21800 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21803 unsigned int i, j, elt, which;
21804 unsigned char perm[16];
21805 rtx x;
21806 bool one_vec;
21808 /* Unpack the constant selector. */
21809 for (i = which = 0; i < 16; ++i)
21811 elt = sel[i] & 31;
21812 which |= (elt < 16 ? 1 : 2);
21813 perm[i] = elt;
21816 /* Simplify the constant selector based on operands. */
21817 switch (which)
21819 default:
21820 gcc_unreachable ();
21822 case 3:
21823 one_vec = false;
21824 if (!rtx_equal_p (op0, op1))
21825 break;
21826 /* FALLTHRU */
21828 case 2:
21829 for (i = 0; i < 16; ++i)
21830 perm[i] &= 15;
21831 op0 = op1;
21832 one_vec = true;
21833 break;
21835 case 1:
21836 op1 = op0;
21837 one_vec = true;
21838 break;
21841 /* Look for splat patterns. */
21842 if (one_vec)
21844 elt = perm[0];
21846 for (i = 0; i < 16; ++i)
21847 if (perm[i] != elt)
21848 break;
21849 if (i == 16)
21851 if (!BYTES_BIG_ENDIAN)
21852 elt = 15 - elt;
21853 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
21854 return true;
21857 if (elt % 2 == 0)
21859 for (i = 0; i < 16; i += 2)
21860 if (perm[i] != elt || perm[i + 1] != elt + 1)
21861 break;
21862 if (i == 16)
21864 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
21865 x = gen_reg_rtx (V8HImode);
21866 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
21867 GEN_INT (field)));
21868 emit_move_insn (target, gen_lowpart (V16QImode, x));
21869 return true;
21873 if (elt % 4 == 0)
21875 for (i = 0; i < 16; i += 4)
21876 if (perm[i] != elt
21877 || perm[i + 1] != elt + 1
21878 || perm[i + 2] != elt + 2
21879 || perm[i + 3] != elt + 3)
21880 break;
21881 if (i == 16)
21883 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
21884 x = gen_reg_rtx (V4SImode);
21885 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
21886 GEN_INT (field)));
21887 emit_move_insn (target, gen_lowpart (V16QImode, x));
21888 return true;
21893 /* Look for merge and pack patterns. */
21894 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
21896 bool swapped;
21898 if ((patterns[j].mask & rs6000_isa_flags) == 0)
21899 continue;
21901 elt = patterns[j].perm[0];
21902 if (perm[0] == elt)
21903 swapped = false;
21904 else if (perm[0] == elt + 16)
21905 swapped = true;
21906 else
21907 continue;
21908 for (i = 1; i < 16; ++i)
21910 elt = patterns[j].perm[i];
21911 if (swapped)
21912 elt = (elt >= 16 ? elt - 16 : elt + 16);
21913 else if (one_vec && elt >= 16)
21914 elt -= 16;
21915 if (perm[i] != elt)
21916 break;
21918 if (i == 16)
21920 enum insn_code icode = patterns[j].impl;
21921 machine_mode omode = insn_data[icode].operand[0].mode;
21922 machine_mode imode = insn_data[icode].operand[1].mode;
21924 /* For little-endian, don't use vpkuwum and vpkuhum if the
21925 underlying vector type is not V4SI and V8HI, respectively.
21926 For example, using vpkuwum with a V8HI picks up the even
21927 halfwords (BE numbering) when the even halfwords (LE
21928 numbering) are what we need. */
21929 if (!BYTES_BIG_ENDIAN
21930 && icode == CODE_FOR_altivec_vpkuwum_direct
21931 && ((REG_P (op0)
21932 && GET_MODE (op0) != V4SImode)
21933 || (SUBREG_P (op0)
21934 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
21935 continue;
21936 if (!BYTES_BIG_ENDIAN
21937 && icode == CODE_FOR_altivec_vpkuhum_direct
21938 && ((REG_P (op0)
21939 && GET_MODE (op0) != V8HImode)
21940 || (SUBREG_P (op0)
21941 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
21942 continue;
21944 /* For little-endian, the two input operands must be swapped
21945 (or swapped back) to ensure proper right-to-left numbering
21946 from 0 to 2N-1. */
21947 if (swapped ^ !BYTES_BIG_ENDIAN)
21948 std::swap (op0, op1);
21949 if (imode != V16QImode)
21951 op0 = gen_lowpart (imode, op0);
21952 op1 = gen_lowpart (imode, op1);
21954 if (omode == V16QImode)
21955 x = target;
21956 else
21957 x = gen_reg_rtx (omode);
21958 emit_insn (GEN_FCN (icode) (x, op0, op1));
21959 if (omode != V16QImode)
21960 emit_move_insn (target, gen_lowpart (V16QImode, x));
21961 return true;
21965 if (!BYTES_BIG_ENDIAN)
21967 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
21968 return true;
21971 return false;
21974 /* Expand a VSX Permute Doubleword constant permutation.
21975 Return true if we match an efficient implementation. */
21977 static bool
21978 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
21979 unsigned char perm0, unsigned char perm1)
21981 rtx x;
21983 /* If both selectors come from the same operand, fold to single op. */
21984 if ((perm0 & 2) == (perm1 & 2))
21986 if (perm0 & 2)
21987 op0 = op1;
21988 else
21989 op1 = op0;
21991 /* If both operands are equal, fold to simpler permutation. */
21992 if (rtx_equal_p (op0, op1))
21994 perm0 = perm0 & 1;
21995 perm1 = (perm1 & 1) + 2;
21997 /* If the first selector comes from the second operand, swap. */
21998 else if (perm0 & 2)
22000 if (perm1 & 2)
22001 return false;
22002 perm0 -= 2;
22003 perm1 += 2;
22004 std::swap (op0, op1);
22006 /* If the second selector does not come from the second operand, fail. */
22007 else if ((perm1 & 2) == 0)
22008 return false;
22010 /* Success! */
22011 if (target != NULL)
22013 machine_mode vmode, dmode;
22014 rtvec v;
22016 vmode = GET_MODE (target);
22017 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22018 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22019 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22020 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22021 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22022 emit_insn (gen_rtx_SET (target, x));
22024 return true;
22027 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22029 static bool
22030 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22031 rtx op1, const vec_perm_indices &sel)
22033 bool testing_p = !target;
22035 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22036 if (TARGET_ALTIVEC && testing_p)
22037 return true;
22039 /* Check for ps_merge* or xxpermdi insns. */
22040 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22042 if (testing_p)
22044 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22045 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22047 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22048 return true;
22051 if (TARGET_ALTIVEC)
22053 /* Force the target-independent code to lower to V16QImode. */
22054 if (vmode != V16QImode)
22055 return false;
22056 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22057 return true;
22060 return false;
22063 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22064 OP0 and OP1 are the input vectors and TARGET is the output vector.
22065 PERM specifies the constant permutation vector. */
22067 static void
22068 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22069 machine_mode vmode, const vec_perm_builder &perm)
22071 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22072 if (x != target)
22073 emit_move_insn (target, x);
22076 /* Expand an extract even operation. */
22078 void
22079 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22081 machine_mode vmode = GET_MODE (target);
22082 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22083 vec_perm_builder perm (nelt, nelt, 1);
22085 for (i = 0; i < nelt; i++)
22086 perm.quick_push (i * 2);
22088 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22091 /* Expand a vector interleave operation. */
22093 void
22094 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22096 machine_mode vmode = GET_MODE (target);
22097 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22098 vec_perm_builder perm (nelt, nelt, 1);
22100 high = (highp ? 0 : nelt / 2);
22101 for (i = 0; i < nelt / 2; i++)
22103 perm.quick_push (i + high);
22104 perm.quick_push (i + nelt + high);
22107 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22110 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22111 void
22112 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22114 HOST_WIDE_INT hwi_scale (scale);
22115 REAL_VALUE_TYPE r_pow;
22116 rtvec v = rtvec_alloc (2);
22117 rtx elt;
22118 rtx scale_vec = gen_reg_rtx (V2DFmode);
22119 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22120 elt = const_double_from_real_value (r_pow, DFmode);
22121 RTVEC_ELT (v, 0) = elt;
22122 RTVEC_ELT (v, 1) = elt;
22123 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22124 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22127 /* Return an RTX representing where to find the function value of a
22128 function returning MODE. */
22129 static rtx
22130 rs6000_complex_function_value (machine_mode mode)
22132 unsigned int regno;
22133 rtx r1, r2;
22134 machine_mode inner = GET_MODE_INNER (mode);
22135 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22137 if (TARGET_FLOAT128_TYPE
22138 && (mode == KCmode
22139 || (mode == TCmode && TARGET_IEEEQUAD)))
22140 regno = ALTIVEC_ARG_RETURN;
22142 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22143 regno = FP_ARG_RETURN;
22145 else
22147 regno = GP_ARG_RETURN;
22149 /* 32-bit is OK since it'll go in r3/r4. */
22150 if (TARGET_32BIT && inner_bytes >= 4)
22151 return gen_rtx_REG (mode, regno);
22154 if (inner_bytes >= 8)
22155 return gen_rtx_REG (mode, regno);
22157 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22158 const0_rtx);
22159 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22160 GEN_INT (inner_bytes));
22161 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22164 /* Return an rtx describing a return value of MODE as a PARALLEL
22165 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22166 stride REG_STRIDE. */
22168 static rtx
22169 rs6000_parallel_return (machine_mode mode,
22170 int n_elts, machine_mode elt_mode,
22171 unsigned int regno, unsigned int reg_stride)
22173 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22175 int i;
22176 for (i = 0; i < n_elts; i++)
22178 rtx r = gen_rtx_REG (elt_mode, regno);
22179 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22180 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22181 regno += reg_stride;
22184 return par;
22187 /* Target hook for TARGET_FUNCTION_VALUE.
22189 An integer value is in r3 and a floating-point value is in fp1,
22190 unless -msoft-float. */
22192 static rtx
22193 rs6000_function_value (const_tree valtype,
22194 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22195 bool outgoing ATTRIBUTE_UNUSED)
22197 machine_mode mode;
22198 unsigned int regno;
22199 machine_mode elt_mode;
22200 int n_elts;
22202 /* Special handling for structs in darwin64. */
22203 if (TARGET_MACHO
22204 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22206 CUMULATIVE_ARGS valcum;
22207 rtx valret;
22209 valcum.words = 0;
22210 valcum.fregno = FP_ARG_MIN_REG;
22211 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22212 /* Do a trial code generation as if this were going to be passed as
22213 an argument; if any part goes in memory, we return NULL. */
22214 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22215 if (valret)
22216 return valret;
22217 /* Otherwise fall through to standard ABI rules. */
22220 mode = TYPE_MODE (valtype);
22222 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22223 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22225 int first_reg, n_regs;
22227 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22229 /* _Decimal128 must use even/odd register pairs. */
22230 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22231 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22233 else
22235 first_reg = ALTIVEC_ARG_RETURN;
22236 n_regs = 1;
22239 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22242 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22243 if (TARGET_32BIT && TARGET_POWERPC64)
22244 switch (mode)
22246 default:
22247 break;
22248 case E_DImode:
22249 case E_SCmode:
22250 case E_DCmode:
22251 case E_TCmode:
22252 int count = GET_MODE_SIZE (mode) / 4;
22253 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22256 if ((INTEGRAL_TYPE_P (valtype)
22257 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22258 || POINTER_TYPE_P (valtype))
22259 mode = TARGET_32BIT ? SImode : DImode;
22261 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22262 /* _Decimal128 must use an even/odd register pair. */
22263 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22264 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22265 && !FLOAT128_VECTOR_P (mode))
22266 regno = FP_ARG_RETURN;
22267 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22268 && targetm.calls.split_complex_arg)
22269 return rs6000_complex_function_value (mode);
22270 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22271 return register is used in both cases, and we won't see V2DImode/V2DFmode
22272 for pure altivec, combine the two cases. */
22273 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22274 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22275 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22276 regno = ALTIVEC_ARG_RETURN;
22277 else
22278 regno = GP_ARG_RETURN;
22280 return gen_rtx_REG (mode, regno);
22283 /* Define how to find the value returned by a library function
22284 assuming the value has mode MODE. */
22286 rs6000_libcall_value (machine_mode mode)
22288 unsigned int regno;
22290 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22291 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22292 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22294 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22295 /* _Decimal128 must use an even/odd register pair. */
22296 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22297 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22298 regno = FP_ARG_RETURN;
22299 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22300 return register is used in both cases, and we won't see V2DImode/V2DFmode
22301 for pure altivec, combine the two cases. */
22302 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22303 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22304 regno = ALTIVEC_ARG_RETURN;
22305 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22306 return rs6000_complex_function_value (mode);
22307 else
22308 regno = GP_ARG_RETURN;
22310 return gen_rtx_REG (mode, regno);
22313 /* Compute register pressure classes. We implement the target hook to avoid
22314 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22315 lead to incorrect estimates of number of available registers and therefor
22316 increased register pressure/spill. */
22317 static int
22318 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22320 int n;
22322 n = 0;
22323 pressure_classes[n++] = GENERAL_REGS;
22324 if (TARGET_VSX)
22325 pressure_classes[n++] = VSX_REGS;
22326 else
22328 if (TARGET_ALTIVEC)
22329 pressure_classes[n++] = ALTIVEC_REGS;
22330 if (TARGET_HARD_FLOAT)
22331 pressure_classes[n++] = FLOAT_REGS;
22333 pressure_classes[n++] = CR_REGS;
22334 pressure_classes[n++] = SPECIAL_REGS;
22336 return n;
22339 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22340 Frame pointer elimination is automatically handled.
22342 For the RS/6000, if frame pointer elimination is being done, we would like
22343 to convert ap into fp, not sp.
22345 We need r30 if -mminimal-toc was specified, and there are constant pool
22346 references. */
22348 static bool
22349 rs6000_can_eliminate (const int from, const int to)
22351 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22352 ? ! frame_pointer_needed
22353 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22354 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22355 || constant_pool_empty_p ()
22356 : true);
22359 /* Define the offset between two registers, FROM to be eliminated and its
22360 replacement TO, at the start of a routine. */
22361 HOST_WIDE_INT
22362 rs6000_initial_elimination_offset (int from, int to)
22364 rs6000_stack_t *info = rs6000_stack_info ();
22365 HOST_WIDE_INT offset;
22367 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22368 offset = info->push_p ? 0 : -info->total_size;
22369 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22371 offset = info->push_p ? 0 : -info->total_size;
22372 if (FRAME_GROWS_DOWNWARD)
22373 offset += info->fixed_size + info->vars_size + info->parm_size;
22375 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22376 offset = FRAME_GROWS_DOWNWARD
22377 ? info->fixed_size + info->vars_size + info->parm_size
22378 : 0;
22379 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22380 offset = info->total_size;
22381 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22382 offset = info->push_p ? info->total_size : 0;
22383 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22384 offset = 0;
22385 else
22386 gcc_unreachable ();
22388 return offset;
22391 /* Fill in sizes of registers used by unwinder. */
22393 static void
22394 rs6000_init_dwarf_reg_sizes_extra (tree address)
22396 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22398 int i;
22399 machine_mode mode = TYPE_MODE (char_type_node);
22400 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22401 rtx mem = gen_rtx_MEM (BLKmode, addr);
22402 rtx value = gen_int_mode (16, mode);
22404 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22405 The unwinder still needs to know the size of Altivec registers. */
22407 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22409 int column = DWARF_REG_TO_UNWIND_COLUMN
22410 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22411 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22413 emit_move_insn (adjust_address (mem, mode, offset), value);
22418 /* Map internal gcc register numbers to debug format register numbers.
22419 FORMAT specifies the type of debug register number to use:
22420 0 -- debug information, except for frame-related sections
22421 1 -- DWARF .debug_frame section
22422 2 -- DWARF .eh_frame section */
22424 unsigned int
22425 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22427 /* On some platforms, we use the standard DWARF register
22428 numbering for .debug_info and .debug_frame. */
22429 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22431 #ifdef RS6000_USE_DWARF_NUMBERING
22432 if (regno <= 31)
22433 return regno;
22434 if (FP_REGNO_P (regno))
22435 return regno - FIRST_FPR_REGNO + 32;
22436 if (ALTIVEC_REGNO_P (regno))
22437 return regno - FIRST_ALTIVEC_REGNO + 1124;
22438 if (regno == LR_REGNO)
22439 return 108;
22440 if (regno == CTR_REGNO)
22441 return 109;
22442 if (regno == CA_REGNO)
22443 return 101; /* XER */
22444 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22445 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22446 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22447 to the DWARF reg for CR. */
22448 if (format == 1 && regno == CR2_REGNO)
22449 return 64;
22450 if (CR_REGNO_P (regno))
22451 return regno - CR0_REGNO + 86;
22452 if (regno == VRSAVE_REGNO)
22453 return 356;
22454 if (regno == VSCR_REGNO)
22455 return 67;
22457 /* These do not make much sense. */
22458 if (regno == FRAME_POINTER_REGNUM)
22459 return 111;
22460 if (regno == ARG_POINTER_REGNUM)
22461 return 67;
22462 if (regno == 64)
22463 return 100;
22465 gcc_unreachable ();
22466 #endif
22469 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22470 information, and also for .eh_frame. */
22471 /* Translate the regnos to their numbers in GCC 7 (and before). */
22472 if (regno <= 31)
22473 return regno;
22474 if (FP_REGNO_P (regno))
22475 return regno - FIRST_FPR_REGNO + 32;
22476 if (ALTIVEC_REGNO_P (regno))
22477 return regno - FIRST_ALTIVEC_REGNO + 77;
22478 if (regno == LR_REGNO)
22479 return 65;
22480 if (regno == CTR_REGNO)
22481 return 66;
22482 if (regno == CA_REGNO)
22483 return 76; /* XER */
22484 if (CR_REGNO_P (regno))
22485 return regno - CR0_REGNO + 68;
22486 if (regno == VRSAVE_REGNO)
22487 return 109;
22488 if (regno == VSCR_REGNO)
22489 return 110;
22491 if (regno == FRAME_POINTER_REGNUM)
22492 return 111;
22493 if (regno == ARG_POINTER_REGNUM)
22494 return 67;
22495 if (regno == 64)
22496 return 64;
22498 gcc_unreachable ();
22501 /* target hook eh_return_filter_mode */
22502 static scalar_int_mode
22503 rs6000_eh_return_filter_mode (void)
22505 return TARGET_32BIT ? SImode : word_mode;
22508 /* Target hook for translate_mode_attribute. */
22509 static machine_mode
22510 rs6000_translate_mode_attribute (machine_mode mode)
22512 if ((FLOAT128_IEEE_P (mode)
22513 && ieee128_float_type_node == long_double_type_node)
22514 || (FLOAT128_IBM_P (mode)
22515 && ibm128_float_type_node == long_double_type_node))
22516 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22517 return mode;
22520 /* Target hook for scalar_mode_supported_p. */
22521 static bool
22522 rs6000_scalar_mode_supported_p (scalar_mode mode)
22524 /* -m32 does not support TImode. This is the default, from
22525 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22526 same ABI as for -m32. But default_scalar_mode_supported_p allows
22527 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22528 for -mpowerpc64. */
22529 if (TARGET_32BIT && mode == TImode)
22530 return false;
22532 if (DECIMAL_FLOAT_MODE_P (mode))
22533 return default_decimal_float_supported_p ();
22534 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22535 return true;
22536 else
22537 return default_scalar_mode_supported_p (mode);
22540 /* Target hook for vector_mode_supported_p. */
22541 static bool
22542 rs6000_vector_mode_supported_p (machine_mode mode)
22544 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22545 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22546 double-double. */
22547 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22548 return true;
22550 else
22551 return false;
22554 /* Target hook for floatn_mode. */
22555 static opt_scalar_float_mode
22556 rs6000_floatn_mode (int n, bool extended)
22558 if (extended)
22560 switch (n)
22562 case 32:
22563 return DFmode;
22565 case 64:
22566 if (TARGET_FLOAT128_TYPE)
22567 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22568 else
22569 return opt_scalar_float_mode ();
22571 case 128:
22572 return opt_scalar_float_mode ();
22574 default:
22575 /* Those are the only valid _FloatNx types. */
22576 gcc_unreachable ();
22579 else
22581 switch (n)
22583 case 32:
22584 return SFmode;
22586 case 64:
22587 return DFmode;
22589 case 128:
22590 if (TARGET_FLOAT128_TYPE)
22591 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22592 else
22593 return opt_scalar_float_mode ();
22595 default:
22596 return opt_scalar_float_mode ();
22602 /* Target hook for c_mode_for_suffix. */
22603 static machine_mode
22604 rs6000_c_mode_for_suffix (char suffix)
22606 if (TARGET_FLOAT128_TYPE)
22608 if (suffix == 'q' || suffix == 'Q')
22609 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22611 /* At the moment, we are not defining a suffix for IBM extended double.
22612 If/when the default for -mabi=ieeelongdouble is changed, and we want
22613 to support __ibm128 constants in legacy library code, we may need to
22614 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22615 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22616 __float80 constants. */
22619 return VOIDmode;
22622 /* Target hook for invalid_arg_for_unprototyped_fn. */
22623 static const char *
22624 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22626 return (!rs6000_darwin64_abi
22627 && typelist == 0
22628 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22629 && (funcdecl == NULL_TREE
22630 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22631 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22632 ? N_("AltiVec argument passed to unprototyped function")
22633 : NULL;
22636 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22637 setup by using __stack_chk_fail_local hidden function instead of
22638 calling __stack_chk_fail directly. Otherwise it is better to call
22639 __stack_chk_fail directly. */
22641 static tree ATTRIBUTE_UNUSED
22642 rs6000_stack_protect_fail (void)
22644 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22645 ? default_hidden_stack_protect_fail ()
22646 : default_external_stack_protect_fail ();
22649 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22651 #if TARGET_ELF
22652 static unsigned HOST_WIDE_INT
22653 rs6000_asan_shadow_offset (void)
22655 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22657 #endif
22659 /* Mask options that we want to support inside of attribute((target)) and
22660 #pragma GCC target operations. Note, we do not include things like
22661 64/32-bit, endianness, hard/soft floating point, etc. that would have
22662 different calling sequences. */
22664 struct rs6000_opt_mask {
22665 const char *name; /* option name */
22666 HOST_WIDE_INT mask; /* mask to set */
22667 bool invert; /* invert sense of mask */
22668 bool valid_target; /* option is a target option */
22671 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22673 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22674 { "cmpb", OPTION_MASK_CMPB, false, true },
22675 { "crypto", OPTION_MASK_CRYPTO, false, true },
22676 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22677 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22678 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22679 false, true },
22680 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22681 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22682 { "fprnd", OPTION_MASK_FPRND, false, true },
22683 { "future", OPTION_MASK_FUTURE, false, true },
22684 { "hard-dfp", OPTION_MASK_DFP, false, true },
22685 { "htm", OPTION_MASK_HTM, false, true },
22686 { "isel", OPTION_MASK_ISEL, false, true },
22687 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22688 { "mfpgpr", 0, false, true },
22689 { "modulo", OPTION_MASK_MODULO, false, true },
22690 { "mulhw", OPTION_MASK_MULHW, false, true },
22691 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22692 { "pcrel", OPTION_MASK_PCREL, false, true },
22693 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22694 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22695 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22696 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22697 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22698 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22699 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22700 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22701 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22702 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22703 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
22704 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22705 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22706 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22707 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22708 { "string", 0, false, true },
22709 { "update", OPTION_MASK_NO_UPDATE, true , true },
22710 { "vsx", OPTION_MASK_VSX, false, true },
22711 #ifdef OPTION_MASK_64BIT
22712 #if TARGET_AIX_OS
22713 { "aix64", OPTION_MASK_64BIT, false, false },
22714 { "aix32", OPTION_MASK_64BIT, true, false },
22715 #else
22716 { "64", OPTION_MASK_64BIT, false, false },
22717 { "32", OPTION_MASK_64BIT, true, false },
22718 #endif
22719 #endif
22720 #ifdef OPTION_MASK_EABI
22721 { "eabi", OPTION_MASK_EABI, false, false },
22722 #endif
22723 #ifdef OPTION_MASK_LITTLE_ENDIAN
22724 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22725 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22726 #endif
22727 #ifdef OPTION_MASK_RELOCATABLE
22728 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22729 #endif
22730 #ifdef OPTION_MASK_STRICT_ALIGN
22731 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22732 #endif
22733 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22734 { "string", 0, false, false },
22737 /* Builtin mask mapping for printing the flags. */
22738 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22740 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22741 { "vsx", RS6000_BTM_VSX, false, false },
22742 { "fre", RS6000_BTM_FRE, false, false },
22743 { "fres", RS6000_BTM_FRES, false, false },
22744 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22745 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22746 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22747 { "cell", RS6000_BTM_CELL, false, false },
22748 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22749 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22750 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22751 { "crypto", RS6000_BTM_CRYPTO, false, false },
22752 { "htm", RS6000_BTM_HTM, false, false },
22753 { "hard-dfp", RS6000_BTM_DFP, false, false },
22754 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22755 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22756 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22757 { "float128", RS6000_BTM_FLOAT128, false, false },
22758 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22761 /* Option variables that we want to support inside attribute((target)) and
22762 #pragma GCC target operations. */
22764 struct rs6000_opt_var {
22765 const char *name; /* option name */
22766 size_t global_offset; /* offset of the option in global_options. */
22767 size_t target_offset; /* offset of the option in target options. */
22770 static struct rs6000_opt_var const rs6000_opt_vars[] =
22772 { "friz",
22773 offsetof (struct gcc_options, x_TARGET_FRIZ),
22774 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22775 { "avoid-indexed-addresses",
22776 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22777 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22778 { "longcall",
22779 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22780 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22781 { "optimize-swaps",
22782 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22783 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22784 { "allow-movmisalign",
22785 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22786 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22787 { "sched-groups",
22788 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22789 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22790 { "always-hint",
22791 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22792 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22793 { "align-branch-targets",
22794 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22795 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22796 { "tls-markers",
22797 offsetof (struct gcc_options, x_tls_markers),
22798 offsetof (struct cl_target_option, x_tls_markers), },
22799 { "sched-prolog",
22800 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22801 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22802 { "sched-epilog",
22803 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22804 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22805 { "speculate-indirect-jumps",
22806 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22807 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22810 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22811 parsing. Return true if there were no errors. */
22813 static bool
22814 rs6000_inner_target_options (tree args, bool attr_p)
22816 bool ret = true;
22818 if (args == NULL_TREE)
22821 else if (TREE_CODE (args) == STRING_CST)
22823 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22824 char *q;
22826 while ((q = strtok (p, ",")) != NULL)
22828 bool error_p = false;
22829 bool not_valid_p = false;
22830 const char *cpu_opt = NULL;
22832 p = NULL;
22833 if (strncmp (q, "cpu=", 4) == 0)
22835 int cpu_index = rs6000_cpu_name_lookup (q+4);
22836 if (cpu_index >= 0)
22837 rs6000_cpu_index = cpu_index;
22838 else
22840 error_p = true;
22841 cpu_opt = q+4;
22844 else if (strncmp (q, "tune=", 5) == 0)
22846 int tune_index = rs6000_cpu_name_lookup (q+5);
22847 if (tune_index >= 0)
22848 rs6000_tune_index = tune_index;
22849 else
22851 error_p = true;
22852 cpu_opt = q+5;
22855 else
22857 size_t i;
22858 bool invert = false;
22859 char *r = q;
22861 error_p = true;
22862 if (strncmp (r, "no-", 3) == 0)
22864 invert = true;
22865 r += 3;
22868 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
22869 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
22871 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
22873 if (!rs6000_opt_masks[i].valid_target)
22874 not_valid_p = true;
22875 else
22877 error_p = false;
22878 rs6000_isa_flags_explicit |= mask;
22880 /* VSX needs altivec, so -mvsx automagically sets
22881 altivec and disables -mavoid-indexed-addresses. */
22882 if (!invert)
22884 if (mask == OPTION_MASK_VSX)
22886 mask |= OPTION_MASK_ALTIVEC;
22887 TARGET_AVOID_XFORM = 0;
22891 if (rs6000_opt_masks[i].invert)
22892 invert = !invert;
22894 if (invert)
22895 rs6000_isa_flags &= ~mask;
22896 else
22897 rs6000_isa_flags |= mask;
22899 break;
22902 if (error_p && !not_valid_p)
22904 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
22905 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
22907 size_t j = rs6000_opt_vars[i].global_offset;
22908 *((int *) ((char *)&global_options + j)) = !invert;
22909 error_p = false;
22910 not_valid_p = false;
22911 break;
22916 if (error_p)
22918 const char *eprefix, *esuffix;
22920 ret = false;
22921 if (attr_p)
22923 eprefix = "__attribute__((__target__(";
22924 esuffix = ")))";
22926 else
22928 eprefix = "#pragma GCC target ";
22929 esuffix = "";
22932 if (cpu_opt)
22933 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
22934 q, esuffix);
22935 else if (not_valid_p)
22936 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
22937 else
22938 error ("%s%qs%s is invalid", eprefix, q, esuffix);
22943 else if (TREE_CODE (args) == TREE_LIST)
22947 tree value = TREE_VALUE (args);
22948 if (value)
22950 bool ret2 = rs6000_inner_target_options (value, attr_p);
22951 if (!ret2)
22952 ret = false;
22954 args = TREE_CHAIN (args);
22956 while (args != NULL_TREE);
22959 else
22961 error ("attribute %<target%> argument not a string");
22962 return false;
22965 return ret;
22968 /* Print out the target options as a list for -mdebug=target. */
22970 static void
22971 rs6000_debug_target_options (tree args, const char *prefix)
22973 if (args == NULL_TREE)
22974 fprintf (stderr, "%s<NULL>", prefix);
22976 else if (TREE_CODE (args) == STRING_CST)
22978 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22979 char *q;
22981 while ((q = strtok (p, ",")) != NULL)
22983 p = NULL;
22984 fprintf (stderr, "%s\"%s\"", prefix, q);
22985 prefix = ", ";
22989 else if (TREE_CODE (args) == TREE_LIST)
22993 tree value = TREE_VALUE (args);
22994 if (value)
22996 rs6000_debug_target_options (value, prefix);
22997 prefix = ", ";
22999 args = TREE_CHAIN (args);
23001 while (args != NULL_TREE);
23004 else
23005 gcc_unreachable ();
23007 return;
23011 /* Hook to validate attribute((target("..."))). */
23013 static bool
23014 rs6000_valid_attribute_p (tree fndecl,
23015 tree ARG_UNUSED (name),
23016 tree args,
23017 int flags)
23019 struct cl_target_option cur_target;
23020 bool ret;
23021 tree old_optimize;
23022 tree new_target, new_optimize;
23023 tree func_optimize;
23025 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23027 if (TARGET_DEBUG_TARGET)
23029 tree tname = DECL_NAME (fndecl);
23030 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23031 if (tname)
23032 fprintf (stderr, "function: %.*s\n",
23033 (int) IDENTIFIER_LENGTH (tname),
23034 IDENTIFIER_POINTER (tname));
23035 else
23036 fprintf (stderr, "function: unknown\n");
23038 fprintf (stderr, "args:");
23039 rs6000_debug_target_options (args, " ");
23040 fprintf (stderr, "\n");
23042 if (flags)
23043 fprintf (stderr, "flags: 0x%x\n", flags);
23045 fprintf (stderr, "--------------------\n");
23048 /* attribute((target("default"))) does nothing, beyond
23049 affecting multi-versioning. */
23050 if (TREE_VALUE (args)
23051 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23052 && TREE_CHAIN (args) == NULL_TREE
23053 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23054 return true;
23056 old_optimize = build_optimization_node (&global_options);
23057 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23059 /* If the function changed the optimization levels as well as setting target
23060 options, start with the optimizations specified. */
23061 if (func_optimize && func_optimize != old_optimize)
23062 cl_optimization_restore (&global_options,
23063 TREE_OPTIMIZATION (func_optimize));
23065 /* The target attributes may also change some optimization flags, so update
23066 the optimization options if necessary. */
23067 cl_target_option_save (&cur_target, &global_options);
23068 rs6000_cpu_index = rs6000_tune_index = -1;
23069 ret = rs6000_inner_target_options (args, true);
23071 /* Set up any additional state. */
23072 if (ret)
23074 ret = rs6000_option_override_internal (false);
23075 new_target = build_target_option_node (&global_options);
23077 else
23078 new_target = NULL;
23080 new_optimize = build_optimization_node (&global_options);
23082 if (!new_target)
23083 ret = false;
23085 else if (fndecl)
23087 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23089 if (old_optimize != new_optimize)
23090 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23093 cl_target_option_restore (&global_options, &cur_target);
23095 if (old_optimize != new_optimize)
23096 cl_optimization_restore (&global_options,
23097 TREE_OPTIMIZATION (old_optimize));
23099 return ret;
23103 /* Hook to validate the current #pragma GCC target and set the state, and
23104 update the macros based on what was changed. If ARGS is NULL, then
23105 POP_TARGET is used to reset the options. */
23107 bool
23108 rs6000_pragma_target_parse (tree args, tree pop_target)
23110 tree prev_tree = build_target_option_node (&global_options);
23111 tree cur_tree;
23112 struct cl_target_option *prev_opt, *cur_opt;
23113 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23114 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23116 if (TARGET_DEBUG_TARGET)
23118 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23119 fprintf (stderr, "args:");
23120 rs6000_debug_target_options (args, " ");
23121 fprintf (stderr, "\n");
23123 if (pop_target)
23125 fprintf (stderr, "pop_target:\n");
23126 debug_tree (pop_target);
23128 else
23129 fprintf (stderr, "pop_target: <NULL>\n");
23131 fprintf (stderr, "--------------------\n");
23134 if (! args)
23136 cur_tree = ((pop_target)
23137 ? pop_target
23138 : target_option_default_node);
23139 cl_target_option_restore (&global_options,
23140 TREE_TARGET_OPTION (cur_tree));
23142 else
23144 rs6000_cpu_index = rs6000_tune_index = -1;
23145 if (!rs6000_inner_target_options (args, false)
23146 || !rs6000_option_override_internal (false)
23147 || (cur_tree = build_target_option_node (&global_options))
23148 == NULL_TREE)
23150 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23151 fprintf (stderr, "invalid pragma\n");
23153 return false;
23157 target_option_current_node = cur_tree;
23158 rs6000_activate_target_options (target_option_current_node);
23160 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23161 change the macros that are defined. */
23162 if (rs6000_target_modify_macros_ptr)
23164 prev_opt = TREE_TARGET_OPTION (prev_tree);
23165 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23166 prev_flags = prev_opt->x_rs6000_isa_flags;
23168 cur_opt = TREE_TARGET_OPTION (cur_tree);
23169 cur_flags = cur_opt->x_rs6000_isa_flags;
23170 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23172 diff_bumask = (prev_bumask ^ cur_bumask);
23173 diff_flags = (prev_flags ^ cur_flags);
23175 if ((diff_flags != 0) || (diff_bumask != 0))
23177 /* Delete old macros. */
23178 rs6000_target_modify_macros_ptr (false,
23179 prev_flags & diff_flags,
23180 prev_bumask & diff_bumask);
23182 /* Define new macros. */
23183 rs6000_target_modify_macros_ptr (true,
23184 cur_flags & diff_flags,
23185 cur_bumask & diff_bumask);
23189 return true;
23193 /* Remember the last target of rs6000_set_current_function. */
23194 static GTY(()) tree rs6000_previous_fndecl;
23196 /* Restore target's globals from NEW_TREE and invalidate the
23197 rs6000_previous_fndecl cache. */
23199 void
23200 rs6000_activate_target_options (tree new_tree)
23202 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23203 if (TREE_TARGET_GLOBALS (new_tree))
23204 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23205 else if (new_tree == target_option_default_node)
23206 restore_target_globals (&default_target_globals);
23207 else
23208 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23209 rs6000_previous_fndecl = NULL_TREE;
23212 /* Establish appropriate back-end context for processing the function
23213 FNDECL. The argument might be NULL to indicate processing at top
23214 level, outside of any function scope. */
23215 static void
23216 rs6000_set_current_function (tree fndecl)
23218 if (TARGET_DEBUG_TARGET)
23220 fprintf (stderr, "\n==================== rs6000_set_current_function");
23222 if (fndecl)
23223 fprintf (stderr, ", fndecl %s (%p)",
23224 (DECL_NAME (fndecl)
23225 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23226 : "<unknown>"), (void *)fndecl);
23228 if (rs6000_previous_fndecl)
23229 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23231 fprintf (stderr, "\n");
23234 /* Only change the context if the function changes. This hook is called
23235 several times in the course of compiling a function, and we don't want to
23236 slow things down too much or call target_reinit when it isn't safe. */
23237 if (fndecl == rs6000_previous_fndecl)
23238 return;
23240 tree old_tree;
23241 if (rs6000_previous_fndecl == NULL_TREE)
23242 old_tree = target_option_current_node;
23243 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23244 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23245 else
23246 old_tree = target_option_default_node;
23248 tree new_tree;
23249 if (fndecl == NULL_TREE)
23251 if (old_tree != target_option_current_node)
23252 new_tree = target_option_current_node;
23253 else
23254 new_tree = NULL_TREE;
23256 else
23258 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23259 if (new_tree == NULL_TREE)
23260 new_tree = target_option_default_node;
23263 if (TARGET_DEBUG_TARGET)
23265 if (new_tree)
23267 fprintf (stderr, "\nnew fndecl target specific options:\n");
23268 debug_tree (new_tree);
23271 if (old_tree)
23273 fprintf (stderr, "\nold fndecl target specific options:\n");
23274 debug_tree (old_tree);
23277 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23278 fprintf (stderr, "--------------------\n");
23281 if (new_tree && old_tree != new_tree)
23282 rs6000_activate_target_options (new_tree);
23284 if (fndecl)
23285 rs6000_previous_fndecl = fndecl;
23289 /* Save the current options */
23291 static void
23292 rs6000_function_specific_save (struct cl_target_option *ptr,
23293 struct gcc_options *opts)
23295 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23296 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23299 /* Restore the current options */
23301 static void
23302 rs6000_function_specific_restore (struct gcc_options *opts,
23303 struct cl_target_option *ptr)
23306 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23307 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23308 (void) rs6000_option_override_internal (false);
23311 /* Print the current options */
23313 static void
23314 rs6000_function_specific_print (FILE *file, int indent,
23315 struct cl_target_option *ptr)
23317 rs6000_print_isa_options (file, indent, "Isa options set",
23318 ptr->x_rs6000_isa_flags);
23320 rs6000_print_isa_options (file, indent, "Isa options explicit",
23321 ptr->x_rs6000_isa_flags_explicit);
23324 /* Helper function to print the current isa or misc options on a line. */
23326 static void
23327 rs6000_print_options_internal (FILE *file,
23328 int indent,
23329 const char *string,
23330 HOST_WIDE_INT flags,
23331 const char *prefix,
23332 const struct rs6000_opt_mask *opts,
23333 size_t num_elements)
23335 size_t i;
23336 size_t start_column = 0;
23337 size_t cur_column;
23338 size_t max_column = 120;
23339 size_t prefix_len = strlen (prefix);
23340 size_t comma_len = 0;
23341 const char *comma = "";
23343 if (indent)
23344 start_column += fprintf (file, "%*s", indent, "");
23346 if (!flags)
23348 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23349 return;
23352 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23354 /* Print the various mask options. */
23355 cur_column = start_column;
23356 for (i = 0; i < num_elements; i++)
23358 bool invert = opts[i].invert;
23359 const char *name = opts[i].name;
23360 const char *no_str = "";
23361 HOST_WIDE_INT mask = opts[i].mask;
23362 size_t len = comma_len + prefix_len + strlen (name);
23364 if (!invert)
23366 if ((flags & mask) == 0)
23368 no_str = "no-";
23369 len += sizeof ("no-") - 1;
23372 flags &= ~mask;
23375 else
23377 if ((flags & mask) != 0)
23379 no_str = "no-";
23380 len += sizeof ("no-") - 1;
23383 flags |= mask;
23386 cur_column += len;
23387 if (cur_column > max_column)
23389 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23390 cur_column = start_column + len;
23391 comma = "";
23394 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23395 comma = ", ";
23396 comma_len = sizeof (", ") - 1;
23399 fputs ("\n", file);
23402 /* Helper function to print the current isa options on a line. */
23404 static void
23405 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23406 HOST_WIDE_INT flags)
23408 rs6000_print_options_internal (file, indent, string, flags, "-m",
23409 &rs6000_opt_masks[0],
23410 ARRAY_SIZE (rs6000_opt_masks));
23413 static void
23414 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23415 HOST_WIDE_INT flags)
23417 rs6000_print_options_internal (file, indent, string, flags, "",
23418 &rs6000_builtin_mask_names[0],
23419 ARRAY_SIZE (rs6000_builtin_mask_names));
23422 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23423 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23424 -mupper-regs-df, etc.).
23426 If the user used -mno-power8-vector, we need to turn off all of the implicit
23427 ISA 2.07 and 3.0 options that relate to the vector unit.
23429 If the user used -mno-power9-vector, we need to turn off all of the implicit
23430 ISA 3.0 options that relate to the vector unit.
23432 This function does not handle explicit options such as the user specifying
23433 -mdirect-move. These are handled in rs6000_option_override_internal, and
23434 the appropriate error is given if needed.
23436 We return a mask of all of the implicit options that should not be enabled
23437 by default. */
23439 static HOST_WIDE_INT
23440 rs6000_disable_incompatible_switches (void)
23442 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23443 size_t i, j;
23445 static const struct {
23446 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23447 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23448 const char *const name; /* name of the switch. */
23449 } flags[] = {
23450 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23451 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23452 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23453 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23456 for (i = 0; i < ARRAY_SIZE (flags); i++)
23458 HOST_WIDE_INT no_flag = flags[i].no_flag;
23460 if ((rs6000_isa_flags & no_flag) == 0
23461 && (rs6000_isa_flags_explicit & no_flag) != 0)
23463 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23464 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23465 & rs6000_isa_flags
23466 & dep_flags);
23468 if (set_flags)
23470 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23471 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23473 set_flags &= ~rs6000_opt_masks[j].mask;
23474 error ("%<-mno-%s%> turns off %<-m%s%>",
23475 flags[i].name,
23476 rs6000_opt_masks[j].name);
23479 gcc_assert (!set_flags);
23482 rs6000_isa_flags &= ~dep_flags;
23483 ignore_masks |= no_flag | dep_flags;
23487 return ignore_masks;
23491 /* Helper function for printing the function name when debugging. */
23493 static const char *
23494 get_decl_name (tree fn)
23496 tree name;
23498 if (!fn)
23499 return "<null>";
23501 name = DECL_NAME (fn);
23502 if (!name)
23503 return "<no-name>";
23505 return IDENTIFIER_POINTER (name);
23508 /* Return the clone id of the target we are compiling code for in a target
23509 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23510 the priority list for the target clones (ordered from lowest to
23511 highest). */
23513 static int
23514 rs6000_clone_priority (tree fndecl)
23516 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23517 HOST_WIDE_INT isa_masks;
23518 int ret = CLONE_DEFAULT;
23519 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23520 const char *attrs_str = NULL;
23522 attrs = TREE_VALUE (TREE_VALUE (attrs));
23523 attrs_str = TREE_STRING_POINTER (attrs);
23525 /* Return priority zero for default function. Return the ISA needed for the
23526 function if it is not the default. */
23527 if (strcmp (attrs_str, "default") != 0)
23529 if (fn_opts == NULL_TREE)
23530 fn_opts = target_option_default_node;
23532 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23533 isa_masks = rs6000_isa_flags;
23534 else
23535 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23537 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23538 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23539 break;
23542 if (TARGET_DEBUG_TARGET)
23543 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23544 get_decl_name (fndecl), ret);
23546 return ret;
23549 /* This compares the priority of target features in function DECL1 and DECL2.
23550 It returns positive value if DECL1 is higher priority, negative value if
23551 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23552 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23554 static int
23555 rs6000_compare_version_priority (tree decl1, tree decl2)
23557 int priority1 = rs6000_clone_priority (decl1);
23558 int priority2 = rs6000_clone_priority (decl2);
23559 int ret = priority1 - priority2;
23561 if (TARGET_DEBUG_TARGET)
23562 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23563 get_decl_name (decl1), get_decl_name (decl2), ret);
23565 return ret;
23568 /* Make a dispatcher declaration for the multi-versioned function DECL.
23569 Calls to DECL function will be replaced with calls to the dispatcher
23570 by the front-end. Returns the decl of the dispatcher function. */
23572 static tree
23573 rs6000_get_function_versions_dispatcher (void *decl)
23575 tree fn = (tree) decl;
23576 struct cgraph_node *node = NULL;
23577 struct cgraph_node *default_node = NULL;
23578 struct cgraph_function_version_info *node_v = NULL;
23579 struct cgraph_function_version_info *first_v = NULL;
23581 tree dispatch_decl = NULL;
23583 struct cgraph_function_version_info *default_version_info = NULL;
23584 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23586 if (TARGET_DEBUG_TARGET)
23587 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23588 get_decl_name (fn));
23590 node = cgraph_node::get (fn);
23591 gcc_assert (node != NULL);
23593 node_v = node->function_version ();
23594 gcc_assert (node_v != NULL);
23596 if (node_v->dispatcher_resolver != NULL)
23597 return node_v->dispatcher_resolver;
23599 /* Find the default version and make it the first node. */
23600 first_v = node_v;
23601 /* Go to the beginning of the chain. */
23602 while (first_v->prev != NULL)
23603 first_v = first_v->prev;
23605 default_version_info = first_v;
23606 while (default_version_info != NULL)
23608 const tree decl2 = default_version_info->this_node->decl;
23609 if (is_function_default_version (decl2))
23610 break;
23611 default_version_info = default_version_info->next;
23614 /* If there is no default node, just return NULL. */
23615 if (default_version_info == NULL)
23616 return NULL;
23618 /* Make default info the first node. */
23619 if (first_v != default_version_info)
23621 default_version_info->prev->next = default_version_info->next;
23622 if (default_version_info->next)
23623 default_version_info->next->prev = default_version_info->prev;
23624 first_v->prev = default_version_info;
23625 default_version_info->next = first_v;
23626 default_version_info->prev = NULL;
23629 default_node = default_version_info->this_node;
23631 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23632 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23633 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23634 "exports hardware capability bits");
23635 #else
23637 if (targetm.has_ifunc_p ())
23639 struct cgraph_function_version_info *it_v = NULL;
23640 struct cgraph_node *dispatcher_node = NULL;
23641 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23643 /* Right now, the dispatching is done via ifunc. */
23644 dispatch_decl = make_dispatcher_decl (default_node->decl);
23646 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23647 gcc_assert (dispatcher_node != NULL);
23648 dispatcher_node->dispatcher_function = 1;
23649 dispatcher_version_info
23650 = dispatcher_node->insert_new_function_version ();
23651 dispatcher_version_info->next = default_version_info;
23652 dispatcher_node->definition = 1;
23654 /* Set the dispatcher for all the versions. */
23655 it_v = default_version_info;
23656 while (it_v != NULL)
23658 it_v->dispatcher_resolver = dispatch_decl;
23659 it_v = it_v->next;
23662 else
23664 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23665 "multiversioning needs ifunc which is not supported "
23666 "on this target");
23668 #endif
23670 return dispatch_decl;
23673 /* Make the resolver function decl to dispatch the versions of a multi-
23674 versioned function, DEFAULT_DECL. Create an empty basic block in the
23675 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23676 function. */
23678 static tree
23679 make_resolver_func (const tree default_decl,
23680 const tree dispatch_decl,
23681 basic_block *empty_bb)
23683 /* Make the resolver function static. The resolver function returns
23684 void *. */
23685 tree decl_name = clone_function_name (default_decl, "resolver");
23686 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23687 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23688 tree decl = build_fn_decl (resolver_name, type);
23689 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23691 DECL_NAME (decl) = decl_name;
23692 TREE_USED (decl) = 1;
23693 DECL_ARTIFICIAL (decl) = 1;
23694 DECL_IGNORED_P (decl) = 0;
23695 TREE_PUBLIC (decl) = 0;
23696 DECL_UNINLINABLE (decl) = 1;
23698 /* Resolver is not external, body is generated. */
23699 DECL_EXTERNAL (decl) = 0;
23700 DECL_EXTERNAL (dispatch_decl) = 0;
23702 DECL_CONTEXT (decl) = NULL_TREE;
23703 DECL_INITIAL (decl) = make_node (BLOCK);
23704 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23706 /* Build result decl and add to function_decl. */
23707 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23708 DECL_CONTEXT (t) = decl;
23709 DECL_ARTIFICIAL (t) = 1;
23710 DECL_IGNORED_P (t) = 1;
23711 DECL_RESULT (decl) = t;
23713 gimplify_function_tree (decl);
23714 push_cfun (DECL_STRUCT_FUNCTION (decl));
23715 *empty_bb = init_lowered_empty_function (decl, false,
23716 profile_count::uninitialized ());
23718 cgraph_node::add_new_function (decl, true);
23719 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23721 pop_cfun ();
23723 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23724 DECL_ATTRIBUTES (dispatch_decl)
23725 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23727 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23729 return decl;
23732 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23733 return a pointer to VERSION_DECL if we are running on a machine that
23734 supports the index CLONE_ISA hardware architecture bits. This function will
23735 be called during version dispatch to decide which function version to
23736 execute. It returns the basic block at the end, to which more conditions
23737 can be added. */
23739 static basic_block
23740 add_condition_to_bb (tree function_decl, tree version_decl,
23741 int clone_isa, basic_block new_bb)
23743 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23745 gcc_assert (new_bb != NULL);
23746 gimple_seq gseq = bb_seq (new_bb);
23749 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23750 build_fold_addr_expr (version_decl));
23751 tree result_var = create_tmp_var (ptr_type_node);
23752 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23753 gimple *return_stmt = gimple_build_return (result_var);
23755 if (clone_isa == CLONE_DEFAULT)
23757 gimple_seq_add_stmt (&gseq, convert_stmt);
23758 gimple_seq_add_stmt (&gseq, return_stmt);
23759 set_bb_seq (new_bb, gseq);
23760 gimple_set_bb (convert_stmt, new_bb);
23761 gimple_set_bb (return_stmt, new_bb);
23762 pop_cfun ();
23763 return new_bb;
23766 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23767 tree cond_var = create_tmp_var (bool_int_type_node);
23768 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23769 const char *arg_str = rs6000_clone_map[clone_isa].name;
23770 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23771 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23772 gimple_call_set_lhs (call_cond_stmt, cond_var);
23774 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23775 gimple_set_bb (call_cond_stmt, new_bb);
23776 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23778 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23779 NULL_TREE, NULL_TREE);
23780 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23781 gimple_set_bb (if_else_stmt, new_bb);
23782 gimple_seq_add_stmt (&gseq, if_else_stmt);
23784 gimple_seq_add_stmt (&gseq, convert_stmt);
23785 gimple_seq_add_stmt (&gseq, return_stmt);
23786 set_bb_seq (new_bb, gseq);
23788 basic_block bb1 = new_bb;
23789 edge e12 = split_block (bb1, if_else_stmt);
23790 basic_block bb2 = e12->dest;
23791 e12->flags &= ~EDGE_FALLTHRU;
23792 e12->flags |= EDGE_TRUE_VALUE;
23794 edge e23 = split_block (bb2, return_stmt);
23795 gimple_set_bb (convert_stmt, bb2);
23796 gimple_set_bb (return_stmt, bb2);
23798 basic_block bb3 = e23->dest;
23799 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
23801 remove_edge (e23);
23802 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
23804 pop_cfun ();
23805 return bb3;
23808 /* This function generates the dispatch function for multi-versioned functions.
23809 DISPATCH_DECL is the function which will contain the dispatch logic.
23810 FNDECLS are the function choices for dispatch, and is a tree chain.
23811 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23812 code is generated. */
23814 static int
23815 dispatch_function_versions (tree dispatch_decl,
23816 void *fndecls_p,
23817 basic_block *empty_bb)
23819 int ix;
23820 tree ele;
23821 vec<tree> *fndecls;
23822 tree clones[CLONE_MAX];
23824 if (TARGET_DEBUG_TARGET)
23825 fputs ("dispatch_function_versions, top\n", stderr);
23827 gcc_assert (dispatch_decl != NULL
23828 && fndecls_p != NULL
23829 && empty_bb != NULL);
23831 /* fndecls_p is actually a vector. */
23832 fndecls = static_cast<vec<tree> *> (fndecls_p);
23834 /* At least one more version other than the default. */
23835 gcc_assert (fndecls->length () >= 2);
23837 /* The first version in the vector is the default decl. */
23838 memset ((void *) clones, '\0', sizeof (clones));
23839 clones[CLONE_DEFAULT] = (*fndecls)[0];
23841 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23842 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23843 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23844 recent glibc. If we ever need to call __builtin_cpu_init, we would need
23845 to insert the code here to do the call. */
23847 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
23849 int priority = rs6000_clone_priority (ele);
23850 if (!clones[priority])
23851 clones[priority] = ele;
23854 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
23855 if (clones[ix])
23857 if (TARGET_DEBUG_TARGET)
23858 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
23859 ix, get_decl_name (clones[ix]));
23861 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
23862 *empty_bb);
23865 return 0;
23868 /* Generate the dispatching code body to dispatch multi-versioned function
23869 DECL. The target hook is called to process the "target" attributes and
23870 provide the code to dispatch the right function at run-time. NODE points
23871 to the dispatcher decl whose body will be created. */
23873 static tree
23874 rs6000_generate_version_dispatcher_body (void *node_p)
23876 tree resolver;
23877 basic_block empty_bb;
23878 struct cgraph_node *node = (cgraph_node *) node_p;
23879 struct cgraph_function_version_info *ninfo = node->function_version ();
23881 if (ninfo->dispatcher_resolver)
23882 return ninfo->dispatcher_resolver;
23884 /* node is going to be an alias, so remove the finalized bit. */
23885 node->definition = false;
23887 /* The first version in the chain corresponds to the default version. */
23888 ninfo->dispatcher_resolver = resolver
23889 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
23891 if (TARGET_DEBUG_TARGET)
23892 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
23893 get_decl_name (resolver));
23895 push_cfun (DECL_STRUCT_FUNCTION (resolver));
23896 auto_vec<tree, 2> fn_ver_vec;
23898 for (struct cgraph_function_version_info *vinfo = ninfo->next;
23899 vinfo;
23900 vinfo = vinfo->next)
23902 struct cgraph_node *version = vinfo->this_node;
23903 /* Check for virtual functions here again, as by this time it should
23904 have been determined if this function needs a vtable index or
23905 not. This happens for methods in derived classes that override
23906 virtual methods in base classes but are not explicitly marked as
23907 virtual. */
23908 if (DECL_VINDEX (version->decl))
23909 sorry ("Virtual function multiversioning not supported");
23911 fn_ver_vec.safe_push (version->decl);
23914 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
23915 cgraph_edge::rebuild_edges ();
23916 pop_cfun ();
23917 return resolver;
23921 /* Hook to determine if one function can safely inline another. */
23923 static bool
23924 rs6000_can_inline_p (tree caller, tree callee)
23926 bool ret = false;
23927 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
23928 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
23930 /* If callee has no option attributes, then it is ok to inline. */
23931 if (!callee_tree)
23932 ret = true;
23934 /* If caller has no option attributes, but callee does then it is not ok to
23935 inline. */
23936 else if (!caller_tree)
23937 ret = false;
23939 else
23941 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
23942 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
23944 /* Callee's options should a subset of the caller's, i.e. a vsx function
23945 can inline an altivec function but a non-vsx function can't inline a
23946 vsx function. */
23947 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
23948 == callee_opts->x_rs6000_isa_flags)
23949 ret = true;
23952 if (TARGET_DEBUG_TARGET)
23953 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
23954 get_decl_name (caller), get_decl_name (callee),
23955 (ret ? "can" : "cannot"));
23957 return ret;
23960 /* Allocate a stack temp and fixup the address so it meets the particular
23961 memory requirements (either offetable or REG+REG addressing). */
23964 rs6000_allocate_stack_temp (machine_mode mode,
23965 bool offsettable_p,
23966 bool reg_reg_p)
23968 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
23969 rtx addr = XEXP (stack, 0);
23970 int strict_p = reload_completed;
23972 if (!legitimate_indirect_address_p (addr, strict_p))
23974 if (offsettable_p
23975 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
23976 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
23978 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
23979 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
23982 return stack;
23985 /* Given a memory reference, if it is not a reg or reg+reg addressing,
23986 convert to such a form to deal with memory reference instructions
23987 like STFIWX and LDBRX that only take reg+reg addressing. */
23990 rs6000_force_indexed_or_indirect_mem (rtx x)
23992 machine_mode mode = GET_MODE (x);
23994 gcc_assert (MEM_P (x));
23995 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
23997 rtx addr = XEXP (x, 0);
23998 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24000 rtx reg = XEXP (addr, 0);
24001 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24002 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24003 gcc_assert (REG_P (reg));
24004 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24005 addr = reg;
24007 else if (GET_CODE (addr) == PRE_MODIFY)
24009 rtx reg = XEXP (addr, 0);
24010 rtx expr = XEXP (addr, 1);
24011 gcc_assert (REG_P (reg));
24012 gcc_assert (GET_CODE (expr) == PLUS);
24013 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24014 addr = reg;
24017 if (GET_CODE (addr) == PLUS)
24019 rtx op0 = XEXP (addr, 0);
24020 rtx op1 = XEXP (addr, 1);
24021 op0 = force_reg (Pmode, op0);
24022 op1 = force_reg (Pmode, op1);
24023 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24025 else
24026 x = replace_equiv_address (x, force_reg (Pmode, addr));
24029 return x;
24032 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24034 On the RS/6000, all integer constants are acceptable, most won't be valid
24035 for particular insns, though. Only easy FP constants are acceptable. */
24037 static bool
24038 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24040 if (TARGET_ELF && tls_referenced_p (x))
24041 return false;
24043 if (CONST_DOUBLE_P (x))
24044 return easy_fp_constant (x, mode);
24046 if (GET_CODE (x) == CONST_VECTOR)
24047 return easy_vector_constant (x, mode);
24049 return true;
24053 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24055 static bool
24056 chain_already_loaded (rtx_insn *last)
24058 for (; last != NULL; last = PREV_INSN (last))
24060 if (NONJUMP_INSN_P (last))
24062 rtx patt = PATTERN (last);
24064 if (GET_CODE (patt) == SET)
24066 rtx lhs = XEXP (patt, 0);
24068 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24069 return true;
24073 return false;
24076 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24078 void
24079 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24081 rtx func = func_desc;
24082 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24083 rtx toc_load = NULL_RTX;
24084 rtx toc_restore = NULL_RTX;
24085 rtx func_addr;
24086 rtx abi_reg = NULL_RTX;
24087 rtx call[4];
24088 int n_call;
24089 rtx insn;
24090 bool is_pltseq_longcall;
24092 if (global_tlsarg)
24093 tlsarg = global_tlsarg;
24095 /* Handle longcall attributes. */
24096 is_pltseq_longcall = false;
24097 if ((INTVAL (cookie) & CALL_LONG) != 0
24098 && GET_CODE (func_desc) == SYMBOL_REF)
24100 func = rs6000_longcall_ref (func_desc, tlsarg);
24101 if (TARGET_PLTSEQ)
24102 is_pltseq_longcall = true;
24105 /* Handle indirect calls. */
24106 if (!SYMBOL_REF_P (func)
24107 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24109 if (!rs6000_pcrel_p (cfun))
24111 /* Save the TOC into its reserved slot before the call,
24112 and prepare to restore it after the call. */
24113 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24114 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24115 gen_rtvec (1, stack_toc_offset),
24116 UNSPEC_TOCSLOT);
24117 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24119 /* Can we optimize saving the TOC in the prologue or
24120 do we need to do it at every call? */
24121 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24122 cfun->machine->save_toc_in_prologue = true;
24123 else
24125 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24126 rtx stack_toc_mem = gen_frame_mem (Pmode,
24127 gen_rtx_PLUS (Pmode, stack_ptr,
24128 stack_toc_offset));
24129 MEM_VOLATILE_P (stack_toc_mem) = 1;
24130 if (is_pltseq_longcall)
24132 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24133 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24134 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24136 else
24137 emit_move_insn (stack_toc_mem, toc_reg);
24141 if (DEFAULT_ABI == ABI_ELFv2)
24143 /* A function pointer in the ELFv2 ABI is just a plain address, but
24144 the ABI requires it to be loaded into r12 before the call. */
24145 func_addr = gen_rtx_REG (Pmode, 12);
24146 if (!rtx_equal_p (func_addr, func))
24147 emit_move_insn (func_addr, func);
24148 abi_reg = func_addr;
24149 /* Indirect calls via CTR are strongly preferred over indirect
24150 calls via LR, so move the address there. Needed to mark
24151 this insn for linker plt sequence editing too. */
24152 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24153 if (is_pltseq_longcall)
24155 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24156 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24157 emit_insn (gen_rtx_SET (func_addr, mark_func));
24158 v = gen_rtvec (2, func_addr, func_desc);
24159 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24161 else
24162 emit_move_insn (func_addr, abi_reg);
24164 else
24166 /* A function pointer under AIX is a pointer to a data area whose
24167 first word contains the actual address of the function, whose
24168 second word contains a pointer to its TOC, and whose third word
24169 contains a value to place in the static chain register (r11).
24170 Note that if we load the static chain, our "trampoline" need
24171 not have any executable code. */
24173 /* Load up address of the actual function. */
24174 func = force_reg (Pmode, func);
24175 func_addr = gen_reg_rtx (Pmode);
24176 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24178 /* Indirect calls via CTR are strongly preferred over indirect
24179 calls via LR, so move the address there. */
24180 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24181 emit_move_insn (ctr_reg, func_addr);
24182 func_addr = ctr_reg;
24184 /* Prepare to load the TOC of the called function. Note that the
24185 TOC load must happen immediately before the actual call so
24186 that unwinding the TOC registers works correctly. See the
24187 comment in frob_update_context. */
24188 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24189 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24190 gen_rtx_PLUS (Pmode, func,
24191 func_toc_offset));
24192 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24194 /* If we have a static chain, load it up. But, if the call was
24195 originally direct, the 3rd word has not been written since no
24196 trampoline has been built, so we ought not to load it, lest we
24197 override a static chain value. */
24198 if (!(GET_CODE (func_desc) == SYMBOL_REF
24199 && SYMBOL_REF_FUNCTION_P (func_desc))
24200 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24201 && !chain_already_loaded (get_current_sequence ()->next->last))
24203 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24204 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24205 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24206 gen_rtx_PLUS (Pmode, func,
24207 func_sc_offset));
24208 emit_move_insn (sc_reg, func_sc_mem);
24209 abi_reg = sc_reg;
24213 else
24215 /* No TOC register needed for calls from PC-relative callers. */
24216 if (!rs6000_pcrel_p (cfun))
24217 /* Direct calls use the TOC: for local calls, the callee will
24218 assume the TOC register is set; for non-local calls, the
24219 PLT stub needs the TOC register. */
24220 abi_reg = toc_reg;
24221 func_addr = func;
24224 /* Create the call. */
24225 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24226 if (value != NULL_RTX)
24227 call[0] = gen_rtx_SET (value, call[0]);
24228 n_call = 1;
24230 if (toc_load)
24231 call[n_call++] = toc_load;
24232 if (toc_restore)
24233 call[n_call++] = toc_restore;
24235 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24237 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24238 insn = emit_call_insn (insn);
24240 /* Mention all registers defined by the ABI to hold information
24241 as uses in CALL_INSN_FUNCTION_USAGE. */
24242 if (abi_reg)
24243 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24246 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24248 void
24249 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24251 rtx call[2];
24252 rtx insn;
24254 gcc_assert (INTVAL (cookie) == 0);
24256 if (global_tlsarg)
24257 tlsarg = global_tlsarg;
24259 /* Create the call. */
24260 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24261 if (value != NULL_RTX)
24262 call[0] = gen_rtx_SET (value, call[0]);
24264 call[1] = simple_return_rtx;
24266 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24267 insn = emit_call_insn (insn);
24269 /* Note use of the TOC register. */
24270 if (!rs6000_pcrel_p (cfun))
24271 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24272 gen_rtx_REG (Pmode, TOC_REGNUM));
24275 /* Expand code to perform a call under the SYSV4 ABI. */
24277 void
24278 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24280 rtx func = func_desc;
24281 rtx func_addr;
24282 rtx call[4];
24283 rtx insn;
24284 rtx abi_reg = NULL_RTX;
24285 int n;
24287 if (global_tlsarg)
24288 tlsarg = global_tlsarg;
24290 /* Handle longcall attributes. */
24291 if ((INTVAL (cookie) & CALL_LONG) != 0
24292 && GET_CODE (func_desc) == SYMBOL_REF)
24294 func = rs6000_longcall_ref (func_desc, tlsarg);
24295 /* If the longcall was implemented as an inline PLT call using
24296 PLT unspecs then func will be REG:r11. If not, func will be
24297 a pseudo reg. The inline PLT call sequence supports lazy
24298 linking (and longcalls to functions in dlopen'd libraries).
24299 The other style of longcalls don't. The lazy linking entry
24300 to the dynamic symbol resolver requires r11 be the function
24301 address (as it is for linker generated PLT stubs). Ensure
24302 r11 stays valid to the bctrl by marking r11 used by the call. */
24303 if (TARGET_PLTSEQ)
24304 abi_reg = func;
24307 /* Handle indirect calls. */
24308 if (GET_CODE (func) != SYMBOL_REF)
24310 func = force_reg (Pmode, func);
24312 /* Indirect calls via CTR are strongly preferred over indirect
24313 calls via LR, so move the address there. That can't be left
24314 to reload because we want to mark every instruction in an
24315 inline PLT call sequence with a reloc, enabling the linker to
24316 edit the sequence back to a direct call when that makes sense. */
24317 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24318 if (abi_reg)
24320 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24321 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24322 emit_insn (gen_rtx_SET (func_addr, mark_func));
24323 v = gen_rtvec (2, func_addr, func_desc);
24324 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24326 else
24327 emit_move_insn (func_addr, func);
24329 else
24330 func_addr = func;
24332 /* Create the call. */
24333 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24334 if (value != NULL_RTX)
24335 call[0] = gen_rtx_SET (value, call[0]);
24337 call[1] = gen_rtx_USE (VOIDmode, cookie);
24338 n = 2;
24339 if (TARGET_SECURE_PLT
24340 && flag_pic
24341 && GET_CODE (func_addr) == SYMBOL_REF
24342 && !SYMBOL_REF_LOCAL_P (func_addr))
24343 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24345 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24347 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24348 insn = emit_call_insn (insn);
24349 if (abi_reg)
24350 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24353 /* Expand code to perform a sibling call under the SysV4 ABI. */
24355 void
24356 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24358 rtx func = func_desc;
24359 rtx func_addr;
24360 rtx call[3];
24361 rtx insn;
24362 rtx abi_reg = NULL_RTX;
24364 if (global_tlsarg)
24365 tlsarg = global_tlsarg;
24367 /* Handle longcall attributes. */
24368 if ((INTVAL (cookie) & CALL_LONG) != 0
24369 && GET_CODE (func_desc) == SYMBOL_REF)
24371 func = rs6000_longcall_ref (func_desc, tlsarg);
24372 /* If the longcall was implemented as an inline PLT call using
24373 PLT unspecs then func will be REG:r11. If not, func will be
24374 a pseudo reg. The inline PLT call sequence supports lazy
24375 linking (and longcalls to functions in dlopen'd libraries).
24376 The other style of longcalls don't. The lazy linking entry
24377 to the dynamic symbol resolver requires r11 be the function
24378 address (as it is for linker generated PLT stubs). Ensure
24379 r11 stays valid to the bctr by marking r11 used by the call. */
24380 if (TARGET_PLTSEQ)
24381 abi_reg = func;
24384 /* Handle indirect calls. */
24385 if (GET_CODE (func) != SYMBOL_REF)
24387 func = force_reg (Pmode, func);
24389 /* Indirect sibcalls must go via CTR. That can't be left to
24390 reload because we want to mark every instruction in an inline
24391 PLT call sequence with a reloc, enabling the linker to edit
24392 the sequence back to a direct call when that makes sense. */
24393 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24394 if (abi_reg)
24396 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24397 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24398 emit_insn (gen_rtx_SET (func_addr, mark_func));
24399 v = gen_rtvec (2, func_addr, func_desc);
24400 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24402 else
24403 emit_move_insn (func_addr, func);
24405 else
24406 func_addr = func;
24408 /* Create the call. */
24409 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24410 if (value != NULL_RTX)
24411 call[0] = gen_rtx_SET (value, call[0]);
24413 call[1] = gen_rtx_USE (VOIDmode, cookie);
24414 call[2] = simple_return_rtx;
24416 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24417 insn = emit_call_insn (insn);
24418 if (abi_reg)
24419 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24422 #if TARGET_MACHO
24424 /* Expand code to perform a call under the Darwin ABI.
24425 Modulo handling of mlongcall, this is much the same as sysv.
24426 if/when the longcall optimisation is removed, we could drop this
24427 code and use the sysv case (taking care to avoid the tls stuff).
24429 We can use this for sibcalls too, if needed. */
24431 void
24432 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24433 rtx cookie, bool sibcall)
24435 rtx func = func_desc;
24436 rtx func_addr;
24437 rtx call[3];
24438 rtx insn;
24439 int cookie_val = INTVAL (cookie);
24440 bool make_island = false;
24442 /* Handle longcall attributes, there are two cases for Darwin:
24443 1) Newer linkers are capable of synthesising any branch islands needed.
24444 2) We need a helper branch island synthesised by the compiler.
24445 The second case has mostly been retired and we don't use it for m64.
24446 In fact, it's is an optimisation, we could just indirect as sysv does..
24447 ... however, backwards compatibility for now.
24448 If we're going to use this, then we need to keep the CALL_LONG bit set,
24449 so that we can pick up the special insn form later. */
24450 if ((cookie_val & CALL_LONG) != 0
24451 && GET_CODE (func_desc) == SYMBOL_REF)
24453 /* FIXME: the longcall opt should not hang off this flag, it is most
24454 likely incorrect for kernel-mode code-generation. */
24455 if (darwin_symbol_stubs && TARGET_32BIT)
24456 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24457 else
24459 /* The linker is capable of doing this, but the user explicitly
24460 asked for -mlongcall, so we'll do the 'normal' version. */
24461 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24462 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24466 /* Handle indirect calls. */
24467 if (GET_CODE (func) != SYMBOL_REF)
24469 func = force_reg (Pmode, func);
24471 /* Indirect calls via CTR are strongly preferred over indirect
24472 calls via LR, and are required for indirect sibcalls, so move
24473 the address there. */
24474 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24475 emit_move_insn (func_addr, func);
24477 else
24478 func_addr = func;
24480 /* Create the call. */
24481 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24482 if (value != NULL_RTX)
24483 call[0] = gen_rtx_SET (value, call[0]);
24485 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24487 if (sibcall)
24488 call[2] = simple_return_rtx;
24489 else
24490 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24492 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24493 insn = emit_call_insn (insn);
24494 /* Now we have the debug info in the insn, we can set up the branch island
24495 if we're using one. */
24496 if (make_island)
24498 tree funname = get_identifier (XSTR (func_desc, 0));
24500 if (no_previous_def (funname))
24502 rtx label_rtx = gen_label_rtx ();
24503 char *label_buf, temp_buf[256];
24504 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24505 CODE_LABEL_NUMBER (label_rtx));
24506 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24507 tree labelname = get_identifier (label_buf);
24508 add_compiler_branch_island (labelname, funname,
24509 insn_line ((const rtx_insn*)insn));
24513 #endif
24515 void
24516 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24517 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24519 #if TARGET_MACHO
24520 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24521 #else
24522 gcc_unreachable();
24523 #endif
24527 void
24528 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24529 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24531 #if TARGET_MACHO
24532 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24533 #else
24534 gcc_unreachable();
24535 #endif
24538 /* Return whether we should generate PC-relative code for FNDECL. */
24539 bool
24540 rs6000_fndecl_pcrel_p (const_tree fndecl)
24542 if (DEFAULT_ABI != ABI_ELFv2)
24543 return false;
24545 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24547 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24548 && TARGET_CMODEL == CMODEL_MEDIUM);
24551 /* Return whether we should generate PC-relative code for *FN. */
24552 bool
24553 rs6000_pcrel_p (struct function *fn)
24555 if (DEFAULT_ABI != ABI_ELFv2)
24556 return false;
24558 /* Optimize usual case. */
24559 if (fn == cfun)
24560 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24561 && TARGET_CMODEL == CMODEL_MEDIUM);
24563 return rs6000_fndecl_pcrel_p (fn->decl);
24567 /* Given an address (ADDR), a mode (MODE), and what the format of the
24568 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24569 for the address. */
24571 enum insn_form
24572 address_to_insn_form (rtx addr,
24573 machine_mode mode,
24574 enum non_prefixed_form non_prefixed_format)
24576 /* Single register is easy. */
24577 if (REG_P (addr) || SUBREG_P (addr))
24578 return INSN_FORM_BASE_REG;
24580 /* If the non prefixed instruction format doesn't support offset addressing,
24581 make sure only indexed addressing is allowed.
24583 We special case SDmode so that the register allocator does not try to move
24584 SDmode through GPR registers, but instead uses the 32-bit integer load and
24585 store instructions for the floating point registers. */
24586 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24588 if (GET_CODE (addr) != PLUS)
24589 return INSN_FORM_BAD;
24591 rtx op0 = XEXP (addr, 0);
24592 rtx op1 = XEXP (addr, 1);
24593 if (!REG_P (op0) && !SUBREG_P (op0))
24594 return INSN_FORM_BAD;
24596 if (!REG_P (op1) && !SUBREG_P (op1))
24597 return INSN_FORM_BAD;
24599 return INSN_FORM_X;
24602 /* Deal with update forms. */
24603 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24604 return INSN_FORM_UPDATE;
24606 /* Handle PC-relative symbols and labels. Check for both local and external
24607 symbols. Assume labels are always local. */
24608 if (TARGET_PCREL)
24610 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24611 return INSN_FORM_PCREL_EXTERNAL;
24613 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24614 return INSN_FORM_PCREL_LOCAL;
24617 if (GET_CODE (addr) == CONST)
24618 addr = XEXP (addr, 0);
24620 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24621 if (GET_CODE (addr) == LO_SUM)
24622 return INSN_FORM_LO_SUM;
24624 /* Everything below must be an offset address of some form. */
24625 if (GET_CODE (addr) != PLUS)
24626 return INSN_FORM_BAD;
24628 rtx op0 = XEXP (addr, 0);
24629 rtx op1 = XEXP (addr, 1);
24631 /* Check for indexed addresses. */
24632 if (REG_P (op1) || SUBREG_P (op1))
24634 if (REG_P (op0) || SUBREG_P (op0))
24635 return INSN_FORM_X;
24637 return INSN_FORM_BAD;
24640 if (!CONST_INT_P (op1))
24641 return INSN_FORM_BAD;
24643 HOST_WIDE_INT offset = INTVAL (op1);
24644 if (!SIGNED_34BIT_OFFSET_P (offset))
24645 return INSN_FORM_BAD;
24647 /* Check for local and external PC-relative addresses. Labels are always
24648 local. */
24649 if (TARGET_PCREL)
24651 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24652 return INSN_FORM_PCREL_EXTERNAL;
24654 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24655 return INSN_FORM_PCREL_LOCAL;
24658 /* If it isn't PC-relative, the address must use a base register. */
24659 if (!REG_P (op0) && !SUBREG_P (op0))
24660 return INSN_FORM_BAD;
24662 /* Large offsets must be prefixed. */
24663 if (!SIGNED_16BIT_OFFSET_P (offset))
24665 if (TARGET_PREFIXED_ADDR)
24666 return INSN_FORM_PREFIXED_NUMERIC;
24668 return INSN_FORM_BAD;
24671 /* We have a 16-bit offset, see what default instruction format to use. */
24672 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24674 unsigned size = GET_MODE_SIZE (mode);
24676 /* On 64-bit systems, assume 64-bit integers need to use DS form
24677 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24678 (for LXV and STXV). TImode is problematical in that its normal usage
24679 is expected to be GPRs where it wants a DS instruction format, but if
24680 it goes into the vector registers, it wants a DQ instruction
24681 format. */
24682 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24683 non_prefixed_format = NON_PREFIXED_DS;
24685 else if (TARGET_VSX && size >= 16
24686 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24687 non_prefixed_format = NON_PREFIXED_DQ;
24689 else
24690 non_prefixed_format = NON_PREFIXED_D;
24693 /* Classify the D/DS/DQ-form addresses. */
24694 switch (non_prefixed_format)
24696 /* Instruction format D, all 16 bits are valid. */
24697 case NON_PREFIXED_D:
24698 return INSN_FORM_D;
24700 /* Instruction format DS, bottom 2 bits must be 0. */
24701 case NON_PREFIXED_DS:
24702 if ((offset & 3) == 0)
24703 return INSN_FORM_DS;
24705 else if (TARGET_PREFIXED_ADDR)
24706 return INSN_FORM_PREFIXED_NUMERIC;
24708 else
24709 return INSN_FORM_BAD;
24711 /* Instruction format DQ, bottom 4 bits must be 0. */
24712 case NON_PREFIXED_DQ:
24713 if ((offset & 15) == 0)
24714 return INSN_FORM_DQ;
24716 else if (TARGET_PREFIXED_ADDR)
24717 return INSN_FORM_PREFIXED_NUMERIC;
24719 else
24720 return INSN_FORM_BAD;
24722 default:
24723 break;
24726 return INSN_FORM_BAD;
24729 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24730 instruction format (D/DS/DQ) used for offset memory. */
24732 static enum non_prefixed_form
24733 reg_to_non_prefixed (rtx reg, machine_mode mode)
24735 /* If it isn't a register, use the defaults. */
24736 if (!REG_P (reg) && !SUBREG_P (reg))
24737 return NON_PREFIXED_DEFAULT;
24739 unsigned int r = reg_or_subregno (reg);
24741 /* If we have a pseudo, use the default instruction format. */
24742 if (!HARD_REGISTER_NUM_P (r))
24743 return NON_PREFIXED_DEFAULT;
24745 unsigned size = GET_MODE_SIZE (mode);
24747 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24748 128-bit floating point, and 128-bit integers. */
24749 if (FP_REGNO_P (r))
24751 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24752 return NON_PREFIXED_D;
24754 else if (size < 8)
24755 return NON_PREFIXED_X;
24757 else if (TARGET_VSX && size >= 16
24758 && (VECTOR_MODE_P (mode)
24759 || FLOAT128_VECTOR_P (mode)
24760 || mode == TImode || mode == CTImode))
24761 return NON_PREFIXED_DQ;
24763 else
24764 return NON_PREFIXED_DEFAULT;
24767 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24768 128-bit floating point, and 128-bit integers. */
24769 else if (ALTIVEC_REGNO_P (r))
24771 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24772 return NON_PREFIXED_DS;
24774 else if (size < 8)
24775 return NON_PREFIXED_X;
24777 else if (TARGET_VSX && size >= 16
24778 && (VECTOR_MODE_P (mode)
24779 || FLOAT128_VECTOR_P (mode)
24780 || mode == TImode || mode == CTImode))
24781 return NON_PREFIXED_DQ;
24783 else
24784 return NON_PREFIXED_DEFAULT;
24787 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24788 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24789 through the GPR registers for memory operations. */
24790 else if (TARGET_POWERPC64 && size >= 8)
24791 return NON_PREFIXED_DS;
24793 return NON_PREFIXED_D;
24797 /* Whether a load instruction is a prefixed instruction. This is called from
24798 the prefixed attribute processing. */
24800 bool
24801 prefixed_load_p (rtx_insn *insn)
24803 /* Validate the insn to make sure it is a normal load insn. */
24804 extract_insn_cached (insn);
24805 if (recog_data.n_operands < 2)
24806 return false;
24808 rtx reg = recog_data.operand[0];
24809 rtx mem = recog_data.operand[1];
24811 if (!REG_P (reg) && !SUBREG_P (reg))
24812 return false;
24814 if (!MEM_P (mem))
24815 return false;
24817 /* Prefixed load instructions do not support update or indexed forms. */
24818 if (get_attr_indexed (insn) == INDEXED_YES
24819 || get_attr_update (insn) == UPDATE_YES)
24820 return false;
24822 /* LWA uses the DS format instead of the D format that LWZ uses. */
24823 enum non_prefixed_form non_prefixed;
24824 machine_mode reg_mode = GET_MODE (reg);
24825 machine_mode mem_mode = GET_MODE (mem);
24827 if (mem_mode == SImode && reg_mode == DImode
24828 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
24829 non_prefixed = NON_PREFIXED_DS;
24831 else
24832 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24834 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24837 /* Whether a store instruction is a prefixed instruction. This is called from
24838 the prefixed attribute processing. */
24840 bool
24841 prefixed_store_p (rtx_insn *insn)
24843 /* Validate the insn to make sure it is a normal store insn. */
24844 extract_insn_cached (insn);
24845 if (recog_data.n_operands < 2)
24846 return false;
24848 rtx mem = recog_data.operand[0];
24849 rtx reg = recog_data.operand[1];
24851 if (!REG_P (reg) && !SUBREG_P (reg))
24852 return false;
24854 if (!MEM_P (mem))
24855 return false;
24857 /* Prefixed store instructions do not support update or indexed forms. */
24858 if (get_attr_indexed (insn) == INDEXED_YES
24859 || get_attr_update (insn) == UPDATE_YES)
24860 return false;
24862 machine_mode mem_mode = GET_MODE (mem);
24863 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24864 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24867 /* Whether a load immediate or add instruction is a prefixed instruction. This
24868 is called from the prefixed attribute processing. */
24870 bool
24871 prefixed_paddi_p (rtx_insn *insn)
24873 rtx set = single_set (insn);
24874 if (!set)
24875 return false;
24877 rtx dest = SET_DEST (set);
24878 rtx src = SET_SRC (set);
24880 if (!REG_P (dest) && !SUBREG_P (dest))
24881 return false;
24883 /* Is this a load immediate that can't be done with a simple ADDI or
24884 ADDIS? */
24885 if (CONST_INT_P (src))
24886 return (satisfies_constraint_eI (src)
24887 && !satisfies_constraint_I (src)
24888 && !satisfies_constraint_L (src));
24890 /* Is this a PADDI instruction that can't be done with a simple ADDI or
24891 ADDIS? */
24892 if (GET_CODE (src) == PLUS)
24894 rtx op1 = XEXP (src, 1);
24896 return (CONST_INT_P (op1)
24897 && satisfies_constraint_eI (op1)
24898 && !satisfies_constraint_I (op1)
24899 && !satisfies_constraint_L (op1));
24902 /* If not, is it a load of a PC-relative address? */
24903 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
24904 return false;
24906 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
24907 return false;
24909 enum insn_form iform = address_to_insn_form (src, Pmode,
24910 NON_PREFIXED_DEFAULT);
24912 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
24915 /* Whether the next instruction needs a 'p' prefix issued before the
24916 instruction is printed out. */
24917 static bool next_insn_prefixed_p;
24919 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
24920 outputting the assembler code. On the PowerPC, we remember if the current
24921 insn is a prefixed insn where we need to emit a 'p' before the insn.
24923 In addition, if the insn is part of a PC-relative reference to an external
24924 label optimization, this is recorded also. */
24925 void
24926 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
24928 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
24929 return;
24932 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
24933 We use it to emit a 'p' for prefixed insns that is set in
24934 FINAL_PRESCAN_INSN. */
24935 void
24936 rs6000_asm_output_opcode (FILE *stream)
24938 if (next_insn_prefixed_p)
24939 fprintf (stream, "p");
24941 return;
24945 #ifdef HAVE_GAS_HIDDEN
24946 # define USE_HIDDEN_LINKONCE 1
24947 #else
24948 # define USE_HIDDEN_LINKONCE 0
24949 #endif
24951 /* Fills in the label name that should be used for a 476 link stack thunk. */
24953 void
24954 get_ppc476_thunk_name (char name[32])
24956 gcc_assert (TARGET_LINK_STACK);
24958 if (USE_HIDDEN_LINKONCE)
24959 sprintf (name, "__ppc476.get_thunk");
24960 else
24961 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
24964 /* This function emits the simple thunk routine that is used to preserve
24965 the link stack on the 476 cpu. */
24967 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
24968 static void
24969 rs6000_code_end (void)
24971 char name[32];
24972 tree decl;
24974 if (!TARGET_LINK_STACK)
24975 return;
24977 get_ppc476_thunk_name (name);
24979 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
24980 build_function_type_list (void_type_node, NULL_TREE));
24981 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
24982 NULL_TREE, void_type_node);
24983 TREE_PUBLIC (decl) = 1;
24984 TREE_STATIC (decl) = 1;
24986 #if RS6000_WEAK
24987 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
24989 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
24990 targetm.asm_out.unique_section (decl, 0);
24991 switch_to_section (get_named_section (decl, NULL, 0));
24992 DECL_WEAK (decl) = 1;
24993 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
24994 targetm.asm_out.globalize_label (asm_out_file, name);
24995 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
24996 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
24998 else
24999 #endif
25001 switch_to_section (text_section);
25002 ASM_OUTPUT_LABEL (asm_out_file, name);
25005 DECL_INITIAL (decl) = make_node (BLOCK);
25006 current_function_decl = decl;
25007 allocate_struct_function (decl, false);
25008 init_function_start (decl);
25009 first_function_block_is_cold = false;
25010 /* Make sure unwind info is emitted for the thunk if needed. */
25011 final_start_function (emit_barrier (), asm_out_file, 1);
25013 fputs ("\tblr\n", asm_out_file);
25015 final_end_function ();
25016 init_insn_lengths ();
25017 free_after_compilation (cfun);
25018 set_cfun (NULL);
25019 current_function_decl = NULL;
25022 /* Add r30 to hard reg set if the prologue sets it up and it is not
25023 pic_offset_table_rtx. */
25025 static void
25026 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25028 if (!TARGET_SINGLE_PIC_BASE
25029 && TARGET_TOC
25030 && TARGET_MINIMAL_TOC
25031 && !constant_pool_empty_p ())
25032 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25033 if (cfun->machine->split_stack_argp_used)
25034 add_to_hard_reg_set (&set->set, Pmode, 12);
25036 /* Make sure the hard reg set doesn't include r2, which was possibly added
25037 via PIC_OFFSET_TABLE_REGNUM. */
25038 if (TARGET_TOC)
25039 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25043 /* Helper function for rs6000_split_logical to emit a logical instruction after
25044 spliting the operation to single GPR registers.
25046 DEST is the destination register.
25047 OP1 and OP2 are the input source registers.
25048 CODE is the base operation (AND, IOR, XOR, NOT).
25049 MODE is the machine mode.
25050 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25051 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25052 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25054 static void
25055 rs6000_split_logical_inner (rtx dest,
25056 rtx op1,
25057 rtx op2,
25058 enum rtx_code code,
25059 machine_mode mode,
25060 bool complement_final_p,
25061 bool complement_op1_p,
25062 bool complement_op2_p)
25064 rtx bool_rtx;
25066 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25067 if (op2 && CONST_INT_P (op2)
25068 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25069 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25071 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25072 HOST_WIDE_INT value = INTVAL (op2) & mask;
25074 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25075 if (code == AND)
25077 if (value == 0)
25079 emit_insn (gen_rtx_SET (dest, const0_rtx));
25080 return;
25083 else if (value == mask)
25085 if (!rtx_equal_p (dest, op1))
25086 emit_insn (gen_rtx_SET (dest, op1));
25087 return;
25091 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25092 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25093 else if (code == IOR || code == XOR)
25095 if (value == 0)
25097 if (!rtx_equal_p (dest, op1))
25098 emit_insn (gen_rtx_SET (dest, op1));
25099 return;
25104 if (code == AND && mode == SImode
25105 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25107 emit_insn (gen_andsi3 (dest, op1, op2));
25108 return;
25111 if (complement_op1_p)
25112 op1 = gen_rtx_NOT (mode, op1);
25114 if (complement_op2_p)
25115 op2 = gen_rtx_NOT (mode, op2);
25117 /* For canonical RTL, if only one arm is inverted it is the first. */
25118 if (!complement_op1_p && complement_op2_p)
25119 std::swap (op1, op2);
25121 bool_rtx = ((code == NOT)
25122 ? gen_rtx_NOT (mode, op1)
25123 : gen_rtx_fmt_ee (code, mode, op1, op2));
25125 if (complement_final_p)
25126 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25128 emit_insn (gen_rtx_SET (dest, bool_rtx));
25131 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25132 operations are split immediately during RTL generation to allow for more
25133 optimizations of the AND/IOR/XOR.
25135 OPERANDS is an array containing the destination and two input operands.
25136 CODE is the base operation (AND, IOR, XOR, NOT).
25137 MODE is the machine mode.
25138 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25139 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25140 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25141 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25142 formation of the AND instructions. */
25144 static void
25145 rs6000_split_logical_di (rtx operands[3],
25146 enum rtx_code code,
25147 bool complement_final_p,
25148 bool complement_op1_p,
25149 bool complement_op2_p)
25151 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25152 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25153 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25154 enum hi_lo { hi = 0, lo = 1 };
25155 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25156 size_t i;
25158 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25159 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25160 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25161 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25163 if (code == NOT)
25164 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25165 else
25167 if (!CONST_INT_P (operands[2]))
25169 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25170 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25172 else
25174 HOST_WIDE_INT value = INTVAL (operands[2]);
25175 HOST_WIDE_INT value_hi_lo[2];
25177 gcc_assert (!complement_final_p);
25178 gcc_assert (!complement_op1_p);
25179 gcc_assert (!complement_op2_p);
25181 value_hi_lo[hi] = value >> 32;
25182 value_hi_lo[lo] = value & lower_32bits;
25184 for (i = 0; i < 2; i++)
25186 HOST_WIDE_INT sub_value = value_hi_lo[i];
25188 if (sub_value & sign_bit)
25189 sub_value |= upper_32bits;
25191 op2_hi_lo[i] = GEN_INT (sub_value);
25193 /* If this is an AND instruction, check to see if we need to load
25194 the value in a register. */
25195 if (code == AND && sub_value != -1 && sub_value != 0
25196 && !and_operand (op2_hi_lo[i], SImode))
25197 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25202 for (i = 0; i < 2; i++)
25204 /* Split large IOR/XOR operations. */
25205 if ((code == IOR || code == XOR)
25206 && CONST_INT_P (op2_hi_lo[i])
25207 && !complement_final_p
25208 && !complement_op1_p
25209 && !complement_op2_p
25210 && !logical_const_operand (op2_hi_lo[i], SImode))
25212 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25213 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25214 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25215 rtx tmp = gen_reg_rtx (SImode);
25217 /* Make sure the constant is sign extended. */
25218 if ((hi_16bits & sign_bit) != 0)
25219 hi_16bits |= upper_32bits;
25221 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25222 code, SImode, false, false, false);
25224 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25225 code, SImode, false, false, false);
25227 else
25228 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25229 code, SImode, complement_final_p,
25230 complement_op1_p, complement_op2_p);
25233 return;
25236 /* Split the insns that make up boolean operations operating on multiple GPR
25237 registers. The boolean MD patterns ensure that the inputs either are
25238 exactly the same as the output registers, or there is no overlap.
25240 OPERANDS is an array containing the destination and two input operands.
25241 CODE is the base operation (AND, IOR, XOR, NOT).
25242 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25243 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25244 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25246 void
25247 rs6000_split_logical (rtx operands[3],
25248 enum rtx_code code,
25249 bool complement_final_p,
25250 bool complement_op1_p,
25251 bool complement_op2_p)
25253 machine_mode mode = GET_MODE (operands[0]);
25254 machine_mode sub_mode;
25255 rtx op0, op1, op2;
25256 int sub_size, regno0, regno1, nregs, i;
25258 /* If this is DImode, use the specialized version that can run before
25259 register allocation. */
25260 if (mode == DImode && !TARGET_POWERPC64)
25262 rs6000_split_logical_di (operands, code, complement_final_p,
25263 complement_op1_p, complement_op2_p);
25264 return;
25267 op0 = operands[0];
25268 op1 = operands[1];
25269 op2 = (code == NOT) ? NULL_RTX : operands[2];
25270 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25271 sub_size = GET_MODE_SIZE (sub_mode);
25272 regno0 = REGNO (op0);
25273 regno1 = REGNO (op1);
25275 gcc_assert (reload_completed);
25276 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25277 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25279 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25280 gcc_assert (nregs > 1);
25282 if (op2 && REG_P (op2))
25283 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25285 for (i = 0; i < nregs; i++)
25287 int offset = i * sub_size;
25288 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25289 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25290 rtx sub_op2 = ((code == NOT)
25291 ? NULL_RTX
25292 : simplify_subreg (sub_mode, op2, mode, offset));
25294 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25295 complement_final_p, complement_op1_p,
25296 complement_op2_p);
25299 return;
25303 /* Return true if the peephole2 can combine a load involving a combination of
25304 an addis instruction and a load with an offset that can be fused together on
25305 a power8. */
25307 bool
25308 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25309 rtx addis_value, /* addis value. */
25310 rtx target, /* target register that is loaded. */
25311 rtx mem) /* bottom part of the memory addr. */
25313 rtx addr;
25314 rtx base_reg;
25316 /* Validate arguments. */
25317 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25318 return false;
25320 if (!base_reg_operand (target, GET_MODE (target)))
25321 return false;
25323 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25324 return false;
25326 /* Allow sign/zero extension. */
25327 if (GET_CODE (mem) == ZERO_EXTEND
25328 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25329 mem = XEXP (mem, 0);
25331 if (!MEM_P (mem))
25332 return false;
25334 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25335 return false;
25337 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25338 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25339 return false;
25341 /* Validate that the register used to load the high value is either the
25342 register being loaded, or we can safely replace its use.
25344 This function is only called from the peephole2 pass and we assume that
25345 there are 2 instructions in the peephole (addis and load), so we want to
25346 check if the target register was not used in the memory address and the
25347 register to hold the addis result is dead after the peephole. */
25348 if (REGNO (addis_reg) != REGNO (target))
25350 if (reg_mentioned_p (target, mem))
25351 return false;
25353 if (!peep2_reg_dead_p (2, addis_reg))
25354 return false;
25356 /* If the target register being loaded is the stack pointer, we must
25357 avoid loading any other value into it, even temporarily. */
25358 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25359 return false;
25362 base_reg = XEXP (addr, 0);
25363 return REGNO (addis_reg) == REGNO (base_reg);
25366 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25367 sequence. We adjust the addis register to use the target register. If the
25368 load sign extends, we adjust the code to do the zero extending load, and an
25369 explicit sign extension later since the fusion only covers zero extending
25370 loads.
25372 The operands are:
25373 operands[0] register set with addis (to be replaced with target)
25374 operands[1] value set via addis
25375 operands[2] target register being loaded
25376 operands[3] D-form memory reference using operands[0]. */
25378 void
25379 expand_fusion_gpr_load (rtx *operands)
25381 rtx addis_value = operands[1];
25382 rtx target = operands[2];
25383 rtx orig_mem = operands[3];
25384 rtx new_addr, new_mem, orig_addr, offset;
25385 enum rtx_code plus_or_lo_sum;
25386 machine_mode target_mode = GET_MODE (target);
25387 machine_mode extend_mode = target_mode;
25388 machine_mode ptr_mode = Pmode;
25389 enum rtx_code extend = UNKNOWN;
25391 if (GET_CODE (orig_mem) == ZERO_EXTEND
25392 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25394 extend = GET_CODE (orig_mem);
25395 orig_mem = XEXP (orig_mem, 0);
25396 target_mode = GET_MODE (orig_mem);
25399 gcc_assert (MEM_P (orig_mem));
25401 orig_addr = XEXP (orig_mem, 0);
25402 plus_or_lo_sum = GET_CODE (orig_addr);
25403 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25405 offset = XEXP (orig_addr, 1);
25406 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25407 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25409 if (extend != UNKNOWN)
25410 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25412 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25413 UNSPEC_FUSION_GPR);
25414 emit_insn (gen_rtx_SET (target, new_mem));
25416 if (extend == SIGN_EXTEND)
25418 int sub_off = ((BYTES_BIG_ENDIAN)
25419 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25420 : 0);
25421 rtx sign_reg
25422 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25424 emit_insn (gen_rtx_SET (target,
25425 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25428 return;
25431 /* Emit the addis instruction that will be part of a fused instruction
25432 sequence. */
25434 void
25435 emit_fusion_addis (rtx target, rtx addis_value)
25437 rtx fuse_ops[10];
25438 const char *addis_str = NULL;
25440 /* Emit the addis instruction. */
25441 fuse_ops[0] = target;
25442 if (satisfies_constraint_L (addis_value))
25444 fuse_ops[1] = addis_value;
25445 addis_str = "lis %0,%v1";
25448 else if (GET_CODE (addis_value) == PLUS)
25450 rtx op0 = XEXP (addis_value, 0);
25451 rtx op1 = XEXP (addis_value, 1);
25453 if (REG_P (op0) && CONST_INT_P (op1)
25454 && satisfies_constraint_L (op1))
25456 fuse_ops[1] = op0;
25457 fuse_ops[2] = op1;
25458 addis_str = "addis %0,%1,%v2";
25462 else if (GET_CODE (addis_value) == HIGH)
25464 rtx value = XEXP (addis_value, 0);
25465 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25467 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25468 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25469 if (TARGET_ELF)
25470 addis_str = "addis %0,%2,%1@toc@ha";
25472 else if (TARGET_XCOFF)
25473 addis_str = "addis %0,%1@u(%2)";
25475 else
25476 gcc_unreachable ();
25479 else if (GET_CODE (value) == PLUS)
25481 rtx op0 = XEXP (value, 0);
25482 rtx op1 = XEXP (value, 1);
25484 if (GET_CODE (op0) == UNSPEC
25485 && XINT (op0, 1) == UNSPEC_TOCREL
25486 && CONST_INT_P (op1))
25488 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25489 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25490 fuse_ops[3] = op1;
25491 if (TARGET_ELF)
25492 addis_str = "addis %0,%2,%1+%3@toc@ha";
25494 else if (TARGET_XCOFF)
25495 addis_str = "addis %0,%1+%3@u(%2)";
25497 else
25498 gcc_unreachable ();
25502 else if (satisfies_constraint_L (value))
25504 fuse_ops[1] = value;
25505 addis_str = "lis %0,%v1";
25508 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25510 fuse_ops[1] = value;
25511 addis_str = "lis %0,%1@ha";
25515 if (!addis_str)
25516 fatal_insn ("Could not generate addis value for fusion", addis_value);
25518 output_asm_insn (addis_str, fuse_ops);
25521 /* Emit a D-form load or store instruction that is the second instruction
25522 of a fusion sequence. */
25524 static void
25525 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25527 rtx fuse_ops[10];
25528 char insn_template[80];
25530 fuse_ops[0] = load_reg;
25531 fuse_ops[1] = addis_reg;
25533 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25535 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25536 fuse_ops[2] = offset;
25537 output_asm_insn (insn_template, fuse_ops);
25540 else if (GET_CODE (offset) == UNSPEC
25541 && XINT (offset, 1) == UNSPEC_TOCREL)
25543 if (TARGET_ELF)
25544 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25546 else if (TARGET_XCOFF)
25547 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25549 else
25550 gcc_unreachable ();
25552 fuse_ops[2] = XVECEXP (offset, 0, 0);
25553 output_asm_insn (insn_template, fuse_ops);
25556 else if (GET_CODE (offset) == PLUS
25557 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25558 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25559 && CONST_INT_P (XEXP (offset, 1)))
25561 rtx tocrel_unspec = XEXP (offset, 0);
25562 if (TARGET_ELF)
25563 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25565 else if (TARGET_XCOFF)
25566 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25568 else
25569 gcc_unreachable ();
25571 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25572 fuse_ops[3] = XEXP (offset, 1);
25573 output_asm_insn (insn_template, fuse_ops);
25576 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25578 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25580 fuse_ops[2] = offset;
25581 output_asm_insn (insn_template, fuse_ops);
25584 else
25585 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25587 return;
25590 /* Given an address, convert it into the addis and load offset parts. Addresses
25591 created during the peephole2 process look like:
25592 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25593 (unspec [(...)] UNSPEC_TOCREL)) */
25595 static void
25596 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25598 rtx hi, lo;
25600 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25602 hi = XEXP (addr, 0);
25603 lo = XEXP (addr, 1);
25605 else
25606 gcc_unreachable ();
25608 *p_hi = hi;
25609 *p_lo = lo;
25612 /* Return a string to fuse an addis instruction with a gpr load to the same
25613 register that we loaded up the addis instruction. The address that is used
25614 is the logical address that was formed during peephole2:
25615 (lo_sum (high) (low-part))
25617 The code is complicated, so we call output_asm_insn directly, and just
25618 return "". */
25620 const char *
25621 emit_fusion_gpr_load (rtx target, rtx mem)
25623 rtx addis_value;
25624 rtx addr;
25625 rtx load_offset;
25626 const char *load_str = NULL;
25627 machine_mode mode;
25629 if (GET_CODE (mem) == ZERO_EXTEND)
25630 mem = XEXP (mem, 0);
25632 gcc_assert (REG_P (target) && MEM_P (mem));
25634 addr = XEXP (mem, 0);
25635 fusion_split_address (addr, &addis_value, &load_offset);
25637 /* Now emit the load instruction to the same register. */
25638 mode = GET_MODE (mem);
25639 switch (mode)
25641 case E_QImode:
25642 load_str = "lbz";
25643 break;
25645 case E_HImode:
25646 load_str = "lhz";
25647 break;
25649 case E_SImode:
25650 case E_SFmode:
25651 load_str = "lwz";
25652 break;
25654 case E_DImode:
25655 case E_DFmode:
25656 gcc_assert (TARGET_POWERPC64);
25657 load_str = "ld";
25658 break;
25660 default:
25661 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25664 /* Emit the addis instruction. */
25665 emit_fusion_addis (target, addis_value);
25667 /* Emit the D-form load instruction. */
25668 emit_fusion_load (target, target, load_offset, load_str);
25670 return "";
25674 #ifdef RS6000_GLIBC_ATOMIC_FENV
25675 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25676 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25677 #endif
25679 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25681 static void
25682 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25684 if (!TARGET_HARD_FLOAT)
25686 #ifdef RS6000_GLIBC_ATOMIC_FENV
25687 if (atomic_hold_decl == NULL_TREE)
25689 atomic_hold_decl
25690 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25691 get_identifier ("__atomic_feholdexcept"),
25692 build_function_type_list (void_type_node,
25693 double_ptr_type_node,
25694 NULL_TREE));
25695 TREE_PUBLIC (atomic_hold_decl) = 1;
25696 DECL_EXTERNAL (atomic_hold_decl) = 1;
25699 if (atomic_clear_decl == NULL_TREE)
25701 atomic_clear_decl
25702 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25703 get_identifier ("__atomic_feclearexcept"),
25704 build_function_type_list (void_type_node,
25705 NULL_TREE));
25706 TREE_PUBLIC (atomic_clear_decl) = 1;
25707 DECL_EXTERNAL (atomic_clear_decl) = 1;
25710 tree const_double = build_qualified_type (double_type_node,
25711 TYPE_QUAL_CONST);
25712 tree const_double_ptr = build_pointer_type (const_double);
25713 if (atomic_update_decl == NULL_TREE)
25715 atomic_update_decl
25716 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25717 get_identifier ("__atomic_feupdateenv"),
25718 build_function_type_list (void_type_node,
25719 const_double_ptr,
25720 NULL_TREE));
25721 TREE_PUBLIC (atomic_update_decl) = 1;
25722 DECL_EXTERNAL (atomic_update_decl) = 1;
25725 tree fenv_var = create_tmp_var_raw (double_type_node);
25726 TREE_ADDRESSABLE (fenv_var) = 1;
25727 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25729 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25730 *clear = build_call_expr (atomic_clear_decl, 0);
25731 *update = build_call_expr (atomic_update_decl, 1,
25732 fold_convert (const_double_ptr, fenv_addr));
25733 #endif
25734 return;
25737 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25738 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25739 tree call_mffs = build_call_expr (mffs, 0);
25741 /* Generates the equivalent of feholdexcept (&fenv_var)
25743 *fenv_var = __builtin_mffs ();
25744 double fenv_hold;
25745 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25746 __builtin_mtfsf (0xff, fenv_hold); */
25748 /* Mask to clear everything except for the rounding modes and non-IEEE
25749 arithmetic flag. */
25750 const unsigned HOST_WIDE_INT hold_exception_mask =
25751 HOST_WIDE_INT_C (0xffffffff00000007);
25753 tree fenv_var = create_tmp_var_raw (double_type_node);
25755 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
25757 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
25758 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25759 build_int_cst (uint64_type_node,
25760 hold_exception_mask));
25762 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25763 fenv_llu_and);
25765 tree hold_mtfsf = build_call_expr (mtfsf, 2,
25766 build_int_cst (unsigned_type_node, 0xff),
25767 fenv_hold_mtfsf);
25769 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
25771 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25773 double fenv_clear = __builtin_mffs ();
25774 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25775 __builtin_mtfsf (0xff, fenv_clear); */
25777 /* Mask to clear everything except for the rounding modes and non-IEEE
25778 arithmetic flag. */
25779 const unsigned HOST_WIDE_INT clear_exception_mask =
25780 HOST_WIDE_INT_C (0xffffffff00000000);
25782 tree fenv_clear = create_tmp_var_raw (double_type_node);
25784 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
25786 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
25787 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
25788 fenv_clean_llu,
25789 build_int_cst (uint64_type_node,
25790 clear_exception_mask));
25792 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25793 fenv_clear_llu_and);
25795 tree clear_mtfsf = build_call_expr (mtfsf, 2,
25796 build_int_cst (unsigned_type_node, 0xff),
25797 fenv_clear_mtfsf);
25799 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
25801 /* Generates the equivalent of feupdateenv (&fenv_var)
25803 double old_fenv = __builtin_mffs ();
25804 double fenv_update;
25805 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25806 (*(uint64_t*)fenv_var 0x1ff80fff);
25807 __builtin_mtfsf (0xff, fenv_update); */
25809 const unsigned HOST_WIDE_INT update_exception_mask =
25810 HOST_WIDE_INT_C (0xffffffff1fffff00);
25811 const unsigned HOST_WIDE_INT new_exception_mask =
25812 HOST_WIDE_INT_C (0x1ff80fff);
25814 tree old_fenv = create_tmp_var_raw (double_type_node);
25815 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
25817 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
25818 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
25819 build_int_cst (uint64_type_node,
25820 update_exception_mask));
25822 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25823 build_int_cst (uint64_type_node,
25824 new_exception_mask));
25826 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
25827 old_llu_and, new_llu_and);
25829 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25830 new_llu_mask);
25832 tree update_mtfsf = build_call_expr (mtfsf, 2,
25833 build_int_cst (unsigned_type_node, 0xff),
25834 fenv_update_mtfsf);
25836 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
25839 void
25840 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
25842 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
25844 rtx_tmp0 = gen_reg_rtx (V2DFmode);
25845 rtx_tmp1 = gen_reg_rtx (V2DFmode);
25847 /* The destination of the vmrgew instruction layout is:
25848 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
25849 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
25850 vmrgew instruction will be correct. */
25851 if (BYTES_BIG_ENDIAN)
25853 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
25854 GEN_INT (0)));
25855 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
25856 GEN_INT (3)));
25858 else
25860 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
25861 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
25864 rtx_tmp2 = gen_reg_rtx (V4SFmode);
25865 rtx_tmp3 = gen_reg_rtx (V4SFmode);
25867 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
25868 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
25870 if (BYTES_BIG_ENDIAN)
25871 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
25872 else
25873 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
25876 void
25877 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
25879 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
25881 rtx_tmp0 = gen_reg_rtx (V2DImode);
25882 rtx_tmp1 = gen_reg_rtx (V2DImode);
25884 /* The destination of the vmrgew instruction layout is:
25885 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
25886 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
25887 vmrgew instruction will be correct. */
25888 if (BYTES_BIG_ENDIAN)
25890 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
25891 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
25893 else
25895 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
25896 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
25899 rtx_tmp2 = gen_reg_rtx (V4SFmode);
25900 rtx_tmp3 = gen_reg_rtx (V4SFmode);
25902 if (signed_convert)
25904 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
25905 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
25907 else
25909 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
25910 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
25913 if (BYTES_BIG_ENDIAN)
25914 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
25915 else
25916 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
25919 void
25920 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
25921 rtx src2)
25923 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
25925 rtx_tmp0 = gen_reg_rtx (V2DFmode);
25926 rtx_tmp1 = gen_reg_rtx (V2DFmode);
25928 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
25929 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
25931 rtx_tmp2 = gen_reg_rtx (V4SImode);
25932 rtx_tmp3 = gen_reg_rtx (V4SImode);
25934 if (signed_convert)
25936 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
25937 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
25939 else
25941 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
25942 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
25945 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
25948 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
25950 static bool
25951 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
25952 optimization_type opt_type)
25954 switch (op)
25956 case rsqrt_optab:
25957 return (opt_type == OPTIMIZE_FOR_SPEED
25958 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
25960 default:
25961 return true;
25965 /* Implement TARGET_CONSTANT_ALIGNMENT. */
25967 static HOST_WIDE_INT
25968 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
25970 if (TREE_CODE (exp) == STRING_CST
25971 && (STRICT_ALIGNMENT || !optimize_size))
25972 return MAX (align, BITS_PER_WORD);
25973 return align;
25976 /* Implement TARGET_STARTING_FRAME_OFFSET. */
25978 static HOST_WIDE_INT
25979 rs6000_starting_frame_offset (void)
25981 if (FRAME_GROWS_DOWNWARD)
25982 return 0;
25983 return RS6000_STARTING_FRAME_OFFSET;
25987 /* Create an alias for a mangled name where we have changed the mangling (in
25988 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
25989 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
25991 #if TARGET_ELF && RS6000_WEAK
25992 static void
25993 rs6000_globalize_decl_name (FILE * stream, tree decl)
25995 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
25997 targetm.asm_out.globalize_label (stream, name);
25999 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26001 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26002 const char *old_name;
26004 ieee128_mangling_gcc_8_1 = true;
26005 lang_hooks.set_decl_assembler_name (decl);
26006 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26007 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26008 ieee128_mangling_gcc_8_1 = false;
26010 if (strcmp (name, old_name) != 0)
26012 fprintf (stream, "\t.weak %s\n", old_name);
26013 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26017 #endif
26020 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26021 function names from <foo>l to <foo>f128 if the default long double type is
26022 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26023 include file switches the names on systems that support long double as IEEE
26024 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26025 In the future, glibc will export names like __ieee128_sinf128 and we can
26026 switch to using those instead of using sinf128, which pollutes the user's
26027 namespace.
26029 This will switch the names for Fortran math functions as well (which doesn't
26030 use math.h). However, Fortran needs other changes to the compiler and
26031 library before you can switch the real*16 type at compile time.
26033 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26034 only do this if the default is that long double is IBM extended double, and
26035 the user asked for IEEE 128-bit. */
26037 static tree
26038 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26040 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26041 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26043 size_t len = IDENTIFIER_LENGTH (id);
26044 const char *name = IDENTIFIER_POINTER (id);
26046 if (name[len - 1] == 'l')
26048 bool uses_ieee128_p = false;
26049 tree type = TREE_TYPE (decl);
26050 machine_mode ret_mode = TYPE_MODE (type);
26052 /* See if the function returns a IEEE 128-bit floating point type or
26053 complex type. */
26054 if (ret_mode == TFmode || ret_mode == TCmode)
26055 uses_ieee128_p = true;
26056 else
26058 function_args_iterator args_iter;
26059 tree arg;
26061 /* See if the function passes a IEEE 128-bit floating point type
26062 or complex type. */
26063 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26065 machine_mode arg_mode = TYPE_MODE (arg);
26066 if (arg_mode == TFmode || arg_mode == TCmode)
26068 uses_ieee128_p = true;
26069 break;
26074 /* If we passed or returned an IEEE 128-bit floating point type,
26075 change the name. */
26076 if (uses_ieee128_p)
26078 char *name2 = (char *) alloca (len + 4);
26079 memcpy (name2, name, len - 1);
26080 strcpy (name2 + len - 1, "f128");
26081 id = get_identifier (name2);
26086 return id;
26089 /* Predict whether the given loop in gimple will be transformed in the RTL
26090 doloop_optimize pass. */
26092 static bool
26093 rs6000_predict_doloop_p (struct loop *loop)
26095 gcc_assert (loop);
26097 /* On rs6000, targetm.can_use_doloop_p is actually
26098 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26099 if (loop->inner != NULL)
26101 if (dump_file && (dump_flags & TDF_DETAILS))
26102 fprintf (dump_file, "Predict doloop failure due to"
26103 " loop nesting.\n");
26104 return false;
26107 return true;
26110 struct gcc_target targetm = TARGET_INITIALIZER;
26112 #include "gt-rs6000.h"