Add support for large prefixed address in adjusting a vector address.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob64b40a474499604cf245ab267fd35de2197ea542
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "gimple-fold.h"
61 #include "gimple-iterator.h"
62 #include "gimple-ssa.h"
63 #include "gimple-walk.h"
64 #include "intl.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "tree-vector-builder.h"
70 #include "context.h"
71 #include "tree-pass.h"
72 #include "except.h"
73 #if TARGET_XCOFF
74 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
75 #endif
76 #include "case-cfn-macros.h"
77 #include "ppc-auxv.h"
78 #include "tree-ssa-propagate.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "rs6000-internal.h"
82 #include "opts.h"
84 /* This file should be included last. */
85 #include "target-def.h"
87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
88 systems will also set long double to be IEEE 128-bit. AIX and Darwin
89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
90 those systems will not pick up this default. This needs to be after all
91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
92 properly defined. */
93 #ifndef TARGET_IEEEQUAD_DEFAULT
94 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
95 #define TARGET_IEEEQUAD_DEFAULT 1
96 #else
97 #define TARGET_IEEEQUAD_DEFAULT 0
98 #endif
99 #endif
101 /* Support targetm.vectorize.builtin_mask_for_load. */
102 GTY(()) tree altivec_builtin_mask_for_load;
104 /* Set to nonzero once AIX common-mode calls have been defined. */
105 static GTY(()) int common_mode_defined;
107 #ifdef USING_ELFOS_H
108 /* Counter for labels which are to be placed in .fixup. */
109 int fixuplabelno = 0;
110 #endif
112 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
113 int dot_symbols;
115 /* Specify the machine mode that pointers have. After generation of rtl, the
116 compiler makes no further distinction between pointers and any other objects
117 of this machine mode. */
118 scalar_int_mode rs6000_pmode;
120 #if TARGET_ELF
121 /* Note whether IEEE 128-bit floating point was passed or returned, either as
122 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
123 floating point. We changed the default C++ mangling for these types and we
124 may want to generate a weak alias of the old mangling (U10__float128) to the
125 new mangling (u9__ieee128). */
126 bool rs6000_passes_ieee128 = false;
127 #endif
129 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
130 name used in current releases (i.e. u9__ieee128). */
131 static bool ieee128_mangling_gcc_8_1;
133 /* Width in bits of a pointer. */
134 unsigned rs6000_pointer_size;
136 #ifdef HAVE_AS_GNU_ATTRIBUTE
137 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
138 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
139 # endif
140 /* Flag whether floating point values have been passed/returned.
141 Note that this doesn't say whether fprs are used, since the
142 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
143 should be set for soft-float values passed in gprs and ieee128
144 values passed in vsx registers. */
145 bool rs6000_passes_float = false;
146 bool rs6000_passes_long_double = false;
147 /* Flag whether vector values have been passed/returned. */
148 bool rs6000_passes_vector = false;
149 /* Flag whether small (<= 8 byte) structures have been returned. */
150 bool rs6000_returns_struct = false;
151 #endif
153 /* Value is TRUE if register/mode pair is acceptable. */
154 static bool rs6000_hard_regno_mode_ok_p
155 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
157 /* Maximum number of registers needed for a given register class and mode. */
158 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
160 /* How many registers are needed for a given register and mode. */
161 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
163 /* Map register number to register class. */
164 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
166 static int dbg_cost_ctrl;
168 /* Built in types. */
169 tree rs6000_builtin_types[RS6000_BTI_MAX];
170 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
172 /* Flag to say the TOC is initialized */
173 int toc_initialized, need_toc_init;
174 char toc_label_name[10];
176 /* Cached value of rs6000_variable_issue. This is cached in
177 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
178 static short cached_can_issue_more;
180 static GTY(()) section *read_only_data_section;
181 static GTY(()) section *private_data_section;
182 static GTY(()) section *tls_data_section;
183 static GTY(()) section *tls_private_data_section;
184 static GTY(()) section *read_only_private_data_section;
185 static GTY(()) section *sdata2_section;
187 extern GTY(()) section *toc_section;
188 section *toc_section = 0;
190 /* Describe the vector unit used for modes. */
191 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
192 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
194 /* Register classes for various constraints that are based on the target
195 switches. */
196 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
198 /* Describe the alignment of a vector. */
199 int rs6000_vector_align[NUM_MACHINE_MODES];
201 /* Map selected modes to types for builtins. */
202 GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
204 /* What modes to automatically generate reciprocal divide estimate (fre) and
205 reciprocal sqrt (frsqrte) for. */
206 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
208 /* Masks to determine which reciprocal esitmate instructions to generate
209 automatically. */
210 enum rs6000_recip_mask {
211 RECIP_SF_DIV = 0x001, /* Use divide estimate */
212 RECIP_DF_DIV = 0x002,
213 RECIP_V4SF_DIV = 0x004,
214 RECIP_V2DF_DIV = 0x008,
216 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
217 RECIP_DF_RSQRT = 0x020,
218 RECIP_V4SF_RSQRT = 0x040,
219 RECIP_V2DF_RSQRT = 0x080,
221 /* Various combination of flags for -mrecip=xxx. */
222 RECIP_NONE = 0,
223 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
224 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
225 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
227 RECIP_HIGH_PRECISION = RECIP_ALL,
229 /* On low precision machines like the power5, don't enable double precision
230 reciprocal square root estimate, since it isn't accurate enough. */
231 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
234 /* -mrecip options. */
235 static struct
237 const char *string; /* option name */
238 unsigned int mask; /* mask bits to set */
239 } recip_options[] = {
240 { "all", RECIP_ALL },
241 { "none", RECIP_NONE },
242 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
243 | RECIP_V2DF_DIV) },
244 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
245 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
246 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
247 | RECIP_V2DF_RSQRT) },
248 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
249 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
252 /* On PowerPC, we have a limited number of target clones that we care about
253 which means we can use an array to hold the options, rather than having more
254 elaborate data structures to identify each possible variation. Order the
255 clones from the default to the highest ISA. */
256 enum {
257 CLONE_DEFAULT = 0, /* default clone. */
258 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
259 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
260 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
261 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
262 CLONE_MAX
265 /* Map compiler ISA bits into HWCAP names. */
266 struct clone_map {
267 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
268 const char *name; /* name to use in __builtin_cpu_supports. */
271 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
272 { 0, "" }, /* Default options. */
273 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
274 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
275 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
276 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
280 /* Newer LIBCs explicitly export this symbol to declare that they provide
281 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
282 reference to this symbol whenever we expand a CPU builtin, so that
283 we never link against an old LIBC. */
284 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
286 /* True if we have expanded a CPU builtin. */
287 bool cpu_builtin_p = false;
289 /* Pointer to function (in rs6000-c.c) that can define or undefine target
290 macros that have changed. Languages that don't support the preprocessor
291 don't link in rs6000-c.c, so we can't call it directly. */
292 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
294 /* Simplfy register classes into simpler classifications. We assume
295 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
296 check for standard register classes (gpr/floating/altivec/vsx) and
297 floating/vector classes (float/altivec/vsx). */
299 enum rs6000_reg_type {
300 NO_REG_TYPE,
301 PSEUDO_REG_TYPE,
302 GPR_REG_TYPE,
303 VSX_REG_TYPE,
304 ALTIVEC_REG_TYPE,
305 FPR_REG_TYPE,
306 SPR_REG_TYPE,
307 CR_REG_TYPE
310 /* Map register class to register type. */
311 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
313 /* First/last register type for the 'normal' register types (i.e. general
314 purpose, floating point, altivec, and VSX registers). */
315 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
317 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
320 /* Register classes we care about in secondary reload or go if legitimate
321 address. We only need to worry about GPR, FPR, and Altivec registers here,
322 along an ANY field that is the OR of the 3 register classes. */
324 enum rs6000_reload_reg_type {
325 RELOAD_REG_GPR, /* General purpose registers. */
326 RELOAD_REG_FPR, /* Traditional floating point regs. */
327 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
328 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
329 N_RELOAD_REG
332 /* For setting up register classes, loop through the 3 register classes mapping
333 into real registers, and skip the ANY class, which is just an OR of the
334 bits. */
335 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
336 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
338 /* Map reload register type to a register in the register class. */
339 struct reload_reg_map_type {
340 const char *name; /* Register class name. */
341 int reg; /* Register in the register class. */
344 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
345 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
346 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
347 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
348 { "Any", -1 }, /* RELOAD_REG_ANY. */
351 /* Mask bits for each register class, indexed per mode. Historically the
352 compiler has been more restrictive which types can do PRE_MODIFY instead of
353 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
354 typedef unsigned char addr_mask_type;
356 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
357 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
358 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
359 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
360 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
361 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
362 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
363 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
365 /* Register type masks based on the type, of valid addressing modes. */
366 struct rs6000_reg_addr {
367 enum insn_code reload_load; /* INSN to reload for loading. */
368 enum insn_code reload_store; /* INSN to reload for storing. */
369 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
370 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
371 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
372 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
373 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
376 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
378 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
379 static inline bool
380 mode_supports_pre_incdec_p (machine_mode mode)
382 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
383 != 0);
386 /* Helper function to say whether a mode supports PRE_MODIFY. */
387 static inline bool
388 mode_supports_pre_modify_p (machine_mode mode)
390 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
391 != 0);
394 /* Return true if we have D-form addressing in altivec registers. */
395 static inline bool
396 mode_supports_vmx_dform (machine_mode mode)
398 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
401 /* Return true if we have D-form addressing in VSX registers. This addressing
402 is more limited than normal d-form addressing in that the offset must be
403 aligned on a 16-byte boundary. */
404 static inline bool
405 mode_supports_dq_form (machine_mode mode)
407 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
408 != 0);
411 /* Given that there exists at least one variable that is set (produced)
412 by OUT_INSN and read (consumed) by IN_INSN, return true iff
413 IN_INSN represents one or more memory store operations and none of
414 the variables set by OUT_INSN is used by IN_INSN as the address of a
415 store operation. If either IN_INSN or OUT_INSN does not represent
416 a "single" RTL SET expression (as loosely defined by the
417 implementation of the single_set function) or a PARALLEL with only
418 SETs, CLOBBERs, and USEs inside, this function returns false.
420 This rs6000-specific version of store_data_bypass_p checks for
421 certain conditions that result in assertion failures (and internal
422 compiler errors) in the generic store_data_bypass_p function and
423 returns false rather than calling store_data_bypass_p if one of the
424 problematic conditions is detected. */
427 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
429 rtx out_set, in_set;
430 rtx out_pat, in_pat;
431 rtx out_exp, in_exp;
432 int i, j;
434 in_set = single_set (in_insn);
435 if (in_set)
437 if (MEM_P (SET_DEST (in_set)))
439 out_set = single_set (out_insn);
440 if (!out_set)
442 out_pat = PATTERN (out_insn);
443 if (GET_CODE (out_pat) == PARALLEL)
445 for (i = 0; i < XVECLEN (out_pat, 0); i++)
447 out_exp = XVECEXP (out_pat, 0, i);
448 if ((GET_CODE (out_exp) == CLOBBER)
449 || (GET_CODE (out_exp) == USE))
450 continue;
451 else if (GET_CODE (out_exp) != SET)
452 return false;
458 else
460 in_pat = PATTERN (in_insn);
461 if (GET_CODE (in_pat) != PARALLEL)
462 return false;
464 for (i = 0; i < XVECLEN (in_pat, 0); i++)
466 in_exp = XVECEXP (in_pat, 0, i);
467 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
468 continue;
469 else if (GET_CODE (in_exp) != SET)
470 return false;
472 if (MEM_P (SET_DEST (in_exp)))
474 out_set = single_set (out_insn);
475 if (!out_set)
477 out_pat = PATTERN (out_insn);
478 if (GET_CODE (out_pat) != PARALLEL)
479 return false;
480 for (j = 0; j < XVECLEN (out_pat, 0); j++)
482 out_exp = XVECEXP (out_pat, 0, j);
483 if ((GET_CODE (out_exp) == CLOBBER)
484 || (GET_CODE (out_exp) == USE))
485 continue;
486 else if (GET_CODE (out_exp) != SET)
487 return false;
493 return store_data_bypass_p (out_insn, in_insn);
497 /* Processor costs (relative to an add) */
499 const struct processor_costs *rs6000_cost;
501 /* Instruction size costs on 32bit processors. */
502 static const
503 struct processor_costs size32_cost = {
504 COSTS_N_INSNS (1), /* mulsi */
505 COSTS_N_INSNS (1), /* mulsi_const */
506 COSTS_N_INSNS (1), /* mulsi_const9 */
507 COSTS_N_INSNS (1), /* muldi */
508 COSTS_N_INSNS (1), /* divsi */
509 COSTS_N_INSNS (1), /* divdi */
510 COSTS_N_INSNS (1), /* fp */
511 COSTS_N_INSNS (1), /* dmul */
512 COSTS_N_INSNS (1), /* sdiv */
513 COSTS_N_INSNS (1), /* ddiv */
514 32, /* cache line size */
515 0, /* l1 cache */
516 0, /* l2 cache */
517 0, /* streams */
518 0, /* SF->DF convert */
521 /* Instruction size costs on 64bit processors. */
522 static const
523 struct processor_costs size64_cost = {
524 COSTS_N_INSNS (1), /* mulsi */
525 COSTS_N_INSNS (1), /* mulsi_const */
526 COSTS_N_INSNS (1), /* mulsi_const9 */
527 COSTS_N_INSNS (1), /* muldi */
528 COSTS_N_INSNS (1), /* divsi */
529 COSTS_N_INSNS (1), /* divdi */
530 COSTS_N_INSNS (1), /* fp */
531 COSTS_N_INSNS (1), /* dmul */
532 COSTS_N_INSNS (1), /* sdiv */
533 COSTS_N_INSNS (1), /* ddiv */
534 128, /* cache line size */
535 0, /* l1 cache */
536 0, /* l2 cache */
537 0, /* streams */
538 0, /* SF->DF convert */
541 /* Instruction costs on RS64A processors. */
542 static const
543 struct processor_costs rs64a_cost = {
544 COSTS_N_INSNS (20), /* mulsi */
545 COSTS_N_INSNS (12), /* mulsi_const */
546 COSTS_N_INSNS (8), /* mulsi_const9 */
547 COSTS_N_INSNS (34), /* muldi */
548 COSTS_N_INSNS (65), /* divsi */
549 COSTS_N_INSNS (67), /* divdi */
550 COSTS_N_INSNS (4), /* fp */
551 COSTS_N_INSNS (4), /* dmul */
552 COSTS_N_INSNS (31), /* sdiv */
553 COSTS_N_INSNS (31), /* ddiv */
554 128, /* cache line size */
555 128, /* l1 cache */
556 2048, /* l2 cache */
557 1, /* streams */
558 0, /* SF->DF convert */
561 /* Instruction costs on MPCCORE processors. */
562 static const
563 struct processor_costs mpccore_cost = {
564 COSTS_N_INSNS (2), /* mulsi */
565 COSTS_N_INSNS (2), /* mulsi_const */
566 COSTS_N_INSNS (2), /* mulsi_const9 */
567 COSTS_N_INSNS (2), /* muldi */
568 COSTS_N_INSNS (6), /* divsi */
569 COSTS_N_INSNS (6), /* divdi */
570 COSTS_N_INSNS (4), /* fp */
571 COSTS_N_INSNS (5), /* dmul */
572 COSTS_N_INSNS (10), /* sdiv */
573 COSTS_N_INSNS (17), /* ddiv */
574 32, /* cache line size */
575 4, /* l1 cache */
576 16, /* l2 cache */
577 1, /* streams */
578 0, /* SF->DF convert */
581 /* Instruction costs on PPC403 processors. */
582 static const
583 struct processor_costs ppc403_cost = {
584 COSTS_N_INSNS (4), /* mulsi */
585 COSTS_N_INSNS (4), /* mulsi_const */
586 COSTS_N_INSNS (4), /* mulsi_const9 */
587 COSTS_N_INSNS (4), /* muldi */
588 COSTS_N_INSNS (33), /* divsi */
589 COSTS_N_INSNS (33), /* divdi */
590 COSTS_N_INSNS (11), /* fp */
591 COSTS_N_INSNS (11), /* dmul */
592 COSTS_N_INSNS (11), /* sdiv */
593 COSTS_N_INSNS (11), /* ddiv */
594 32, /* cache line size */
595 4, /* l1 cache */
596 16, /* l2 cache */
597 1, /* streams */
598 0, /* SF->DF convert */
601 /* Instruction costs on PPC405 processors. */
602 static const
603 struct processor_costs ppc405_cost = {
604 COSTS_N_INSNS (5), /* mulsi */
605 COSTS_N_INSNS (4), /* mulsi_const */
606 COSTS_N_INSNS (3), /* mulsi_const9 */
607 COSTS_N_INSNS (5), /* muldi */
608 COSTS_N_INSNS (35), /* divsi */
609 COSTS_N_INSNS (35), /* divdi */
610 COSTS_N_INSNS (11), /* fp */
611 COSTS_N_INSNS (11), /* dmul */
612 COSTS_N_INSNS (11), /* sdiv */
613 COSTS_N_INSNS (11), /* ddiv */
614 32, /* cache line size */
615 16, /* l1 cache */
616 128, /* l2 cache */
617 1, /* streams */
618 0, /* SF->DF convert */
621 /* Instruction costs on PPC440 processors. */
622 static const
623 struct processor_costs ppc440_cost = {
624 COSTS_N_INSNS (3), /* mulsi */
625 COSTS_N_INSNS (2), /* mulsi_const */
626 COSTS_N_INSNS (2), /* mulsi_const9 */
627 COSTS_N_INSNS (3), /* muldi */
628 COSTS_N_INSNS (34), /* divsi */
629 COSTS_N_INSNS (34), /* divdi */
630 COSTS_N_INSNS (5), /* fp */
631 COSTS_N_INSNS (5), /* dmul */
632 COSTS_N_INSNS (19), /* sdiv */
633 COSTS_N_INSNS (33), /* ddiv */
634 32, /* cache line size */
635 32, /* l1 cache */
636 256, /* l2 cache */
637 1, /* streams */
638 0, /* SF->DF convert */
641 /* Instruction costs on PPC476 processors. */
642 static const
643 struct processor_costs ppc476_cost = {
644 COSTS_N_INSNS (4), /* mulsi */
645 COSTS_N_INSNS (4), /* mulsi_const */
646 COSTS_N_INSNS (4), /* mulsi_const9 */
647 COSTS_N_INSNS (4), /* muldi */
648 COSTS_N_INSNS (11), /* divsi */
649 COSTS_N_INSNS (11), /* divdi */
650 COSTS_N_INSNS (6), /* fp */
651 COSTS_N_INSNS (6), /* dmul */
652 COSTS_N_INSNS (19), /* sdiv */
653 COSTS_N_INSNS (33), /* ddiv */
654 32, /* l1 cache line size */
655 32, /* l1 cache */
656 512, /* l2 cache */
657 1, /* streams */
658 0, /* SF->DF convert */
661 /* Instruction costs on PPC601 processors. */
662 static const
663 struct processor_costs ppc601_cost = {
664 COSTS_N_INSNS (5), /* mulsi */
665 COSTS_N_INSNS (5), /* mulsi_const */
666 COSTS_N_INSNS (5), /* mulsi_const9 */
667 COSTS_N_INSNS (5), /* muldi */
668 COSTS_N_INSNS (36), /* divsi */
669 COSTS_N_INSNS (36), /* divdi */
670 COSTS_N_INSNS (4), /* fp */
671 COSTS_N_INSNS (5), /* dmul */
672 COSTS_N_INSNS (17), /* sdiv */
673 COSTS_N_INSNS (31), /* ddiv */
674 32, /* cache line size */
675 32, /* l1 cache */
676 256, /* l2 cache */
677 1, /* streams */
678 0, /* SF->DF convert */
681 /* Instruction costs on PPC603 processors. */
682 static const
683 struct processor_costs ppc603_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (3), /* mulsi_const */
686 COSTS_N_INSNS (2), /* mulsi_const9 */
687 COSTS_N_INSNS (5), /* muldi */
688 COSTS_N_INSNS (37), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (4), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (33), /* ddiv */
694 32, /* cache line size */
695 8, /* l1 cache */
696 64, /* l2 cache */
697 1, /* streams */
698 0, /* SF->DF convert */
701 /* Instruction costs on PPC604 processors. */
702 static const
703 struct processor_costs ppc604_cost = {
704 COSTS_N_INSNS (4), /* mulsi */
705 COSTS_N_INSNS (4), /* mulsi_const */
706 COSTS_N_INSNS (4), /* mulsi_const9 */
707 COSTS_N_INSNS (4), /* muldi */
708 COSTS_N_INSNS (20), /* divsi */
709 COSTS_N_INSNS (20), /* divdi */
710 COSTS_N_INSNS (3), /* fp */
711 COSTS_N_INSNS (3), /* dmul */
712 COSTS_N_INSNS (18), /* sdiv */
713 COSTS_N_INSNS (32), /* ddiv */
714 32, /* cache line size */
715 16, /* l1 cache */
716 512, /* l2 cache */
717 1, /* streams */
718 0, /* SF->DF convert */
721 /* Instruction costs on PPC604e processors. */
722 static const
723 struct processor_costs ppc604e_cost = {
724 COSTS_N_INSNS (2), /* mulsi */
725 COSTS_N_INSNS (2), /* mulsi_const */
726 COSTS_N_INSNS (2), /* mulsi_const9 */
727 COSTS_N_INSNS (2), /* muldi */
728 COSTS_N_INSNS (20), /* divsi */
729 COSTS_N_INSNS (20), /* divdi */
730 COSTS_N_INSNS (3), /* fp */
731 COSTS_N_INSNS (3), /* dmul */
732 COSTS_N_INSNS (18), /* sdiv */
733 COSTS_N_INSNS (32), /* ddiv */
734 32, /* cache line size */
735 32, /* l1 cache */
736 1024, /* l2 cache */
737 1, /* streams */
738 0, /* SF->DF convert */
741 /* Instruction costs on PPC620 processors. */
742 static const
743 struct processor_costs ppc620_cost = {
744 COSTS_N_INSNS (5), /* mulsi */
745 COSTS_N_INSNS (4), /* mulsi_const */
746 COSTS_N_INSNS (3), /* mulsi_const9 */
747 COSTS_N_INSNS (7), /* muldi */
748 COSTS_N_INSNS (21), /* divsi */
749 COSTS_N_INSNS (37), /* divdi */
750 COSTS_N_INSNS (3), /* fp */
751 COSTS_N_INSNS (3), /* dmul */
752 COSTS_N_INSNS (18), /* sdiv */
753 COSTS_N_INSNS (32), /* ddiv */
754 128, /* cache line size */
755 32, /* l1 cache */
756 1024, /* l2 cache */
757 1, /* streams */
758 0, /* SF->DF convert */
761 /* Instruction costs on PPC630 processors. */
762 static const
763 struct processor_costs ppc630_cost = {
764 COSTS_N_INSNS (5), /* mulsi */
765 COSTS_N_INSNS (4), /* mulsi_const */
766 COSTS_N_INSNS (3), /* mulsi_const9 */
767 COSTS_N_INSNS (7), /* muldi */
768 COSTS_N_INSNS (21), /* divsi */
769 COSTS_N_INSNS (37), /* divdi */
770 COSTS_N_INSNS (3), /* fp */
771 COSTS_N_INSNS (3), /* dmul */
772 COSTS_N_INSNS (17), /* sdiv */
773 COSTS_N_INSNS (21), /* ddiv */
774 128, /* cache line size */
775 64, /* l1 cache */
776 1024, /* l2 cache */
777 1, /* streams */
778 0, /* SF->DF convert */
781 /* Instruction costs on Cell processor. */
782 /* COSTS_N_INSNS (1) ~ one add. */
783 static const
784 struct processor_costs ppccell_cost = {
785 COSTS_N_INSNS (9/2)+2, /* mulsi */
786 COSTS_N_INSNS (6/2), /* mulsi_const */
787 COSTS_N_INSNS (6/2), /* mulsi_const9 */
788 COSTS_N_INSNS (15/2)+2, /* muldi */
789 COSTS_N_INSNS (38/2), /* divsi */
790 COSTS_N_INSNS (70/2), /* divdi */
791 COSTS_N_INSNS (10/2), /* fp */
792 COSTS_N_INSNS (10/2), /* dmul */
793 COSTS_N_INSNS (74/2), /* sdiv */
794 COSTS_N_INSNS (74/2), /* ddiv */
795 128, /* cache line size */
796 32, /* l1 cache */
797 512, /* l2 cache */
798 6, /* streams */
799 0, /* SF->DF convert */
802 /* Instruction costs on PPC750 and PPC7400 processors. */
803 static const
804 struct processor_costs ppc750_cost = {
805 COSTS_N_INSNS (5), /* mulsi */
806 COSTS_N_INSNS (3), /* mulsi_const */
807 COSTS_N_INSNS (2), /* mulsi_const9 */
808 COSTS_N_INSNS (5), /* muldi */
809 COSTS_N_INSNS (17), /* divsi */
810 COSTS_N_INSNS (17), /* divdi */
811 COSTS_N_INSNS (3), /* fp */
812 COSTS_N_INSNS (3), /* dmul */
813 COSTS_N_INSNS (17), /* sdiv */
814 COSTS_N_INSNS (31), /* ddiv */
815 32, /* cache line size */
816 32, /* l1 cache */
817 512, /* l2 cache */
818 1, /* streams */
819 0, /* SF->DF convert */
822 /* Instruction costs on PPC7450 processors. */
823 static const
824 struct processor_costs ppc7450_cost = {
825 COSTS_N_INSNS (4), /* mulsi */
826 COSTS_N_INSNS (3), /* mulsi_const */
827 COSTS_N_INSNS (3), /* mulsi_const9 */
828 COSTS_N_INSNS (4), /* muldi */
829 COSTS_N_INSNS (23), /* divsi */
830 COSTS_N_INSNS (23), /* divdi */
831 COSTS_N_INSNS (5), /* fp */
832 COSTS_N_INSNS (5), /* dmul */
833 COSTS_N_INSNS (21), /* sdiv */
834 COSTS_N_INSNS (35), /* ddiv */
835 32, /* cache line size */
836 32, /* l1 cache */
837 1024, /* l2 cache */
838 1, /* streams */
839 0, /* SF->DF convert */
842 /* Instruction costs on PPC8540 processors. */
843 static const
844 struct processor_costs ppc8540_cost = {
845 COSTS_N_INSNS (4), /* mulsi */
846 COSTS_N_INSNS (4), /* mulsi_const */
847 COSTS_N_INSNS (4), /* mulsi_const9 */
848 COSTS_N_INSNS (4), /* muldi */
849 COSTS_N_INSNS (19), /* divsi */
850 COSTS_N_INSNS (19), /* divdi */
851 COSTS_N_INSNS (4), /* fp */
852 COSTS_N_INSNS (4), /* dmul */
853 COSTS_N_INSNS (29), /* sdiv */
854 COSTS_N_INSNS (29), /* ddiv */
855 32, /* cache line size */
856 32, /* l1 cache */
857 256, /* l2 cache */
858 1, /* prefetch streams /*/
859 0, /* SF->DF convert */
862 /* Instruction costs on E300C2 and E300C3 cores. */
863 static const
864 struct processor_costs ppce300c2c3_cost = {
865 COSTS_N_INSNS (4), /* mulsi */
866 COSTS_N_INSNS (4), /* mulsi_const */
867 COSTS_N_INSNS (4), /* mulsi_const9 */
868 COSTS_N_INSNS (4), /* muldi */
869 COSTS_N_INSNS (19), /* divsi */
870 COSTS_N_INSNS (19), /* divdi */
871 COSTS_N_INSNS (3), /* fp */
872 COSTS_N_INSNS (4), /* dmul */
873 COSTS_N_INSNS (18), /* sdiv */
874 COSTS_N_INSNS (33), /* ddiv */
876 16, /* l1 cache */
877 16, /* l2 cache */
878 1, /* prefetch streams /*/
879 0, /* SF->DF convert */
882 /* Instruction costs on PPCE500MC processors. */
883 static const
884 struct processor_costs ppce500mc_cost = {
885 COSTS_N_INSNS (4), /* mulsi */
886 COSTS_N_INSNS (4), /* mulsi_const */
887 COSTS_N_INSNS (4), /* mulsi_const9 */
888 COSTS_N_INSNS (4), /* muldi */
889 COSTS_N_INSNS (14), /* divsi */
890 COSTS_N_INSNS (14), /* divdi */
891 COSTS_N_INSNS (8), /* fp */
892 COSTS_N_INSNS (10), /* dmul */
893 COSTS_N_INSNS (36), /* sdiv */
894 COSTS_N_INSNS (66), /* ddiv */
895 64, /* cache line size */
896 32, /* l1 cache */
897 128, /* l2 cache */
898 1, /* prefetch streams /*/
899 0, /* SF->DF convert */
902 /* Instruction costs on PPCE500MC64 processors. */
903 static const
904 struct processor_costs ppce500mc64_cost = {
905 COSTS_N_INSNS (4), /* mulsi */
906 COSTS_N_INSNS (4), /* mulsi_const */
907 COSTS_N_INSNS (4), /* mulsi_const9 */
908 COSTS_N_INSNS (4), /* muldi */
909 COSTS_N_INSNS (14), /* divsi */
910 COSTS_N_INSNS (14), /* divdi */
911 COSTS_N_INSNS (4), /* fp */
912 COSTS_N_INSNS (10), /* dmul */
913 COSTS_N_INSNS (36), /* sdiv */
914 COSTS_N_INSNS (66), /* ddiv */
915 64, /* cache line size */
916 32, /* l1 cache */
917 128, /* l2 cache */
918 1, /* prefetch streams /*/
919 0, /* SF->DF convert */
922 /* Instruction costs on PPCE5500 processors. */
923 static const
924 struct processor_costs ppce5500_cost = {
925 COSTS_N_INSNS (5), /* mulsi */
926 COSTS_N_INSNS (5), /* mulsi_const */
927 COSTS_N_INSNS (4), /* mulsi_const9 */
928 COSTS_N_INSNS (5), /* muldi */
929 COSTS_N_INSNS (14), /* divsi */
930 COSTS_N_INSNS (14), /* divdi */
931 COSTS_N_INSNS (7), /* fp */
932 COSTS_N_INSNS (10), /* dmul */
933 COSTS_N_INSNS (36), /* sdiv */
934 COSTS_N_INSNS (66), /* ddiv */
935 64, /* cache line size */
936 32, /* l1 cache */
937 128, /* l2 cache */
938 1, /* prefetch streams /*/
939 0, /* SF->DF convert */
942 /* Instruction costs on PPCE6500 processors. */
943 static const
944 struct processor_costs ppce6500_cost = {
945 COSTS_N_INSNS (5), /* mulsi */
946 COSTS_N_INSNS (5), /* mulsi_const */
947 COSTS_N_INSNS (4), /* mulsi_const9 */
948 COSTS_N_INSNS (5), /* muldi */
949 COSTS_N_INSNS (14), /* divsi */
950 COSTS_N_INSNS (14), /* divdi */
951 COSTS_N_INSNS (7), /* fp */
952 COSTS_N_INSNS (10), /* dmul */
953 COSTS_N_INSNS (36), /* sdiv */
954 COSTS_N_INSNS (66), /* ddiv */
955 64, /* cache line size */
956 32, /* l1 cache */
957 128, /* l2 cache */
958 1, /* prefetch streams /*/
959 0, /* SF->DF convert */
962 /* Instruction costs on AppliedMicro Titan processors. */
963 static const
964 struct processor_costs titan_cost = {
965 COSTS_N_INSNS (5), /* mulsi */
966 COSTS_N_INSNS (5), /* mulsi_const */
967 COSTS_N_INSNS (5), /* mulsi_const9 */
968 COSTS_N_INSNS (5), /* muldi */
969 COSTS_N_INSNS (18), /* divsi */
970 COSTS_N_INSNS (18), /* divdi */
971 COSTS_N_INSNS (10), /* fp */
972 COSTS_N_INSNS (10), /* dmul */
973 COSTS_N_INSNS (46), /* sdiv */
974 COSTS_N_INSNS (72), /* ddiv */
975 32, /* cache line size */
976 32, /* l1 cache */
977 512, /* l2 cache */
978 1, /* prefetch streams /*/
979 0, /* SF->DF convert */
982 /* Instruction costs on POWER4 and POWER5 processors. */
983 static const
984 struct processor_costs power4_cost = {
985 COSTS_N_INSNS (3), /* mulsi */
986 COSTS_N_INSNS (2), /* mulsi_const */
987 COSTS_N_INSNS (2), /* mulsi_const9 */
988 COSTS_N_INSNS (4), /* muldi */
989 COSTS_N_INSNS (18), /* divsi */
990 COSTS_N_INSNS (34), /* divdi */
991 COSTS_N_INSNS (3), /* fp */
992 COSTS_N_INSNS (3), /* dmul */
993 COSTS_N_INSNS (17), /* sdiv */
994 COSTS_N_INSNS (17), /* ddiv */
995 128, /* cache line size */
996 32, /* l1 cache */
997 1024, /* l2 cache */
998 8, /* prefetch streams /*/
999 0, /* SF->DF convert */
1002 /* Instruction costs on POWER6 processors. */
1003 static const
1004 struct processor_costs power6_cost = {
1005 COSTS_N_INSNS (8), /* mulsi */
1006 COSTS_N_INSNS (8), /* mulsi_const */
1007 COSTS_N_INSNS (8), /* mulsi_const9 */
1008 COSTS_N_INSNS (8), /* muldi */
1009 COSTS_N_INSNS (22), /* divsi */
1010 COSTS_N_INSNS (28), /* divdi */
1011 COSTS_N_INSNS (3), /* fp */
1012 COSTS_N_INSNS (3), /* dmul */
1013 COSTS_N_INSNS (13), /* sdiv */
1014 COSTS_N_INSNS (16), /* ddiv */
1015 128, /* cache line size */
1016 64, /* l1 cache */
1017 2048, /* l2 cache */
1018 16, /* prefetch streams */
1019 0, /* SF->DF convert */
1022 /* Instruction costs on POWER7 processors. */
1023 static const
1024 struct processor_costs power7_cost = {
1025 COSTS_N_INSNS (2), /* mulsi */
1026 COSTS_N_INSNS (2), /* mulsi_const */
1027 COSTS_N_INSNS (2), /* mulsi_const9 */
1028 COSTS_N_INSNS (2), /* muldi */
1029 COSTS_N_INSNS (18), /* divsi */
1030 COSTS_N_INSNS (34), /* divdi */
1031 COSTS_N_INSNS (3), /* fp */
1032 COSTS_N_INSNS (3), /* dmul */
1033 COSTS_N_INSNS (13), /* sdiv */
1034 COSTS_N_INSNS (16), /* ddiv */
1035 128, /* cache line size */
1036 32, /* l1 cache */
1037 256, /* l2 cache */
1038 12, /* prefetch streams */
1039 COSTS_N_INSNS (3), /* SF->DF convert */
1042 /* Instruction costs on POWER8 processors. */
1043 static const
1044 struct processor_costs power8_cost = {
1045 COSTS_N_INSNS (3), /* mulsi */
1046 COSTS_N_INSNS (3), /* mulsi_const */
1047 COSTS_N_INSNS (3), /* mulsi_const9 */
1048 COSTS_N_INSNS (3), /* muldi */
1049 COSTS_N_INSNS (19), /* divsi */
1050 COSTS_N_INSNS (35), /* divdi */
1051 COSTS_N_INSNS (3), /* fp */
1052 COSTS_N_INSNS (3), /* dmul */
1053 COSTS_N_INSNS (14), /* sdiv */
1054 COSTS_N_INSNS (17), /* ddiv */
1055 128, /* cache line size */
1056 32, /* l1 cache */
1057 256, /* l2 cache */
1058 12, /* prefetch streams */
1059 COSTS_N_INSNS (3), /* SF->DF convert */
1062 /* Instruction costs on POWER9 processors. */
1063 static const
1064 struct processor_costs power9_cost = {
1065 COSTS_N_INSNS (3), /* mulsi */
1066 COSTS_N_INSNS (3), /* mulsi_const */
1067 COSTS_N_INSNS (3), /* mulsi_const9 */
1068 COSTS_N_INSNS (3), /* muldi */
1069 COSTS_N_INSNS (8), /* divsi */
1070 COSTS_N_INSNS (12), /* divdi */
1071 COSTS_N_INSNS (3), /* fp */
1072 COSTS_N_INSNS (3), /* dmul */
1073 COSTS_N_INSNS (13), /* sdiv */
1074 COSTS_N_INSNS (18), /* ddiv */
1075 128, /* cache line size */
1076 32, /* l1 cache */
1077 512, /* l2 cache */
1078 8, /* prefetch streams */
1079 COSTS_N_INSNS (3), /* SF->DF convert */
1082 /* Instruction costs on POWER A2 processors. */
1083 static const
1084 struct processor_costs ppca2_cost = {
1085 COSTS_N_INSNS (16), /* mulsi */
1086 COSTS_N_INSNS (16), /* mulsi_const */
1087 COSTS_N_INSNS (16), /* mulsi_const9 */
1088 COSTS_N_INSNS (16), /* muldi */
1089 COSTS_N_INSNS (22), /* divsi */
1090 COSTS_N_INSNS (28), /* divdi */
1091 COSTS_N_INSNS (3), /* fp */
1092 COSTS_N_INSNS (3), /* dmul */
1093 COSTS_N_INSNS (59), /* sdiv */
1094 COSTS_N_INSNS (72), /* ddiv */
1096 16, /* l1 cache */
1097 2048, /* l2 cache */
1098 16, /* prefetch streams */
1099 0, /* SF->DF convert */
1102 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1103 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1106 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1107 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1108 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1110 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1111 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1112 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1113 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1114 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1115 bool);
1116 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1117 unsigned int);
1118 static bool is_microcoded_insn (rtx_insn *);
1119 static bool is_nonpipeline_insn (rtx_insn *);
1120 static bool is_cracked_insn (rtx_insn *);
1121 static bool is_load_insn (rtx, rtx *);
1122 static bool is_store_insn (rtx, rtx *);
1123 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1124 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1125 static bool insn_must_be_first_in_group (rtx_insn *);
1126 static bool insn_must_be_last_in_group (rtx_insn *);
1127 int easy_vector_constant (rtx, machine_mode);
1128 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1129 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1130 #if TARGET_MACHO
1131 static tree get_prev_label (tree);
1132 #endif
1133 static bool rs6000_mode_dependent_address (const_rtx);
1134 static bool rs6000_debug_mode_dependent_address (const_rtx);
1135 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1136 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1137 machine_mode, rtx);
1138 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1139 machine_mode,
1140 rtx);
1141 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1142 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1143 enum reg_class);
1144 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1145 reg_class_t,
1146 reg_class_t);
1147 static bool rs6000_debug_can_change_mode_class (machine_mode,
1148 machine_mode,
1149 reg_class_t);
1151 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1152 = rs6000_mode_dependent_address;
1154 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1155 machine_mode, rtx)
1156 = rs6000_secondary_reload_class;
1158 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1159 = rs6000_preferred_reload_class;
1161 const int INSN_NOT_AVAILABLE = -1;
1163 static void rs6000_print_isa_options (FILE *, int, const char *,
1164 HOST_WIDE_INT);
1165 static void rs6000_print_builtin_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1167 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1169 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1170 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1171 enum rs6000_reg_type,
1172 machine_mode,
1173 secondary_reload_info *,
1174 bool);
1175 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1177 /* Hash table stuff for keeping track of TOC entries. */
1179 struct GTY((for_user)) toc_hash_struct
1181 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1182 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1183 rtx key;
1184 machine_mode key_mode;
1185 int labelno;
1188 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1190 static hashval_t hash (toc_hash_struct *);
1191 static bool equal (toc_hash_struct *, toc_hash_struct *);
1194 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1198 /* Default register names. */
1199 char rs6000_reg_names[][8] =
1201 /* GPRs */
1202 "0", "1", "2", "3", "4", "5", "6", "7",
1203 "8", "9", "10", "11", "12", "13", "14", "15",
1204 "16", "17", "18", "19", "20", "21", "22", "23",
1205 "24", "25", "26", "27", "28", "29", "30", "31",
1206 /* FPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* VRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* lr ctr ca ap */
1217 "lr", "ctr", "ca", "ap",
1218 /* cr0..cr7 */
1219 "0", "1", "2", "3", "4", "5", "6", "7",
1220 /* vrsave vscr sfp */
1221 "vrsave", "vscr", "sfp",
1224 #ifdef TARGET_REGNAMES
1225 static const char alt_reg_names[][8] =
1227 /* GPRs */
1228 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1229 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1230 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1231 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1232 /* FPRs */
1233 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1234 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1235 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1236 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1237 /* VRs */
1238 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1239 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1240 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1241 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1242 /* lr ctr ca ap */
1243 "lr", "ctr", "ca", "ap",
1244 /* cr0..cr7 */
1245 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1246 /* vrsave vscr sfp */
1247 "vrsave", "vscr", "sfp",
1249 #endif
1251 /* Table of valid machine attributes. */
1253 static const struct attribute_spec rs6000_attribute_table[] =
1255 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1256 affects_type_identity, handler, exclude } */
1257 { "altivec", 1, 1, false, true, false, false,
1258 rs6000_handle_altivec_attribute, NULL },
1259 { "longcall", 0, 0, false, true, true, false,
1260 rs6000_handle_longcall_attribute, NULL },
1261 { "shortcall", 0, 0, false, true, true, false,
1262 rs6000_handle_longcall_attribute, NULL },
1263 { "ms_struct", 0, 0, false, false, false, false,
1264 rs6000_handle_struct_attribute, NULL },
1265 { "gcc_struct", 0, 0, false, false, false, false,
1266 rs6000_handle_struct_attribute, NULL },
1267 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1268 SUBTARGET_ATTRIBUTE_TABLE,
1269 #endif
1270 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1273 #ifndef TARGET_PROFILE_KERNEL
1274 #define TARGET_PROFILE_KERNEL 0
1275 #endif
1277 /* Initialize the GCC target structure. */
1278 #undef TARGET_ATTRIBUTE_TABLE
1279 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1280 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1281 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1282 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1283 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1285 #undef TARGET_ASM_ALIGNED_DI_OP
1286 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1288 /* Default unaligned ops are only provided for ELF. Find the ops needed
1289 for non-ELF systems. */
1290 #ifndef OBJECT_FORMAT_ELF
1291 #if TARGET_XCOFF
1292 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1293 64-bit targets. */
1294 #undef TARGET_ASM_UNALIGNED_HI_OP
1295 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1296 #undef TARGET_ASM_UNALIGNED_SI_OP
1297 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1298 #undef TARGET_ASM_UNALIGNED_DI_OP
1299 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1300 #else
1301 /* For Darwin. */
1302 #undef TARGET_ASM_UNALIGNED_HI_OP
1303 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1304 #undef TARGET_ASM_UNALIGNED_SI_OP
1305 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1306 #undef TARGET_ASM_UNALIGNED_DI_OP
1307 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1308 #undef TARGET_ASM_ALIGNED_DI_OP
1309 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1310 #endif
1311 #endif
1313 /* This hook deals with fixups for relocatable code and DI-mode objects
1314 in 64-bit code. */
1315 #undef TARGET_ASM_INTEGER
1316 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1318 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1319 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1320 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1321 #endif
1323 #undef TARGET_SET_UP_BY_PROLOGUE
1324 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1326 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1327 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1328 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1329 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1330 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1331 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1332 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1333 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1334 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1336 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1337 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1339 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1340 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1342 #undef TARGET_INTERNAL_ARG_POINTER
1343 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1345 #undef TARGET_HAVE_TLS
1346 #define TARGET_HAVE_TLS HAVE_AS_TLS
1348 #undef TARGET_CANNOT_FORCE_CONST_MEM
1349 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1351 #undef TARGET_DELEGITIMIZE_ADDRESS
1352 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1354 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1355 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1357 #undef TARGET_LEGITIMATE_COMBINED_INSN
1358 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1360 #undef TARGET_ASM_FUNCTION_PROLOGUE
1361 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1362 #undef TARGET_ASM_FUNCTION_EPILOGUE
1363 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1365 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1366 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1368 #undef TARGET_LEGITIMIZE_ADDRESS
1369 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1371 #undef TARGET_SCHED_VARIABLE_ISSUE
1372 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1374 #undef TARGET_SCHED_ISSUE_RATE
1375 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1376 #undef TARGET_SCHED_ADJUST_COST
1377 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1378 #undef TARGET_SCHED_ADJUST_PRIORITY
1379 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1380 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1381 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1382 #undef TARGET_SCHED_INIT
1383 #define TARGET_SCHED_INIT rs6000_sched_init
1384 #undef TARGET_SCHED_FINISH
1385 #define TARGET_SCHED_FINISH rs6000_sched_finish
1386 #undef TARGET_SCHED_REORDER
1387 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1388 #undef TARGET_SCHED_REORDER2
1389 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1394 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1395 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1397 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1398 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1399 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1400 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1401 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1402 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1403 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1404 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1406 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1407 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1409 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1410 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1411 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1412 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1413 rs6000_builtin_support_vector_misalignment
1414 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1415 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1418 rs6000_builtin_vectorization_cost
1419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1421 rs6000_preferred_simd_mode
1422 #undef TARGET_VECTORIZE_INIT_COST
1423 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1424 #undef TARGET_VECTORIZE_ADD_STMT_COST
1425 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1426 #undef TARGET_VECTORIZE_FINISH_COST
1427 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1428 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1429 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1431 #undef TARGET_LOOP_UNROLL_ADJUST
1432 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1434 #undef TARGET_INIT_BUILTINS
1435 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1436 #undef TARGET_BUILTIN_DECL
1437 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1439 #undef TARGET_FOLD_BUILTIN
1440 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1441 #undef TARGET_GIMPLE_FOLD_BUILTIN
1442 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1444 #undef TARGET_EXPAND_BUILTIN
1445 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1447 #undef TARGET_MANGLE_TYPE
1448 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1450 #undef TARGET_INIT_LIBFUNCS
1451 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1453 #if TARGET_MACHO
1454 #undef TARGET_BINDS_LOCAL_P
1455 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1456 #endif
1458 #undef TARGET_MS_BITFIELD_LAYOUT_P
1459 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1461 #undef TARGET_ASM_OUTPUT_MI_THUNK
1462 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1467 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1468 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1470 #undef TARGET_REGISTER_MOVE_COST
1471 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1472 #undef TARGET_MEMORY_MOVE_COST
1473 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1474 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1475 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1476 rs6000_ira_change_pseudo_allocno_class
1477 #undef TARGET_CANNOT_COPY_INSN_P
1478 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1479 #undef TARGET_RTX_COSTS
1480 #define TARGET_RTX_COSTS rs6000_rtx_costs
1481 #undef TARGET_ADDRESS_COST
1482 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1483 #undef TARGET_INSN_COST
1484 #define TARGET_INSN_COST rs6000_insn_cost
1486 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1487 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1489 #undef TARGET_PROMOTE_FUNCTION_MODE
1490 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1492 #undef TARGET_RETURN_IN_MEMORY
1493 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1495 #undef TARGET_RETURN_IN_MSB
1496 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1498 #undef TARGET_SETUP_INCOMING_VARARGS
1499 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1501 /* Always strict argument naming on rs6000. */
1502 #undef TARGET_STRICT_ARGUMENT_NAMING
1503 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1504 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1505 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_SPLIT_COMPLEX_ARG
1507 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1508 #undef TARGET_MUST_PASS_IN_STACK
1509 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1510 #undef TARGET_PASS_BY_REFERENCE
1511 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1512 #undef TARGET_ARG_PARTIAL_BYTES
1513 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1514 #undef TARGET_FUNCTION_ARG_ADVANCE
1515 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1516 #undef TARGET_FUNCTION_ARG
1517 #define TARGET_FUNCTION_ARG rs6000_function_arg
1518 #undef TARGET_FUNCTION_ARG_PADDING
1519 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1520 #undef TARGET_FUNCTION_ARG_BOUNDARY
1521 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1523 #undef TARGET_BUILD_BUILTIN_VA_LIST
1524 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1526 #undef TARGET_EXPAND_BUILTIN_VA_START
1527 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1530 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1532 #undef TARGET_EH_RETURN_FILTER_MODE
1533 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1535 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1536 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1538 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1539 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1541 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1542 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1544 #undef TARGET_FLOATN_MODE
1545 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1547 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1548 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1550 #undef TARGET_MD_ASM_ADJUST
1551 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1553 #undef TARGET_OPTION_OVERRIDE
1554 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1556 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1557 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1558 rs6000_builtin_vectorized_function
1560 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1561 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1562 rs6000_builtin_md_vectorized_function
1564 #undef TARGET_STACK_PROTECT_GUARD
1565 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1567 #if !TARGET_MACHO
1568 #undef TARGET_STACK_PROTECT_FAIL
1569 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1570 #endif
1572 #ifdef HAVE_AS_TLS
1573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1575 #endif
1577 /* Use a 32-bit anchor range. This leads to sequences like:
1579 addis tmp,anchor,high
1580 add dest,tmp,low
1582 where tmp itself acts as an anchor, and can be shared between
1583 accesses to the same 64k page. */
1584 #undef TARGET_MIN_ANCHOR_OFFSET
1585 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1586 #undef TARGET_MAX_ANCHOR_OFFSET
1587 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1588 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1589 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1590 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1591 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1593 #undef TARGET_BUILTIN_RECIPROCAL
1594 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1596 #undef TARGET_SECONDARY_RELOAD
1597 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1598 #undef TARGET_SECONDARY_MEMORY_NEEDED
1599 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1600 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1601 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1603 #undef TARGET_LEGITIMATE_ADDRESS_P
1604 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1606 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1607 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1609 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1610 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1612 #undef TARGET_CAN_ELIMINATE
1613 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1615 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1616 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1618 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1619 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1621 #undef TARGET_TRAMPOLINE_INIT
1622 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1624 #undef TARGET_FUNCTION_VALUE
1625 #define TARGET_FUNCTION_VALUE rs6000_function_value
1627 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1628 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1630 #undef TARGET_OPTION_SAVE
1631 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1633 #undef TARGET_OPTION_RESTORE
1634 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1636 #undef TARGET_OPTION_PRINT
1637 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1639 #undef TARGET_CAN_INLINE_P
1640 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1642 #undef TARGET_SET_CURRENT_FUNCTION
1643 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1645 #undef TARGET_LEGITIMATE_CONSTANT_P
1646 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1648 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1649 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1651 #undef TARGET_CAN_USE_DOLOOP_P
1652 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1654 #undef TARGET_PREDICT_DOLOOP_P
1655 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1657 #undef TARGET_HAVE_COUNT_REG_DECR_P
1658 #define TARGET_HAVE_COUNT_REG_DECR_P true
1660 /* 1000000000 is infinite cost in IVOPTs. */
1661 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1662 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1664 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1665 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1667 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1668 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1670 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1671 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1672 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1673 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1674 #undef TARGET_UNWIND_WORD_MODE
1675 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1677 #undef TARGET_OFFLOAD_OPTIONS
1678 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1680 #undef TARGET_C_MODE_FOR_SUFFIX
1681 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1683 #undef TARGET_INVALID_BINARY_OP
1684 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1686 #undef TARGET_OPTAB_SUPPORTED_P
1687 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1689 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1690 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1692 #undef TARGET_COMPARE_VERSION_PRIORITY
1693 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1695 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1696 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1697 rs6000_generate_version_dispatcher_body
1699 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1700 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1701 rs6000_get_function_versions_dispatcher
1703 #undef TARGET_OPTION_FUNCTION_VERSIONS
1704 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1706 #undef TARGET_HARD_REGNO_NREGS
1707 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1708 #undef TARGET_HARD_REGNO_MODE_OK
1709 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1711 #undef TARGET_MODES_TIEABLE_P
1712 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1714 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1715 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1716 rs6000_hard_regno_call_part_clobbered
1718 #undef TARGET_SLOW_UNALIGNED_ACCESS
1719 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1721 #undef TARGET_CAN_CHANGE_MODE_CLASS
1722 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1724 #undef TARGET_CONSTANT_ALIGNMENT
1725 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1727 #undef TARGET_STARTING_FRAME_OFFSET
1728 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1730 #if TARGET_ELF && RS6000_WEAK
1731 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1732 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1733 #endif
1735 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1736 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1738 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1739 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1742 /* Processor table. */
1743 struct rs6000_ptt
1745 const char *const name; /* Canonical processor name. */
1746 const enum processor_type processor; /* Processor type enum value. */
1747 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1750 static struct rs6000_ptt const processor_target_table[] =
1752 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1753 #include "rs6000-cpus.def"
1754 #undef RS6000_CPU
1757 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1758 name is invalid. */
1760 static int
1761 rs6000_cpu_name_lookup (const char *name)
1763 size_t i;
1765 if (name != NULL)
1767 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1768 if (! strcmp (name, processor_target_table[i].name))
1769 return (int)i;
1772 return -1;
1776 /* Return number of consecutive hard regs needed starting at reg REGNO
1777 to hold something of mode MODE.
1778 This is ordinarily the length in words of a value of mode MODE
1779 but can be less for certain modes in special long registers.
1781 POWER and PowerPC GPRs hold 32 bits worth;
1782 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1784 static int
1785 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1787 unsigned HOST_WIDE_INT reg_size;
1789 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1790 128-bit floating point that can go in vector registers, which has VSX
1791 memory addressing. */
1792 if (FP_REGNO_P (regno))
1793 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
1794 ? UNITS_PER_VSX_WORD
1795 : UNITS_PER_FP_WORD);
1797 else if (ALTIVEC_REGNO_P (regno))
1798 reg_size = UNITS_PER_ALTIVEC_WORD;
1800 else
1801 reg_size = UNITS_PER_WORD;
1803 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1806 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1807 MODE. */
1808 static int
1809 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1811 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1813 if (COMPLEX_MODE_P (mode))
1814 mode = GET_MODE_INNER (mode);
1816 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1817 register combinations, and use PTImode where we need to deal with quad
1818 word memory operations. Don't allow quad words in the argument or frame
1819 pointer registers, just registers 0..31. */
1820 if (mode == PTImode)
1821 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1822 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1823 && ((regno & 1) == 0));
1825 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1826 implementations. Don't allow an item to be split between a FP register
1827 and an Altivec register. Allow TImode in all VSX registers if the user
1828 asked for it. */
1829 if (TARGET_VSX && VSX_REGNO_P (regno)
1830 && (VECTOR_MEM_VSX_P (mode)
1831 || FLOAT128_VECTOR_P (mode)
1832 || reg_addr[mode].scalar_in_vmx_p
1833 || mode == TImode
1834 || (TARGET_VADDUQM && mode == V1TImode)))
1836 if (FP_REGNO_P (regno))
1837 return FP_REGNO_P (last_regno);
1839 if (ALTIVEC_REGNO_P (regno))
1841 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1842 return 0;
1844 return ALTIVEC_REGNO_P (last_regno);
1848 /* The GPRs can hold any mode, but values bigger than one register
1849 cannot go past R31. */
1850 if (INT_REGNO_P (regno))
1851 return INT_REGNO_P (last_regno);
1853 /* The float registers (except for VSX vector modes) can only hold floating
1854 modes and DImode. */
1855 if (FP_REGNO_P (regno))
1857 if (FLOAT128_VECTOR_P (mode))
1858 return false;
1860 if (SCALAR_FLOAT_MODE_P (mode)
1861 && (mode != TDmode || (regno % 2) == 0)
1862 && FP_REGNO_P (last_regno))
1863 return 1;
1865 if (GET_MODE_CLASS (mode) == MODE_INT)
1867 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1868 return 1;
1870 if (TARGET_P8_VECTOR && (mode == SImode))
1871 return 1;
1873 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1874 return 1;
1877 return 0;
1880 /* The CR register can only hold CC modes. */
1881 if (CR_REGNO_P (regno))
1882 return GET_MODE_CLASS (mode) == MODE_CC;
1884 if (CA_REGNO_P (regno))
1885 return mode == Pmode || mode == SImode;
1887 /* AltiVec only in AldyVec registers. */
1888 if (ALTIVEC_REGNO_P (regno))
1889 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1890 || mode == V1TImode);
1892 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1893 and it must be able to fit within the register set. */
1895 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1898 /* Implement TARGET_HARD_REGNO_NREGS. */
1900 static unsigned int
1901 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1903 return rs6000_hard_regno_nregs[mode][regno];
1906 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1908 static bool
1909 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1911 return rs6000_hard_regno_mode_ok_p[mode][regno];
1914 /* Implement TARGET_MODES_TIEABLE_P.
1916 PTImode cannot tie with other modes because PTImode is restricted to even
1917 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1918 57744).
1920 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1921 128-bit floating point on VSX systems ties with other vectors. */
1923 static bool
1924 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1926 if (mode1 == PTImode)
1927 return mode2 == PTImode;
1928 if (mode2 == PTImode)
1929 return false;
1931 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1932 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1933 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1934 return false;
1936 if (SCALAR_FLOAT_MODE_P (mode1))
1937 return SCALAR_FLOAT_MODE_P (mode2);
1938 if (SCALAR_FLOAT_MODE_P (mode2))
1939 return false;
1941 if (GET_MODE_CLASS (mode1) == MODE_CC)
1942 return GET_MODE_CLASS (mode2) == MODE_CC;
1943 if (GET_MODE_CLASS (mode2) == MODE_CC)
1944 return false;
1946 return true;
1949 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1951 static bool
1952 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1953 machine_mode mode)
1955 if (TARGET_32BIT
1956 && TARGET_POWERPC64
1957 && GET_MODE_SIZE (mode) > 4
1958 && INT_REGNO_P (regno))
1959 return true;
1961 if (TARGET_VSX
1962 && FP_REGNO_P (regno)
1963 && GET_MODE_SIZE (mode) > 8
1964 && !FLOAT128_2REG_P (mode))
1965 return true;
1967 return false;
1970 /* Print interesting facts about registers. */
1971 static void
1972 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1974 int r, m;
1976 for (r = first_regno; r <= last_regno; ++r)
1978 const char *comma = "";
1979 int len;
1981 if (first_regno == last_regno)
1982 fprintf (stderr, "%s:\t", reg_name);
1983 else
1984 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1986 len = 8;
1987 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1988 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1990 if (len > 70)
1992 fprintf (stderr, ",\n\t");
1993 len = 8;
1994 comma = "";
1997 if (rs6000_hard_regno_nregs[m][r] > 1)
1998 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1999 rs6000_hard_regno_nregs[m][r]);
2000 else
2001 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2003 comma = ", ";
2006 if (call_used_or_fixed_reg_p (r))
2008 if (len > 70)
2010 fprintf (stderr, ",\n\t");
2011 len = 8;
2012 comma = "";
2015 len += fprintf (stderr, "%s%s", comma, "call-used");
2016 comma = ", ";
2019 if (fixed_regs[r])
2021 if (len > 70)
2023 fprintf (stderr, ",\n\t");
2024 len = 8;
2025 comma = "";
2028 len += fprintf (stderr, "%s%s", comma, "fixed");
2029 comma = ", ";
2032 if (len > 70)
2034 fprintf (stderr, ",\n\t");
2035 comma = "";
2038 len += fprintf (stderr, "%sreg-class = %s", comma,
2039 reg_class_names[(int)rs6000_regno_regclass[r]]);
2040 comma = ", ";
2042 if (len > 70)
2044 fprintf (stderr, ",\n\t");
2045 comma = "";
2048 fprintf (stderr, "%sregno = %d\n", comma, r);
2052 static const char *
2053 rs6000_debug_vector_unit (enum rs6000_vector v)
2055 const char *ret;
2057 switch (v)
2059 case VECTOR_NONE: ret = "none"; break;
2060 case VECTOR_ALTIVEC: ret = "altivec"; break;
2061 case VECTOR_VSX: ret = "vsx"; break;
2062 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2063 default: ret = "unknown"; break;
2066 return ret;
2069 /* Inner function printing just the address mask for a particular reload
2070 register class. */
2071 DEBUG_FUNCTION char *
2072 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2074 static char ret[8];
2075 char *p = ret;
2077 if ((mask & RELOAD_REG_VALID) != 0)
2078 *p++ = 'v';
2079 else if (keep_spaces)
2080 *p++ = ' ';
2082 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2083 *p++ = 'm';
2084 else if (keep_spaces)
2085 *p++ = ' ';
2087 if ((mask & RELOAD_REG_INDEXED) != 0)
2088 *p++ = 'i';
2089 else if (keep_spaces)
2090 *p++ = ' ';
2092 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2093 *p++ = 'O';
2094 else if ((mask & RELOAD_REG_OFFSET) != 0)
2095 *p++ = 'o';
2096 else if (keep_spaces)
2097 *p++ = ' ';
2099 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2100 *p++ = '+';
2101 else if (keep_spaces)
2102 *p++ = ' ';
2104 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2105 *p++ = '+';
2106 else if (keep_spaces)
2107 *p++ = ' ';
2109 if ((mask & RELOAD_REG_AND_M16) != 0)
2110 *p++ = '&';
2111 else if (keep_spaces)
2112 *p++ = ' ';
2114 *p = '\0';
2116 return ret;
2119 /* Print the address masks in a human readble fashion. */
2120 DEBUG_FUNCTION void
2121 rs6000_debug_print_mode (ssize_t m)
2123 ssize_t rc;
2124 int spaces = 0;
2126 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2127 for (rc = 0; rc < N_RELOAD_REG; rc++)
2128 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2129 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2131 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2132 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2134 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2135 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2136 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2137 spaces = 0;
2139 else
2140 spaces += sizeof (" Reload=sl") - 1;
2142 if (reg_addr[m].scalar_in_vmx_p)
2144 fprintf (stderr, "%*s Upper=y", spaces, "");
2145 spaces = 0;
2147 else
2148 spaces += sizeof (" Upper=y") - 1;
2150 if (rs6000_vector_unit[m] != VECTOR_NONE
2151 || rs6000_vector_mem[m] != VECTOR_NONE)
2153 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2154 spaces, "",
2155 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2156 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2159 fputs ("\n", stderr);
2162 #define DEBUG_FMT_ID "%-32s= "
2163 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2164 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2165 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2167 /* Print various interesting information with -mdebug=reg. */
2168 static void
2169 rs6000_debug_reg_global (void)
2171 static const char *const tf[2] = { "false", "true" };
2172 const char *nl = (const char *)0;
2173 int m;
2174 size_t m1, m2, v;
2175 char costly_num[20];
2176 char nop_num[20];
2177 char flags_buffer[40];
2178 const char *costly_str;
2179 const char *nop_str;
2180 const char *trace_str;
2181 const char *abi_str;
2182 const char *cmodel_str;
2183 struct cl_target_option cl_opts;
2185 /* Modes we want tieable information on. */
2186 static const machine_mode print_tieable_modes[] = {
2187 QImode,
2188 HImode,
2189 SImode,
2190 DImode,
2191 TImode,
2192 PTImode,
2193 SFmode,
2194 DFmode,
2195 TFmode,
2196 IFmode,
2197 KFmode,
2198 SDmode,
2199 DDmode,
2200 TDmode,
2201 V16QImode,
2202 V8HImode,
2203 V4SImode,
2204 V2DImode,
2205 V1TImode,
2206 V32QImode,
2207 V16HImode,
2208 V8SImode,
2209 V4DImode,
2210 V2TImode,
2211 V4SFmode,
2212 V2DFmode,
2213 V8SFmode,
2214 V4DFmode,
2215 CCmode,
2216 CCUNSmode,
2217 CCEQmode,
2220 /* Virtual regs we are interested in. */
2221 const static struct {
2222 int regno; /* register number. */
2223 const char *name; /* register name. */
2224 } virtual_regs[] = {
2225 { STACK_POINTER_REGNUM, "stack pointer:" },
2226 { TOC_REGNUM, "toc: " },
2227 { STATIC_CHAIN_REGNUM, "static chain: " },
2228 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2229 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2230 { ARG_POINTER_REGNUM, "arg pointer: " },
2231 { FRAME_POINTER_REGNUM, "frame pointer:" },
2232 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2233 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2234 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2235 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2236 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2237 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2238 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2239 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2240 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2243 fputs ("\nHard register information:\n", stderr);
2244 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2245 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2246 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2247 LAST_ALTIVEC_REGNO,
2248 "vs");
2249 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2250 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2251 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2252 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2253 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2254 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2256 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2257 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2258 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2260 fprintf (stderr,
2261 "\n"
2262 "d reg_class = %s\n"
2263 "f reg_class = %s\n"
2264 "v reg_class = %s\n"
2265 "wa reg_class = %s\n"
2266 "we reg_class = %s\n"
2267 "wr reg_class = %s\n"
2268 "wx reg_class = %s\n"
2269 "wA reg_class = %s\n"
2270 "\n",
2271 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2272 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2273 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2274 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2275 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2276 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2277 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2278 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2280 nl = "\n";
2281 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2282 rs6000_debug_print_mode (m);
2284 fputs ("\n", stderr);
2286 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2288 machine_mode mode1 = print_tieable_modes[m1];
2289 bool first_time = true;
2291 nl = (const char *)0;
2292 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2294 machine_mode mode2 = print_tieable_modes[m2];
2295 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2297 if (first_time)
2299 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2300 nl = "\n";
2301 first_time = false;
2304 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2308 if (!first_time)
2309 fputs ("\n", stderr);
2312 if (nl)
2313 fputs (nl, stderr);
2315 if (rs6000_recip_control)
2317 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2319 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2320 if (rs6000_recip_bits[m])
2322 fprintf (stderr,
2323 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2324 GET_MODE_NAME (m),
2325 (RS6000_RECIP_AUTO_RE_P (m)
2326 ? "auto"
2327 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2328 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2329 ? "auto"
2330 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2333 fputs ("\n", stderr);
2336 if (rs6000_cpu_index >= 0)
2338 const char *name = processor_target_table[rs6000_cpu_index].name;
2339 HOST_WIDE_INT flags
2340 = processor_target_table[rs6000_cpu_index].target_enable;
2342 sprintf (flags_buffer, "-mcpu=%s flags", name);
2343 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2345 else
2346 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2348 if (rs6000_tune_index >= 0)
2350 const char *name = processor_target_table[rs6000_tune_index].name;
2351 HOST_WIDE_INT flags
2352 = processor_target_table[rs6000_tune_index].target_enable;
2354 sprintf (flags_buffer, "-mtune=%s flags", name);
2355 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2357 else
2358 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2360 cl_target_option_save (&cl_opts, &global_options);
2361 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2362 rs6000_isa_flags);
2364 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2365 rs6000_isa_flags_explicit);
2367 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2368 rs6000_builtin_mask);
2370 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2372 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2373 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2375 switch (rs6000_sched_costly_dep)
2377 case max_dep_latency:
2378 costly_str = "max_dep_latency";
2379 break;
2381 case no_dep_costly:
2382 costly_str = "no_dep_costly";
2383 break;
2385 case all_deps_costly:
2386 costly_str = "all_deps_costly";
2387 break;
2389 case true_store_to_load_dep_costly:
2390 costly_str = "true_store_to_load_dep_costly";
2391 break;
2393 case store_to_load_dep_costly:
2394 costly_str = "store_to_load_dep_costly";
2395 break;
2397 default:
2398 costly_str = costly_num;
2399 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2400 break;
2403 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2405 switch (rs6000_sched_insert_nops)
2407 case sched_finish_regroup_exact:
2408 nop_str = "sched_finish_regroup_exact";
2409 break;
2411 case sched_finish_pad_groups:
2412 nop_str = "sched_finish_pad_groups";
2413 break;
2415 case sched_finish_none:
2416 nop_str = "sched_finish_none";
2417 break;
2419 default:
2420 nop_str = nop_num;
2421 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2422 break;
2425 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2427 switch (rs6000_sdata)
2429 default:
2430 case SDATA_NONE:
2431 break;
2433 case SDATA_DATA:
2434 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2435 break;
2437 case SDATA_SYSV:
2438 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2439 break;
2441 case SDATA_EABI:
2442 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2443 break;
2447 switch (rs6000_traceback)
2449 case traceback_default: trace_str = "default"; break;
2450 case traceback_none: trace_str = "none"; break;
2451 case traceback_part: trace_str = "part"; break;
2452 case traceback_full: trace_str = "full"; break;
2453 default: trace_str = "unknown"; break;
2456 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2458 switch (rs6000_current_cmodel)
2460 case CMODEL_SMALL: cmodel_str = "small"; break;
2461 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2462 case CMODEL_LARGE: cmodel_str = "large"; break;
2463 default: cmodel_str = "unknown"; break;
2466 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2468 switch (rs6000_current_abi)
2470 case ABI_NONE: abi_str = "none"; break;
2471 case ABI_AIX: abi_str = "aix"; break;
2472 case ABI_ELFv2: abi_str = "ELFv2"; break;
2473 case ABI_V4: abi_str = "V4"; break;
2474 case ABI_DARWIN: abi_str = "darwin"; break;
2475 default: abi_str = "unknown"; break;
2478 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2480 if (rs6000_altivec_abi)
2481 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2483 if (rs6000_darwin64_abi)
2484 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2486 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2487 (TARGET_SOFT_FLOAT ? "true" : "false"));
2489 if (TARGET_LINK_STACK)
2490 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2492 if (TARGET_P8_FUSION)
2494 char options[80];
2496 strcpy (options, "power8");
2497 if (TARGET_P8_FUSION_SIGN)
2498 strcat (options, ", sign");
2500 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2503 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2504 TARGET_SECURE_PLT ? "secure" : "bss");
2505 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2506 aix_struct_return ? "aix" : "sysv");
2507 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2508 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2509 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2510 tf[!!rs6000_align_branch_targets]);
2511 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2512 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2513 rs6000_long_double_type_size);
2514 if (rs6000_long_double_type_size > 64)
2516 fprintf (stderr, DEBUG_FMT_S, "long double type",
2517 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2518 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2519 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2521 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2522 (int)rs6000_sched_restricted_insns_priority);
2523 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2524 (int)END_BUILTINS);
2525 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2526 (int)RS6000_BUILTIN_COUNT);
2528 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2529 (int)TARGET_FLOAT128_ENABLE_TYPE);
2531 if (TARGET_VSX)
2532 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2533 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2535 if (TARGET_DIRECT_MOVE_128)
2536 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2537 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2541 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2542 legitimate address support to figure out the appropriate addressing to
2543 use. */
2545 static void
2546 rs6000_setup_reg_addr_masks (void)
2548 ssize_t rc, reg, m, nregs;
2549 addr_mask_type any_addr_mask, addr_mask;
2551 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2553 machine_mode m2 = (machine_mode) m;
2554 bool complex_p = false;
2555 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2556 size_t msize;
2558 if (COMPLEX_MODE_P (m2))
2560 complex_p = true;
2561 m2 = GET_MODE_INNER (m2);
2564 msize = GET_MODE_SIZE (m2);
2566 /* SDmode is special in that we want to access it only via REG+REG
2567 addressing on power7 and above, since we want to use the LFIWZX and
2568 STFIWZX instructions to load it. */
2569 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2571 any_addr_mask = 0;
2572 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2574 addr_mask = 0;
2575 reg = reload_reg_map[rc].reg;
2577 /* Can mode values go in the GPR/FPR/Altivec registers? */
2578 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2580 bool small_int_vsx_p = (small_int_p
2581 && (rc == RELOAD_REG_FPR
2582 || rc == RELOAD_REG_VMX));
2584 nregs = rs6000_hard_regno_nregs[m][reg];
2585 addr_mask |= RELOAD_REG_VALID;
2587 /* Indicate if the mode takes more than 1 physical register. If
2588 it takes a single register, indicate it can do REG+REG
2589 addressing. Small integers in VSX registers can only do
2590 REG+REG addressing. */
2591 if (small_int_vsx_p)
2592 addr_mask |= RELOAD_REG_INDEXED;
2593 else if (nregs > 1 || m == BLKmode || complex_p)
2594 addr_mask |= RELOAD_REG_MULTIPLE;
2595 else
2596 addr_mask |= RELOAD_REG_INDEXED;
2598 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2599 addressing. If we allow scalars into Altivec registers,
2600 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2602 For VSX systems, we don't allow update addressing for
2603 DFmode/SFmode if those registers can go in both the
2604 traditional floating point registers and Altivec registers.
2605 The load/store instructions for the Altivec registers do not
2606 have update forms. If we allowed update addressing, it seems
2607 to break IV-OPT code using floating point if the index type is
2608 int instead of long (PR target/81550 and target/84042). */
2610 if (TARGET_UPDATE
2611 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2612 && msize <= 8
2613 && !VECTOR_MODE_P (m2)
2614 && !FLOAT128_VECTOR_P (m2)
2615 && !complex_p
2616 && (m != E_DFmode || !TARGET_VSX)
2617 && (m != E_SFmode || !TARGET_P8_VECTOR)
2618 && !small_int_vsx_p)
2620 addr_mask |= RELOAD_REG_PRE_INCDEC;
2622 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2623 we don't allow PRE_MODIFY for some multi-register
2624 operations. */
2625 switch (m)
2627 default:
2628 addr_mask |= RELOAD_REG_PRE_MODIFY;
2629 break;
2631 case E_DImode:
2632 if (TARGET_POWERPC64)
2633 addr_mask |= RELOAD_REG_PRE_MODIFY;
2634 break;
2636 case E_DFmode:
2637 case E_DDmode:
2638 if (TARGET_HARD_FLOAT)
2639 addr_mask |= RELOAD_REG_PRE_MODIFY;
2640 break;
2645 /* GPR and FPR registers can do REG+OFFSET addressing, except
2646 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2647 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2648 if ((addr_mask != 0) && !indexed_only_p
2649 && msize <= 8
2650 && (rc == RELOAD_REG_GPR
2651 || ((msize == 8 || m2 == SFmode)
2652 && (rc == RELOAD_REG_FPR
2653 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2654 addr_mask |= RELOAD_REG_OFFSET;
2656 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2657 instructions are enabled. The offset for 128-bit VSX registers is
2658 only 12-bits. While GPRs can handle the full offset range, VSX
2659 registers can only handle the restricted range. */
2660 else if ((addr_mask != 0) && !indexed_only_p
2661 && msize == 16 && TARGET_P9_VECTOR
2662 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2663 || (m2 == TImode && TARGET_VSX)))
2665 addr_mask |= RELOAD_REG_OFFSET;
2666 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2667 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2670 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2671 addressing on 128-bit types. */
2672 if (rc == RELOAD_REG_VMX && msize == 16
2673 && (addr_mask & RELOAD_REG_VALID) != 0)
2674 addr_mask |= RELOAD_REG_AND_M16;
2676 reg_addr[m].addr_mask[rc] = addr_mask;
2677 any_addr_mask |= addr_mask;
2680 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2685 /* Initialize the various global tables that are based on register size. */
2686 static void
2687 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2689 ssize_t r, m, c;
2690 int align64;
2691 int align32;
2693 /* Precalculate REGNO_REG_CLASS. */
2694 rs6000_regno_regclass[0] = GENERAL_REGS;
2695 for (r = 1; r < 32; ++r)
2696 rs6000_regno_regclass[r] = BASE_REGS;
2698 for (r = 32; r < 64; ++r)
2699 rs6000_regno_regclass[r] = FLOAT_REGS;
2701 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2702 rs6000_regno_regclass[r] = NO_REGS;
2704 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2705 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2707 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2708 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2709 rs6000_regno_regclass[r] = CR_REGS;
2711 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2712 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2713 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2714 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2715 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2716 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2717 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2719 /* Precalculate register class to simpler reload register class. We don't
2720 need all of the register classes that are combinations of different
2721 classes, just the simple ones that have constraint letters. */
2722 for (c = 0; c < N_REG_CLASSES; c++)
2723 reg_class_to_reg_type[c] = NO_REG_TYPE;
2725 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2726 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2727 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2728 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2729 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2730 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2731 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2732 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2733 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2734 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2736 if (TARGET_VSX)
2738 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2739 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2741 else
2743 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2744 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2747 /* Precalculate the valid memory formats as well as the vector information,
2748 this must be set up before the rs6000_hard_regno_nregs_internal calls
2749 below. */
2750 gcc_assert ((int)VECTOR_NONE == 0);
2751 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2752 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2754 gcc_assert ((int)CODE_FOR_nothing == 0);
2755 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2757 gcc_assert ((int)NO_REGS == 0);
2758 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2760 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2761 believes it can use native alignment or still uses 128-bit alignment. */
2762 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2764 align64 = 64;
2765 align32 = 32;
2767 else
2769 align64 = 128;
2770 align32 = 128;
2773 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2774 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2775 if (TARGET_FLOAT128_TYPE)
2777 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2778 rs6000_vector_align[KFmode] = 128;
2780 if (FLOAT128_IEEE_P (TFmode))
2782 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2783 rs6000_vector_align[TFmode] = 128;
2787 /* V2DF mode, VSX only. */
2788 if (TARGET_VSX)
2790 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2791 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2792 rs6000_vector_align[V2DFmode] = align64;
2795 /* V4SF mode, either VSX or Altivec. */
2796 if (TARGET_VSX)
2798 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2799 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2800 rs6000_vector_align[V4SFmode] = align32;
2802 else if (TARGET_ALTIVEC)
2804 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2805 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2806 rs6000_vector_align[V4SFmode] = align32;
2809 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2810 and stores. */
2811 if (TARGET_ALTIVEC)
2813 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2814 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2815 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2816 rs6000_vector_align[V4SImode] = align32;
2817 rs6000_vector_align[V8HImode] = align32;
2818 rs6000_vector_align[V16QImode] = align32;
2820 if (TARGET_VSX)
2822 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2823 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2824 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2826 else
2828 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2829 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2830 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2834 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2835 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2836 if (TARGET_VSX)
2838 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2839 rs6000_vector_unit[V2DImode]
2840 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2841 rs6000_vector_align[V2DImode] = align64;
2843 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2844 rs6000_vector_unit[V1TImode]
2845 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2846 rs6000_vector_align[V1TImode] = 128;
2849 /* DFmode, see if we want to use the VSX unit. Memory is handled
2850 differently, so don't set rs6000_vector_mem. */
2851 if (TARGET_VSX)
2853 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2854 rs6000_vector_align[DFmode] = 64;
2857 /* SFmode, see if we want to use the VSX unit. */
2858 if (TARGET_P8_VECTOR)
2860 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2861 rs6000_vector_align[SFmode] = 32;
2864 /* Allow TImode in VSX register and set the VSX memory macros. */
2865 if (TARGET_VSX)
2867 rs6000_vector_mem[TImode] = VECTOR_VSX;
2868 rs6000_vector_align[TImode] = align64;
2871 /* Register class constraints for the constraints that depend on compile
2872 switches. When the VSX code was added, different constraints were added
2873 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2874 of the VSX registers are used. The register classes for scalar floating
2875 point types is set, based on whether we allow that type into the upper
2876 (Altivec) registers. GCC has register classes to target the Altivec
2877 registers for load/store operations, to select using a VSX memory
2878 operation instead of the traditional floating point operation. The
2879 constraints are:
2881 d - Register class to use with traditional DFmode instructions.
2882 f - Register class to use with traditional SFmode instructions.
2883 v - Altivec register.
2884 wa - Any VSX register.
2885 wc - Reserved to represent individual CR bits (used in LLVM).
2886 wn - always NO_REGS.
2887 wr - GPR if 64-bit mode is permitted.
2888 wx - Float register if we can do 32-bit int stores. */
2890 if (TARGET_HARD_FLOAT)
2892 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2893 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2896 if (TARGET_VSX)
2897 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2899 /* Add conditional constraints based on various options, to allow us to
2900 collapse multiple insn patterns. */
2901 if (TARGET_ALTIVEC)
2902 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2904 if (TARGET_POWERPC64)
2906 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2907 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2910 if (TARGET_STFIWX)
2911 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2913 /* Support for new direct moves (ISA 3.0 + 64bit). */
2914 if (TARGET_DIRECT_MOVE_128)
2915 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2917 /* Set up the reload helper and direct move functions. */
2918 if (TARGET_VSX || TARGET_ALTIVEC)
2920 if (TARGET_64BIT)
2922 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2923 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2924 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2925 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2926 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2927 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2928 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2929 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2930 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2931 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2932 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2933 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2934 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2935 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2936 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2937 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2938 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2939 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2940 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2941 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2943 if (FLOAT128_VECTOR_P (KFmode))
2945 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
2946 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
2949 if (FLOAT128_VECTOR_P (TFmode))
2951 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
2952 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
2955 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2956 available. */
2957 if (TARGET_NO_SDMODE_STACK)
2959 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2960 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2963 if (TARGET_VSX)
2965 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2966 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2969 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
2971 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2972 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2973 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2974 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2975 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2976 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2977 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2978 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2979 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2981 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2982 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2983 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2984 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2985 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2986 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2987 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2988 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2989 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2991 if (FLOAT128_VECTOR_P (KFmode))
2993 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
2994 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
2997 if (FLOAT128_VECTOR_P (TFmode))
2999 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3000 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3004 else
3006 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3007 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3008 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3009 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3010 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3011 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3012 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3013 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3014 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3015 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3016 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3017 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3018 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3019 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3020 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3021 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3022 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3023 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3024 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3025 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3027 if (FLOAT128_VECTOR_P (KFmode))
3029 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3030 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3033 if (FLOAT128_IEEE_P (TFmode))
3035 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3036 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3039 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3040 available. */
3041 if (TARGET_NO_SDMODE_STACK)
3043 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3044 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3047 if (TARGET_VSX)
3049 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3050 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3053 if (TARGET_DIRECT_MOVE)
3055 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3056 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3057 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3061 reg_addr[DFmode].scalar_in_vmx_p = true;
3062 reg_addr[DImode].scalar_in_vmx_p = true;
3064 if (TARGET_P8_VECTOR)
3066 reg_addr[SFmode].scalar_in_vmx_p = true;
3067 reg_addr[SImode].scalar_in_vmx_p = true;
3069 if (TARGET_P9_VECTOR)
3071 reg_addr[HImode].scalar_in_vmx_p = true;
3072 reg_addr[QImode].scalar_in_vmx_p = true;
3077 /* Precalculate HARD_REGNO_NREGS. */
3078 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3079 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3080 rs6000_hard_regno_nregs[m][r]
3081 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3083 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3084 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3085 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3086 rs6000_hard_regno_mode_ok_p[m][r]
3087 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3089 /* Precalculate CLASS_MAX_NREGS sizes. */
3090 for (c = 0; c < LIM_REG_CLASSES; ++c)
3092 int reg_size;
3094 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3095 reg_size = UNITS_PER_VSX_WORD;
3097 else if (c == ALTIVEC_REGS)
3098 reg_size = UNITS_PER_ALTIVEC_WORD;
3100 else if (c == FLOAT_REGS)
3101 reg_size = UNITS_PER_FP_WORD;
3103 else
3104 reg_size = UNITS_PER_WORD;
3106 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3108 machine_mode m2 = (machine_mode)m;
3109 int reg_size2 = reg_size;
3111 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3112 in VSX. */
3113 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3114 reg_size2 = UNITS_PER_FP_WORD;
3116 rs6000_class_max_nregs[m][c]
3117 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3121 /* Calculate which modes to automatically generate code to use a the
3122 reciprocal divide and square root instructions. In the future, possibly
3123 automatically generate the instructions even if the user did not specify
3124 -mrecip. The older machines double precision reciprocal sqrt estimate is
3125 not accurate enough. */
3126 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3127 if (TARGET_FRES)
3128 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3129 if (TARGET_FRE)
3130 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3131 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3132 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3133 if (VECTOR_UNIT_VSX_P (V2DFmode))
3134 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3136 if (TARGET_FRSQRTES)
3137 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3138 if (TARGET_FRSQRTE)
3139 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3140 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3141 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3142 if (VECTOR_UNIT_VSX_P (V2DFmode))
3143 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3145 if (rs6000_recip_control)
3147 if (!flag_finite_math_only)
3148 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3149 "-ffast-math");
3150 if (flag_trapping_math)
3151 warning (0, "%qs requires %qs or %qs", "-mrecip",
3152 "-fno-trapping-math", "-ffast-math");
3153 if (!flag_reciprocal_math)
3154 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3155 "-ffast-math");
3156 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3158 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3159 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3160 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3162 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3163 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3164 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3166 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3167 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3168 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3170 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3171 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3172 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3174 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3175 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3176 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3178 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3179 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3180 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3182 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3183 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3184 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3186 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3187 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3188 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3192 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3193 legitimate address support to figure out the appropriate addressing to
3194 use. */
3195 rs6000_setup_reg_addr_masks ();
3197 if (global_init_p || TARGET_DEBUG_TARGET)
3199 if (TARGET_DEBUG_REG)
3200 rs6000_debug_reg_global ();
3202 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3203 fprintf (stderr,
3204 "SImode variable mult cost = %d\n"
3205 "SImode constant mult cost = %d\n"
3206 "SImode short constant mult cost = %d\n"
3207 "DImode multipliciation cost = %d\n"
3208 "SImode division cost = %d\n"
3209 "DImode division cost = %d\n"
3210 "Simple fp operation cost = %d\n"
3211 "DFmode multiplication cost = %d\n"
3212 "SFmode division cost = %d\n"
3213 "DFmode division cost = %d\n"
3214 "cache line size = %d\n"
3215 "l1 cache size = %d\n"
3216 "l2 cache size = %d\n"
3217 "simultaneous prefetches = %d\n"
3218 "\n",
3219 rs6000_cost->mulsi,
3220 rs6000_cost->mulsi_const,
3221 rs6000_cost->mulsi_const9,
3222 rs6000_cost->muldi,
3223 rs6000_cost->divsi,
3224 rs6000_cost->divdi,
3225 rs6000_cost->fp,
3226 rs6000_cost->dmul,
3227 rs6000_cost->sdiv,
3228 rs6000_cost->ddiv,
3229 rs6000_cost->cache_line_size,
3230 rs6000_cost->l1_cache_size,
3231 rs6000_cost->l2_cache_size,
3232 rs6000_cost->simultaneous_prefetches);
3236 #if TARGET_MACHO
3237 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3239 static void
3240 darwin_rs6000_override_options (void)
3242 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3243 off. */
3244 rs6000_altivec_abi = 1;
3245 TARGET_ALTIVEC_VRSAVE = 1;
3246 rs6000_current_abi = ABI_DARWIN;
3248 if (DEFAULT_ABI == ABI_DARWIN
3249 && TARGET_64BIT)
3250 darwin_one_byte_bool = 1;
3252 if (TARGET_64BIT && ! TARGET_POWERPC64)
3254 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3255 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3258 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3259 optimisation, and will not work with the most generic case (where the
3260 symbol is undefined external, but there is no symbl stub). */
3261 if (TARGET_64BIT)
3262 rs6000_default_long_calls = 0;
3264 /* ld_classic is (so far) still used for kernel (static) code, and supports
3265 the JBSR longcall / branch islands. */
3266 if (flag_mkernel)
3268 rs6000_default_long_calls = 1;
3270 /* Allow a kext author to do -mkernel -mhard-float. */
3271 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3272 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3275 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3276 Altivec. */
3277 if (!flag_mkernel && !flag_apple_kext
3278 && TARGET_64BIT
3279 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3280 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3282 /* Unless the user (not the configurer) has explicitly overridden
3283 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3284 G4 unless targeting the kernel. */
3285 if (!flag_mkernel
3286 && !flag_apple_kext
3287 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3288 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3289 && ! global_options_set.x_rs6000_cpu_index)
3291 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3294 #endif
3296 /* If not otherwise specified by a target, make 'long double' equivalent to
3297 'double'. */
3299 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3300 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3301 #endif
3303 /* Return the builtin mask of the various options used that could affect which
3304 builtins were used. In the past we used target_flags, but we've run out of
3305 bits, and some options are no longer in target_flags. */
3307 HOST_WIDE_INT
3308 rs6000_builtin_mask_calculate (void)
3310 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3311 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3312 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3313 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3314 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3315 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3316 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3317 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3318 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3319 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3320 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3321 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3322 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3323 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3324 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3325 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3326 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3327 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3328 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3329 | ((TARGET_LONG_DOUBLE_128
3330 && TARGET_HARD_FLOAT
3331 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3332 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3333 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3336 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3337 to clobber the XER[CA] bit because clobbering that bit without telling
3338 the compiler worked just fine with versions of GCC before GCC 5, and
3339 breaking a lot of older code in ways that are hard to track down is
3340 not such a great idea. */
3342 static rtx_insn *
3343 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3344 vec<const char *> &/*constraints*/,
3345 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3347 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3348 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3349 return NULL;
3352 /* Override command line options.
3354 Combine build-specific configuration information with options
3355 specified on the command line to set various state variables which
3356 influence code generation, optimization, and expansion of built-in
3357 functions. Assure that command-line configuration preferences are
3358 compatible with each other and with the build configuration; issue
3359 warnings while adjusting configuration or error messages while
3360 rejecting configuration.
3362 Upon entry to this function:
3364 This function is called once at the beginning of
3365 compilation, and then again at the start and end of compiling
3366 each section of code that has a different configuration, as
3367 indicated, for example, by adding the
3369 __attribute__((__target__("cpu=power9")))
3371 qualifier to a function definition or, for example, by bracketing
3372 code between
3374 #pragma GCC target("altivec")
3378 #pragma GCC reset_options
3380 directives. Parameter global_init_p is true for the initial
3381 invocation, which initializes global variables, and false for all
3382 subsequent invocations.
3385 Various global state information is assumed to be valid. This
3386 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3387 default CPU specified at build configure time, TARGET_DEFAULT,
3388 representing the default set of option flags for the default
3389 target, and global_options_set.x_rs6000_isa_flags, representing
3390 which options were requested on the command line.
3392 Upon return from this function:
3394 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3395 was set by name on the command line. Additionally, if certain
3396 attributes are automatically enabled or disabled by this function
3397 in order to assure compatibility between options and
3398 configuration, the flags associated with those attributes are
3399 also set. By setting these "explicit bits", we avoid the risk
3400 that other code might accidentally overwrite these particular
3401 attributes with "default values".
3403 The various bits of rs6000_isa_flags are set to indicate the
3404 target options that have been selected for the most current
3405 compilation efforts. This has the effect of also turning on the
3406 associated TARGET_XXX values since these are macros which are
3407 generally defined to test the corresponding bit of the
3408 rs6000_isa_flags variable.
3410 The variable rs6000_builtin_mask is set to represent the target
3411 options for the most current compilation efforts, consistent with
3412 the current contents of rs6000_isa_flags. This variable controls
3413 expansion of built-in functions.
3415 Various other global variables and fields of global structures
3416 (over 50 in all) are initialized to reflect the desired options
3417 for the most current compilation efforts. */
3419 static bool
3420 rs6000_option_override_internal (bool global_init_p)
3422 bool ret = true;
3424 HOST_WIDE_INT set_masks;
3425 HOST_WIDE_INT ignore_masks;
3426 int cpu_index = -1;
3427 int tune_index;
3428 struct cl_target_option *main_target_opt
3429 = ((global_init_p || target_option_default_node == NULL)
3430 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3432 /* Print defaults. */
3433 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3434 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3436 /* Remember the explicit arguments. */
3437 if (global_init_p)
3438 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3440 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3441 library functions, so warn about it. The flag may be useful for
3442 performance studies from time to time though, so don't disable it
3443 entirely. */
3444 if (global_options_set.x_rs6000_alignment_flags
3445 && rs6000_alignment_flags == MASK_ALIGN_POWER
3446 && DEFAULT_ABI == ABI_DARWIN
3447 && TARGET_64BIT)
3448 warning (0, "%qs is not supported for 64-bit Darwin;"
3449 " it is incompatible with the installed C and C++ libraries",
3450 "-malign-power");
3452 /* Numerous experiment shows that IRA based loop pressure
3453 calculation works better for RTL loop invariant motion on targets
3454 with enough (>= 32) registers. It is an expensive optimization.
3455 So it is on only for peak performance. */
3456 if (optimize >= 3 && global_init_p
3457 && !global_options_set.x_flag_ira_loop_pressure)
3458 flag_ira_loop_pressure = 1;
3460 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3461 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3462 options were already specified. */
3463 if (flag_sanitize & SANITIZE_USER_ADDRESS
3464 && !global_options_set.x_flag_asynchronous_unwind_tables)
3465 flag_asynchronous_unwind_tables = 1;
3467 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3468 loop unroller is active. It is only checked during unrolling, so
3469 we can just set it on by default. */
3470 if (!global_options_set.x_flag_variable_expansion_in_unroller)
3471 flag_variable_expansion_in_unroller = 1;
3473 /* Set the pointer size. */
3474 if (TARGET_64BIT)
3476 rs6000_pmode = DImode;
3477 rs6000_pointer_size = 64;
3479 else
3481 rs6000_pmode = SImode;
3482 rs6000_pointer_size = 32;
3485 /* Some OSs don't support saving the high part of 64-bit registers on context
3486 switch. Other OSs don't support saving Altivec registers. On those OSs,
3487 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3488 if the user wants either, the user must explicitly specify them and we
3489 won't interfere with the user's specification. */
3491 set_masks = POWERPC_MASKS;
3492 #ifdef OS_MISSING_POWERPC64
3493 if (OS_MISSING_POWERPC64)
3494 set_masks &= ~OPTION_MASK_POWERPC64;
3495 #endif
3496 #ifdef OS_MISSING_ALTIVEC
3497 if (OS_MISSING_ALTIVEC)
3498 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3499 | OTHER_VSX_VECTOR_MASKS);
3500 #endif
3502 /* Don't override by the processor default if given explicitly. */
3503 set_masks &= ~rs6000_isa_flags_explicit;
3505 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3506 the cpu in a target attribute or pragma, but did not specify a tuning
3507 option, use the cpu for the tuning option rather than the option specified
3508 with -mtune on the command line. Process a '--with-cpu' configuration
3509 request as an implicit --cpu. */
3510 if (rs6000_cpu_index >= 0)
3511 cpu_index = rs6000_cpu_index;
3512 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3513 cpu_index = main_target_opt->x_rs6000_cpu_index;
3514 else if (OPTION_TARGET_CPU_DEFAULT)
3515 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3517 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3518 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3519 with those from the cpu, except for options that were explicitly set. If
3520 we don't have a cpu, do not override the target bits set in
3521 TARGET_DEFAULT. */
3522 if (cpu_index >= 0)
3524 rs6000_cpu_index = cpu_index;
3525 rs6000_isa_flags &= ~set_masks;
3526 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3527 & set_masks);
3529 else
3531 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3532 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3533 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3534 to using rs6000_isa_flags, we need to do the initialization here.
3536 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3537 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3538 HOST_WIDE_INT flags;
3539 if (TARGET_DEFAULT)
3540 flags = TARGET_DEFAULT;
3541 else
3543 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3544 const char *default_cpu = (!TARGET_POWERPC64
3545 ? "powerpc"
3546 : (BYTES_BIG_ENDIAN
3547 ? "powerpc64"
3548 : "powerpc64le"));
3549 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3550 flags = processor_target_table[default_cpu_index].target_enable;
3552 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3555 if (rs6000_tune_index >= 0)
3556 tune_index = rs6000_tune_index;
3557 else if (cpu_index >= 0)
3558 rs6000_tune_index = tune_index = cpu_index;
3559 else
3561 size_t i;
3562 enum processor_type tune_proc
3563 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3565 tune_index = -1;
3566 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3567 if (processor_target_table[i].processor == tune_proc)
3569 tune_index = i;
3570 break;
3574 if (cpu_index >= 0)
3575 rs6000_cpu = processor_target_table[cpu_index].processor;
3576 else
3577 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3579 gcc_assert (tune_index >= 0);
3580 rs6000_tune = processor_target_table[tune_index].processor;
3582 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3583 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3584 || rs6000_cpu == PROCESSOR_PPCE5500)
3586 if (TARGET_ALTIVEC)
3587 error ("AltiVec not supported in this target");
3590 /* If we are optimizing big endian systems for space, use the load/store
3591 multiple instructions. */
3592 if (BYTES_BIG_ENDIAN && optimize_size)
3593 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3595 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3596 because the hardware doesn't support the instructions used in little
3597 endian mode, and causes an alignment trap. The 750 does not cause an
3598 alignment trap (except when the target is unaligned). */
3600 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3602 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3603 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3604 warning (0, "%qs is not supported on little endian systems",
3605 "-mmultiple");
3608 /* If little-endian, default to -mstrict-align on older processors.
3609 Testing for htm matches power8 and later. */
3610 if (!BYTES_BIG_ENDIAN
3611 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3612 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3614 if (!rs6000_fold_gimple)
3615 fprintf (stderr,
3616 "gimple folding of rs6000 builtins has been disabled.\n");
3618 /* Add some warnings for VSX. */
3619 if (TARGET_VSX)
3621 const char *msg = NULL;
3622 if (!TARGET_HARD_FLOAT)
3624 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3625 msg = N_("%<-mvsx%> requires hardware floating point");
3626 else
3628 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3629 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3632 else if (TARGET_AVOID_XFORM > 0)
3633 msg = N_("%<-mvsx%> needs indexed addressing");
3634 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3635 & OPTION_MASK_ALTIVEC))
3637 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3638 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3639 else
3640 msg = N_("%<-mno-altivec%> disables vsx");
3643 if (msg)
3645 warning (0, msg);
3646 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3647 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3651 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3652 the -mcpu setting to enable options that conflict. */
3653 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3654 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3655 | OPTION_MASK_ALTIVEC
3656 | OPTION_MASK_VSX)) != 0)
3657 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3658 | OPTION_MASK_DIRECT_MOVE)
3659 & ~rs6000_isa_flags_explicit);
3661 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3662 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3664 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3665 off all of the options that depend on those flags. */
3666 ignore_masks = rs6000_disable_incompatible_switches ();
3668 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3669 unless the user explicitly used the -mno-<option> to disable the code. */
3670 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3671 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3672 else if (TARGET_P9_MINMAX)
3674 if (cpu_index >= 0)
3676 if (cpu_index == PROCESSOR_POWER9)
3678 /* legacy behavior: allow -mcpu=power9 with certain
3679 capabilities explicitly disabled. */
3680 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3682 else
3683 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3684 "for <xxx> less than power9", "-mcpu");
3686 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3687 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3688 & rs6000_isa_flags_explicit))
3689 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3690 were explicitly cleared. */
3691 error ("%qs incompatible with explicitly disabled options",
3692 "-mpower9-minmax");
3693 else
3694 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3696 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3697 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3698 else if (TARGET_VSX)
3699 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3700 else if (TARGET_POPCNTD)
3701 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3702 else if (TARGET_DFP)
3703 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3704 else if (TARGET_CMPB)
3705 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3706 else if (TARGET_FPRND)
3707 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3708 else if (TARGET_POPCNTB)
3709 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3710 else if (TARGET_ALTIVEC)
3711 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3713 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3715 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3716 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3717 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3720 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3722 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3723 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3724 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3727 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3729 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3730 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3731 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3734 if (TARGET_P8_VECTOR && !TARGET_VSX)
3736 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3737 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3738 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3739 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3741 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3742 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3743 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3745 else
3747 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3748 not explicit. */
3749 rs6000_isa_flags |= OPTION_MASK_VSX;
3750 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3754 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3756 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3757 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3758 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3761 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3762 silently turn off quad memory mode. */
3763 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3765 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3766 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3768 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3769 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3771 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3772 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3775 /* Non-atomic quad memory load/store are disabled for little endian, since
3776 the words are reversed, but atomic operations can still be done by
3777 swapping the words. */
3778 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3780 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3781 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3782 "mode"));
3784 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3787 /* Assume if the user asked for normal quad memory instructions, they want
3788 the atomic versions as well, unless they explicity told us not to use quad
3789 word atomic instructions. */
3790 if (TARGET_QUAD_MEMORY
3791 && !TARGET_QUAD_MEMORY_ATOMIC
3792 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3793 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3795 /* If we can shrink-wrap the TOC register save separately, then use
3796 -msave-toc-indirect unless explicitly disabled. */
3797 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
3798 && flag_shrink_wrap_separate
3799 && optimize_function_for_speed_p (cfun))
3800 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
3802 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3803 generating power8 instructions. Power9 does not optimize power8 fusion
3804 cases. */
3805 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3807 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
3808 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3809 else
3810 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3813 /* Setting additional fusion flags turns on base fusion. */
3814 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
3816 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
3818 if (TARGET_P8_FUSION_SIGN)
3819 error ("%qs requires %qs", "-mpower8-fusion-sign",
3820 "-mpower8-fusion");
3822 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
3824 else
3825 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
3828 /* Power8 does not fuse sign extended loads with the addis. If we are
3829 optimizing at high levels for speed, convert a sign extended load into a
3830 zero extending load, and an explicit sign extension. */
3831 if (TARGET_P8_FUSION
3832 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3833 && optimize_function_for_speed_p (cfun)
3834 && optimize >= 3)
3835 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3837 /* ISA 3.0 vector instructions include ISA 2.07. */
3838 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
3840 /* We prefer to not mention undocumented options in
3841 error messages. However, if users have managed to select
3842 power9-vector without selecting power8-vector, they
3843 already know about undocumented flags. */
3844 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
3845 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
3846 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
3847 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
3849 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
3850 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3851 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
3853 else
3855 /* OPTION_MASK_P9_VECTOR is explicit and
3856 OPTION_MASK_P8_VECTOR is not explicit. */
3857 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
3858 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3862 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3863 support. If we only have ISA 2.06 support, and the user did not specify
3864 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3865 but we don't enable the full vectorization support */
3866 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3867 TARGET_ALLOW_MOVMISALIGN = 1;
3869 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3871 if (TARGET_ALLOW_MOVMISALIGN > 0
3872 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
3873 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
3875 TARGET_ALLOW_MOVMISALIGN = 0;
3878 /* Determine when unaligned vector accesses are permitted, and when
3879 they are preferred over masked Altivec loads. Note that if
3880 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3881 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3882 not true. */
3883 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3885 if (!TARGET_VSX)
3887 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3888 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
3890 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3893 else if (!TARGET_ALLOW_MOVMISALIGN)
3895 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3896 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
3897 "-mallow-movmisalign");
3899 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3903 /* Use long double size to select the appropriate long double. We use
3904 TYPE_PRECISION to differentiate the 3 different long double types. We map
3905 128 into the precision used for TFmode. */
3906 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
3907 ? 64
3908 : FLOAT_PRECISION_TFmode);
3910 /* Set long double size before the IEEE 128-bit tests. */
3911 if (!global_options_set.x_rs6000_long_double_type_size)
3913 if (main_target_opt != NULL
3914 && (main_target_opt->x_rs6000_long_double_type_size
3915 != default_long_double_size))
3916 error ("target attribute or pragma changes %<long double%> size");
3917 else
3918 rs6000_long_double_type_size = default_long_double_size;
3920 else if (rs6000_long_double_type_size == 128)
3921 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
3922 else if (global_options_set.x_rs6000_ieeequad)
3924 if (global_options.x_rs6000_ieeequad)
3925 error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128");
3926 else
3927 error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128");
3930 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
3931 systems will also set long double to be IEEE 128-bit. AIX and Darwin
3932 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
3933 those systems will not pick up this default. Warn if the user changes the
3934 default unless -Wno-psabi. */
3935 if (!global_options_set.x_rs6000_ieeequad)
3936 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
3938 else
3940 if (global_options.x_rs6000_ieeequad
3941 && (!TARGET_POPCNTD || !TARGET_VSX))
3942 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
3944 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
3946 static bool warned_change_long_double;
3947 if (!warned_change_long_double)
3949 warned_change_long_double = true;
3950 if (TARGET_IEEEQUAD)
3951 warning (OPT_Wpsabi, "Using IEEE extended precision "
3952 "%<long double%>");
3953 else
3954 warning (OPT_Wpsabi, "Using IBM extended precision "
3955 "%<long double%>");
3960 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
3961 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
3962 infrastructure (-mfloat128-type) but not enable the actual __float128 type
3963 unless the user used the explicit -mfloat128. In GCC 8, we enable both
3964 the keyword as well as the type. */
3965 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
3967 /* IEEE 128-bit floating point requires VSX support. */
3968 if (TARGET_FLOAT128_KEYWORD)
3970 if (!TARGET_VSX)
3972 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
3973 error ("%qs requires VSX support", "%<-mfloat128%>");
3975 TARGET_FLOAT128_TYPE = 0;
3976 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
3977 | OPTION_MASK_FLOAT128_HW);
3979 else if (!TARGET_FLOAT128_TYPE)
3981 TARGET_FLOAT128_TYPE = 1;
3982 warning (0, "The %<-mfloat128%> option may not be fully supported");
3986 /* Enable the __float128 keyword under Linux by default. */
3987 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
3988 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
3989 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
3991 /* If we have are supporting the float128 type and full ISA 3.0 support,
3992 enable -mfloat128-hardware by default. However, don't enable the
3993 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
3994 because sometimes the compiler wants to put things in an integer
3995 container, and if we don't have __int128 support, it is impossible. */
3996 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
3997 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
3998 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
3999 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4001 if (TARGET_FLOAT128_HW
4002 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4004 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4005 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4007 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4010 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4012 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4013 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4015 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4018 /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
4019 if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
4021 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4022 error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
4023 else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
4024 error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
4026 rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
4029 /* -mpcrel requires prefixed load/store addressing. */
4030 if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
4032 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4033 error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
4035 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4038 /* Print the options after updating the defaults. */
4039 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4040 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4042 /* E500mc does "better" if we inline more aggressively. Respect the
4043 user's opinion, though. */
4044 if (rs6000_block_move_inline_limit == 0
4045 && (rs6000_tune == PROCESSOR_PPCE500MC
4046 || rs6000_tune == PROCESSOR_PPCE500MC64
4047 || rs6000_tune == PROCESSOR_PPCE5500
4048 || rs6000_tune == PROCESSOR_PPCE6500))
4049 rs6000_block_move_inline_limit = 128;
4051 /* store_one_arg depends on expand_block_move to handle at least the
4052 size of reg_parm_stack_space. */
4053 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4054 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4056 if (global_init_p)
4058 /* If the appropriate debug option is enabled, replace the target hooks
4059 with debug versions that call the real version and then prints
4060 debugging information. */
4061 if (TARGET_DEBUG_COST)
4063 targetm.rtx_costs = rs6000_debug_rtx_costs;
4064 targetm.address_cost = rs6000_debug_address_cost;
4065 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4068 if (TARGET_DEBUG_ADDR)
4070 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4071 targetm.legitimize_address = rs6000_debug_legitimize_address;
4072 rs6000_secondary_reload_class_ptr
4073 = rs6000_debug_secondary_reload_class;
4074 targetm.secondary_memory_needed
4075 = rs6000_debug_secondary_memory_needed;
4076 targetm.can_change_mode_class
4077 = rs6000_debug_can_change_mode_class;
4078 rs6000_preferred_reload_class_ptr
4079 = rs6000_debug_preferred_reload_class;
4080 rs6000_mode_dependent_address_ptr
4081 = rs6000_debug_mode_dependent_address;
4084 if (rs6000_veclibabi_name)
4086 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4087 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4088 else
4090 error ("unknown vectorization library ABI type (%qs) for "
4091 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4092 ret = false;
4097 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4098 target attribute or pragma which automatically enables both options,
4099 unless the altivec ABI was set. This is set by default for 64-bit, but
4100 not for 32-bit. */
4101 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4103 TARGET_FLOAT128_TYPE = 0;
4104 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4105 | OPTION_MASK_FLOAT128_KEYWORD)
4106 & ~rs6000_isa_flags_explicit);
4109 /* Enable Altivec ABI for AIX -maltivec. */
4110 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4112 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4113 error ("target attribute or pragma changes AltiVec ABI");
4114 else
4115 rs6000_altivec_abi = 1;
4118 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4119 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4120 be explicitly overridden in either case. */
4121 if (TARGET_ELF)
4123 if (!global_options_set.x_rs6000_altivec_abi
4124 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4126 if (main_target_opt != NULL &&
4127 !main_target_opt->x_rs6000_altivec_abi)
4128 error ("target attribute or pragma changes AltiVec ABI");
4129 else
4130 rs6000_altivec_abi = 1;
4134 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4135 So far, the only darwin64 targets are also MACH-O. */
4136 if (TARGET_MACHO
4137 && DEFAULT_ABI == ABI_DARWIN
4138 && TARGET_64BIT)
4140 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4141 error ("target attribute or pragma changes darwin64 ABI");
4142 else
4144 rs6000_darwin64_abi = 1;
4145 /* Default to natural alignment, for better performance. */
4146 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4150 /* Place FP constants in the constant pool instead of TOC
4151 if section anchors enabled. */
4152 if (flag_section_anchors
4153 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4154 TARGET_NO_FP_IN_TOC = 1;
4156 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4157 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4159 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4160 SUBTARGET_OVERRIDE_OPTIONS;
4161 #endif
4162 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4163 SUBSUBTARGET_OVERRIDE_OPTIONS;
4164 #endif
4165 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4166 SUB3TARGET_OVERRIDE_OPTIONS;
4167 #endif
4169 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4170 after the subtarget override options are done. */
4171 if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4173 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4174 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4176 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4179 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4180 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4182 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4183 && rs6000_tune != PROCESSOR_POWER5
4184 && rs6000_tune != PROCESSOR_POWER6
4185 && rs6000_tune != PROCESSOR_POWER7
4186 && rs6000_tune != PROCESSOR_POWER8
4187 && rs6000_tune != PROCESSOR_POWER9
4188 && rs6000_tune != PROCESSOR_FUTURE
4189 && rs6000_tune != PROCESSOR_PPCA2
4190 && rs6000_tune != PROCESSOR_CELL
4191 && rs6000_tune != PROCESSOR_PPC476);
4192 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4193 || rs6000_tune == PROCESSOR_POWER5
4194 || rs6000_tune == PROCESSOR_POWER7
4195 || rs6000_tune == PROCESSOR_POWER8);
4196 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4197 || rs6000_tune == PROCESSOR_POWER5
4198 || rs6000_tune == PROCESSOR_POWER6
4199 || rs6000_tune == PROCESSOR_POWER7
4200 || rs6000_tune == PROCESSOR_POWER8
4201 || rs6000_tune == PROCESSOR_POWER9
4202 || rs6000_tune == PROCESSOR_FUTURE
4203 || rs6000_tune == PROCESSOR_PPCE500MC
4204 || rs6000_tune == PROCESSOR_PPCE500MC64
4205 || rs6000_tune == PROCESSOR_PPCE5500
4206 || rs6000_tune == PROCESSOR_PPCE6500);
4208 /* Allow debug switches to override the above settings. These are set to -1
4209 in rs6000.opt to indicate the user hasn't directly set the switch. */
4210 if (TARGET_ALWAYS_HINT >= 0)
4211 rs6000_always_hint = TARGET_ALWAYS_HINT;
4213 if (TARGET_SCHED_GROUPS >= 0)
4214 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4216 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4217 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4219 rs6000_sched_restricted_insns_priority
4220 = (rs6000_sched_groups ? 1 : 0);
4222 /* Handle -msched-costly-dep option. */
4223 rs6000_sched_costly_dep
4224 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4226 if (rs6000_sched_costly_dep_str)
4228 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4229 rs6000_sched_costly_dep = no_dep_costly;
4230 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4231 rs6000_sched_costly_dep = all_deps_costly;
4232 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4233 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4234 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4235 rs6000_sched_costly_dep = store_to_load_dep_costly;
4236 else
4237 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4238 atoi (rs6000_sched_costly_dep_str));
4241 /* Handle -minsert-sched-nops option. */
4242 rs6000_sched_insert_nops
4243 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4245 if (rs6000_sched_insert_nops_str)
4247 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4248 rs6000_sched_insert_nops = sched_finish_none;
4249 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4250 rs6000_sched_insert_nops = sched_finish_pad_groups;
4251 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4252 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4253 else
4254 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4255 atoi (rs6000_sched_insert_nops_str));
4258 /* Handle stack protector */
4259 if (!global_options_set.x_rs6000_stack_protector_guard)
4260 #ifdef TARGET_THREAD_SSP_OFFSET
4261 rs6000_stack_protector_guard = SSP_TLS;
4262 #else
4263 rs6000_stack_protector_guard = SSP_GLOBAL;
4264 #endif
4266 #ifdef TARGET_THREAD_SSP_OFFSET
4267 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4268 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4269 #endif
4271 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4273 char *endp;
4274 const char *str = rs6000_stack_protector_guard_offset_str;
4276 errno = 0;
4277 long offset = strtol (str, &endp, 0);
4278 if (!*str || *endp || errno)
4279 error ("%qs is not a valid number in %qs", str,
4280 "-mstack-protector-guard-offset=");
4282 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4283 || (TARGET_64BIT && (offset & 3)))
4284 error ("%qs is not a valid offset in %qs", str,
4285 "-mstack-protector-guard-offset=");
4287 rs6000_stack_protector_guard_offset = offset;
4290 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4292 const char *str = rs6000_stack_protector_guard_reg_str;
4293 int reg = decode_reg_name (str);
4295 if (!IN_RANGE (reg, 1, 31))
4296 error ("%qs is not a valid base register in %qs", str,
4297 "-mstack-protector-guard-reg=");
4299 rs6000_stack_protector_guard_reg = reg;
4302 if (rs6000_stack_protector_guard == SSP_TLS
4303 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4304 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4306 if (global_init_p)
4308 #ifdef TARGET_REGNAMES
4309 /* If the user desires alternate register names, copy in the
4310 alternate names now. */
4311 if (TARGET_REGNAMES)
4312 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4313 #endif
4315 /* Set aix_struct_return last, after the ABI is determined.
4316 If -maix-struct-return or -msvr4-struct-return was explicitly
4317 used, don't override with the ABI default. */
4318 if (!global_options_set.x_aix_struct_return)
4319 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4321 #if 0
4322 /* IBM XL compiler defaults to unsigned bitfields. */
4323 if (TARGET_XL_COMPAT)
4324 flag_signed_bitfields = 0;
4325 #endif
4327 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4328 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4330 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4332 /* We can only guarantee the availability of DI pseudo-ops when
4333 assembling for 64-bit targets. */
4334 if (!TARGET_64BIT)
4336 targetm.asm_out.aligned_op.di = NULL;
4337 targetm.asm_out.unaligned_op.di = NULL;
4341 /* Set branch target alignment, if not optimizing for size. */
4342 if (!optimize_size)
4344 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4345 aligned 8byte to avoid misprediction by the branch predictor. */
4346 if (rs6000_tune == PROCESSOR_TITAN
4347 || rs6000_tune == PROCESSOR_CELL)
4349 if (flag_align_functions && !str_align_functions)
4350 str_align_functions = "8";
4351 if (flag_align_jumps && !str_align_jumps)
4352 str_align_jumps = "8";
4353 if (flag_align_loops && !str_align_loops)
4354 str_align_loops = "8";
4356 if (rs6000_align_branch_targets)
4358 if (flag_align_functions && !str_align_functions)
4359 str_align_functions = "16";
4360 if (flag_align_jumps && !str_align_jumps)
4361 str_align_jumps = "16";
4362 if (flag_align_loops && !str_align_loops)
4364 can_override_loop_align = 1;
4365 str_align_loops = "16";
4369 if (flag_align_jumps && !str_align_jumps)
4370 str_align_jumps = "16";
4371 if (flag_align_loops && !str_align_loops)
4372 str_align_loops = "16";
4375 /* Arrange to save and restore machine status around nested functions. */
4376 init_machine_status = rs6000_init_machine_status;
4378 /* We should always be splitting complex arguments, but we can't break
4379 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4380 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4381 targetm.calls.split_complex_arg = NULL;
4383 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4384 if (DEFAULT_ABI == ABI_AIX)
4385 targetm.calls.custom_function_descriptors = 0;
4388 /* Initialize rs6000_cost with the appropriate target costs. */
4389 if (optimize_size)
4390 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4391 else
4392 switch (rs6000_tune)
4394 case PROCESSOR_RS64A:
4395 rs6000_cost = &rs64a_cost;
4396 break;
4398 case PROCESSOR_MPCCORE:
4399 rs6000_cost = &mpccore_cost;
4400 break;
4402 case PROCESSOR_PPC403:
4403 rs6000_cost = &ppc403_cost;
4404 break;
4406 case PROCESSOR_PPC405:
4407 rs6000_cost = &ppc405_cost;
4408 break;
4410 case PROCESSOR_PPC440:
4411 rs6000_cost = &ppc440_cost;
4412 break;
4414 case PROCESSOR_PPC476:
4415 rs6000_cost = &ppc476_cost;
4416 break;
4418 case PROCESSOR_PPC601:
4419 rs6000_cost = &ppc601_cost;
4420 break;
4422 case PROCESSOR_PPC603:
4423 rs6000_cost = &ppc603_cost;
4424 break;
4426 case PROCESSOR_PPC604:
4427 rs6000_cost = &ppc604_cost;
4428 break;
4430 case PROCESSOR_PPC604e:
4431 rs6000_cost = &ppc604e_cost;
4432 break;
4434 case PROCESSOR_PPC620:
4435 rs6000_cost = &ppc620_cost;
4436 break;
4438 case PROCESSOR_PPC630:
4439 rs6000_cost = &ppc630_cost;
4440 break;
4442 case PROCESSOR_CELL:
4443 rs6000_cost = &ppccell_cost;
4444 break;
4446 case PROCESSOR_PPC750:
4447 case PROCESSOR_PPC7400:
4448 rs6000_cost = &ppc750_cost;
4449 break;
4451 case PROCESSOR_PPC7450:
4452 rs6000_cost = &ppc7450_cost;
4453 break;
4455 case PROCESSOR_PPC8540:
4456 case PROCESSOR_PPC8548:
4457 rs6000_cost = &ppc8540_cost;
4458 break;
4460 case PROCESSOR_PPCE300C2:
4461 case PROCESSOR_PPCE300C3:
4462 rs6000_cost = &ppce300c2c3_cost;
4463 break;
4465 case PROCESSOR_PPCE500MC:
4466 rs6000_cost = &ppce500mc_cost;
4467 break;
4469 case PROCESSOR_PPCE500MC64:
4470 rs6000_cost = &ppce500mc64_cost;
4471 break;
4473 case PROCESSOR_PPCE5500:
4474 rs6000_cost = &ppce5500_cost;
4475 break;
4477 case PROCESSOR_PPCE6500:
4478 rs6000_cost = &ppce6500_cost;
4479 break;
4481 case PROCESSOR_TITAN:
4482 rs6000_cost = &titan_cost;
4483 break;
4485 case PROCESSOR_POWER4:
4486 case PROCESSOR_POWER5:
4487 rs6000_cost = &power4_cost;
4488 break;
4490 case PROCESSOR_POWER6:
4491 rs6000_cost = &power6_cost;
4492 break;
4494 case PROCESSOR_POWER7:
4495 rs6000_cost = &power7_cost;
4496 break;
4498 case PROCESSOR_POWER8:
4499 rs6000_cost = &power8_cost;
4500 break;
4502 case PROCESSOR_POWER9:
4503 case PROCESSOR_FUTURE:
4504 rs6000_cost = &power9_cost;
4505 break;
4507 case PROCESSOR_PPCA2:
4508 rs6000_cost = &ppca2_cost;
4509 break;
4511 default:
4512 gcc_unreachable ();
4515 if (global_init_p)
4517 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4518 param_simultaneous_prefetches,
4519 rs6000_cost->simultaneous_prefetches);
4520 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4521 param_l1_cache_size,
4522 rs6000_cost->l1_cache_size);
4523 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4524 param_l1_cache_line_size,
4525 rs6000_cost->cache_line_size);
4526 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4527 param_l2_cache_size,
4528 rs6000_cost->l2_cache_size);
4530 /* Increase loop peeling limits based on performance analysis. */
4531 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4532 param_max_peeled_insns, 400);
4533 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4534 param_max_completely_peeled_insns, 400);
4536 /* Use the 'model' -fsched-pressure algorithm by default. */
4537 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4538 param_sched_pressure_algorithm,
4539 SCHED_PRESSURE_MODEL);
4541 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
4542 turns -fweb and -frename-registers on. */
4543 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
4544 || (global_options_set.x_flag_unroll_all_loops
4545 && flag_unroll_all_loops))
4547 if (!global_options_set.x_unroll_only_small_loops)
4548 unroll_only_small_loops = 0;
4549 if (!global_options_set.x_flag_rename_registers)
4550 flag_rename_registers = 1;
4551 if (!global_options_set.x_flag_web)
4552 flag_web = 1;
4555 /* If using typedef char *va_list, signal that
4556 __builtin_va_start (&ap, 0) can be optimized to
4557 ap = __builtin_next_arg (0). */
4558 if (DEFAULT_ABI != ABI_V4)
4559 targetm.expand_builtin_va_start = NULL;
4562 /* If not explicitly specified via option, decide whether to generate indexed
4563 load/store instructions. A value of -1 indicates that the
4564 initial value of this variable has not been overwritten. During
4565 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4566 if (TARGET_AVOID_XFORM == -1)
4567 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4568 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4569 need indexed accesses and the type used is the scalar type of the element
4570 being loaded or stored. */
4571 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4572 && !TARGET_ALTIVEC);
4574 /* Set the -mrecip options. */
4575 if (rs6000_recip_name)
4577 char *p = ASTRDUP (rs6000_recip_name);
4578 char *q;
4579 unsigned int mask, i;
4580 bool invert;
4582 while ((q = strtok (p, ",")) != NULL)
4584 p = NULL;
4585 if (*q == '!')
4587 invert = true;
4588 q++;
4590 else
4591 invert = false;
4593 if (!strcmp (q, "default"))
4594 mask = ((TARGET_RECIP_PRECISION)
4595 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4596 else
4598 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4599 if (!strcmp (q, recip_options[i].string))
4601 mask = recip_options[i].mask;
4602 break;
4605 if (i == ARRAY_SIZE (recip_options))
4607 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4608 invert = false;
4609 mask = 0;
4610 ret = false;
4614 if (invert)
4615 rs6000_recip_control &= ~mask;
4616 else
4617 rs6000_recip_control |= mask;
4621 /* Set the builtin mask of the various options used that could affect which
4622 builtins were used. In the past we used target_flags, but we've run out
4623 of bits, and some options are no longer in target_flags. */
4624 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4625 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4626 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4627 rs6000_builtin_mask);
4629 /* Initialize all of the registers. */
4630 rs6000_init_hard_regno_mode_ok (global_init_p);
4632 /* Save the initial options in case the user does function specific options */
4633 if (global_init_p)
4634 target_option_default_node = target_option_current_node
4635 = build_target_option_node (&global_options);
4637 /* If not explicitly specified via option, decide whether to generate the
4638 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4639 if (TARGET_LINK_STACK == -1)
4640 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4642 /* Deprecate use of -mno-speculate-indirect-jumps. */
4643 if (!rs6000_speculate_indirect_jumps)
4644 warning (0, "%qs is deprecated and not recommended in any circumstances",
4645 "-mno-speculate-indirect-jumps");
4647 return ret;
4650 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4651 define the target cpu type. */
4653 static void
4654 rs6000_option_override (void)
4656 (void) rs6000_option_override_internal (true);
4660 /* Implement targetm.vectorize.builtin_mask_for_load. */
4661 static tree
4662 rs6000_builtin_mask_for_load (void)
4664 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4665 if ((TARGET_ALTIVEC && !TARGET_VSX)
4666 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4667 return altivec_builtin_mask_for_load;
4668 else
4669 return 0;
4672 /* Implement LOOP_ALIGN. */
4673 align_flags
4674 rs6000_loop_align (rtx label)
4676 basic_block bb;
4677 int ninsns;
4679 /* Don't override loop alignment if -falign-loops was specified. */
4680 if (!can_override_loop_align)
4681 return align_loops;
4683 bb = BLOCK_FOR_INSN (label);
4684 ninsns = num_loop_insns(bb->loop_father);
4686 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4687 if (ninsns > 4 && ninsns <= 8
4688 && (rs6000_tune == PROCESSOR_POWER4
4689 || rs6000_tune == PROCESSOR_POWER5
4690 || rs6000_tune == PROCESSOR_POWER6
4691 || rs6000_tune == PROCESSOR_POWER7
4692 || rs6000_tune == PROCESSOR_POWER8))
4693 return align_flags (5);
4694 else
4695 return align_loops;
4698 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4699 after applying N number of iterations. This routine does not determine
4700 how may iterations are required to reach desired alignment. */
4702 static bool
4703 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4705 if (is_packed)
4706 return false;
4708 if (TARGET_32BIT)
4710 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4711 return true;
4713 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4714 return true;
4716 return false;
4718 else
4720 if (TARGET_MACHO)
4721 return false;
4723 /* Assuming that all other types are naturally aligned. CHECKME! */
4724 return true;
4728 /* Return true if the vector misalignment factor is supported by the
4729 target. */
4730 static bool
4731 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4732 const_tree type,
4733 int misalignment,
4734 bool is_packed)
4736 if (TARGET_VSX)
4738 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4739 return true;
4741 /* Return if movmisalign pattern is not supported for this mode. */
4742 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4743 return false;
4745 if (misalignment == -1)
4747 /* Misalignment factor is unknown at compile time but we know
4748 it's word aligned. */
4749 if (rs6000_vector_alignment_reachable (type, is_packed))
4751 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4753 if (element_size == 64 || element_size == 32)
4754 return true;
4757 return false;
4760 /* VSX supports word-aligned vector. */
4761 if (misalignment % 4 == 0)
4762 return true;
4764 return false;
4767 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4768 static int
4769 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4770 tree vectype, int misalign)
4772 unsigned elements;
4773 tree elem_type;
4775 switch (type_of_cost)
4777 case scalar_stmt:
4778 case scalar_store:
4779 case vector_stmt:
4780 case vector_store:
4781 case vec_to_scalar:
4782 case scalar_to_vec:
4783 case cond_branch_not_taken:
4784 return 1;
4785 case scalar_load:
4786 case vector_load:
4787 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4788 return 2;
4790 case vec_perm:
4791 /* Power7 has only one permute unit, make it a bit expensive. */
4792 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4793 return 3;
4794 else
4795 return 1;
4797 case vec_promote_demote:
4798 /* Power7 has only one permute/pack unit, make it a bit expensive. */
4799 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
4800 return 4;
4801 else
4802 return 1;
4804 case cond_branch_taken:
4805 return 3;
4807 case unaligned_load:
4808 case vector_gather_load:
4809 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4810 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4811 return 2;
4813 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4815 elements = TYPE_VECTOR_SUBPARTS (vectype);
4816 if (elements == 2)
4817 /* Double word aligned. */
4818 return 4;
4820 if (elements == 4)
4822 switch (misalign)
4824 case 8:
4825 /* Double word aligned. */
4826 return 4;
4828 case -1:
4829 /* Unknown misalignment. */
4830 case 4:
4831 case 12:
4832 /* Word aligned. */
4833 return 33;
4835 default:
4836 gcc_unreachable ();
4841 if (TARGET_ALTIVEC)
4842 /* Misaligned loads are not supported. */
4843 gcc_unreachable ();
4845 /* Like rs6000_insn_cost, make load insns cost a bit more. */
4846 return 4;
4848 case unaligned_store:
4849 case vector_scatter_store:
4850 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4851 return 1;
4853 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4855 elements = TYPE_VECTOR_SUBPARTS (vectype);
4856 if (elements == 2)
4857 /* Double word aligned. */
4858 return 2;
4860 if (elements == 4)
4862 switch (misalign)
4864 case 8:
4865 /* Double word aligned. */
4866 return 2;
4868 case -1:
4869 /* Unknown misalignment. */
4870 case 4:
4871 case 12:
4872 /* Word aligned. */
4873 return 23;
4875 default:
4876 gcc_unreachable ();
4881 if (TARGET_ALTIVEC)
4882 /* Misaligned stores are not supported. */
4883 gcc_unreachable ();
4885 return 2;
4887 case vec_construct:
4888 /* This is a rough approximation assuming non-constant elements
4889 constructed into a vector via element insertion. FIXME:
4890 vec_construct is not granular enough for uniformly good
4891 decisions. If the initialization is a splat, this is
4892 cheaper than we estimate. Improve this someday. */
4893 elem_type = TREE_TYPE (vectype);
4894 /* 32-bit vectors loaded into registers are stored as double
4895 precision, so we need 2 permutes, 2 converts, and 1 merge
4896 to construct a vector of short floats from them. */
4897 if (SCALAR_FLOAT_TYPE_P (elem_type)
4898 && TYPE_PRECISION (elem_type) == 32)
4899 return 5;
4900 /* On POWER9, integer vector types are built up in GPRs and then
4901 use a direct move (2 cycles). For POWER8 this is even worse,
4902 as we need two direct moves and a merge, and the direct moves
4903 are five cycles. */
4904 else if (INTEGRAL_TYPE_P (elem_type))
4906 if (TARGET_P9_VECTOR)
4907 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
4908 else
4909 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
4911 else
4912 /* V2DFmode doesn't need a direct move. */
4913 return 2;
4915 default:
4916 gcc_unreachable ();
4920 /* Implement targetm.vectorize.preferred_simd_mode. */
4922 static machine_mode
4923 rs6000_preferred_simd_mode (scalar_mode mode)
4925 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
4927 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
4928 return vmode.require ();
4930 return word_mode;
4933 typedef struct _rs6000_cost_data
4935 struct loop *loop_info;
4936 unsigned cost[3];
4937 } rs6000_cost_data;
4939 /* Test for likely overcommitment of vector hardware resources. If a
4940 loop iteration is relatively large, and too large a percentage of
4941 instructions in the loop are vectorized, the cost model may not
4942 adequately reflect delays from unavailable vector resources.
4943 Penalize the loop body cost for this case. */
4945 static void
4946 rs6000_density_test (rs6000_cost_data *data)
4948 const int DENSITY_PCT_THRESHOLD = 85;
4949 const int DENSITY_SIZE_THRESHOLD = 70;
4950 const int DENSITY_PENALTY = 10;
4951 struct loop *loop = data->loop_info;
4952 basic_block *bbs = get_loop_body (loop);
4953 int nbbs = loop->num_nodes;
4954 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
4955 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4956 int i, density_pct;
4958 for (i = 0; i < nbbs; i++)
4960 basic_block bb = bbs[i];
4961 gimple_stmt_iterator gsi;
4963 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4965 gimple *stmt = gsi_stmt (gsi);
4966 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
4968 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4969 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4970 not_vec_cost++;
4974 free (bbs);
4975 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4977 if (density_pct > DENSITY_PCT_THRESHOLD
4978 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4980 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4981 if (dump_enabled_p ())
4982 dump_printf_loc (MSG_NOTE, vect_location,
4983 "density %d%%, cost %d exceeds threshold, penalizing "
4984 "loop body cost by %d%%", density_pct,
4985 vec_cost + not_vec_cost, DENSITY_PENALTY);
4989 /* Implement targetm.vectorize.init_cost. */
4991 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
4992 instruction is needed by the vectorization. */
4993 static bool rs6000_vect_nonmem;
4995 static void *
4996 rs6000_init_cost (struct loop *loop_info)
4998 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4999 data->loop_info = loop_info;
5000 data->cost[vect_prologue] = 0;
5001 data->cost[vect_body] = 0;
5002 data->cost[vect_epilogue] = 0;
5003 rs6000_vect_nonmem = false;
5004 return data;
5007 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5008 For some statement, we would like to further fine-grain tweak the cost on
5009 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5010 information on statement operation codes etc. One typical case here is
5011 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5012 for scalar cost, but it should be priced more whatever transformed to either
5013 compare + branch or compare + isel instructions. */
5015 static unsigned
5016 adjust_vectorization_cost (enum vect_cost_for_stmt kind,
5017 struct _stmt_vec_info *stmt_info)
5019 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5020 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5022 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5023 if (subcode == COND_EXPR)
5024 return 2;
5027 return 0;
5030 /* Implement targetm.vectorize.add_stmt_cost. */
5032 static unsigned
5033 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5034 struct _stmt_vec_info *stmt_info, int misalign,
5035 enum vect_cost_model_location where)
5037 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5038 unsigned retval = 0;
5040 if (flag_vect_cost_model)
5042 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5043 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5044 misalign);
5045 stmt_cost += adjust_vectorization_cost (kind, stmt_info);
5046 /* Statements in an inner loop relative to the loop being
5047 vectorized are weighted more heavily. The value here is
5048 arbitrary and could potentially be improved with analysis. */
5049 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5050 count *= 50; /* FIXME. */
5052 retval = (unsigned) (count * stmt_cost);
5053 cost_data->cost[where] += retval;
5055 /* Check whether we're doing something other than just a copy loop.
5056 Not all such loops may be profitably vectorized; see
5057 rs6000_finish_cost. */
5058 if ((kind == vec_to_scalar || kind == vec_perm
5059 || kind == vec_promote_demote || kind == vec_construct
5060 || kind == scalar_to_vec)
5061 || (where == vect_body && kind == vector_stmt))
5062 rs6000_vect_nonmem = true;
5065 return retval;
5068 /* Implement targetm.vectorize.finish_cost. */
5070 static void
5071 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5072 unsigned *body_cost, unsigned *epilogue_cost)
5074 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5076 if (cost_data->loop_info)
5077 rs6000_density_test (cost_data);
5079 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5080 that require versioning for any reason. The vectorization is at
5081 best a wash inside the loop, and the versioning checks make
5082 profitability highly unlikely and potentially quite harmful. */
5083 if (cost_data->loop_info)
5085 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5086 if (!rs6000_vect_nonmem
5087 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5088 && LOOP_REQUIRES_VERSIONING (vec_info))
5089 cost_data->cost[vect_body] += 10000;
5092 *prologue_cost = cost_data->cost[vect_prologue];
5093 *body_cost = cost_data->cost[vect_body];
5094 *epilogue_cost = cost_data->cost[vect_epilogue];
5097 /* Implement targetm.vectorize.destroy_cost_data. */
5099 static void
5100 rs6000_destroy_cost_data (void *data)
5102 free (data);
5105 /* Implement targetm.loop_unroll_adjust. */
5107 static unsigned
5108 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5110 if (unroll_only_small_loops)
5112 /* TODO: This is hardcoded to 10 right now. It can be refined, for
5113 example we may want to unroll very small loops more times (4 perhaps).
5114 We also should use a PARAM for this. */
5115 if (loop->ninsns <= 10)
5116 return MIN (2, nunroll);
5117 else
5118 return 0;
5121 return nunroll;
5124 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5125 library with vectorized intrinsics. */
5127 static tree
5128 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5129 tree type_in)
5131 char name[32];
5132 const char *suffix = NULL;
5133 tree fntype, new_fndecl, bdecl = NULL_TREE;
5134 int n_args = 1;
5135 const char *bname;
5136 machine_mode el_mode, in_mode;
5137 int n, in_n;
5139 /* Libmass is suitable for unsafe math only as it does not correctly support
5140 parts of IEEE with the required precision such as denormals. Only support
5141 it if we have VSX to use the simd d2 or f4 functions.
5142 XXX: Add variable length support. */
5143 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5144 return NULL_TREE;
5146 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5147 n = TYPE_VECTOR_SUBPARTS (type_out);
5148 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5149 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5150 if (el_mode != in_mode
5151 || n != in_n)
5152 return NULL_TREE;
5154 switch (fn)
5156 CASE_CFN_ATAN2:
5157 CASE_CFN_HYPOT:
5158 CASE_CFN_POW:
5159 n_args = 2;
5160 gcc_fallthrough ();
5162 CASE_CFN_ACOS:
5163 CASE_CFN_ACOSH:
5164 CASE_CFN_ASIN:
5165 CASE_CFN_ASINH:
5166 CASE_CFN_ATAN:
5167 CASE_CFN_ATANH:
5168 CASE_CFN_CBRT:
5169 CASE_CFN_COS:
5170 CASE_CFN_COSH:
5171 CASE_CFN_ERF:
5172 CASE_CFN_ERFC:
5173 CASE_CFN_EXP2:
5174 CASE_CFN_EXP:
5175 CASE_CFN_EXPM1:
5176 CASE_CFN_LGAMMA:
5177 CASE_CFN_LOG10:
5178 CASE_CFN_LOG1P:
5179 CASE_CFN_LOG2:
5180 CASE_CFN_LOG:
5181 CASE_CFN_SIN:
5182 CASE_CFN_SINH:
5183 CASE_CFN_SQRT:
5184 CASE_CFN_TAN:
5185 CASE_CFN_TANH:
5186 if (el_mode == DFmode && n == 2)
5188 bdecl = mathfn_built_in (double_type_node, fn);
5189 suffix = "d2"; /* pow -> powd2 */
5191 else if (el_mode == SFmode && n == 4)
5193 bdecl = mathfn_built_in (float_type_node, fn);
5194 suffix = "4"; /* powf -> powf4 */
5196 else
5197 return NULL_TREE;
5198 if (!bdecl)
5199 return NULL_TREE;
5200 break;
5202 default:
5203 return NULL_TREE;
5206 gcc_assert (suffix != NULL);
5207 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5208 if (!bname)
5209 return NULL_TREE;
5211 strcpy (name, bname + sizeof ("__builtin_") - 1);
5212 strcat (name, suffix);
5214 if (n_args == 1)
5215 fntype = build_function_type_list (type_out, type_in, NULL);
5216 else if (n_args == 2)
5217 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5218 else
5219 gcc_unreachable ();
5221 /* Build a function declaration for the vectorized function. */
5222 new_fndecl = build_decl (BUILTINS_LOCATION,
5223 FUNCTION_DECL, get_identifier (name), fntype);
5224 TREE_PUBLIC (new_fndecl) = 1;
5225 DECL_EXTERNAL (new_fndecl) = 1;
5226 DECL_IS_NOVOPS (new_fndecl) = 1;
5227 TREE_READONLY (new_fndecl) = 1;
5229 return new_fndecl;
5232 /* Returns a function decl for a vectorized version of the builtin function
5233 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5234 if it is not available. */
5236 static tree
5237 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5238 tree type_in)
5240 machine_mode in_mode, out_mode;
5241 int in_n, out_n;
5243 if (TARGET_DEBUG_BUILTIN)
5244 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5245 combined_fn_name (combined_fn (fn)),
5246 GET_MODE_NAME (TYPE_MODE (type_out)),
5247 GET_MODE_NAME (TYPE_MODE (type_in)));
5249 if (TREE_CODE (type_out) != VECTOR_TYPE
5250 || TREE_CODE (type_in) != VECTOR_TYPE)
5251 return NULL_TREE;
5253 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5254 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5255 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5256 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5258 switch (fn)
5260 CASE_CFN_COPYSIGN:
5261 if (VECTOR_UNIT_VSX_P (V2DFmode)
5262 && out_mode == DFmode && out_n == 2
5263 && in_mode == DFmode && in_n == 2)
5264 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5265 if (VECTOR_UNIT_VSX_P (V4SFmode)
5266 && out_mode == SFmode && out_n == 4
5267 && in_mode == SFmode && in_n == 4)
5268 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5269 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5270 && out_mode == SFmode && out_n == 4
5271 && in_mode == SFmode && in_n == 4)
5272 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5273 break;
5274 CASE_CFN_CEIL:
5275 if (VECTOR_UNIT_VSX_P (V2DFmode)
5276 && out_mode == DFmode && out_n == 2
5277 && in_mode == DFmode && in_n == 2)
5278 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5279 if (VECTOR_UNIT_VSX_P (V4SFmode)
5280 && out_mode == SFmode && out_n == 4
5281 && in_mode == SFmode && in_n == 4)
5282 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5283 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5284 && out_mode == SFmode && out_n == 4
5285 && in_mode == SFmode && in_n == 4)
5286 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5287 break;
5288 CASE_CFN_FLOOR:
5289 if (VECTOR_UNIT_VSX_P (V2DFmode)
5290 && out_mode == DFmode && out_n == 2
5291 && in_mode == DFmode && in_n == 2)
5292 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5293 if (VECTOR_UNIT_VSX_P (V4SFmode)
5294 && out_mode == SFmode && out_n == 4
5295 && in_mode == SFmode && in_n == 4)
5296 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5297 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5298 && out_mode == SFmode && out_n == 4
5299 && in_mode == SFmode && in_n == 4)
5300 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5301 break;
5302 CASE_CFN_FMA:
5303 if (VECTOR_UNIT_VSX_P (V2DFmode)
5304 && out_mode == DFmode && out_n == 2
5305 && in_mode == DFmode && in_n == 2)
5306 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5307 if (VECTOR_UNIT_VSX_P (V4SFmode)
5308 && out_mode == SFmode && out_n == 4
5309 && in_mode == SFmode && in_n == 4)
5310 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5311 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5312 && out_mode == SFmode && out_n == 4
5313 && in_mode == SFmode && in_n == 4)
5314 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5315 break;
5316 CASE_CFN_TRUNC:
5317 if (VECTOR_UNIT_VSX_P (V2DFmode)
5318 && out_mode == DFmode && out_n == 2
5319 && in_mode == DFmode && in_n == 2)
5320 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5321 if (VECTOR_UNIT_VSX_P (V4SFmode)
5322 && out_mode == SFmode && out_n == 4
5323 && in_mode == SFmode && in_n == 4)
5324 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5325 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5326 && out_mode == SFmode && out_n == 4
5327 && in_mode == SFmode && in_n == 4)
5328 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5329 break;
5330 CASE_CFN_NEARBYINT:
5331 if (VECTOR_UNIT_VSX_P (V2DFmode)
5332 && flag_unsafe_math_optimizations
5333 && out_mode == DFmode && out_n == 2
5334 && in_mode == DFmode && in_n == 2)
5335 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5336 if (VECTOR_UNIT_VSX_P (V4SFmode)
5337 && flag_unsafe_math_optimizations
5338 && out_mode == SFmode && out_n == 4
5339 && in_mode == SFmode && in_n == 4)
5340 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5341 break;
5342 CASE_CFN_RINT:
5343 if (VECTOR_UNIT_VSX_P (V2DFmode)
5344 && !flag_trapping_math
5345 && out_mode == DFmode && out_n == 2
5346 && in_mode == DFmode && in_n == 2)
5347 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5348 if (VECTOR_UNIT_VSX_P (V4SFmode)
5349 && !flag_trapping_math
5350 && out_mode == SFmode && out_n == 4
5351 && in_mode == SFmode && in_n == 4)
5352 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5353 break;
5354 default:
5355 break;
5358 /* Generate calls to libmass if appropriate. */
5359 if (rs6000_veclib_handler)
5360 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5362 return NULL_TREE;
5365 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5367 static tree
5368 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5369 tree type_in)
5371 machine_mode in_mode, out_mode;
5372 int in_n, out_n;
5374 if (TARGET_DEBUG_BUILTIN)
5375 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5376 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5377 GET_MODE_NAME (TYPE_MODE (type_out)),
5378 GET_MODE_NAME (TYPE_MODE (type_in)));
5380 if (TREE_CODE (type_out) != VECTOR_TYPE
5381 || TREE_CODE (type_in) != VECTOR_TYPE)
5382 return NULL_TREE;
5384 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5385 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5386 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5387 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5389 enum rs6000_builtins fn
5390 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
5391 switch (fn)
5393 case RS6000_BUILTIN_RSQRTF:
5394 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5395 && out_mode == SFmode && out_n == 4
5396 && in_mode == SFmode && in_n == 4)
5397 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5398 break;
5399 case RS6000_BUILTIN_RSQRT:
5400 if (VECTOR_UNIT_VSX_P (V2DFmode)
5401 && out_mode == DFmode && out_n == 2
5402 && in_mode == DFmode && in_n == 2)
5403 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5404 break;
5405 case RS6000_BUILTIN_RECIPF:
5406 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5407 && out_mode == SFmode && out_n == 4
5408 && in_mode == SFmode && in_n == 4)
5409 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5410 break;
5411 case RS6000_BUILTIN_RECIP:
5412 if (VECTOR_UNIT_VSX_P (V2DFmode)
5413 && out_mode == DFmode && out_n == 2
5414 && in_mode == DFmode && in_n == 2)
5415 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5416 break;
5417 default:
5418 break;
5420 return NULL_TREE;
5423 /* Default CPU string for rs6000*_file_start functions. */
5424 static const char *rs6000_default_cpu;
5426 #ifdef USING_ELFOS_H
5427 const char *rs6000_machine;
5429 const char *
5430 rs6000_machine_from_flags (void)
5432 HOST_WIDE_INT flags = rs6000_isa_flags;
5434 /* Disable the flags that should never influence the .machine selection. */
5435 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT);
5437 if ((flags & (ISA_FUTURE_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5438 return "future";
5439 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5440 return "power9";
5441 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5442 return "power8";
5443 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5444 return "power7";
5445 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5446 return "power6";
5447 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5448 return "power5";
5449 if ((flags & ISA_2_1_MASKS) != 0)
5450 return "power4";
5451 if ((flags & OPTION_MASK_POWERPC64) != 0)
5452 return "ppc64";
5453 return "ppc";
5456 void
5457 emit_asm_machine (void)
5459 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5461 #endif
5463 /* Do anything needed at the start of the asm file. */
5465 static void
5466 rs6000_file_start (void)
5468 char buffer[80];
5469 const char *start = buffer;
5470 FILE *file = asm_out_file;
5472 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5474 default_file_start ();
5476 if (flag_verbose_asm)
5478 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5480 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5482 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5483 start = "";
5486 if (global_options_set.x_rs6000_cpu_index)
5488 fprintf (file, "%s -mcpu=%s", start,
5489 processor_target_table[rs6000_cpu_index].name);
5490 start = "";
5493 if (global_options_set.x_rs6000_tune_index)
5495 fprintf (file, "%s -mtune=%s", start,
5496 processor_target_table[rs6000_tune_index].name);
5497 start = "";
5500 if (PPC405_ERRATUM77)
5502 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5503 start = "";
5506 #ifdef USING_ELFOS_H
5507 switch (rs6000_sdata)
5509 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5510 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5511 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5512 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5515 if (rs6000_sdata && g_switch_value)
5517 fprintf (file, "%s -G %d", start,
5518 g_switch_value);
5519 start = "";
5521 #endif
5523 if (*start == '\0')
5524 putc ('\n', file);
5527 #ifdef USING_ELFOS_H
5528 rs6000_machine = rs6000_machine_from_flags ();
5529 emit_asm_machine ();
5530 #endif
5532 if (DEFAULT_ABI == ABI_ELFv2)
5533 fprintf (file, "\t.abiversion 2\n");
5537 /* Return nonzero if this function is known to have a null epilogue. */
5540 direct_return (void)
5542 if (reload_completed)
5544 rs6000_stack_t *info = rs6000_stack_info ();
5546 if (info->first_gp_reg_save == 32
5547 && info->first_fp_reg_save == 64
5548 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5549 && ! info->lr_save_p
5550 && ! info->cr_save_p
5551 && info->vrsave_size == 0
5552 && ! info->push_p)
5553 return 1;
5556 return 0;
5559 /* Helper for num_insns_constant. Calculate number of instructions to
5560 load VALUE to a single gpr using combinations of addi, addis, ori,
5561 oris and sldi instructions. */
5563 static int
5564 num_insns_constant_gpr (HOST_WIDE_INT value)
5566 /* signed constant loadable with addi */
5567 if (SIGNED_INTEGER_16BIT_P (value))
5568 return 1;
5570 /* constant loadable with addis */
5571 else if ((value & 0xffff) == 0
5572 && (value >> 31 == -1 || value >> 31 == 0))
5573 return 1;
5575 /* PADDI can support up to 34 bit signed integers. */
5576 else if (TARGET_PREFIXED_ADDR && SIGNED_INTEGER_34BIT_P (value))
5577 return 1;
5579 else if (TARGET_POWERPC64)
5581 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5582 HOST_WIDE_INT high = value >> 31;
5584 if (high == 0 || high == -1)
5585 return 2;
5587 high >>= 1;
5589 if (low == 0)
5590 return num_insns_constant_gpr (high) + 1;
5591 else if (high == 0)
5592 return num_insns_constant_gpr (low) + 1;
5593 else
5594 return (num_insns_constant_gpr (high)
5595 + num_insns_constant_gpr (low) + 1);
5598 else
5599 return 2;
5602 /* Helper for num_insns_constant. Allow constants formed by the
5603 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
5604 and handle modes that require multiple gprs. */
5606 static int
5607 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
5609 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5610 int total = 0;
5611 while (nregs-- > 0)
5613 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
5614 int insns = num_insns_constant_gpr (low);
5615 if (insns > 2
5616 /* We won't get more than 2 from num_insns_constant_gpr
5617 except when TARGET_POWERPC64 and mode is DImode or
5618 wider, so the register mode must be DImode. */
5619 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
5620 insns = 2;
5621 total += insns;
5622 value >>= BITS_PER_WORD;
5624 return total;
5627 /* Return the number of instructions it takes to form a constant in as
5628 many gprs are needed for MODE. */
5631 num_insns_constant (rtx op, machine_mode mode)
5633 HOST_WIDE_INT val;
5635 switch (GET_CODE (op))
5637 case CONST_INT:
5638 val = INTVAL (op);
5639 break;
5641 case CONST_WIDE_INT:
5643 int insns = 0;
5644 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5645 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
5646 DImode);
5647 return insns;
5650 case CONST_DOUBLE:
5652 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
5654 if (mode == SFmode || mode == SDmode)
5656 long l;
5658 if (mode == SDmode)
5659 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
5660 else
5661 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
5662 /* See the first define_split in rs6000.md handling a
5663 const_double_operand. */
5664 val = l;
5665 mode = SImode;
5667 else if (mode == DFmode || mode == DDmode)
5669 long l[2];
5671 if (mode == DDmode)
5672 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
5673 else
5674 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
5676 /* See the second (32-bit) and third (64-bit) define_split
5677 in rs6000.md handling a const_double_operand. */
5678 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
5679 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
5680 mode = DImode;
5682 else if (mode == TFmode || mode == TDmode
5683 || mode == KFmode || mode == IFmode)
5685 long l[4];
5686 int insns;
5688 if (mode == TDmode)
5689 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
5690 else
5691 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
5693 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
5694 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
5695 insns = num_insns_constant_multi (val, DImode);
5696 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
5697 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
5698 insns += num_insns_constant_multi (val, DImode);
5699 return insns;
5701 else
5702 gcc_unreachable ();
5704 break;
5706 default:
5707 gcc_unreachable ();
5710 return num_insns_constant_multi (val, mode);
5713 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5714 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5715 corresponding element of the vector, but for V4SFmode, the
5716 corresponding "float" is interpreted as an SImode integer. */
5718 HOST_WIDE_INT
5719 const_vector_elt_as_int (rtx op, unsigned int elt)
5721 rtx tmp;
5723 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5724 gcc_assert (GET_MODE (op) != V2DImode
5725 && GET_MODE (op) != V2DFmode);
5727 tmp = CONST_VECTOR_ELT (op, elt);
5728 if (GET_MODE (op) == V4SFmode)
5729 tmp = gen_lowpart (SImode, tmp);
5730 return INTVAL (tmp);
5733 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5734 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5735 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5736 all items are set to the same value and contain COPIES replicas of the
5737 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5738 operand and the others are set to the value of the operand's msb. */
5740 static bool
5741 vspltis_constant (rtx op, unsigned step, unsigned copies)
5743 machine_mode mode = GET_MODE (op);
5744 machine_mode inner = GET_MODE_INNER (mode);
5746 unsigned i;
5747 unsigned nunits;
5748 unsigned bitsize;
5749 unsigned mask;
5751 HOST_WIDE_INT val;
5752 HOST_WIDE_INT splat_val;
5753 HOST_WIDE_INT msb_val;
5755 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5756 return false;
5758 nunits = GET_MODE_NUNITS (mode);
5759 bitsize = GET_MODE_BITSIZE (inner);
5760 mask = GET_MODE_MASK (inner);
5762 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5763 splat_val = val;
5764 msb_val = val >= 0 ? 0 : -1;
5766 /* Construct the value to be splatted, if possible. If not, return 0. */
5767 for (i = 2; i <= copies; i *= 2)
5769 HOST_WIDE_INT small_val;
5770 bitsize /= 2;
5771 small_val = splat_val >> bitsize;
5772 mask >>= bitsize;
5773 if (splat_val != ((HOST_WIDE_INT)
5774 ((unsigned HOST_WIDE_INT) small_val << bitsize)
5775 | (small_val & mask)))
5776 return false;
5777 splat_val = small_val;
5780 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5781 if (EASY_VECTOR_15 (splat_val))
5784 /* Also check if we can splat, and then add the result to itself. Do so if
5785 the value is positive, of if the splat instruction is using OP's mode;
5786 for splat_val < 0, the splat and the add should use the same mode. */
5787 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5788 && (splat_val >= 0 || (step == 1 && copies == 1)))
5791 /* Also check if are loading up the most significant bit which can be done by
5792 loading up -1 and shifting the value left by -1. */
5793 else if (EASY_VECTOR_MSB (splat_val, inner))
5796 else
5797 return false;
5799 /* Check if VAL is present in every STEP-th element, and the
5800 other elements are filled with its most significant bit. */
5801 for (i = 1; i < nunits; ++i)
5803 HOST_WIDE_INT desired_val;
5804 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5805 if ((i & (step - 1)) == 0)
5806 desired_val = val;
5807 else
5808 desired_val = msb_val;
5810 if (desired_val != const_vector_elt_as_int (op, elt))
5811 return false;
5814 return true;
5817 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5818 instruction, filling in the bottom elements with 0 or -1.
5820 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5821 for the number of zeroes to shift in, or negative for the number of 0xff
5822 bytes to shift in.
5824 OP is a CONST_VECTOR. */
5827 vspltis_shifted (rtx op)
5829 machine_mode mode = GET_MODE (op);
5830 machine_mode inner = GET_MODE_INNER (mode);
5832 unsigned i, j;
5833 unsigned nunits;
5834 unsigned mask;
5836 HOST_WIDE_INT val;
5838 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5839 return false;
5841 /* We need to create pseudo registers to do the shift, so don't recognize
5842 shift vector constants after reload. */
5843 if (!can_create_pseudo_p ())
5844 return false;
5846 nunits = GET_MODE_NUNITS (mode);
5847 mask = GET_MODE_MASK (inner);
5849 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5851 /* Check if the value can really be the operand of a vspltis[bhw]. */
5852 if (EASY_VECTOR_15 (val))
5855 /* Also check if we are loading up the most significant bit which can be done
5856 by loading up -1 and shifting the value left by -1. */
5857 else if (EASY_VECTOR_MSB (val, inner))
5860 else
5861 return 0;
5863 /* Check if VAL is present in every STEP-th element until we find elements
5864 that are 0 or all 1 bits. */
5865 for (i = 1; i < nunits; ++i)
5867 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5868 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5870 /* If the value isn't the splat value, check for the remaining elements
5871 being 0/-1. */
5872 if (val != elt_val)
5874 if (elt_val == 0)
5876 for (j = i+1; j < nunits; ++j)
5878 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5879 if (const_vector_elt_as_int (op, elt2) != 0)
5880 return 0;
5883 return (nunits - i) * GET_MODE_SIZE (inner);
5886 else if ((elt_val & mask) == mask)
5888 for (j = i+1; j < nunits; ++j)
5890 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5891 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5892 return 0;
5895 return -((nunits - i) * GET_MODE_SIZE (inner));
5898 else
5899 return 0;
5903 /* If all elements are equal, we don't need to do VLSDOI. */
5904 return 0;
5908 /* Return true if OP is of the given MODE and can be synthesized
5909 with a vspltisb, vspltish or vspltisw. */
5911 bool
5912 easy_altivec_constant (rtx op, machine_mode mode)
5914 unsigned step, copies;
5916 if (mode == VOIDmode)
5917 mode = GET_MODE (op);
5918 else if (mode != GET_MODE (op))
5919 return false;
5921 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5922 constants. */
5923 if (mode == V2DFmode)
5924 return zero_constant (op, mode);
5926 else if (mode == V2DImode)
5928 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
5929 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
5930 return false;
5932 if (zero_constant (op, mode))
5933 return true;
5935 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5936 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5937 return true;
5939 return false;
5942 /* V1TImode is a special container for TImode. Ignore for now. */
5943 else if (mode == V1TImode)
5944 return false;
5946 /* Start with a vspltisw. */
5947 step = GET_MODE_NUNITS (mode) / 4;
5948 copies = 1;
5950 if (vspltis_constant (op, step, copies))
5951 return true;
5953 /* Then try with a vspltish. */
5954 if (step == 1)
5955 copies <<= 1;
5956 else
5957 step >>= 1;
5959 if (vspltis_constant (op, step, copies))
5960 return true;
5962 /* And finally a vspltisb. */
5963 if (step == 1)
5964 copies <<= 1;
5965 else
5966 step >>= 1;
5968 if (vspltis_constant (op, step, copies))
5969 return true;
5971 if (vspltis_shifted (op) != 0)
5972 return true;
5974 return false;
5977 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5978 result is OP. Abort if it is not possible. */
5981 gen_easy_altivec_constant (rtx op)
5983 machine_mode mode = GET_MODE (op);
5984 int nunits = GET_MODE_NUNITS (mode);
5985 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5986 unsigned step = nunits / 4;
5987 unsigned copies = 1;
5989 /* Start with a vspltisw. */
5990 if (vspltis_constant (op, step, copies))
5991 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5993 /* Then try with a vspltish. */
5994 if (step == 1)
5995 copies <<= 1;
5996 else
5997 step >>= 1;
5999 if (vspltis_constant (op, step, copies))
6000 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6002 /* And finally a vspltisb. */
6003 if (step == 1)
6004 copies <<= 1;
6005 else
6006 step >>= 1;
6008 if (vspltis_constant (op, step, copies))
6009 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6011 gcc_unreachable ();
6014 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6015 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6017 Return the number of instructions needed (1 or 2) into the address pointed
6018 via NUM_INSNS_PTR.
6020 Return the constant that is being split via CONSTANT_PTR. */
6022 bool
6023 xxspltib_constant_p (rtx op,
6024 machine_mode mode,
6025 int *num_insns_ptr,
6026 int *constant_ptr)
6028 size_t nunits = GET_MODE_NUNITS (mode);
6029 size_t i;
6030 HOST_WIDE_INT value;
6031 rtx element;
6033 /* Set the returned values to out of bound values. */
6034 *num_insns_ptr = -1;
6035 *constant_ptr = 256;
6037 if (!TARGET_P9_VECTOR)
6038 return false;
6040 if (mode == VOIDmode)
6041 mode = GET_MODE (op);
6043 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6044 return false;
6046 /* Handle (vec_duplicate <constant>). */
6047 if (GET_CODE (op) == VEC_DUPLICATE)
6049 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6050 && mode != V2DImode)
6051 return false;
6053 element = XEXP (op, 0);
6054 if (!CONST_INT_P (element))
6055 return false;
6057 value = INTVAL (element);
6058 if (!IN_RANGE (value, -128, 127))
6059 return false;
6062 /* Handle (const_vector [...]). */
6063 else if (GET_CODE (op) == CONST_VECTOR)
6065 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6066 && mode != V2DImode)
6067 return false;
6069 element = CONST_VECTOR_ELT (op, 0);
6070 if (!CONST_INT_P (element))
6071 return false;
6073 value = INTVAL (element);
6074 if (!IN_RANGE (value, -128, 127))
6075 return false;
6077 for (i = 1; i < nunits; i++)
6079 element = CONST_VECTOR_ELT (op, i);
6080 if (!CONST_INT_P (element))
6081 return false;
6083 if (value != INTVAL (element))
6084 return false;
6088 /* Handle integer constants being loaded into the upper part of the VSX
6089 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6090 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6091 else if (CONST_INT_P (op))
6093 if (!SCALAR_INT_MODE_P (mode))
6094 return false;
6096 value = INTVAL (op);
6097 if (!IN_RANGE (value, -128, 127))
6098 return false;
6100 if (!IN_RANGE (value, -1, 0))
6102 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6103 return false;
6105 if (EASY_VECTOR_15 (value))
6106 return false;
6110 else
6111 return false;
6113 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6114 sign extend. Special case 0/-1 to allow getting any VSX register instead
6115 of an Altivec register. */
6116 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6117 && EASY_VECTOR_15 (value))
6118 return false;
6120 /* Return # of instructions and the constant byte for XXSPLTIB. */
6121 if (mode == V16QImode)
6122 *num_insns_ptr = 1;
6124 else if (IN_RANGE (value, -1, 0))
6125 *num_insns_ptr = 1;
6127 else
6128 *num_insns_ptr = 2;
6130 *constant_ptr = (int) value;
6131 return true;
6134 const char *
6135 output_vec_const_move (rtx *operands)
6137 int shift;
6138 machine_mode mode;
6139 rtx dest, vec;
6141 dest = operands[0];
6142 vec = operands[1];
6143 mode = GET_MODE (dest);
6145 if (TARGET_VSX)
6147 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6148 int xxspltib_value = 256;
6149 int num_insns = -1;
6151 if (zero_constant (vec, mode))
6153 if (TARGET_P9_VECTOR)
6154 return "xxspltib %x0,0";
6156 else if (dest_vmx_p)
6157 return "vspltisw %0,0";
6159 else
6160 return "xxlxor %x0,%x0,%x0";
6163 if (all_ones_constant (vec, mode))
6165 if (TARGET_P9_VECTOR)
6166 return "xxspltib %x0,255";
6168 else if (dest_vmx_p)
6169 return "vspltisw %0,-1";
6171 else if (TARGET_P8_VECTOR)
6172 return "xxlorc %x0,%x0,%x0";
6174 else
6175 gcc_unreachable ();
6178 if (TARGET_P9_VECTOR
6179 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6181 if (num_insns == 1)
6183 operands[2] = GEN_INT (xxspltib_value & 0xff);
6184 return "xxspltib %x0,%2";
6187 return "#";
6191 if (TARGET_ALTIVEC)
6193 rtx splat_vec;
6195 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6196 if (zero_constant (vec, mode))
6197 return "vspltisw %0,0";
6199 if (all_ones_constant (vec, mode))
6200 return "vspltisw %0,-1";
6202 /* Do we need to construct a value using VSLDOI? */
6203 shift = vspltis_shifted (vec);
6204 if (shift != 0)
6205 return "#";
6207 splat_vec = gen_easy_altivec_constant (vec);
6208 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6209 operands[1] = XEXP (splat_vec, 0);
6210 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6211 return "#";
6213 switch (GET_MODE (splat_vec))
6215 case E_V4SImode:
6216 return "vspltisw %0,%1";
6218 case E_V8HImode:
6219 return "vspltish %0,%1";
6221 case E_V16QImode:
6222 return "vspltisb %0,%1";
6224 default:
6225 gcc_unreachable ();
6229 gcc_unreachable ();
6232 /* Initialize vector TARGET to VALS. */
6234 void
6235 rs6000_expand_vector_init (rtx target, rtx vals)
6237 machine_mode mode = GET_MODE (target);
6238 machine_mode inner_mode = GET_MODE_INNER (mode);
6239 int n_elts = GET_MODE_NUNITS (mode);
6240 int n_var = 0, one_var = -1;
6241 bool all_same = true, all_const_zero = true;
6242 rtx x, mem;
6243 int i;
6245 for (i = 0; i < n_elts; ++i)
6247 x = XVECEXP (vals, 0, i);
6248 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6249 ++n_var, one_var = i;
6250 else if (x != CONST0_RTX (inner_mode))
6251 all_const_zero = false;
6253 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6254 all_same = false;
6257 if (n_var == 0)
6259 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6260 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6261 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6263 /* Zero register. */
6264 emit_move_insn (target, CONST0_RTX (mode));
6265 return;
6267 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6269 /* Splat immediate. */
6270 emit_insn (gen_rtx_SET (target, const_vec));
6271 return;
6273 else
6275 /* Load from constant pool. */
6276 emit_move_insn (target, const_vec);
6277 return;
6281 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6282 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6284 rtx op[2];
6285 size_t i;
6286 size_t num_elements = all_same ? 1 : 2;
6287 for (i = 0; i < num_elements; i++)
6289 op[i] = XVECEXP (vals, 0, i);
6290 /* Just in case there is a SUBREG with a smaller mode, do a
6291 conversion. */
6292 if (GET_MODE (op[i]) != inner_mode)
6294 rtx tmp = gen_reg_rtx (inner_mode);
6295 convert_move (tmp, op[i], 0);
6296 op[i] = tmp;
6298 /* Allow load with splat double word. */
6299 else if (MEM_P (op[i]))
6301 if (!all_same)
6302 op[i] = force_reg (inner_mode, op[i]);
6304 else if (!REG_P (op[i]))
6305 op[i] = force_reg (inner_mode, op[i]);
6308 if (all_same)
6310 if (mode == V2DFmode)
6311 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6312 else
6313 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6315 else
6317 if (mode == V2DFmode)
6318 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6319 else
6320 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6322 return;
6325 /* Special case initializing vector int if we are on 64-bit systems with
6326 direct move or we have the ISA 3.0 instructions. */
6327 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6328 && TARGET_DIRECT_MOVE_64BIT)
6330 if (all_same)
6332 rtx element0 = XVECEXP (vals, 0, 0);
6333 if (MEM_P (element0))
6334 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6335 else
6336 element0 = force_reg (SImode, element0);
6338 if (TARGET_P9_VECTOR)
6339 emit_insn (gen_vsx_splat_v4si (target, element0));
6340 else
6342 rtx tmp = gen_reg_rtx (DImode);
6343 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6344 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6346 return;
6348 else
6350 rtx elements[4];
6351 size_t i;
6353 for (i = 0; i < 4; i++)
6354 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6356 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6357 elements[2], elements[3]));
6358 return;
6362 /* With single precision floating point on VSX, know that internally single
6363 precision is actually represented as a double, and either make 2 V2DF
6364 vectors, and convert these vectors to single precision, or do one
6365 conversion, and splat the result to the other elements. */
6366 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6368 if (all_same)
6370 rtx element0 = XVECEXP (vals, 0, 0);
6372 if (TARGET_P9_VECTOR)
6374 if (MEM_P (element0))
6375 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6377 emit_insn (gen_vsx_splat_v4sf (target, element0));
6380 else
6382 rtx freg = gen_reg_rtx (V4SFmode);
6383 rtx sreg = force_reg (SFmode, element0);
6384 rtx cvt = (TARGET_XSCVDPSPN
6385 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6386 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6388 emit_insn (cvt);
6389 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6390 const0_rtx));
6393 else
6395 rtx dbl_even = gen_reg_rtx (V2DFmode);
6396 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6397 rtx flt_even = gen_reg_rtx (V4SFmode);
6398 rtx flt_odd = gen_reg_rtx (V4SFmode);
6399 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6400 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6401 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6402 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6404 /* Use VMRGEW if we can instead of doing a permute. */
6405 if (TARGET_P8_VECTOR)
6407 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6408 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6409 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6410 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6411 if (BYTES_BIG_ENDIAN)
6412 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6413 else
6414 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6416 else
6418 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6419 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6420 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6421 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6422 rs6000_expand_extract_even (target, flt_even, flt_odd);
6425 return;
6428 /* Special case initializing vector short/char that are splats if we are on
6429 64-bit systems with direct move. */
6430 if (all_same && TARGET_DIRECT_MOVE_64BIT
6431 && (mode == V16QImode || mode == V8HImode))
6433 rtx op0 = XVECEXP (vals, 0, 0);
6434 rtx di_tmp = gen_reg_rtx (DImode);
6436 if (!REG_P (op0))
6437 op0 = force_reg (GET_MODE_INNER (mode), op0);
6439 if (mode == V16QImode)
6441 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6442 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6443 return;
6446 if (mode == V8HImode)
6448 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6449 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6450 return;
6454 /* Store value to stack temp. Load vector element. Splat. However, splat
6455 of 64-bit items is not supported on Altivec. */
6456 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6458 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6459 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6460 XVECEXP (vals, 0, 0));
6461 x = gen_rtx_UNSPEC (VOIDmode,
6462 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6463 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6464 gen_rtvec (2,
6465 gen_rtx_SET (target, mem),
6466 x)));
6467 x = gen_rtx_VEC_SELECT (inner_mode, target,
6468 gen_rtx_PARALLEL (VOIDmode,
6469 gen_rtvec (1, const0_rtx)));
6470 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6471 return;
6474 /* One field is non-constant. Load constant then overwrite
6475 varying field. */
6476 if (n_var == 1)
6478 rtx copy = copy_rtx (vals);
6480 /* Load constant part of vector, substitute neighboring value for
6481 varying element. */
6482 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6483 rs6000_expand_vector_init (target, copy);
6485 /* Insert variable. */
6486 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6487 return;
6490 /* Construct the vector in memory one field at a time
6491 and load the whole vector. */
6492 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6493 for (i = 0; i < n_elts; i++)
6494 emit_move_insn (adjust_address_nv (mem, inner_mode,
6495 i * GET_MODE_SIZE (inner_mode)),
6496 XVECEXP (vals, 0, i));
6497 emit_move_insn (target, mem);
6500 /* Set field ELT of TARGET to VAL. */
6502 void
6503 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6505 machine_mode mode = GET_MODE (target);
6506 machine_mode inner_mode = GET_MODE_INNER (mode);
6507 rtx reg = gen_reg_rtx (mode);
6508 rtx mask, mem, x;
6509 int width = GET_MODE_SIZE (inner_mode);
6510 int i;
6512 val = force_reg (GET_MODE (val), val);
6514 if (VECTOR_MEM_VSX_P (mode))
6516 rtx insn = NULL_RTX;
6517 rtx elt_rtx = GEN_INT (elt);
6519 if (mode == V2DFmode)
6520 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
6522 else if (mode == V2DImode)
6523 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
6525 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
6527 if (mode == V4SImode)
6528 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
6529 else if (mode == V8HImode)
6530 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
6531 else if (mode == V16QImode)
6532 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
6533 else if (mode == V4SFmode)
6534 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
6537 if (insn)
6539 emit_insn (insn);
6540 return;
6544 /* Simplify setting single element vectors like V1TImode. */
6545 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6547 emit_move_insn (target, gen_lowpart (mode, val));
6548 return;
6551 /* Load single variable value. */
6552 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6553 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6554 x = gen_rtx_UNSPEC (VOIDmode,
6555 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6556 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6557 gen_rtvec (2,
6558 gen_rtx_SET (reg, mem),
6559 x)));
6561 /* Linear sequence. */
6562 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6563 for (i = 0; i < 16; ++i)
6564 XVECEXP (mask, 0, i) = GEN_INT (i);
6566 /* Set permute mask to insert element into target. */
6567 for (i = 0; i < width; ++i)
6568 XVECEXP (mask, 0, elt*width + i)
6569 = GEN_INT (i + 0x10);
6570 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6572 if (BYTES_BIG_ENDIAN)
6573 x = gen_rtx_UNSPEC (mode,
6574 gen_rtvec (3, target, reg,
6575 force_reg (V16QImode, x)),
6576 UNSPEC_VPERM);
6577 else
6579 if (TARGET_P9_VECTOR)
6580 x = gen_rtx_UNSPEC (mode,
6581 gen_rtvec (3, reg, target,
6582 force_reg (V16QImode, x)),
6583 UNSPEC_VPERMR);
6584 else
6586 /* Invert selector. We prefer to generate VNAND on P8 so
6587 that future fusion opportunities can kick in, but must
6588 generate VNOR elsewhere. */
6589 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6590 rtx iorx = (TARGET_P8_VECTOR
6591 ? gen_rtx_IOR (V16QImode, notx, notx)
6592 : gen_rtx_AND (V16QImode, notx, notx));
6593 rtx tmp = gen_reg_rtx (V16QImode);
6594 emit_insn (gen_rtx_SET (tmp, iorx));
6596 /* Permute with operands reversed and adjusted selector. */
6597 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6598 UNSPEC_VPERM);
6602 emit_insn (gen_rtx_SET (target, x));
6605 /* Extract field ELT from VEC into TARGET. */
6607 void
6608 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
6610 machine_mode mode = GET_MODE (vec);
6611 machine_mode inner_mode = GET_MODE_INNER (mode);
6612 rtx mem;
6614 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
6616 switch (mode)
6618 default:
6619 break;
6620 case E_V1TImode:
6621 emit_move_insn (target, gen_lowpart (TImode, vec));
6622 break;
6623 case E_V2DFmode:
6624 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
6625 return;
6626 case E_V2DImode:
6627 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
6628 return;
6629 case E_V4SFmode:
6630 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
6631 return;
6632 case E_V16QImode:
6633 if (TARGET_DIRECT_MOVE_64BIT)
6635 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
6636 return;
6638 else
6639 break;
6640 case E_V8HImode:
6641 if (TARGET_DIRECT_MOVE_64BIT)
6643 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
6644 return;
6646 else
6647 break;
6648 case E_V4SImode:
6649 if (TARGET_DIRECT_MOVE_64BIT)
6651 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
6652 return;
6654 break;
6657 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
6658 && TARGET_DIRECT_MOVE_64BIT)
6660 if (GET_MODE (elt) != DImode)
6662 rtx tmp = gen_reg_rtx (DImode);
6663 convert_move (tmp, elt, 0);
6664 elt = tmp;
6666 else if (!REG_P (elt))
6667 elt = force_reg (DImode, elt);
6669 switch (mode)
6671 case E_V1TImode:
6672 emit_move_insn (target, gen_lowpart (TImode, vec));
6673 return;
6675 case E_V2DFmode:
6676 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
6677 return;
6679 case E_V2DImode:
6680 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
6681 return;
6683 case E_V4SFmode:
6684 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
6685 return;
6687 case E_V4SImode:
6688 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
6689 return;
6691 case E_V8HImode:
6692 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
6693 return;
6695 case E_V16QImode:
6696 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
6697 return;
6699 default:
6700 gcc_unreachable ();
6704 /* Allocate mode-sized buffer. */
6705 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6707 emit_move_insn (mem, vec);
6708 if (CONST_INT_P (elt))
6710 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
6712 /* Add offset to field within buffer matching vector element. */
6713 mem = adjust_address_nv (mem, inner_mode,
6714 modulo_elt * GET_MODE_SIZE (inner_mode));
6715 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6717 else
6719 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
6720 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
6721 rtx new_addr = gen_reg_rtx (Pmode);
6723 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
6724 if (ele_size > 1)
6725 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
6726 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
6727 new_addr = change_address (mem, inner_mode, new_addr);
6728 emit_move_insn (target, new_addr);
6732 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
6733 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
6734 temporary (BASE_TMP) to fixup the address. Return the new memory address
6735 that is valid for reads or writes to a given register (SCALAR_REG). */
6738 rs6000_adjust_vec_address (rtx scalar_reg,
6739 rtx mem,
6740 rtx element,
6741 rtx base_tmp,
6742 machine_mode scalar_mode)
6744 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6745 rtx addr = XEXP (mem, 0);
6746 rtx element_offset;
6747 rtx new_addr;
6748 bool valid_addr_p;
6750 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
6751 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
6753 /* Calculate what we need to add to the address to get the element
6754 address. */
6755 if (CONST_INT_P (element))
6756 element_offset = GEN_INT (INTVAL (element) * scalar_size);
6757 else
6759 int byte_shift = exact_log2 (scalar_size);
6760 gcc_assert (byte_shift >= 0);
6762 if (byte_shift == 0)
6763 element_offset = element;
6765 else
6767 if (TARGET_POWERPC64)
6768 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
6769 else
6770 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
6772 element_offset = base_tmp;
6776 /* Create the new address pointing to the element within the vector. If we
6777 are adding 0, we don't have to change the address. */
6778 if (element_offset == const0_rtx)
6779 new_addr = addr;
6781 /* A simple indirect address can be converted into a reg + offset
6782 address. */
6783 else if (REG_P (addr) || SUBREG_P (addr))
6784 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
6786 /* Optimize D-FORM addresses with constant offset with a constant element, to
6787 include the element offset in the address directly. */
6788 else if (GET_CODE (addr) == PLUS)
6790 rtx op0 = XEXP (addr, 0);
6791 rtx op1 = XEXP (addr, 1);
6792 rtx insn;
6794 gcc_assert (REG_P (op0) || SUBREG_P (op0));
6795 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
6797 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
6798 rtx offset_rtx = GEN_INT (offset);
6800 /* 16-bit offset. */
6801 if (SIGNED_INTEGER_16BIT_P (offset)
6802 && (scalar_size < 8 || (offset & 0x3) == 0))
6803 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6805 /* 34-bit offset if we have prefixed addresses. */
6806 else if (TARGET_PREFIXED_ADDR && SIGNED_INTEGER_34BIT_P (offset))
6807 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
6809 else
6811 /* Offset overflowed, move offset to the temporary (which will
6812 likely be split), and do X-FORM addressing. */
6813 emit_move_insn (base_tmp, offset_rtx);
6814 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6817 else
6819 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
6820 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
6822 /* Note, ADDI requires the register being added to be a base
6823 register. If the register was R0, load it up into the temporary
6824 and do the add. */
6825 if (op1_reg_p
6826 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
6828 insn = gen_add3_insn (base_tmp, op1, element_offset);
6829 gcc_assert (insn != NULL_RTX);
6830 emit_insn (insn);
6833 else if (ele_reg_p
6834 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
6836 insn = gen_add3_insn (base_tmp, element_offset, op1);
6837 gcc_assert (insn != NULL_RTX);
6838 emit_insn (insn);
6841 /* Make sure we don't overwrite the temporary if the element being
6842 extracted is variable, and we've put the offset into base_tmp
6843 previously. */
6844 else if (reg_mentioned_p (base_tmp, element_offset))
6845 emit_insn (gen_add2_insn (base_tmp, op1));
6847 else
6849 emit_move_insn (base_tmp, op1);
6850 emit_insn (gen_add2_insn (base_tmp, element_offset));
6853 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
6857 else
6859 emit_move_insn (base_tmp, addr);
6860 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
6863 /* If we have a PLUS, we need to see whether the particular register class
6864 allows for D-FORM or X-FORM addressing. */
6865 if (GET_CODE (new_addr) == PLUS)
6867 rtx op1 = XEXP (new_addr, 1);
6868 addr_mask_type addr_mask;
6869 unsigned int scalar_regno = reg_or_subregno (scalar_reg);
6871 gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
6872 if (INT_REGNO_P (scalar_regno))
6873 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
6875 else if (FP_REGNO_P (scalar_regno))
6876 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
6878 else if (ALTIVEC_REGNO_P (scalar_regno))
6879 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
6881 else
6882 gcc_unreachable ();
6884 if (REG_P (op1) || SUBREG_P (op1))
6885 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
6886 else
6887 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
6890 else if (REG_P (new_addr) || SUBREG_P (new_addr))
6891 valid_addr_p = true;
6893 else
6894 valid_addr_p = false;
6896 if (!valid_addr_p)
6898 emit_move_insn (base_tmp, new_addr);
6899 new_addr = base_tmp;
6902 return change_address (mem, scalar_mode, new_addr);
6905 /* Split a variable vec_extract operation into the component instructions. */
6907 void
6908 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
6909 rtx tmp_altivec)
6911 machine_mode mode = GET_MODE (src);
6912 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
6913 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
6914 int byte_shift = exact_log2 (scalar_size);
6916 gcc_assert (byte_shift >= 0);
6918 /* If we are given a memory address, optimize to load just the element. We
6919 don't have to adjust the vector element number on little endian
6920 systems. */
6921 if (MEM_P (src))
6923 int num_elements = GET_MODE_NUNITS (mode);
6924 rtx num_ele_m1 = GEN_INT (num_elements - 1);
6926 emit_insn (gen_anddi3 (element, element, num_ele_m1));
6927 gcc_assert (REG_P (tmp_gpr));
6928 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
6929 tmp_gpr, scalar_mode));
6930 return;
6933 else if (REG_P (src) || SUBREG_P (src))
6935 int num_elements = GET_MODE_NUNITS (mode);
6936 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
6937 int bit_shift = 7 - exact_log2 (num_elements);
6938 rtx element2;
6939 unsigned int dest_regno = reg_or_subregno (dest);
6940 unsigned int src_regno = reg_or_subregno (src);
6941 unsigned int element_regno = reg_or_subregno (element);
6943 gcc_assert (REG_P (tmp_gpr));
6945 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
6946 a general purpose register. */
6947 if (TARGET_P9_VECTOR
6948 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
6949 && INT_REGNO_P (dest_regno)
6950 && ALTIVEC_REGNO_P (src_regno)
6951 && INT_REGNO_P (element_regno))
6953 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
6954 rtx element_si = gen_rtx_REG (SImode, element_regno);
6956 if (mode == V16QImode)
6957 emit_insn (BYTES_BIG_ENDIAN
6958 ? gen_vextublx (dest_si, element_si, src)
6959 : gen_vextubrx (dest_si, element_si, src));
6961 else if (mode == V8HImode)
6963 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6964 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
6965 emit_insn (BYTES_BIG_ENDIAN
6966 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
6967 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
6971 else
6973 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
6974 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
6975 emit_insn (BYTES_BIG_ENDIAN
6976 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
6977 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
6980 return;
6984 gcc_assert (REG_P (tmp_altivec));
6986 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
6987 an XOR, otherwise we need to subtract. The shift amount is so VSLO
6988 will shift the element into the upper position (adding 3 to convert a
6989 byte shift into a bit shift). */
6990 if (scalar_size == 8)
6992 if (!BYTES_BIG_ENDIAN)
6994 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
6995 element2 = tmp_gpr;
6997 else
6998 element2 = element;
7000 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7001 bit. */
7002 emit_insn (gen_rtx_SET (tmp_gpr,
7003 gen_rtx_AND (DImode,
7004 gen_rtx_ASHIFT (DImode,
7005 element2,
7006 GEN_INT (6)),
7007 GEN_INT (64))));
7009 else
7011 if (!BYTES_BIG_ENDIAN)
7013 rtx num_ele_m1 = GEN_INT (num_elements - 1);
7015 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7016 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7017 element2 = tmp_gpr;
7019 else
7020 element2 = element;
7022 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7025 /* Get the value into the lower byte of the Altivec register where VSLO
7026 expects it. */
7027 if (TARGET_P9_VECTOR)
7028 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7029 else if (can_create_pseudo_p ())
7030 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7031 else
7033 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7034 emit_move_insn (tmp_di, tmp_gpr);
7035 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7038 /* Do the VSLO to get the value into the final location. */
7039 switch (mode)
7041 case E_V2DFmode:
7042 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7043 return;
7045 case E_V2DImode:
7046 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7047 return;
7049 case E_V4SFmode:
7051 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7052 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7053 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7054 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7055 tmp_altivec));
7057 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7058 return;
7061 case E_V4SImode:
7062 case E_V8HImode:
7063 case E_V16QImode:
7065 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7066 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7067 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7068 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7069 tmp_altivec));
7070 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7071 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
7072 GEN_INT (64 - bits_in_element)));
7073 return;
7076 default:
7077 gcc_unreachable ();
7080 return;
7082 else
7083 gcc_unreachable ();
7086 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7087 selects whether the alignment is abi mandated, optional, or
7088 both abi and optional alignment. */
7090 unsigned int
7091 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7093 if (how != align_opt)
7095 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7096 align = 128;
7099 if (how != align_abi)
7101 if (TREE_CODE (type) == ARRAY_TYPE
7102 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7104 if (align < BITS_PER_WORD)
7105 align = BITS_PER_WORD;
7109 return align;
7112 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7113 instructions simply ignore the low bits; VSX memory instructions
7114 are aligned to 4 or 8 bytes. */
7116 static bool
7117 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7119 return (STRICT_ALIGNMENT
7120 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7121 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7122 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7123 && (int) align < VECTOR_ALIGN (mode)))));
7126 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7128 bool
7129 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7131 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7133 if (computed != 128)
7135 static bool warned;
7136 if (!warned && warn_psabi)
7138 warned = true;
7139 inform (input_location,
7140 "the layout of aggregates containing vectors with"
7141 " %d-byte alignment has changed in GCC 5",
7142 computed / BITS_PER_UNIT);
7145 /* In current GCC there is no special case. */
7146 return false;
7149 return false;
7152 /* AIX increases natural record alignment to doubleword if the first
7153 field is an FP double while the FP fields remain word aligned. */
7155 unsigned int
7156 rs6000_special_round_type_align (tree type, unsigned int computed,
7157 unsigned int specified)
7159 unsigned int align = MAX (computed, specified);
7160 tree field = TYPE_FIELDS (type);
7162 /* Skip all non field decls */
7163 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7164 field = DECL_CHAIN (field);
7166 if (field != NULL && field != type)
7168 type = TREE_TYPE (field);
7169 while (TREE_CODE (type) == ARRAY_TYPE)
7170 type = TREE_TYPE (type);
7172 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7173 align = MAX (align, 64);
7176 return align;
7179 /* Darwin increases record alignment to the natural alignment of
7180 the first field. */
7182 unsigned int
7183 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7184 unsigned int specified)
7186 unsigned int align = MAX (computed, specified);
7188 if (TYPE_PACKED (type))
7189 return align;
7191 /* Find the first field, looking down into aggregates. */
7192 do {
7193 tree field = TYPE_FIELDS (type);
7194 /* Skip all non field decls */
7195 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7196 field = DECL_CHAIN (field);
7197 if (! field)
7198 break;
7199 /* A packed field does not contribute any extra alignment. */
7200 if (DECL_PACKED (field))
7201 return align;
7202 type = TREE_TYPE (field);
7203 while (TREE_CODE (type) == ARRAY_TYPE)
7204 type = TREE_TYPE (type);
7205 } while (AGGREGATE_TYPE_P (type));
7207 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7208 align = MAX (align, TYPE_ALIGN (type));
7210 return align;
7213 /* Return 1 for an operand in small memory on V.4/eabi. */
7216 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7217 machine_mode mode ATTRIBUTE_UNUSED)
7219 #if TARGET_ELF
7220 rtx sym_ref;
7222 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7223 return 0;
7225 if (DEFAULT_ABI != ABI_V4)
7226 return 0;
7228 if (SYMBOL_REF_P (op))
7229 sym_ref = op;
7231 else if (GET_CODE (op) != CONST
7232 || GET_CODE (XEXP (op, 0)) != PLUS
7233 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
7234 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
7235 return 0;
7237 else
7239 rtx sum = XEXP (op, 0);
7240 HOST_WIDE_INT summand;
7242 /* We have to be careful here, because it is the referenced address
7243 that must be 32k from _SDA_BASE_, not just the symbol. */
7244 summand = INTVAL (XEXP (sum, 1));
7245 if (summand < 0 || summand > g_switch_value)
7246 return 0;
7248 sym_ref = XEXP (sum, 0);
7251 return SYMBOL_REF_SMALL_P (sym_ref);
7252 #else
7253 return 0;
7254 #endif
7257 /* Return true if either operand is a general purpose register. */
7259 bool
7260 gpr_or_gpr_p (rtx op0, rtx op1)
7262 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7263 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7266 /* Return true if this is a move direct operation between GPR registers and
7267 floating point/VSX registers. */
7269 bool
7270 direct_move_p (rtx op0, rtx op1)
7272 if (!REG_P (op0) || !REG_P (op1))
7273 return false;
7275 if (!TARGET_DIRECT_MOVE)
7276 return false;
7278 int regno0 = REGNO (op0);
7279 int regno1 = REGNO (op1);
7280 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
7281 return false;
7283 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
7284 return true;
7286 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
7287 return true;
7289 return false;
7292 /* Return true if the ADDR is an acceptable address for a quad memory
7293 operation of mode MODE (either LQ/STQ for general purpose registers, or
7294 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7295 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7296 3.0 LXV/STXV instruction. */
7298 bool
7299 quad_address_p (rtx addr, machine_mode mode, bool strict)
7301 rtx op0, op1;
7303 if (GET_MODE_SIZE (mode) != 16)
7304 return false;
7306 if (legitimate_indirect_address_p (addr, strict))
7307 return true;
7309 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7310 return false;
7312 /* Is this a valid prefixed address? If the bottom four bits of the offset
7313 are non-zero, we could use a prefixed instruction (which does not have the
7314 DQ-form constraint that the traditional instruction had) instead of
7315 forcing the unaligned offset to a GPR. */
7316 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
7317 return true;
7319 if (GET_CODE (addr) != PLUS)
7320 return false;
7322 op0 = XEXP (addr, 0);
7323 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7324 return false;
7326 op1 = XEXP (addr, 1);
7327 if (!CONST_INT_P (op1))
7328 return false;
7330 return quad_address_offset_p (INTVAL (op1));
7333 /* Return true if this is a load or store quad operation. This function does
7334 not handle the atomic quad memory instructions. */
7336 bool
7337 quad_load_store_p (rtx op0, rtx op1)
7339 bool ret;
7341 if (!TARGET_QUAD_MEMORY)
7342 ret = false;
7344 else if (REG_P (op0) && MEM_P (op1))
7345 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7346 && quad_memory_operand (op1, GET_MODE (op1))
7347 && !reg_overlap_mentioned_p (op0, op1));
7349 else if (MEM_P (op0) && REG_P (op1))
7350 ret = (quad_memory_operand (op0, GET_MODE (op0))
7351 && quad_int_reg_operand (op1, GET_MODE (op1)));
7353 else
7354 ret = false;
7356 if (TARGET_DEBUG_ADDR)
7358 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7359 ret ? "true" : "false");
7360 debug_rtx (gen_rtx_SET (op0, op1));
7363 return ret;
7366 /* Given an address, return a constant offset term if one exists. */
7368 static rtx
7369 address_offset (rtx op)
7371 if (GET_CODE (op) == PRE_INC
7372 || GET_CODE (op) == PRE_DEC)
7373 op = XEXP (op, 0);
7374 else if (GET_CODE (op) == PRE_MODIFY
7375 || GET_CODE (op) == LO_SUM)
7376 op = XEXP (op, 1);
7378 if (GET_CODE (op) == CONST)
7379 op = XEXP (op, 0);
7381 if (GET_CODE (op) == PLUS)
7382 op = XEXP (op, 1);
7384 if (CONST_INT_P (op))
7385 return op;
7387 return NULL_RTX;
7390 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
7391 the mode. If we can't find (or don't know) the alignment of the symbol
7392 we assume (optimistically) that it's sufficiently aligned [??? maybe we
7393 should be pessimistic]. Offsets are validated in the same way as for
7394 reg + offset. */
7395 static bool
7396 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
7398 /* We should not get here with this. */
7399 gcc_checking_assert (! mode_supports_dq_form (mode));
7401 if (GET_CODE (x) == CONST)
7402 x = XEXP (x, 0);
7404 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
7405 x = XVECEXP (x, 0, 0);
7407 rtx sym = NULL_RTX;
7408 unsigned HOST_WIDE_INT offset = 0;
7410 if (GET_CODE (x) == PLUS)
7412 sym = XEXP (x, 0);
7413 if (! SYMBOL_REF_P (sym))
7414 return false;
7415 if (!CONST_INT_P (XEXP (x, 1)))
7416 return false;
7417 offset = INTVAL (XEXP (x, 1));
7419 else if (SYMBOL_REF_P (x))
7420 sym = x;
7421 else if (CONST_INT_P (x))
7422 offset = INTVAL (x);
7423 else if (GET_CODE (x) == LABEL_REF)
7424 offset = 0; // We assume code labels are Pmode aligned
7425 else
7426 return false; // not sure what we have here.
7428 /* If we don't know the alignment of the thing to which the symbol refers,
7429 we assume optimistically it is "enough".
7430 ??? maybe we should be pessimistic instead. */
7431 unsigned align = 0;
7433 if (sym)
7435 tree decl = SYMBOL_REF_DECL (sym);
7436 #if TARGET_MACHO
7437 if (MACHO_SYMBOL_INDIRECTION_P (sym))
7438 /* The decl in an indirection symbol is the original one, which might
7439 be less aligned than the indirection. Our indirections are always
7440 pointer-aligned. */
7442 else
7443 #endif
7444 if (decl && DECL_ALIGN (decl))
7445 align = DECL_ALIGN_UNIT (decl);
7448 unsigned int extra = 0;
7449 switch (mode)
7451 case E_DFmode:
7452 case E_DDmode:
7453 case E_DImode:
7454 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7455 addressing. */
7456 if (VECTOR_MEM_VSX_P (mode))
7457 return false;
7459 if (!TARGET_POWERPC64)
7460 extra = 4;
7461 else if ((offset & 3) || (align & 3))
7462 return false;
7463 break;
7465 case E_TFmode:
7466 case E_IFmode:
7467 case E_KFmode:
7468 case E_TDmode:
7469 case E_TImode:
7470 case E_PTImode:
7471 extra = 8;
7472 if (!TARGET_POWERPC64)
7473 extra = 12;
7474 else if ((offset & 3) || (align & 3))
7475 return false;
7476 break;
7478 default:
7479 break;
7482 /* We only care if the access(es) would cause a change to the high part. */
7483 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7484 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7487 /* Return true if the MEM operand is a memory operand suitable for use
7488 with a (full width, possibly multiple) gpr load/store. On
7489 powerpc64 this means the offset must be divisible by 4.
7490 Implements 'Y' constraint.
7492 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7493 a constraint function we know the operand has satisfied a suitable
7494 memory predicate.
7496 Offsetting a lo_sum should not be allowed, except where we know by
7497 alignment that a 32k boundary is not crossed. Note that by
7498 "offsetting" here we mean a further offset to access parts of the
7499 MEM. It's fine to have a lo_sum where the inner address is offset
7500 from a sym, since the same sym+offset will appear in the high part
7501 of the address calculation. */
7503 bool
7504 mem_operand_gpr (rtx op, machine_mode mode)
7506 unsigned HOST_WIDE_INT offset;
7507 int extra;
7508 rtx addr = XEXP (op, 0);
7510 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
7511 if (TARGET_UPDATE
7512 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
7513 && mode_supports_pre_incdec_p (mode)
7514 && legitimate_indirect_address_p (XEXP (addr, 0), false))
7515 return true;
7517 /* Allow prefixed instructions if supported. If the bottom two bits of the
7518 offset are non-zero, we could use a prefixed instruction (which does not
7519 have the DS-form constraint that the traditional instruction had) instead
7520 of forcing the unaligned offset to a GPR. */
7521 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7522 return true;
7524 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
7525 really OK. Doing this early avoids teaching all the other machinery
7526 about them. */
7527 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
7528 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
7530 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
7531 if (!rs6000_offsettable_memref_p (op, mode, false))
7532 return false;
7534 op = address_offset (addr);
7535 if (op == NULL_RTX)
7536 return true;
7538 offset = INTVAL (op);
7539 if (TARGET_POWERPC64 && (offset & 3) != 0)
7540 return false;
7542 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7543 if (extra < 0)
7544 extra = 0;
7546 if (GET_CODE (addr) == LO_SUM)
7547 /* For lo_sum addresses, we must allow any offset except one that
7548 causes a wrap, so test only the low 16 bits. */
7549 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7551 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7554 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
7555 enforce an offset divisible by 4 even for 32-bit. */
7557 bool
7558 mem_operand_ds_form (rtx op, machine_mode mode)
7560 unsigned HOST_WIDE_INT offset;
7561 int extra;
7562 rtx addr = XEXP (op, 0);
7564 /* Allow prefixed instructions if supported. If the bottom two bits of the
7565 offset are non-zero, we could use a prefixed instruction (which does not
7566 have the DS-form constraint that the traditional instruction had) instead
7567 of forcing the unaligned offset to a GPR. */
7568 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
7569 return true;
7571 if (!offsettable_address_p (false, mode, addr))
7572 return false;
7574 op = address_offset (addr);
7575 if (op == NULL_RTX)
7576 return true;
7578 offset = INTVAL (op);
7579 if ((offset & 3) != 0)
7580 return false;
7582 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
7583 if (extra < 0)
7584 extra = 0;
7586 if (GET_CODE (addr) == LO_SUM)
7587 /* For lo_sum addresses, we must allow any offset except one that
7588 causes a wrap, so test only the low 16 bits. */
7589 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
7591 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7594 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
7596 static bool
7597 reg_offset_addressing_ok_p (machine_mode mode)
7599 switch (mode)
7601 case E_V16QImode:
7602 case E_V8HImode:
7603 case E_V4SFmode:
7604 case E_V4SImode:
7605 case E_V2DFmode:
7606 case E_V2DImode:
7607 case E_V1TImode:
7608 case E_TImode:
7609 case E_TFmode:
7610 case E_KFmode:
7611 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
7612 ISA 3.0 vector d-form addressing mode was added. While TImode is not
7613 a vector mode, if we want to use the VSX registers to move it around,
7614 we need to restrict ourselves to reg+reg addressing. Similarly for
7615 IEEE 128-bit floating point that is passed in a single vector
7616 register. */
7617 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
7618 return mode_supports_dq_form (mode);
7619 break;
7621 case E_SDmode:
7622 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
7623 addressing for the LFIWZX and STFIWX instructions. */
7624 if (TARGET_NO_SDMODE_STACK)
7625 return false;
7626 break;
7628 default:
7629 break;
7632 return true;
7635 static bool
7636 virtual_stack_registers_memory_p (rtx op)
7638 int regnum;
7640 if (REG_P (op))
7641 regnum = REGNO (op);
7643 else if (GET_CODE (op) == PLUS
7644 && REG_P (XEXP (op, 0))
7645 && CONST_INT_P (XEXP (op, 1)))
7646 regnum = REGNO (XEXP (op, 0));
7648 else
7649 return false;
7651 return (regnum >= FIRST_VIRTUAL_REGISTER
7652 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
7655 /* Return true if a MODE sized memory accesses to OP plus OFFSET
7656 is known to not straddle a 32k boundary. This function is used
7657 to determine whether -mcmodel=medium code can use TOC pointer
7658 relative addressing for OP. This means the alignment of the TOC
7659 pointer must also be taken into account, and unfortunately that is
7660 only 8 bytes. */
7662 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
7663 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
7664 #endif
7666 static bool
7667 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
7668 machine_mode mode)
7670 tree decl;
7671 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
7673 if (!SYMBOL_REF_P (op))
7674 return false;
7676 /* ISA 3.0 vector d-form addressing is restricted, don't allow
7677 SYMBOL_REF. */
7678 if (mode_supports_dq_form (mode))
7679 return false;
7681 dsize = GET_MODE_SIZE (mode);
7682 decl = SYMBOL_REF_DECL (op);
7683 if (!decl)
7685 if (dsize == 0)
7686 return false;
7688 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7689 replacing memory addresses with an anchor plus offset. We
7690 could find the decl by rummaging around in the block->objects
7691 VEC for the given offset but that seems like too much work. */
7692 dalign = BITS_PER_UNIT;
7693 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7694 && SYMBOL_REF_ANCHOR_P (op)
7695 && SYMBOL_REF_BLOCK (op) != NULL)
7697 struct object_block *block = SYMBOL_REF_BLOCK (op);
7699 dalign = block->alignment;
7700 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7702 else if (CONSTANT_POOL_ADDRESS_P (op))
7704 /* It would be nice to have get_pool_align().. */
7705 machine_mode cmode = get_pool_mode (op);
7707 dalign = GET_MODE_ALIGNMENT (cmode);
7710 else if (DECL_P (decl))
7712 dalign = DECL_ALIGN (decl);
7714 if (dsize == 0)
7716 /* Allow BLKmode when the entire object is known to not
7717 cross a 32k boundary. */
7718 if (!DECL_SIZE_UNIT (decl))
7719 return false;
7721 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7722 return false;
7724 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7725 if (dsize > 32768)
7726 return false;
7728 dalign /= BITS_PER_UNIT;
7729 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7730 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7731 return dalign >= dsize;
7734 else
7735 gcc_unreachable ();
7737 /* Find how many bits of the alignment we know for this access. */
7738 dalign /= BITS_PER_UNIT;
7739 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7740 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7741 mask = dalign - 1;
7742 lsb = offset & -offset;
7743 mask &= lsb - 1;
7744 dalign = mask + 1;
7746 return dalign >= dsize;
7749 static bool
7750 constant_pool_expr_p (rtx op)
7752 rtx base, offset;
7754 split_const (op, &base, &offset);
7755 return (SYMBOL_REF_P (base)
7756 && CONSTANT_POOL_ADDRESS_P (base)
7757 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7760 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
7761 use that as the register to put the HIGH value into if register allocation
7762 is already done. */
7765 create_TOC_reference (rtx symbol, rtx largetoc_reg)
7767 rtx tocrel, tocreg, hi;
7769 gcc_assert (TARGET_TOC);
7771 if (TARGET_DEBUG_ADDR)
7773 if (SYMBOL_REF_P (symbol))
7774 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
7775 XSTR (symbol, 0));
7776 else
7778 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
7779 GET_RTX_NAME (GET_CODE (symbol)));
7780 debug_rtx (symbol);
7784 if (!can_create_pseudo_p ())
7785 df_set_regs_ever_live (TOC_REGISTER, true);
7787 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
7788 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
7789 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
7790 return tocrel;
7792 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
7793 if (largetoc_reg != NULL)
7795 emit_move_insn (largetoc_reg, hi);
7796 hi = largetoc_reg;
7798 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
7801 /* These are only used to pass through from print_operand/print_operand_address
7802 to rs6000_output_addr_const_extra over the intervening function
7803 output_addr_const which is not target code. */
7804 static const_rtx tocrel_base_oac, tocrel_offset_oac;
7806 /* Return true if OP is a toc pointer relative address (the output
7807 of create_TOC_reference). If STRICT, do not match non-split
7808 -mcmodel=large/medium toc pointer relative addresses. If the pointers
7809 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
7810 TOCREL_OFFSET_RET respectively. */
7812 bool
7813 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
7814 const_rtx *tocrel_offset_ret)
7816 if (!TARGET_TOC)
7817 return false;
7819 if (TARGET_CMODEL != CMODEL_SMALL)
7821 /* When strict ensure we have everything tidy. */
7822 if (strict
7823 && !(GET_CODE (op) == LO_SUM
7824 && REG_P (XEXP (op, 0))
7825 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
7826 return false;
7828 /* When not strict, allow non-split TOC addresses and also allow
7829 (lo_sum (high ..)) TOC addresses created during reload. */
7830 if (GET_CODE (op) == LO_SUM)
7831 op = XEXP (op, 1);
7834 const_rtx tocrel_base = op;
7835 const_rtx tocrel_offset = const0_rtx;
7837 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7839 tocrel_base = XEXP (op, 0);
7840 tocrel_offset = XEXP (op, 1);
7843 if (tocrel_base_ret)
7844 *tocrel_base_ret = tocrel_base;
7845 if (tocrel_offset_ret)
7846 *tocrel_offset_ret = tocrel_offset;
7848 return (GET_CODE (tocrel_base) == UNSPEC
7849 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
7850 && REG_P (XVECEXP (tocrel_base, 0, 1))
7851 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
7854 /* Return true if X is a constant pool address, and also for cmodel=medium
7855 if X is a toc-relative address known to be offsettable within MODE. */
7857 bool
7858 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7859 bool strict)
7861 const_rtx tocrel_base, tocrel_offset;
7862 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
7863 && (TARGET_CMODEL != CMODEL_MEDIUM
7864 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7865 || mode == QImode
7866 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7867 INTVAL (tocrel_offset), mode)));
7870 static bool
7871 legitimate_small_data_p (machine_mode mode, rtx x)
7873 return (DEFAULT_ABI == ABI_V4
7874 && !flag_pic && !TARGET_TOC
7875 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
7876 && small_data_operand (x, mode));
7879 bool
7880 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7881 bool strict, bool worst_case)
7883 unsigned HOST_WIDE_INT offset;
7884 unsigned int extra;
7886 if (GET_CODE (x) != PLUS)
7887 return false;
7888 if (!REG_P (XEXP (x, 0)))
7889 return false;
7890 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7891 return false;
7892 if (mode_supports_dq_form (mode))
7893 return quad_address_p (x, mode, strict);
7894 if (!reg_offset_addressing_ok_p (mode))
7895 return virtual_stack_registers_memory_p (x);
7896 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7897 return true;
7898 if (!CONST_INT_P (XEXP (x, 1)))
7899 return false;
7901 offset = INTVAL (XEXP (x, 1));
7902 extra = 0;
7903 switch (mode)
7905 case E_DFmode:
7906 case E_DDmode:
7907 case E_DImode:
7908 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7909 addressing. */
7910 if (VECTOR_MEM_VSX_P (mode))
7911 return false;
7913 if (!worst_case)
7914 break;
7915 if (!TARGET_POWERPC64)
7916 extra = 4;
7917 else if (offset & 3)
7918 return false;
7919 break;
7921 case E_TFmode:
7922 case E_IFmode:
7923 case E_KFmode:
7924 case E_TDmode:
7925 case E_TImode:
7926 case E_PTImode:
7927 extra = 8;
7928 if (!worst_case)
7929 break;
7930 if (!TARGET_POWERPC64)
7931 extra = 12;
7932 else if (offset & 3)
7933 return false;
7934 break;
7936 default:
7937 break;
7940 if (TARGET_PREFIXED_ADDR)
7941 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
7942 else
7943 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
7946 bool
7947 legitimate_indexed_address_p (rtx x, int strict)
7949 rtx op0, op1;
7951 if (GET_CODE (x) != PLUS)
7952 return false;
7954 op0 = XEXP (x, 0);
7955 op1 = XEXP (x, 1);
7957 return (REG_P (op0) && REG_P (op1)
7958 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7959 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7960 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7961 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7964 bool
7965 avoiding_indexed_address_p (machine_mode mode)
7967 /* Avoid indexed addressing for modes that have non-indexed
7968 load/store instruction forms. */
7969 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7972 bool
7973 legitimate_indirect_address_p (rtx x, int strict)
7975 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
7978 bool
7979 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7981 if (!TARGET_MACHO || !flag_pic
7982 || mode != SImode || !MEM_P (x))
7983 return false;
7984 x = XEXP (x, 0);
7986 if (GET_CODE (x) != LO_SUM)
7987 return false;
7988 if (!REG_P (XEXP (x, 0)))
7989 return false;
7990 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7991 return false;
7992 x = XEXP (x, 1);
7994 return CONSTANT_P (x);
7997 static bool
7998 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8000 if (GET_CODE (x) != LO_SUM)
8001 return false;
8002 if (!REG_P (XEXP (x, 0)))
8003 return false;
8004 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8005 return false;
8006 /* quad word addresses are restricted, and we can't use LO_SUM. */
8007 if (mode_supports_dq_form (mode))
8008 return false;
8009 x = XEXP (x, 1);
8011 if (TARGET_ELF || TARGET_MACHO)
8013 bool large_toc_ok;
8015 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8016 return false;
8017 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8018 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8019 recognizes some LO_SUM addresses as valid although this
8020 function says opposite. In most cases, LRA through different
8021 transformations can generate correct code for address reloads.
8022 It cannot manage only some LO_SUM cases. So we need to add
8023 code here saying that some addresses are still valid. */
8024 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8025 && small_toc_ref (x, VOIDmode));
8026 if (TARGET_TOC && ! large_toc_ok)
8027 return false;
8028 if (GET_MODE_NUNITS (mode) != 1)
8029 return false;
8030 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8031 && !(/* ??? Assume floating point reg based on mode? */
8032 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8033 return false;
8035 return CONSTANT_P (x) || large_toc_ok;
8038 return false;
8042 /* Try machine-dependent ways of modifying an illegitimate address
8043 to be legitimate. If we find one, return the new, valid address.
8044 This is used from only one place: `memory_address' in explow.c.
8046 OLDX is the address as it was before break_out_memory_refs was
8047 called. In some cases it is useful to look at this to decide what
8048 needs to be done.
8050 It is always safe for this function to do nothing. It exists to
8051 recognize opportunities to optimize the output.
8053 On RS/6000, first check for the sum of a register with a constant
8054 integer that is out of range. If so, generate code to add the
8055 constant with the low-order 16 bits masked to the register and force
8056 this result into another register (this can be done with `cau').
8057 Then generate an address of REG+(CONST&0xffff), allowing for the
8058 possibility of bit 16 being a one.
8060 Then check for the sum of a register and something not constant, try to
8061 load the other things into a register and return the sum. */
8063 static rtx
8064 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8065 machine_mode mode)
8067 unsigned int extra;
8069 if (!reg_offset_addressing_ok_p (mode)
8070 || mode_supports_dq_form (mode))
8072 if (virtual_stack_registers_memory_p (x))
8073 return x;
8075 /* In theory we should not be seeing addresses of the form reg+0,
8076 but just in case it is generated, optimize it away. */
8077 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8078 return force_reg (Pmode, XEXP (x, 0));
8080 /* For TImode with load/store quad, restrict addresses to just a single
8081 pointer, so it works with both GPRs and VSX registers. */
8082 /* Make sure both operands are registers. */
8083 else if (GET_CODE (x) == PLUS
8084 && (mode != TImode || !TARGET_VSX))
8085 return gen_rtx_PLUS (Pmode,
8086 force_reg (Pmode, XEXP (x, 0)),
8087 force_reg (Pmode, XEXP (x, 1)));
8088 else
8089 return force_reg (Pmode, x);
8091 if (SYMBOL_REF_P (x))
8093 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8094 if (model != 0)
8095 return rs6000_legitimize_tls_address (x, model);
8098 extra = 0;
8099 switch (mode)
8101 case E_TFmode:
8102 case E_TDmode:
8103 case E_TImode:
8104 case E_PTImode:
8105 case E_IFmode:
8106 case E_KFmode:
8107 /* As in legitimate_offset_address_p we do not assume
8108 worst-case. The mode here is just a hint as to the registers
8109 used. A TImode is usually in gprs, but may actually be in
8110 fprs. Leave worst-case scenario for reload to handle via
8111 insn constraints. PTImode is only GPRs. */
8112 extra = 8;
8113 break;
8114 default:
8115 break;
8118 if (GET_CODE (x) == PLUS
8119 && REG_P (XEXP (x, 0))
8120 && CONST_INT_P (XEXP (x, 1))
8121 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8122 >= 0x10000 - extra))
8124 HOST_WIDE_INT high_int, low_int;
8125 rtx sum;
8126 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8127 if (low_int >= 0x8000 - extra)
8128 low_int = 0;
8129 high_int = INTVAL (XEXP (x, 1)) - low_int;
8130 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8131 GEN_INT (high_int)), 0);
8132 return plus_constant (Pmode, sum, low_int);
8134 else if (GET_CODE (x) == PLUS
8135 && REG_P (XEXP (x, 0))
8136 && !CONST_INT_P (XEXP (x, 1))
8137 && GET_MODE_NUNITS (mode) == 1
8138 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8139 || (/* ??? Assume floating point reg based on mode? */
8140 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8141 && !avoiding_indexed_address_p (mode))
8143 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8144 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8146 else if ((TARGET_ELF
8147 #if TARGET_MACHO
8148 || !MACHO_DYNAMIC_NO_PIC_P
8149 #endif
8151 && TARGET_32BIT
8152 && TARGET_NO_TOC_OR_PCREL
8153 && !flag_pic
8154 && !CONST_INT_P (x)
8155 && !CONST_WIDE_INT_P (x)
8156 && !CONST_DOUBLE_P (x)
8157 && CONSTANT_P (x)
8158 && GET_MODE_NUNITS (mode) == 1
8159 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8160 || (/* ??? Assume floating point reg based on mode? */
8161 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8163 rtx reg = gen_reg_rtx (Pmode);
8164 if (TARGET_ELF)
8165 emit_insn (gen_elf_high (reg, x));
8166 else
8167 emit_insn (gen_macho_high (Pmode, reg, x));
8168 return gen_rtx_LO_SUM (Pmode, reg, x);
8170 else if (TARGET_TOC
8171 && SYMBOL_REF_P (x)
8172 && constant_pool_expr_p (x)
8173 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8174 return create_TOC_reference (x, NULL_RTX);
8175 else
8176 return x;
8179 /* Debug version of rs6000_legitimize_address. */
8180 static rtx
8181 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8183 rtx ret;
8184 rtx_insn *insns;
8186 start_sequence ();
8187 ret = rs6000_legitimize_address (x, oldx, mode);
8188 insns = get_insns ();
8189 end_sequence ();
8191 if (ret != x)
8193 fprintf (stderr,
8194 "\nrs6000_legitimize_address: mode %s, old code %s, "
8195 "new code %s, modified\n",
8196 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8197 GET_RTX_NAME (GET_CODE (ret)));
8199 fprintf (stderr, "Original address:\n");
8200 debug_rtx (x);
8202 fprintf (stderr, "oldx:\n");
8203 debug_rtx (oldx);
8205 fprintf (stderr, "New address:\n");
8206 debug_rtx (ret);
8208 if (insns)
8210 fprintf (stderr, "Insns added:\n");
8211 debug_rtx_list (insns, 20);
8214 else
8216 fprintf (stderr,
8217 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8218 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8220 debug_rtx (x);
8223 if (insns)
8224 emit_insn (insns);
8226 return ret;
8229 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8230 We need to emit DTP-relative relocations. */
8232 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8233 static void
8234 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8236 switch (size)
8238 case 4:
8239 fputs ("\t.long\t", file);
8240 break;
8241 case 8:
8242 fputs (DOUBLE_INT_ASM_OP, file);
8243 break;
8244 default:
8245 gcc_unreachable ();
8247 output_addr_const (file, x);
8248 if (TARGET_ELF)
8249 fputs ("@dtprel+0x8000", file);
8250 else if (TARGET_XCOFF && SYMBOL_REF_P (x))
8252 switch (SYMBOL_REF_TLS_MODEL (x))
8254 case 0:
8255 break;
8256 case TLS_MODEL_LOCAL_EXEC:
8257 fputs ("@le", file);
8258 break;
8259 case TLS_MODEL_INITIAL_EXEC:
8260 fputs ("@ie", file);
8261 break;
8262 case TLS_MODEL_GLOBAL_DYNAMIC:
8263 case TLS_MODEL_LOCAL_DYNAMIC:
8264 fputs ("@m", file);
8265 break;
8266 default:
8267 gcc_unreachable ();
8272 /* Return true if X is a symbol that refers to real (rather than emulated)
8273 TLS. */
8275 static bool
8276 rs6000_real_tls_symbol_ref_p (rtx x)
8278 return (SYMBOL_REF_P (x)
8279 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8282 /* In the name of slightly smaller debug output, and to cater to
8283 general assembler lossage, recognize various UNSPEC sequences
8284 and turn them back into a direct symbol reference. */
8286 static rtx
8287 rs6000_delegitimize_address (rtx orig_x)
8289 rtx x, y, offset;
8291 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR)
8292 orig_x = XVECEXP (orig_x, 0, 0);
8294 orig_x = delegitimize_mem_from_attrs (orig_x);
8296 x = orig_x;
8297 if (MEM_P (x))
8298 x = XEXP (x, 0);
8300 y = x;
8301 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
8302 y = XEXP (y, 1);
8304 offset = NULL_RTX;
8305 if (GET_CODE (y) == PLUS
8306 && GET_MODE (y) == Pmode
8307 && CONST_INT_P (XEXP (y, 1)))
8309 offset = XEXP (y, 1);
8310 y = XEXP (y, 0);
8313 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
8315 y = XVECEXP (y, 0, 0);
8317 #ifdef HAVE_AS_TLS
8318 /* Do not associate thread-local symbols with the original
8319 constant pool symbol. */
8320 if (TARGET_XCOFF
8321 && SYMBOL_REF_P (y)
8322 && CONSTANT_POOL_ADDRESS_P (y)
8323 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8324 return orig_x;
8325 #endif
8327 if (offset != NULL_RTX)
8328 y = gen_rtx_PLUS (Pmode, y, offset);
8329 if (!MEM_P (orig_x))
8330 return y;
8331 else
8332 return replace_equiv_address_nv (orig_x, y);
8335 if (TARGET_MACHO
8336 && GET_CODE (orig_x) == LO_SUM
8337 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8339 y = XEXP (XEXP (orig_x, 1), 0);
8340 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8341 return XVECEXP (y, 0, 0);
8344 return orig_x;
8347 /* Return true if X shouldn't be emitted into the debug info.
8348 The linker doesn't like .toc section references from
8349 .debug_* sections, so reject .toc section symbols. */
8351 static bool
8352 rs6000_const_not_ok_for_debug_p (rtx x)
8354 if (GET_CODE (x) == UNSPEC)
8355 return true;
8356 if (SYMBOL_REF_P (x)
8357 && CONSTANT_POOL_ADDRESS_P (x))
8359 rtx c = get_pool_constant (x);
8360 machine_mode cmode = get_pool_mode (x);
8361 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8362 return true;
8365 return false;
8368 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8370 static bool
8371 rs6000_legitimate_combined_insn (rtx_insn *insn)
8373 int icode = INSN_CODE (insn);
8375 /* Reject creating doloop insns. Combine should not be allowed
8376 to create these for a number of reasons:
8377 1) In a nested loop, if combine creates one of these in an
8378 outer loop and the register allocator happens to allocate ctr
8379 to the outer loop insn, then the inner loop can't use ctr.
8380 Inner loops ought to be more highly optimized.
8381 2) Combine often wants to create one of these from what was
8382 originally a three insn sequence, first combining the three
8383 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8384 allocated ctr, the splitter takes use back to the three insn
8385 sequence. It's better to stop combine at the two insn
8386 sequence.
8387 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8388 insns, the register allocator sometimes uses floating point
8389 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8390 jump insn and output reloads are not implemented for jumps,
8391 the ctrsi/ctrdi splitters need to handle all possible cases.
8392 That's a pain, and it gets to be seriously difficult when a
8393 splitter that runs after reload needs memory to transfer from
8394 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8395 for the difficult case. It's better to not create problems
8396 in the first place. */
8397 if (icode != CODE_FOR_nothing
8398 && (icode == CODE_FOR_bdz_si
8399 || icode == CODE_FOR_bdz_di
8400 || icode == CODE_FOR_bdnz_si
8401 || icode == CODE_FOR_bdnz_di
8402 || icode == CODE_FOR_bdztf_si
8403 || icode == CODE_FOR_bdztf_di
8404 || icode == CODE_FOR_bdnztf_si
8405 || icode == CODE_FOR_bdnztf_di))
8406 return false;
8408 return true;
8411 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8413 static GTY(()) rtx rs6000_tls_symbol;
8414 static rtx
8415 rs6000_tls_get_addr (void)
8417 if (!rs6000_tls_symbol)
8418 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8420 return rs6000_tls_symbol;
8423 /* Construct the SYMBOL_REF for TLS GOT references. */
8425 static GTY(()) rtx rs6000_got_symbol;
8427 rs6000_got_sym (void)
8429 if (!rs6000_got_symbol)
8431 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8432 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8433 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8436 return rs6000_got_symbol;
8439 /* AIX Thread-Local Address support. */
8441 static rtx
8442 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8444 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8445 const char *name;
8446 char *tlsname;
8448 name = XSTR (addr, 0);
8449 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8450 or the symbol will be in TLS private data section. */
8451 if (name[strlen (name) - 1] != ']'
8452 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8453 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8455 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8456 strcpy (tlsname, name);
8457 strcat (tlsname,
8458 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8459 tlsaddr = copy_rtx (addr);
8460 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8462 else
8463 tlsaddr = addr;
8465 /* Place addr into TOC constant pool. */
8466 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8468 /* Output the TOC entry and create the MEM referencing the value. */
8469 if (constant_pool_expr_p (XEXP (sym, 0))
8470 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8472 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8473 mem = gen_const_mem (Pmode, tocref);
8474 set_mem_alias_set (mem, get_TOC_alias_set ());
8476 else
8477 return sym;
8479 /* Use global-dynamic for local-dynamic. */
8480 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8481 || model == TLS_MODEL_LOCAL_DYNAMIC)
8483 /* Create new TOC reference for @m symbol. */
8484 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8485 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8486 strcpy (tlsname, "*LCM");
8487 strcat (tlsname, name + 3);
8488 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8489 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8490 tocref = create_TOC_reference (modaddr, NULL_RTX);
8491 rtx modmem = gen_const_mem (Pmode, tocref);
8492 set_mem_alias_set (modmem, get_TOC_alias_set ());
8494 rtx modreg = gen_reg_rtx (Pmode);
8495 emit_insn (gen_rtx_SET (modreg, modmem));
8497 tmpreg = gen_reg_rtx (Pmode);
8498 emit_insn (gen_rtx_SET (tmpreg, mem));
8500 dest = gen_reg_rtx (Pmode);
8501 if (TARGET_32BIT)
8502 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8503 else
8504 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8505 return dest;
8507 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8508 else if (TARGET_32BIT)
8510 tlsreg = gen_reg_rtx (SImode);
8511 emit_insn (gen_tls_get_tpointer (tlsreg));
8513 else
8514 tlsreg = gen_rtx_REG (DImode, 13);
8516 /* Load the TOC value into temporary register. */
8517 tmpreg = gen_reg_rtx (Pmode);
8518 emit_insn (gen_rtx_SET (tmpreg, mem));
8519 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8520 gen_rtx_MINUS (Pmode, addr, tlsreg));
8522 /* Add TOC symbol value to TLS pointer. */
8523 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8525 return dest;
8528 /* Passes the tls arg value for global dynamic and local dynamic
8529 emit_library_call_value in rs6000_legitimize_tls_address to
8530 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
8531 marker relocs put on __tls_get_addr calls. */
8532 static rtx global_tlsarg;
8534 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8535 this (thread-local) address. */
8537 static rtx
8538 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8540 rtx dest, insn;
8542 if (TARGET_XCOFF)
8543 return rs6000_legitimize_tls_address_aix (addr, model);
8545 dest = gen_reg_rtx (Pmode);
8546 if (model == TLS_MODEL_LOCAL_EXEC
8547 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
8549 rtx tlsreg;
8551 if (TARGET_64BIT)
8553 tlsreg = gen_rtx_REG (Pmode, 13);
8554 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8556 else
8558 tlsreg = gen_rtx_REG (Pmode, 2);
8559 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8561 emit_insn (insn);
8563 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8565 rtx tlsreg, tmp;
8567 tmp = gen_reg_rtx (Pmode);
8568 if (TARGET_64BIT)
8570 tlsreg = gen_rtx_REG (Pmode, 13);
8571 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8573 else
8575 tlsreg = gen_rtx_REG (Pmode, 2);
8576 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
8578 emit_insn (insn);
8579 if (TARGET_64BIT)
8580 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
8581 else
8582 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
8583 emit_insn (insn);
8585 else
8587 rtx got, tga, tmp1, tmp2;
8589 /* We currently use relocations like @got@tlsgd for tls, which
8590 means the linker will handle allocation of tls entries, placing
8591 them in the .got section. So use a pointer to the .got section,
8592 not one to secondary TOC sections used by 64-bit -mminimal-toc,
8593 or to secondary GOT sections used by 32-bit -fPIC. */
8594 if (rs6000_pcrel_p (cfun))
8595 got = const0_rtx;
8596 else if (TARGET_64BIT)
8597 got = gen_rtx_REG (Pmode, 2);
8598 else
8600 if (flag_pic == 1)
8601 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
8602 else
8604 rtx gsym = rs6000_got_sym ();
8605 got = gen_reg_rtx (Pmode);
8606 if (flag_pic == 0)
8607 rs6000_emit_move (got, gsym, Pmode);
8608 else
8610 rtx mem, lab;
8612 tmp1 = gen_reg_rtx (Pmode);
8613 tmp2 = gen_reg_rtx (Pmode);
8614 mem = gen_const_mem (Pmode, tmp1);
8615 lab = gen_label_rtx ();
8616 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
8617 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
8618 if (TARGET_LINK_STACK)
8619 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
8620 emit_move_insn (tmp2, mem);
8621 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
8622 set_unique_reg_note (last, REG_EQUAL, gsym);
8627 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
8629 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
8630 UNSPEC_TLSGD);
8631 tga = rs6000_tls_get_addr ();
8632 rtx argreg = gen_rtx_REG (Pmode, 3);
8633 emit_insn (gen_rtx_SET (argreg, arg));
8634 global_tlsarg = arg;
8635 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
8636 global_tlsarg = NULL_RTX;
8638 /* Make a note so that the result of this call can be CSEd. */
8639 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8640 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8641 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8643 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
8645 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
8646 tga = rs6000_tls_get_addr ();
8647 tmp1 = gen_reg_rtx (Pmode);
8648 rtx argreg = gen_rtx_REG (Pmode, 3);
8649 emit_insn (gen_rtx_SET (argreg, arg));
8650 global_tlsarg = arg;
8651 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
8652 global_tlsarg = NULL_RTX;
8654 /* Make a note so that the result of this call can be CSEd. */
8655 rtvec vec = gen_rtvec (1, copy_rtx (arg));
8656 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
8657 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
8659 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
8661 if (TARGET_64BIT)
8662 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
8663 else
8664 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
8666 else if (rs6000_tls_size == 32)
8668 tmp2 = gen_reg_rtx (Pmode);
8669 if (TARGET_64BIT)
8670 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
8671 else
8672 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
8673 emit_insn (insn);
8674 if (TARGET_64BIT)
8675 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
8676 else
8677 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
8679 else
8681 tmp2 = gen_reg_rtx (Pmode);
8682 if (TARGET_64BIT)
8683 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
8684 else
8685 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
8686 emit_insn (insn);
8687 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
8689 emit_insn (insn);
8691 else
8693 /* IE, or 64-bit offset LE. */
8694 tmp2 = gen_reg_rtx (Pmode);
8695 if (TARGET_64BIT)
8696 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
8697 else
8698 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
8699 emit_insn (insn);
8700 if (rs6000_pcrel_p (cfun))
8702 if (TARGET_64BIT)
8703 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
8704 else
8705 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
8707 else if (TARGET_64BIT)
8708 insn = gen_tls_tls_64 (dest, tmp2, addr);
8709 else
8710 insn = gen_tls_tls_32 (dest, tmp2, addr);
8711 emit_insn (insn);
8715 return dest;
8718 /* Only create the global variable for the stack protect guard if we are using
8719 the global flavor of that guard. */
8720 static tree
8721 rs6000_init_stack_protect_guard (void)
8723 if (rs6000_stack_protector_guard == SSP_GLOBAL)
8724 return default_stack_protect_guard ();
8726 return NULL_TREE;
8729 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8731 static bool
8732 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8734 if (GET_CODE (x) == HIGH
8735 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8736 return true;
8738 /* A TLS symbol in the TOC cannot contain a sum. */
8739 if (GET_CODE (x) == CONST
8740 && GET_CODE (XEXP (x, 0)) == PLUS
8741 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
8742 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8743 return true;
8745 /* Do not place an ELF TLS symbol in the constant pool. */
8746 return TARGET_ELF && tls_referenced_p (x);
8749 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8750 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8751 can be addressed relative to the toc pointer. */
8753 static bool
8754 use_toc_relative_ref (rtx sym, machine_mode mode)
8756 return ((constant_pool_expr_p (sym)
8757 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8758 get_pool_mode (sym)))
8759 || (TARGET_CMODEL == CMODEL_MEDIUM
8760 && SYMBOL_REF_LOCAL_P (sym)
8761 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8764 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8765 that is a valid memory address for an instruction.
8766 The MODE argument is the machine mode for the MEM expression
8767 that wants to use this address.
8769 On the RS/6000, there are four valid address: a SYMBOL_REF that
8770 refers to a constant pool entry of an address (or the sum of it
8771 plus a constant), a short (16-bit signed) constant plus a register,
8772 the sum of two registers, or a register indirect, possibly with an
8773 auto-increment. For DFmode, DDmode and DImode with a constant plus
8774 register, we must ensure that both words are addressable or PowerPC64
8775 with offset word aligned.
8777 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8778 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8779 because adjacent memory cells are accessed by adding word-sized offsets
8780 during assembly output. */
8781 static bool
8782 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8784 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8785 bool quad_offset_p = mode_supports_dq_form (mode);
8787 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8788 if (VECTOR_MEM_ALTIVEC_P (mode)
8789 && GET_CODE (x) == AND
8790 && CONST_INT_P (XEXP (x, 1))
8791 && INTVAL (XEXP (x, 1)) == -16)
8792 x = XEXP (x, 0);
8794 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8795 return 0;
8796 if (legitimate_indirect_address_p (x, reg_ok_strict))
8797 return 1;
8798 if (TARGET_UPDATE
8799 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8800 && mode_supports_pre_incdec_p (mode)
8801 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8802 return 1;
8804 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
8805 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
8806 return 1;
8808 /* Handle restricted vector d-form offsets in ISA 3.0. */
8809 if (quad_offset_p)
8811 if (quad_address_p (x, mode, reg_ok_strict))
8812 return 1;
8814 else if (virtual_stack_registers_memory_p (x))
8815 return 1;
8817 else if (reg_offset_p)
8819 if (legitimate_small_data_p (mode, x))
8820 return 1;
8821 if (legitimate_constant_pool_address_p (x, mode,
8822 reg_ok_strict || lra_in_progress))
8823 return 1;
8826 /* For TImode, if we have TImode in VSX registers, only allow register
8827 indirect addresses. This will allow the values to go in either GPRs
8828 or VSX registers without reloading. The vector types would tend to
8829 go into VSX registers, so we allow REG+REG, while TImode seems
8830 somewhat split, in that some uses are GPR based, and some VSX based. */
8831 /* FIXME: We could loosen this by changing the following to
8832 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
8833 but currently we cannot allow REG+REG addressing for TImode. See
8834 PR72827 for complete details on how this ends up hoodwinking DSE. */
8835 if (mode == TImode && TARGET_VSX)
8836 return 0;
8837 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8838 if (! reg_ok_strict
8839 && reg_offset_p
8840 && GET_CODE (x) == PLUS
8841 && REG_P (XEXP (x, 0))
8842 && (XEXP (x, 0) == virtual_stack_vars_rtx
8843 || XEXP (x, 0) == arg_pointer_rtx)
8844 && CONST_INT_P (XEXP (x, 1)))
8845 return 1;
8846 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8847 return 1;
8848 if (!FLOAT128_2REG_P (mode)
8849 && (TARGET_HARD_FLOAT
8850 || TARGET_POWERPC64
8851 || (mode != DFmode && mode != DDmode))
8852 && (TARGET_POWERPC64 || mode != DImode)
8853 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8854 && mode != PTImode
8855 && !avoiding_indexed_address_p (mode)
8856 && legitimate_indexed_address_p (x, reg_ok_strict))
8857 return 1;
8858 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8859 && mode_supports_pre_modify_p (mode)
8860 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8861 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8862 reg_ok_strict, false)
8863 || (!avoiding_indexed_address_p (mode)
8864 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8865 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8867 /* There is no prefixed version of the load/store with update. */
8868 rtx addr = XEXP (x, 1);
8869 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
8871 if (reg_offset_p && !quad_offset_p
8872 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8873 return 1;
8874 return 0;
8877 /* Debug version of rs6000_legitimate_address_p. */
8878 static bool
8879 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8880 bool reg_ok_strict)
8882 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8883 fprintf (stderr,
8884 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8885 "strict = %d, reload = %s, code = %s\n",
8886 ret ? "true" : "false",
8887 GET_MODE_NAME (mode),
8888 reg_ok_strict,
8889 (reload_completed ? "after" : "before"),
8890 GET_RTX_NAME (GET_CODE (x)));
8891 debug_rtx (x);
8893 return ret;
8896 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8898 static bool
8899 rs6000_mode_dependent_address_p (const_rtx addr,
8900 addr_space_t as ATTRIBUTE_UNUSED)
8902 return rs6000_mode_dependent_address_ptr (addr);
8905 /* Go to LABEL if ADDR (a legitimate address expression)
8906 has an effect that depends on the machine mode it is used for.
8908 On the RS/6000 this is true of all integral offsets (since AltiVec
8909 and VSX modes don't allow them) or is a pre-increment or decrement.
8911 ??? Except that due to conceptual problems in offsettable_address_p
8912 we can't really report the problems of integral offsets. So leave
8913 this assuming that the adjustable offset must be valid for the
8914 sub-words of a TFmode operand, which is what we had before. */
8916 static bool
8917 rs6000_mode_dependent_address (const_rtx addr)
8919 switch (GET_CODE (addr))
8921 case PLUS:
8922 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8923 is considered a legitimate address before reload, so there
8924 are no offset restrictions in that case. Note that this
8925 condition is safe in strict mode because any address involving
8926 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8927 been rejected as illegitimate. */
8928 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8929 && XEXP (addr, 0) != arg_pointer_rtx
8930 && CONST_INT_P (XEXP (addr, 1)))
8932 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8933 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
8934 if (TARGET_PREFIXED_ADDR)
8935 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
8936 else
8937 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
8939 break;
8941 case LO_SUM:
8942 /* Anything in the constant pool is sufficiently aligned that
8943 all bytes have the same high part address. */
8944 return !legitimate_constant_pool_address_p (addr, QImode, false);
8946 /* Auto-increment cases are now treated generically in recog.c. */
8947 case PRE_MODIFY:
8948 return TARGET_UPDATE;
8950 /* AND is only allowed in Altivec loads. */
8951 case AND:
8952 return true;
8954 default:
8955 break;
8958 return false;
8961 /* Debug version of rs6000_mode_dependent_address. */
8962 static bool
8963 rs6000_debug_mode_dependent_address (const_rtx addr)
8965 bool ret = rs6000_mode_dependent_address (addr);
8967 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8968 ret ? "true" : "false");
8969 debug_rtx (addr);
8971 return ret;
8974 /* Implement FIND_BASE_TERM. */
8977 rs6000_find_base_term (rtx op)
8979 rtx base;
8981 base = op;
8982 if (GET_CODE (base) == CONST)
8983 base = XEXP (base, 0);
8984 if (GET_CODE (base) == PLUS)
8985 base = XEXP (base, 0);
8986 if (GET_CODE (base) == UNSPEC)
8987 switch (XINT (base, 1))
8989 case UNSPEC_TOCREL:
8990 case UNSPEC_MACHOPIC_OFFSET:
8991 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8992 for aliasing purposes. */
8993 return XVECEXP (base, 0, 0);
8996 return op;
8999 /* More elaborate version of recog's offsettable_memref_p predicate
9000 that works around the ??? note of rs6000_mode_dependent_address.
9001 In particular it accepts
9003 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9005 in 32-bit mode, that the recog predicate rejects. */
9007 static bool
9008 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9010 bool worst_case;
9012 if (!MEM_P (op))
9013 return false;
9015 /* First mimic offsettable_memref_p. */
9016 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9017 return true;
9019 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9020 the latter predicate knows nothing about the mode of the memory
9021 reference and, therefore, assumes that it is the largest supported
9022 mode (TFmode). As a consequence, legitimate offsettable memory
9023 references are rejected. rs6000_legitimate_offset_address_p contains
9024 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9025 at least with a little bit of help here given that we know the
9026 actual registers used. */
9027 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9028 || GET_MODE_SIZE (reg_mode) == 4);
9029 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9030 strict, worst_case);
9033 /* Determine the reassociation width to be used in reassociate_bb.
9034 This takes into account how many parallel operations we
9035 can actually do of a given type, and also the latency.
9037 int add/sub 6/cycle
9038 mul 2/cycle
9039 vect add/sub/mul 2/cycle
9040 fp add/sub/mul 2/cycle
9041 dfp 1/cycle
9044 static int
9045 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9046 machine_mode mode)
9048 switch (rs6000_tune)
9050 case PROCESSOR_POWER8:
9051 case PROCESSOR_POWER9:
9052 case PROCESSOR_FUTURE:
9053 if (DECIMAL_FLOAT_MODE_P (mode))
9054 return 1;
9055 if (VECTOR_MODE_P (mode))
9056 return 4;
9057 if (INTEGRAL_MODE_P (mode))
9058 return 1;
9059 if (FLOAT_MODE_P (mode))
9060 return 4;
9061 break;
9062 default:
9063 break;
9065 return 1;
9068 /* Change register usage conditional on target flags. */
9069 static void
9070 rs6000_conditional_register_usage (void)
9072 int i;
9074 if (TARGET_DEBUG_TARGET)
9075 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9077 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9078 if (TARGET_64BIT)
9079 fixed_regs[13] = call_used_regs[13] = 1;
9081 /* Conditionally disable FPRs. */
9082 if (TARGET_SOFT_FLOAT)
9083 for (i = 32; i < 64; i++)
9084 fixed_regs[i] = call_used_regs[i] = 1;
9086 /* The TOC register is not killed across calls in a way that is
9087 visible to the compiler. */
9088 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9089 call_used_regs[2] = 0;
9091 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9092 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9094 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9095 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9096 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9098 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9099 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9100 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9102 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9103 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9105 if (!TARGET_ALTIVEC && !TARGET_VSX)
9107 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9108 fixed_regs[i] = call_used_regs[i] = 1;
9109 call_used_regs[VRSAVE_REGNO] = 1;
9112 if (TARGET_ALTIVEC || TARGET_VSX)
9113 global_regs[VSCR_REGNO] = 1;
9115 if (TARGET_ALTIVEC_ABI)
9117 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9118 call_used_regs[i] = 1;
9120 /* AIX reserves VR20:31 in non-extended ABI mode. */
9121 if (TARGET_XCOFF)
9122 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9123 fixed_regs[i] = call_used_regs[i] = 1;
9128 /* Output insns to set DEST equal to the constant SOURCE as a series of
9129 lis, ori and shl instructions and return TRUE. */
9131 bool
9132 rs6000_emit_set_const (rtx dest, rtx source)
9134 machine_mode mode = GET_MODE (dest);
9135 rtx temp, set;
9136 rtx_insn *insn;
9137 HOST_WIDE_INT c;
9139 gcc_checking_assert (CONST_INT_P (source));
9140 c = INTVAL (source);
9141 switch (mode)
9143 case E_QImode:
9144 case E_HImode:
9145 emit_insn (gen_rtx_SET (dest, source));
9146 return true;
9148 case E_SImode:
9149 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9151 emit_insn (gen_rtx_SET (copy_rtx (temp),
9152 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9153 emit_insn (gen_rtx_SET (dest,
9154 gen_rtx_IOR (SImode, copy_rtx (temp),
9155 GEN_INT (c & 0xffff))));
9156 break;
9158 case E_DImode:
9159 if (!TARGET_POWERPC64)
9161 rtx hi, lo;
9163 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9164 DImode);
9165 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9166 DImode);
9167 emit_move_insn (hi, GEN_INT (c >> 32));
9168 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9169 emit_move_insn (lo, GEN_INT (c));
9171 else
9172 rs6000_emit_set_long_const (dest, c);
9173 break;
9175 default:
9176 gcc_unreachable ();
9179 insn = get_last_insn ();
9180 set = single_set (insn);
9181 if (! CONSTANT_P (SET_SRC (set)))
9182 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9184 return true;
9187 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9188 Output insns to set DEST equal to the constant C as a series of
9189 lis, ori and shl instructions. */
9191 static void
9192 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9194 rtx temp;
9195 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9197 ud1 = c & 0xffff;
9198 c = c >> 16;
9199 ud2 = c & 0xffff;
9200 c = c >> 16;
9201 ud3 = c & 0xffff;
9202 c = c >> 16;
9203 ud4 = c & 0xffff;
9205 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9206 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9207 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9209 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9210 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9212 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9214 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9215 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9216 if (ud1 != 0)
9217 emit_move_insn (dest,
9218 gen_rtx_IOR (DImode, copy_rtx (temp),
9219 GEN_INT (ud1)));
9221 else if (ud3 == 0 && ud4 == 0)
9223 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9225 gcc_assert (ud2 & 0x8000);
9226 emit_move_insn (copy_rtx (temp),
9227 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9228 if (ud1 != 0)
9229 emit_move_insn (copy_rtx (temp),
9230 gen_rtx_IOR (DImode, copy_rtx (temp),
9231 GEN_INT (ud1)));
9232 emit_move_insn (dest,
9233 gen_rtx_ZERO_EXTEND (DImode,
9234 gen_lowpart (SImode,
9235 copy_rtx (temp))));
9237 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9238 || (ud4 == 0 && ! (ud3 & 0x8000)))
9240 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9242 emit_move_insn (copy_rtx (temp),
9243 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9244 if (ud2 != 0)
9245 emit_move_insn (copy_rtx (temp),
9246 gen_rtx_IOR (DImode, copy_rtx (temp),
9247 GEN_INT (ud2)));
9248 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9249 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9250 GEN_INT (16)));
9251 if (ud1 != 0)
9252 emit_move_insn (dest,
9253 gen_rtx_IOR (DImode, copy_rtx (temp),
9254 GEN_INT (ud1)));
9256 else
9258 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9260 emit_move_insn (copy_rtx (temp),
9261 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9262 if (ud3 != 0)
9263 emit_move_insn (copy_rtx (temp),
9264 gen_rtx_IOR (DImode, copy_rtx (temp),
9265 GEN_INT (ud3)));
9267 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9268 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9269 GEN_INT (32)));
9270 if (ud2 != 0)
9271 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9272 gen_rtx_IOR (DImode, copy_rtx (temp),
9273 GEN_INT (ud2 << 16)));
9274 if (ud1 != 0)
9275 emit_move_insn (dest,
9276 gen_rtx_IOR (DImode, copy_rtx (temp),
9277 GEN_INT (ud1)));
9281 /* Helper for the following. Get rid of [r+r] memory refs
9282 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9284 static void
9285 rs6000_eliminate_indexed_memrefs (rtx operands[2])
9287 if (MEM_P (operands[0])
9288 && !REG_P (XEXP (operands[0], 0))
9289 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
9290 GET_MODE (operands[0]), false))
9291 operands[0]
9292 = replace_equiv_address (operands[0],
9293 copy_addr_to_reg (XEXP (operands[0], 0)));
9295 if (MEM_P (operands[1])
9296 && !REG_P (XEXP (operands[1], 0))
9297 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
9298 GET_MODE (operands[1]), false))
9299 operands[1]
9300 = replace_equiv_address (operands[1],
9301 copy_addr_to_reg (XEXP (operands[1], 0)));
9304 /* Generate a vector of constants to permute MODE for a little-endian
9305 storage operation by swapping the two halves of a vector. */
9306 static rtvec
9307 rs6000_const_vec (machine_mode mode)
9309 int i, subparts;
9310 rtvec v;
9312 switch (mode)
9314 case E_V1TImode:
9315 subparts = 1;
9316 break;
9317 case E_V2DFmode:
9318 case E_V2DImode:
9319 subparts = 2;
9320 break;
9321 case E_V4SFmode:
9322 case E_V4SImode:
9323 subparts = 4;
9324 break;
9325 case E_V8HImode:
9326 subparts = 8;
9327 break;
9328 case E_V16QImode:
9329 subparts = 16;
9330 break;
9331 default:
9332 gcc_unreachable();
9335 v = rtvec_alloc (subparts);
9337 for (i = 0; i < subparts / 2; ++i)
9338 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
9339 for (i = subparts / 2; i < subparts; ++i)
9340 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
9342 return v;
9345 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
9346 store operation. */
9347 void
9348 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
9350 /* Scalar permutations are easier to express in integer modes rather than
9351 floating-point modes, so cast them here. We use V1TImode instead
9352 of TImode to ensure that the values don't go through GPRs. */
9353 if (FLOAT128_VECTOR_P (mode))
9355 dest = gen_lowpart (V1TImode, dest);
9356 source = gen_lowpart (V1TImode, source);
9357 mode = V1TImode;
9360 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
9361 scalar. */
9362 if (mode == TImode || mode == V1TImode)
9363 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
9364 GEN_INT (64))));
9365 else
9367 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
9368 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
9372 /* Emit a little-endian load from vector memory location SOURCE to VSX
9373 register DEST in mode MODE. The load is done with two permuting
9374 insn's that represent an lxvd2x and xxpermdi. */
9375 void
9376 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
9378 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
9379 V1TImode). */
9380 if (mode == TImode || mode == V1TImode)
9382 mode = V2DImode;
9383 dest = gen_lowpart (V2DImode, dest);
9384 source = adjust_address (source, V2DImode, 0);
9387 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
9388 rs6000_emit_le_vsx_permute (tmp, source, mode);
9389 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9392 /* Emit a little-endian store to vector memory location DEST from VSX
9393 register SOURCE in mode MODE. The store is done with two permuting
9394 insn's that represent an xxpermdi and an stxvd2x. */
9395 void
9396 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
9398 /* This should never be called during or after LRA, because it does
9399 not re-permute the source register. It is intended only for use
9400 during expand. */
9401 gcc_assert (!lra_in_progress && !reload_completed);
9403 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
9404 V1TImode). */
9405 if (mode == TImode || mode == V1TImode)
9407 mode = V2DImode;
9408 dest = adjust_address (dest, V2DImode, 0);
9409 source = gen_lowpart (V2DImode, source);
9412 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
9413 rs6000_emit_le_vsx_permute (tmp, source, mode);
9414 rs6000_emit_le_vsx_permute (dest, tmp, mode);
9417 /* Emit a sequence representing a little-endian VSX load or store,
9418 moving data from SOURCE to DEST in mode MODE. This is done
9419 separately from rs6000_emit_move to ensure it is called only
9420 during expand. LE VSX loads and stores introduced later are
9421 handled with a split. The expand-time RTL generation allows
9422 us to optimize away redundant pairs of register-permutes. */
9423 void
9424 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
9426 gcc_assert (!BYTES_BIG_ENDIAN
9427 && VECTOR_MEM_VSX_P (mode)
9428 && !TARGET_P9_VECTOR
9429 && !gpr_or_gpr_p (dest, source)
9430 && (MEM_P (source) ^ MEM_P (dest)));
9432 if (MEM_P (source))
9434 gcc_assert (REG_P (dest) || SUBREG_P (dest));
9435 rs6000_emit_le_vsx_load (dest, source, mode);
9437 else
9439 if (!REG_P (source))
9440 source = force_reg (mode, source);
9441 rs6000_emit_le_vsx_store (dest, source, mode);
9445 /* Return whether a SFmode or SImode move can be done without converting one
9446 mode to another. This arrises when we have:
9448 (SUBREG:SF (REG:SI ...))
9449 (SUBREG:SI (REG:SF ...))
9451 and one of the values is in a floating point/vector register, where SFmode
9452 scalars are stored in DFmode format. */
9454 bool
9455 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
9457 if (TARGET_ALLOW_SF_SUBREG)
9458 return true;
9460 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
9461 return true;
9463 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
9464 return true;
9466 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
9467 if (SUBREG_P (dest))
9469 rtx dest_subreg = SUBREG_REG (dest);
9470 rtx src_subreg = SUBREG_REG (src);
9471 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
9474 return false;
9478 /* Helper function to change moves with:
9480 (SUBREG:SF (REG:SI)) and
9481 (SUBREG:SI (REG:SF))
9483 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
9484 values are stored as DFmode values in the VSX registers. We need to convert
9485 the bits before we can use a direct move or operate on the bits in the
9486 vector register as an integer type.
9488 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
9490 static bool
9491 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
9493 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
9494 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
9495 && SUBREG_P (source) && sf_subreg_operand (source, mode))
9497 rtx inner_source = SUBREG_REG (source);
9498 machine_mode inner_mode = GET_MODE (inner_source);
9500 if (mode == SImode && inner_mode == SFmode)
9502 emit_insn (gen_movsi_from_sf (dest, inner_source));
9503 return true;
9506 if (mode == SFmode && inner_mode == SImode)
9508 emit_insn (gen_movsf_from_si (dest, inner_source));
9509 return true;
9513 return false;
9516 /* Emit a move from SOURCE to DEST in mode MODE. */
9517 void
9518 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
9520 rtx operands[2];
9521 operands[0] = dest;
9522 operands[1] = source;
9524 if (TARGET_DEBUG_ADDR)
9526 fprintf (stderr,
9527 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
9528 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
9529 GET_MODE_NAME (mode),
9530 lra_in_progress,
9531 reload_completed,
9532 can_create_pseudo_p ());
9533 debug_rtx (dest);
9534 fprintf (stderr, "source:\n");
9535 debug_rtx (source);
9538 /* Check that we get CONST_WIDE_INT only when we should. */
9539 if (CONST_WIDE_INT_P (operands[1])
9540 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
9541 gcc_unreachable ();
9543 #ifdef HAVE_AS_GNU_ATTRIBUTE
9544 /* If we use a long double type, set the flags in .gnu_attribute that say
9545 what the long double type is. This is to allow the linker's warning
9546 message for the wrong long double to be useful, even if the function does
9547 not do a call (for example, doing a 128-bit add on power9 if the long
9548 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
9549 used if they aren't the default long dobule type. */
9550 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
9552 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
9553 rs6000_passes_float = rs6000_passes_long_double = true;
9555 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
9556 rs6000_passes_float = rs6000_passes_long_double = true;
9558 #endif
9560 /* See if we need to special case SImode/SFmode SUBREG moves. */
9561 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
9562 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
9563 return;
9565 /* Check if GCC is setting up a block move that will end up using FP
9566 registers as temporaries. We must make sure this is acceptable. */
9567 if (MEM_P (operands[0])
9568 && MEM_P (operands[1])
9569 && mode == DImode
9570 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
9571 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
9572 && ! (rs6000_slow_unaligned_access (SImode,
9573 (MEM_ALIGN (operands[0]) > 32
9574 ? 32 : MEM_ALIGN (operands[0])))
9575 || rs6000_slow_unaligned_access (SImode,
9576 (MEM_ALIGN (operands[1]) > 32
9577 ? 32 : MEM_ALIGN (operands[1]))))
9578 && ! MEM_VOLATILE_P (operands [0])
9579 && ! MEM_VOLATILE_P (operands [1]))
9581 emit_move_insn (adjust_address (operands[0], SImode, 0),
9582 adjust_address (operands[1], SImode, 0));
9583 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
9584 adjust_address (copy_rtx (operands[1]), SImode, 4));
9585 return;
9588 if (can_create_pseudo_p () && MEM_P (operands[0])
9589 && !gpc_reg_operand (operands[1], mode))
9590 operands[1] = force_reg (mode, operands[1]);
9592 /* Recognize the case where operand[1] is a reference to thread-local
9593 data and load its address to a register. */
9594 if (tls_referenced_p (operands[1]))
9596 enum tls_model model;
9597 rtx tmp = operands[1];
9598 rtx addend = NULL;
9600 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
9602 addend = XEXP (XEXP (tmp, 0), 1);
9603 tmp = XEXP (XEXP (tmp, 0), 0);
9606 gcc_assert (SYMBOL_REF_P (tmp));
9607 model = SYMBOL_REF_TLS_MODEL (tmp);
9608 gcc_assert (model != 0);
9610 tmp = rs6000_legitimize_tls_address (tmp, model);
9611 if (addend)
9613 tmp = gen_rtx_PLUS (mode, tmp, addend);
9614 tmp = force_operand (tmp, operands[0]);
9616 operands[1] = tmp;
9619 /* 128-bit constant floating-point values on Darwin should really be loaded
9620 as two parts. However, this premature splitting is a problem when DFmode
9621 values can go into Altivec registers. */
9622 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
9623 && !reg_addr[DFmode].scalar_in_vmx_p)
9625 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9626 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9627 DFmode);
9628 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9629 GET_MODE_SIZE (DFmode)),
9630 simplify_gen_subreg (DFmode, operands[1], mode,
9631 GET_MODE_SIZE (DFmode)),
9632 DFmode);
9633 return;
9636 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9637 p1:SD) if p1 is not of floating point class and p0 is spilled as
9638 we can have no analogous movsd_store for this. */
9639 if (lra_in_progress && mode == DDmode
9640 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9641 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9642 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
9643 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9645 enum reg_class cl;
9646 int regno = REGNO (SUBREG_REG (operands[1]));
9648 if (!HARD_REGISTER_NUM_P (regno))
9650 cl = reg_preferred_class (regno);
9651 regno = reg_renumber[regno];
9652 if (regno < 0)
9653 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9655 if (regno >= 0 && ! FP_REGNO_P (regno))
9657 mode = SDmode;
9658 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9659 operands[1] = SUBREG_REG (operands[1]);
9662 if (lra_in_progress
9663 && mode == SDmode
9664 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
9665 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9666 && (REG_P (operands[1])
9667 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
9669 int regno = reg_or_subregno (operands[1]);
9670 enum reg_class cl;
9672 if (!HARD_REGISTER_NUM_P (regno))
9674 cl = reg_preferred_class (regno);
9675 gcc_assert (cl != NO_REGS);
9676 regno = reg_renumber[regno];
9677 if (regno < 0)
9678 regno = ira_class_hard_regs[cl][0];
9680 if (FP_REGNO_P (regno))
9682 if (GET_MODE (operands[0]) != DDmode)
9683 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9684 emit_insn (gen_movsd_store (operands[0], operands[1]));
9686 else if (INT_REGNO_P (regno))
9687 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9688 else
9689 gcc_unreachable();
9690 return;
9692 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9693 p:DD)) if p0 is not of floating point class and p1 is spilled as
9694 we can have no analogous movsd_load for this. */
9695 if (lra_in_progress && mode == DDmode
9696 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
9697 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9698 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9699 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9701 enum reg_class cl;
9702 int regno = REGNO (SUBREG_REG (operands[0]));
9704 if (!HARD_REGISTER_NUM_P (regno))
9706 cl = reg_preferred_class (regno);
9707 regno = reg_renumber[regno];
9708 if (regno < 0)
9709 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9711 if (regno >= 0 && ! FP_REGNO_P (regno))
9713 mode = SDmode;
9714 operands[0] = SUBREG_REG (operands[0]);
9715 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9718 if (lra_in_progress
9719 && mode == SDmode
9720 && (REG_P (operands[0])
9721 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
9722 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
9723 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9725 int regno = reg_or_subregno (operands[0]);
9726 enum reg_class cl;
9728 if (!HARD_REGISTER_NUM_P (regno))
9730 cl = reg_preferred_class (regno);
9731 gcc_assert (cl != NO_REGS);
9732 regno = reg_renumber[regno];
9733 if (regno < 0)
9734 regno = ira_class_hard_regs[cl][0];
9736 if (FP_REGNO_P (regno))
9738 if (GET_MODE (operands[1]) != DDmode)
9739 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9740 emit_insn (gen_movsd_load (operands[0], operands[1]));
9742 else if (INT_REGNO_P (regno))
9743 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9744 else
9745 gcc_unreachable();
9746 return;
9749 /* FIXME: In the long term, this switch statement should go away
9750 and be replaced by a sequence of tests based on things like
9751 mode == Pmode. */
9752 switch (mode)
9754 case E_HImode:
9755 case E_QImode:
9756 if (CONSTANT_P (operands[1])
9757 && !CONST_INT_P (operands[1]))
9758 operands[1] = force_const_mem (mode, operands[1]);
9759 break;
9761 case E_TFmode:
9762 case E_TDmode:
9763 case E_IFmode:
9764 case E_KFmode:
9765 if (FLOAT128_2REG_P (mode))
9766 rs6000_eliminate_indexed_memrefs (operands);
9767 /* fall through */
9769 case E_DFmode:
9770 case E_DDmode:
9771 case E_SFmode:
9772 case E_SDmode:
9773 if (CONSTANT_P (operands[1])
9774 && ! easy_fp_constant (operands[1], mode))
9775 operands[1] = force_const_mem (mode, operands[1]);
9776 break;
9778 case E_V16QImode:
9779 case E_V8HImode:
9780 case E_V4SFmode:
9781 case E_V4SImode:
9782 case E_V2DFmode:
9783 case E_V2DImode:
9784 case E_V1TImode:
9785 if (CONSTANT_P (operands[1])
9786 && !easy_vector_constant (operands[1], mode))
9787 operands[1] = force_const_mem (mode, operands[1]);
9788 break;
9790 case E_SImode:
9791 case E_DImode:
9792 /* Use default pattern for address of ELF small data */
9793 if (TARGET_ELF
9794 && mode == Pmode
9795 && DEFAULT_ABI == ABI_V4
9796 && (SYMBOL_REF_P (operands[1])
9797 || GET_CODE (operands[1]) == CONST)
9798 && small_data_operand (operands[1], mode))
9800 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9801 return;
9804 /* Use the default pattern for loading up PC-relative addresses. */
9805 if (TARGET_PCREL && mode == Pmode
9806 && pcrel_local_or_external_address (operands[1], Pmode))
9808 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9809 return;
9812 if (DEFAULT_ABI == ABI_V4
9813 && mode == Pmode && mode == SImode
9814 && flag_pic == 1 && got_operand (operands[1], mode))
9816 emit_insn (gen_movsi_got (operands[0], operands[1]));
9817 return;
9820 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9821 && TARGET_NO_TOC_OR_PCREL
9822 && ! flag_pic
9823 && mode == Pmode
9824 && CONSTANT_P (operands[1])
9825 && GET_CODE (operands[1]) != HIGH
9826 && !CONST_INT_P (operands[1]))
9828 rtx target = (!can_create_pseudo_p ()
9829 ? operands[0]
9830 : gen_reg_rtx (mode));
9832 /* If this is a function address on -mcall-aixdesc,
9833 convert it to the address of the descriptor. */
9834 if (DEFAULT_ABI == ABI_AIX
9835 && SYMBOL_REF_P (operands[1])
9836 && XSTR (operands[1], 0)[0] == '.')
9838 const char *name = XSTR (operands[1], 0);
9839 rtx new_ref;
9840 while (*name == '.')
9841 name++;
9842 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9843 CONSTANT_POOL_ADDRESS_P (new_ref)
9844 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9845 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9846 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9847 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9848 operands[1] = new_ref;
9851 if (DEFAULT_ABI == ABI_DARWIN)
9853 #if TARGET_MACHO
9854 /* This is not PIC code, but could require the subset of
9855 indirections used by mdynamic-no-pic. */
9856 if (MACHO_DYNAMIC_NO_PIC_P)
9858 /* Take care of any required data indirection. */
9859 operands[1] = rs6000_machopic_legitimize_pic_address (
9860 operands[1], mode, operands[0]);
9861 if (operands[0] != operands[1])
9862 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9863 return;
9865 #endif
9866 emit_insn (gen_macho_high (Pmode, target, operands[1]));
9867 emit_insn (gen_macho_low (Pmode, operands[0],
9868 target, operands[1]));
9869 return;
9872 emit_insn (gen_elf_high (target, operands[1]));
9873 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9874 return;
9877 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9878 and we have put it in the TOC, we just need to make a TOC-relative
9879 reference to it. */
9880 if (TARGET_TOC
9881 && SYMBOL_REF_P (operands[1])
9882 && use_toc_relative_ref (operands[1], mode))
9883 operands[1] = create_TOC_reference (operands[1], operands[0]);
9884 else if (mode == Pmode
9885 && CONSTANT_P (operands[1])
9886 && GET_CODE (operands[1]) != HIGH
9887 && ((REG_P (operands[0])
9888 && FP_REGNO_P (REGNO (operands[0])))
9889 || !CONST_INT_P (operands[1])
9890 || (num_insns_constant (operands[1], mode)
9891 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9892 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
9893 && (TARGET_CMODEL == CMODEL_SMALL
9894 || can_create_pseudo_p ()
9895 || (REG_P (operands[0])
9896 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9899 #if TARGET_MACHO
9900 /* Darwin uses a special PIC legitimizer. */
9901 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9903 operands[1] =
9904 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9905 operands[0]);
9906 if (operands[0] != operands[1])
9907 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9908 return;
9910 #endif
9912 /* If we are to limit the number of things we put in the TOC and
9913 this is a symbol plus a constant we can add in one insn,
9914 just put the symbol in the TOC and add the constant. */
9915 if (GET_CODE (operands[1]) == CONST
9916 && TARGET_NO_SUM_IN_TOC
9917 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9918 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9919 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9920 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
9921 && ! side_effects_p (operands[0]))
9923 rtx sym =
9924 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9925 rtx other = XEXP (XEXP (operands[1], 0), 1);
9927 sym = force_reg (mode, sym);
9928 emit_insn (gen_add3_insn (operands[0], sym, other));
9929 return;
9932 operands[1] = force_const_mem (mode, operands[1]);
9934 if (TARGET_TOC
9935 && SYMBOL_REF_P (XEXP (operands[1], 0))
9936 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
9938 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9939 operands[0]);
9940 operands[1] = gen_const_mem (mode, tocref);
9941 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9944 break;
9946 case E_TImode:
9947 if (!VECTOR_MEM_VSX_P (TImode))
9948 rs6000_eliminate_indexed_memrefs (operands);
9949 break;
9951 case E_PTImode:
9952 rs6000_eliminate_indexed_memrefs (operands);
9953 break;
9955 default:
9956 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9959 /* Above, we may have called force_const_mem which may have returned
9960 an invalid address. If we can, fix this up; otherwise, reload will
9961 have to deal with it. */
9962 if (MEM_P (operands[1]))
9963 operands[1] = validize_mem (operands[1]);
9965 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9969 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
9970 static void
9971 init_float128_ibm (machine_mode mode)
9973 if (!TARGET_XL_COMPAT)
9975 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
9976 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
9977 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
9978 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
9980 if (!TARGET_HARD_FLOAT)
9982 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
9983 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
9984 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
9985 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
9986 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
9987 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
9988 set_optab_libfunc (le_optab, mode, "__gcc_qle");
9989 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
9991 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
9992 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
9993 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
9994 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
9995 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
9996 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
9997 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
9998 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
10001 else
10003 set_optab_libfunc (add_optab, mode, "_xlqadd");
10004 set_optab_libfunc (sub_optab, mode, "_xlqsub");
10005 set_optab_libfunc (smul_optab, mode, "_xlqmul");
10006 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
10009 /* Add various conversions for IFmode to use the traditional TFmode
10010 names. */
10011 if (mode == IFmode)
10013 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
10014 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
10015 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
10016 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
10017 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
10018 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
10020 if (TARGET_POWERPC64)
10022 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
10023 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
10024 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
10025 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
10030 /* Create a decl for either complex long double multiply or complex long double
10031 divide when long double is IEEE 128-bit floating point. We can't use
10032 __multc3 and __divtc3 because the original long double using IBM extended
10033 double used those names. The complex multiply/divide functions are encoded
10034 as builtin functions with a complex result and 4 scalar inputs. */
10036 static void
10037 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
10039 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
10040 name, NULL_TREE);
10042 set_builtin_decl (fncode, fndecl, true);
10044 if (TARGET_DEBUG_BUILTIN)
10045 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
10047 return;
10050 /* Set up IEEE 128-bit floating point routines. Use different names if the
10051 arguments can be passed in a vector register. The historical PowerPC
10052 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
10053 continue to use that if we aren't using vector registers to pass IEEE
10054 128-bit floating point. */
10056 static void
10057 init_float128_ieee (machine_mode mode)
10059 if (FLOAT128_VECTOR_P (mode))
10061 static bool complex_muldiv_init_p = false;
10063 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
10064 we have clone or target attributes, this will be called a second
10065 time. We want to create the built-in function only once. */
10066 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
10068 complex_muldiv_init_p = true;
10069 built_in_function fncode_mul =
10070 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
10071 - MIN_MODE_COMPLEX_FLOAT);
10072 built_in_function fncode_div =
10073 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
10074 - MIN_MODE_COMPLEX_FLOAT);
10076 tree fntype = build_function_type_list (complex_long_double_type_node,
10077 long_double_type_node,
10078 long_double_type_node,
10079 long_double_type_node,
10080 long_double_type_node,
10081 NULL_TREE);
10083 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
10084 create_complex_muldiv ("__divkc3", fncode_div, fntype);
10087 set_optab_libfunc (add_optab, mode, "__addkf3");
10088 set_optab_libfunc (sub_optab, mode, "__subkf3");
10089 set_optab_libfunc (neg_optab, mode, "__negkf2");
10090 set_optab_libfunc (smul_optab, mode, "__mulkf3");
10091 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
10092 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
10093 set_optab_libfunc (abs_optab, mode, "__abskf2");
10094 set_optab_libfunc (powi_optab, mode, "__powikf2");
10096 set_optab_libfunc (eq_optab, mode, "__eqkf2");
10097 set_optab_libfunc (ne_optab, mode, "__nekf2");
10098 set_optab_libfunc (gt_optab, mode, "__gtkf2");
10099 set_optab_libfunc (ge_optab, mode, "__gekf2");
10100 set_optab_libfunc (lt_optab, mode, "__ltkf2");
10101 set_optab_libfunc (le_optab, mode, "__lekf2");
10102 set_optab_libfunc (unord_optab, mode, "__unordkf2");
10104 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
10105 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
10106 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
10107 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
10109 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
10110 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10111 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
10113 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
10114 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
10115 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
10117 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
10118 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
10119 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
10120 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
10121 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
10122 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
10124 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
10125 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
10126 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
10127 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
10129 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
10130 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
10131 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
10132 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
10134 if (TARGET_POWERPC64)
10136 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
10137 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
10138 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
10139 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
10143 else
10145 set_optab_libfunc (add_optab, mode, "_q_add");
10146 set_optab_libfunc (sub_optab, mode, "_q_sub");
10147 set_optab_libfunc (neg_optab, mode, "_q_neg");
10148 set_optab_libfunc (smul_optab, mode, "_q_mul");
10149 set_optab_libfunc (sdiv_optab, mode, "_q_div");
10150 if (TARGET_PPC_GPOPT)
10151 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
10153 set_optab_libfunc (eq_optab, mode, "_q_feq");
10154 set_optab_libfunc (ne_optab, mode, "_q_fne");
10155 set_optab_libfunc (gt_optab, mode, "_q_fgt");
10156 set_optab_libfunc (ge_optab, mode, "_q_fge");
10157 set_optab_libfunc (lt_optab, mode, "_q_flt");
10158 set_optab_libfunc (le_optab, mode, "_q_fle");
10160 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
10161 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
10162 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
10163 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
10164 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
10165 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
10166 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
10167 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
10171 static void
10172 rs6000_init_libfuncs (void)
10174 /* __float128 support. */
10175 if (TARGET_FLOAT128_TYPE)
10177 init_float128_ibm (IFmode);
10178 init_float128_ieee (KFmode);
10181 /* AIX/Darwin/64-bit Linux quad floating point routines. */
10182 if (TARGET_LONG_DOUBLE_128)
10184 if (!TARGET_IEEEQUAD)
10185 init_float128_ibm (TFmode);
10187 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
10188 else
10189 init_float128_ieee (TFmode);
10193 /* Emit a potentially record-form instruction, setting DST from SRC.
10194 If DOT is 0, that is all; otherwise, set CCREG to the result of the
10195 signed comparison of DST with zero. If DOT is 1, the generated RTL
10196 doesn't care about the DST result; if DOT is 2, it does. If CCREG
10197 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
10198 a separate COMPARE. */
10200 void
10201 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
10203 if (dot == 0)
10205 emit_move_insn (dst, src);
10206 return;
10209 if (cc_reg_not_cr0_operand (ccreg, CCmode))
10211 emit_move_insn (dst, src);
10212 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
10213 return;
10216 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
10217 if (dot == 1)
10219 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
10220 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
10222 else
10224 rtx set = gen_rtx_SET (dst, src);
10225 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
10230 /* A validation routine: say whether CODE, a condition code, and MODE
10231 match. The other alternatives either don't make sense or should
10232 never be generated. */
10234 void
10235 validate_condition_mode (enum rtx_code code, machine_mode mode)
10237 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
10238 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
10239 && GET_MODE_CLASS (mode) == MODE_CC);
10241 /* These don't make sense. */
10242 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
10243 || mode != CCUNSmode);
10245 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
10246 || mode == CCUNSmode);
10248 gcc_assert (mode == CCFPmode
10249 || (code != ORDERED && code != UNORDERED
10250 && code != UNEQ && code != LTGT
10251 && code != UNGT && code != UNLT
10252 && code != UNGE && code != UNLE));
10254 /* These are invalid; the information is not there. */
10255 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
10259 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
10260 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
10261 not zero, store there the bit offset (counted from the right) where
10262 the single stretch of 1 bits begins; and similarly for B, the bit
10263 offset where it ends. */
10265 bool
10266 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
10268 unsigned HOST_WIDE_INT val = INTVAL (mask);
10269 unsigned HOST_WIDE_INT bit;
10270 int nb, ne;
10271 int n = GET_MODE_PRECISION (mode);
10273 if (mode != DImode && mode != SImode)
10274 return false;
10276 if (INTVAL (mask) >= 0)
10278 bit = val & -val;
10279 ne = exact_log2 (bit);
10280 nb = exact_log2 (val + bit);
10282 else if (val + 1 == 0)
10284 nb = n;
10285 ne = 0;
10287 else if (val & 1)
10289 val = ~val;
10290 bit = val & -val;
10291 nb = exact_log2 (bit);
10292 ne = exact_log2 (val + bit);
10294 else
10296 bit = val & -val;
10297 ne = exact_log2 (bit);
10298 if (val + bit == 0)
10299 nb = n;
10300 else
10301 nb = 0;
10304 nb--;
10306 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
10307 return false;
10309 if (b)
10310 *b = nb;
10311 if (e)
10312 *e = ne;
10314 return true;
10317 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
10318 or rldicr instruction, to implement an AND with it in mode MODE. */
10320 bool
10321 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
10323 int nb, ne;
10325 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10326 return false;
10328 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
10329 does not wrap. */
10330 if (mode == DImode)
10331 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
10333 /* For SImode, rlwinm can do everything. */
10334 if (mode == SImode)
10335 return (nb < 32 && ne < 32);
10337 return false;
10340 /* Return the instruction template for an AND with mask in mode MODE, with
10341 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10343 const char *
10344 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
10346 int nb, ne;
10348 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
10349 gcc_unreachable ();
10351 if (mode == DImode && ne == 0)
10353 operands[3] = GEN_INT (63 - nb);
10354 if (dot)
10355 return "rldicl. %0,%1,0,%3";
10356 return "rldicl %0,%1,0,%3";
10359 if (mode == DImode && nb == 63)
10361 operands[3] = GEN_INT (63 - ne);
10362 if (dot)
10363 return "rldicr. %0,%1,0,%3";
10364 return "rldicr %0,%1,0,%3";
10367 if (nb < 32 && ne < 32)
10369 operands[3] = GEN_INT (31 - nb);
10370 operands[4] = GEN_INT (31 - ne);
10371 if (dot)
10372 return "rlwinm. %0,%1,0,%3,%4";
10373 return "rlwinm %0,%1,0,%3,%4";
10376 gcc_unreachable ();
10379 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
10380 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
10381 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
10383 bool
10384 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
10386 int nb, ne;
10388 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10389 return false;
10391 int n = GET_MODE_PRECISION (mode);
10392 int sh = -1;
10394 if (CONST_INT_P (XEXP (shift, 1)))
10396 sh = INTVAL (XEXP (shift, 1));
10397 if (sh < 0 || sh >= n)
10398 return false;
10401 rtx_code code = GET_CODE (shift);
10403 /* Convert any shift by 0 to a rotate, to simplify below code. */
10404 if (sh == 0)
10405 code = ROTATE;
10407 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10408 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10409 code = ASHIFT;
10410 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10412 code = LSHIFTRT;
10413 sh = n - sh;
10416 /* DImode rotates need rld*. */
10417 if (mode == DImode && code == ROTATE)
10418 return (nb == 63 || ne == 0 || ne == sh);
10420 /* SImode rotates need rlw*. */
10421 if (mode == SImode && code == ROTATE)
10422 return (nb < 32 && ne < 32 && sh < 32);
10424 /* Wrap-around masks are only okay for rotates. */
10425 if (ne > nb)
10426 return false;
10428 /* Variable shifts are only okay for rotates. */
10429 if (sh < 0)
10430 return false;
10432 /* Don't allow ASHIFT if the mask is wrong for that. */
10433 if (code == ASHIFT && ne < sh)
10434 return false;
10436 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
10437 if the mask is wrong for that. */
10438 if (nb < 32 && ne < 32 && sh < 32
10439 && !(code == LSHIFTRT && nb >= 32 - sh))
10440 return true;
10442 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
10443 if the mask is wrong for that. */
10444 if (code == LSHIFTRT)
10445 sh = 64 - sh;
10446 if (nb == 63 || ne == 0 || ne == sh)
10447 return !(code == LSHIFTRT && nb >= sh);
10449 return false;
10452 /* Return the instruction template for a shift with mask in mode MODE, with
10453 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10455 const char *
10456 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
10458 int nb, ne;
10460 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10461 gcc_unreachable ();
10463 if (mode == DImode && ne == 0)
10465 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10466 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
10467 operands[3] = GEN_INT (63 - nb);
10468 if (dot)
10469 return "rld%I2cl. %0,%1,%2,%3";
10470 return "rld%I2cl %0,%1,%2,%3";
10473 if (mode == DImode && nb == 63)
10475 operands[3] = GEN_INT (63 - ne);
10476 if (dot)
10477 return "rld%I2cr. %0,%1,%2,%3";
10478 return "rld%I2cr %0,%1,%2,%3";
10481 if (mode == DImode
10482 && GET_CODE (operands[4]) != LSHIFTRT
10483 && CONST_INT_P (operands[2])
10484 && ne == INTVAL (operands[2]))
10486 operands[3] = GEN_INT (63 - nb);
10487 if (dot)
10488 return "rld%I2c. %0,%1,%2,%3";
10489 return "rld%I2c %0,%1,%2,%3";
10492 if (nb < 32 && ne < 32)
10494 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10495 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10496 operands[3] = GEN_INT (31 - nb);
10497 operands[4] = GEN_INT (31 - ne);
10498 /* This insn can also be a 64-bit rotate with mask that really makes
10499 it just a shift right (with mask); the %h below are to adjust for
10500 that situation (shift count is >= 32 in that case). */
10501 if (dot)
10502 return "rlw%I2nm. %0,%1,%h2,%3,%4";
10503 return "rlw%I2nm %0,%1,%h2,%3,%4";
10506 gcc_unreachable ();
10509 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
10510 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
10511 ASHIFT, or LSHIFTRT) in mode MODE. */
10513 bool
10514 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
10516 int nb, ne;
10518 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
10519 return false;
10521 int n = GET_MODE_PRECISION (mode);
10523 int sh = INTVAL (XEXP (shift, 1));
10524 if (sh < 0 || sh >= n)
10525 return false;
10527 rtx_code code = GET_CODE (shift);
10529 /* Convert any shift by 0 to a rotate, to simplify below code. */
10530 if (sh == 0)
10531 code = ROTATE;
10533 /* Convert rotate to simple shift if we can, to make analysis simpler. */
10534 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
10535 code = ASHIFT;
10536 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
10538 code = LSHIFTRT;
10539 sh = n - sh;
10542 /* DImode rotates need rldimi. */
10543 if (mode == DImode && code == ROTATE)
10544 return (ne == sh);
10546 /* SImode rotates need rlwimi. */
10547 if (mode == SImode && code == ROTATE)
10548 return (nb < 32 && ne < 32 && sh < 32);
10550 /* Wrap-around masks are only okay for rotates. */
10551 if (ne > nb)
10552 return false;
10554 /* Don't allow ASHIFT if the mask is wrong for that. */
10555 if (code == ASHIFT && ne < sh)
10556 return false;
10558 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
10559 if the mask is wrong for that. */
10560 if (nb < 32 && ne < 32 && sh < 32
10561 && !(code == LSHIFTRT && nb >= 32 - sh))
10562 return true;
10564 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
10565 if the mask is wrong for that. */
10566 if (code == LSHIFTRT)
10567 sh = 64 - sh;
10568 if (ne == sh)
10569 return !(code == LSHIFTRT && nb >= sh);
10571 return false;
10574 /* Return the instruction template for an insert with mask in mode MODE, with
10575 operands OPERANDS. If DOT is true, make it a record-form instruction. */
10577 const char *
10578 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
10580 int nb, ne;
10582 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
10583 gcc_unreachable ();
10585 /* Prefer rldimi because rlwimi is cracked. */
10586 if (TARGET_POWERPC64
10587 && (!dot || mode == DImode)
10588 && GET_CODE (operands[4]) != LSHIFTRT
10589 && ne == INTVAL (operands[2]))
10591 operands[3] = GEN_INT (63 - nb);
10592 if (dot)
10593 return "rldimi. %0,%1,%2,%3";
10594 return "rldimi %0,%1,%2,%3";
10597 if (nb < 32 && ne < 32)
10599 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
10600 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
10601 operands[3] = GEN_INT (31 - nb);
10602 operands[4] = GEN_INT (31 - ne);
10603 if (dot)
10604 return "rlwimi. %0,%1,%2,%3,%4";
10605 return "rlwimi %0,%1,%2,%3,%4";
10608 gcc_unreachable ();
10611 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
10612 using two machine instructions. */
10614 bool
10615 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
10617 /* There are two kinds of AND we can handle with two insns:
10618 1) those we can do with two rl* insn;
10619 2) ori[s];xori[s].
10621 We do not handle that last case yet. */
10623 /* If there is just one stretch of ones, we can do it. */
10624 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
10625 return true;
10627 /* Otherwise, fill in the lowest "hole"; if we can do the result with
10628 one insn, we can do the whole thing with two. */
10629 unsigned HOST_WIDE_INT val = INTVAL (c);
10630 unsigned HOST_WIDE_INT bit1 = val & -val;
10631 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10632 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10633 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10634 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
10637 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
10638 If EXPAND is true, split rotate-and-mask instructions we generate to
10639 their constituent parts as well (this is used during expand); if DOT
10640 is 1, make the last insn a record-form instruction clobbering the
10641 destination GPR and setting the CC reg (from operands[3]); if 2, set
10642 that GPR as well as the CC reg. */
10644 void
10645 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
10647 gcc_assert (!(expand && dot));
10649 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
10651 /* If it is one stretch of ones, it is DImode; shift left, mask, then
10652 shift right. This generates better code than doing the masks without
10653 shifts, or shifting first right and then left. */
10654 int nb, ne;
10655 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
10657 gcc_assert (mode == DImode);
10659 int shift = 63 - nb;
10660 if (expand)
10662 rtx tmp1 = gen_reg_rtx (DImode);
10663 rtx tmp2 = gen_reg_rtx (DImode);
10664 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
10665 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
10666 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
10668 else
10670 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
10671 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
10672 emit_move_insn (operands[0], tmp);
10673 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
10674 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10676 return;
10679 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
10680 that does the rest. */
10681 unsigned HOST_WIDE_INT bit1 = val & -val;
10682 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
10683 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
10684 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
10686 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
10687 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
10689 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
10691 /* Two "no-rotate"-and-mask instructions, for SImode. */
10692 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
10694 gcc_assert (mode == SImode);
10696 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10697 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
10698 emit_move_insn (reg, tmp);
10699 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10700 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10701 return;
10704 gcc_assert (mode == DImode);
10706 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
10707 insns; we have to do the first in SImode, because it wraps. */
10708 if (mask2 <= 0xffffffff
10709 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
10711 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
10712 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
10713 GEN_INT (mask1));
10714 rtx reg_low = gen_lowpart (SImode, reg);
10715 emit_move_insn (reg_low, tmp);
10716 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
10717 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10718 return;
10721 /* Two rld* insns: rotate, clear the hole in the middle (which now is
10722 at the top end), rotate back and clear the other hole. */
10723 int right = exact_log2 (bit3);
10724 int left = 64 - right;
10726 /* Rotate the mask too. */
10727 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
10729 if (expand)
10731 rtx tmp1 = gen_reg_rtx (DImode);
10732 rtx tmp2 = gen_reg_rtx (DImode);
10733 rtx tmp3 = gen_reg_rtx (DImode);
10734 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
10735 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
10736 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
10737 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
10739 else
10741 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
10742 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
10743 emit_move_insn (operands[0], tmp);
10744 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
10745 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
10746 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
10750 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
10751 for lfq and stfq insns iff the registers are hard registers. */
10754 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
10756 /* We might have been passed a SUBREG. */
10757 if (!REG_P (reg1) || !REG_P (reg2))
10758 return 0;
10760 /* We might have been passed non floating point registers. */
10761 if (!FP_REGNO_P (REGNO (reg1))
10762 || !FP_REGNO_P (REGNO (reg2)))
10763 return 0;
10765 return (REGNO (reg1) == REGNO (reg2) - 1);
10768 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
10769 addr1 and addr2 must be in consecutive memory locations
10770 (addr2 == addr1 + 8). */
10773 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
10775 rtx addr1, addr2;
10776 unsigned int reg1, reg2;
10777 int offset1, offset2;
10779 /* The mems cannot be volatile. */
10780 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
10781 return 0;
10783 addr1 = XEXP (mem1, 0);
10784 addr2 = XEXP (mem2, 0);
10786 /* Extract an offset (if used) from the first addr. */
10787 if (GET_CODE (addr1) == PLUS)
10789 /* If not a REG, return zero. */
10790 if (!REG_P (XEXP (addr1, 0)))
10791 return 0;
10792 else
10794 reg1 = REGNO (XEXP (addr1, 0));
10795 /* The offset must be constant! */
10796 if (!CONST_INT_P (XEXP (addr1, 1)))
10797 return 0;
10798 offset1 = INTVAL (XEXP (addr1, 1));
10801 else if (!REG_P (addr1))
10802 return 0;
10803 else
10805 reg1 = REGNO (addr1);
10806 /* This was a simple (mem (reg)) expression. Offset is 0. */
10807 offset1 = 0;
10810 /* And now for the second addr. */
10811 if (GET_CODE (addr2) == PLUS)
10813 /* If not a REG, return zero. */
10814 if (!REG_P (XEXP (addr2, 0)))
10815 return 0;
10816 else
10818 reg2 = REGNO (XEXP (addr2, 0));
10819 /* The offset must be constant. */
10820 if (!CONST_INT_P (XEXP (addr2, 1)))
10821 return 0;
10822 offset2 = INTVAL (XEXP (addr2, 1));
10825 else if (!REG_P (addr2))
10826 return 0;
10827 else
10829 reg2 = REGNO (addr2);
10830 /* This was a simple (mem (reg)) expression. Offset is 0. */
10831 offset2 = 0;
10834 /* Both of these must have the same base register. */
10835 if (reg1 != reg2)
10836 return 0;
10838 /* The offset for the second addr must be 8 more than the first addr. */
10839 if (offset2 != offset1 + 8)
10840 return 0;
10842 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
10843 instructions. */
10844 return 1;
10847 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
10848 need to use DDmode, in all other cases we can use the same mode. */
10849 static machine_mode
10850 rs6000_secondary_memory_needed_mode (machine_mode mode)
10852 if (lra_in_progress && mode == SDmode)
10853 return DDmode;
10854 return mode;
10857 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
10858 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
10859 only work on the traditional altivec registers, note if an altivec register
10860 was chosen. */
10862 static enum rs6000_reg_type
10863 register_to_reg_type (rtx reg, bool *is_altivec)
10865 HOST_WIDE_INT regno;
10866 enum reg_class rclass;
10868 if (SUBREG_P (reg))
10869 reg = SUBREG_REG (reg);
10871 if (!REG_P (reg))
10872 return NO_REG_TYPE;
10874 regno = REGNO (reg);
10875 if (!HARD_REGISTER_NUM_P (regno))
10877 if (!lra_in_progress && !reload_completed)
10878 return PSEUDO_REG_TYPE;
10880 regno = true_regnum (reg);
10881 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
10882 return PSEUDO_REG_TYPE;
10885 gcc_assert (regno >= 0);
10887 if (is_altivec && ALTIVEC_REGNO_P (regno))
10888 *is_altivec = true;
10890 rclass = rs6000_regno_regclass[regno];
10891 return reg_class_to_reg_type[(int)rclass];
10894 /* Helper function to return the cost of adding a TOC entry address. */
10896 static inline int
10897 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
10899 int ret;
10901 if (TARGET_CMODEL != CMODEL_SMALL)
10902 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
10904 else
10905 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
10907 return ret;
10910 /* Helper function for rs6000_secondary_reload to determine whether the memory
10911 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
10912 needs reloading. Return negative if the memory is not handled by the memory
10913 helper functions and to try a different reload method, 0 if no additional
10914 instructions are need, and positive to give the extra cost for the
10915 memory. */
10917 static int
10918 rs6000_secondary_reload_memory (rtx addr,
10919 enum reg_class rclass,
10920 machine_mode mode)
10922 int extra_cost = 0;
10923 rtx reg, and_arg, plus_arg0, plus_arg1;
10924 addr_mask_type addr_mask;
10925 const char *type = NULL;
10926 const char *fail_msg = NULL;
10928 if (GPR_REG_CLASS_P (rclass))
10929 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
10931 else if (rclass == FLOAT_REGS)
10932 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
10934 else if (rclass == ALTIVEC_REGS)
10935 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
10937 /* For the combined VSX_REGS, turn off Altivec AND -16. */
10938 else if (rclass == VSX_REGS)
10939 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
10940 & ~RELOAD_REG_AND_M16);
10942 /* If the register allocator hasn't made up its mind yet on the register
10943 class to use, settle on defaults to use. */
10944 else if (rclass == NO_REGS)
10946 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
10947 & ~RELOAD_REG_AND_M16);
10949 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
10950 addr_mask &= ~(RELOAD_REG_INDEXED
10951 | RELOAD_REG_PRE_INCDEC
10952 | RELOAD_REG_PRE_MODIFY);
10955 else
10956 addr_mask = 0;
10958 /* If the register isn't valid in this register class, just return now. */
10959 if ((addr_mask & RELOAD_REG_VALID) == 0)
10961 if (TARGET_DEBUG_ADDR)
10963 fprintf (stderr,
10964 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
10965 "not valid in class\n",
10966 GET_MODE_NAME (mode), reg_class_names[rclass]);
10967 debug_rtx (addr);
10970 return -1;
10973 switch (GET_CODE (addr))
10975 /* Does the register class supports auto update forms for this mode? We
10976 don't need a scratch register, since the powerpc only supports
10977 PRE_INC, PRE_DEC, and PRE_MODIFY. */
10978 case PRE_INC:
10979 case PRE_DEC:
10980 reg = XEXP (addr, 0);
10981 if (!base_reg_operand (addr, GET_MODE (reg)))
10983 fail_msg = "no base register #1";
10984 extra_cost = -1;
10987 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
10989 extra_cost = 1;
10990 type = "update";
10992 break;
10994 case PRE_MODIFY:
10995 reg = XEXP (addr, 0);
10996 plus_arg1 = XEXP (addr, 1);
10997 if (!base_reg_operand (reg, GET_MODE (reg))
10998 || GET_CODE (plus_arg1) != PLUS
10999 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
11001 fail_msg = "bad PRE_MODIFY";
11002 extra_cost = -1;
11005 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11007 extra_cost = 1;
11008 type = "update";
11010 break;
11012 /* Do we need to simulate AND -16 to clear the bottom address bits used
11013 in VMX load/stores? Only allow the AND for vector sizes. */
11014 case AND:
11015 and_arg = XEXP (addr, 0);
11016 if (GET_MODE_SIZE (mode) != 16
11017 || !CONST_INT_P (XEXP (addr, 1))
11018 || INTVAL (XEXP (addr, 1)) != -16)
11020 fail_msg = "bad Altivec AND #1";
11021 extra_cost = -1;
11024 if (rclass != ALTIVEC_REGS)
11026 if (legitimate_indirect_address_p (and_arg, false))
11027 extra_cost = 1;
11029 else if (legitimate_indexed_address_p (and_arg, false))
11030 extra_cost = 2;
11032 else
11034 fail_msg = "bad Altivec AND #2";
11035 extra_cost = -1;
11038 type = "and";
11040 break;
11042 /* If this is an indirect address, make sure it is a base register. */
11043 case REG:
11044 case SUBREG:
11045 if (!legitimate_indirect_address_p (addr, false))
11047 extra_cost = 1;
11048 type = "move";
11050 break;
11052 /* If this is an indexed address, make sure the register class can handle
11053 indexed addresses for this mode. */
11054 case PLUS:
11055 plus_arg0 = XEXP (addr, 0);
11056 plus_arg1 = XEXP (addr, 1);
11058 /* (plus (plus (reg) (constant)) (constant)) is generated during
11059 push_reload processing, so handle it now. */
11060 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
11062 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11064 extra_cost = 1;
11065 type = "offset";
11069 /* (plus (plus (reg) (constant)) (reg)) is also generated during
11070 push_reload processing, so handle it now. */
11071 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
11073 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11075 extra_cost = 1;
11076 type = "indexed #2";
11080 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
11082 fail_msg = "no base register #2";
11083 extra_cost = -1;
11086 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
11088 if ((addr_mask & RELOAD_REG_INDEXED) == 0
11089 || !legitimate_indexed_address_p (addr, false))
11091 extra_cost = 1;
11092 type = "indexed";
11096 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
11097 && CONST_INT_P (plus_arg1))
11099 if (!quad_address_offset_p (INTVAL (plus_arg1)))
11101 extra_cost = 1;
11102 type = "vector d-form offset";
11106 /* Make sure the register class can handle offset addresses. */
11107 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11109 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11111 extra_cost = 1;
11112 type = "offset #2";
11116 else
11118 fail_msg = "bad PLUS";
11119 extra_cost = -1;
11122 break;
11124 case LO_SUM:
11125 /* Quad offsets are restricted and can't handle normal addresses. */
11126 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11128 extra_cost = -1;
11129 type = "vector d-form lo_sum";
11132 else if (!legitimate_lo_sum_address_p (mode, addr, false))
11134 fail_msg = "bad LO_SUM";
11135 extra_cost = -1;
11138 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11140 extra_cost = 1;
11141 type = "lo_sum";
11143 break;
11145 /* Static addresses need to create a TOC entry. */
11146 case CONST:
11147 case SYMBOL_REF:
11148 case LABEL_REF:
11149 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11151 extra_cost = -1;
11152 type = "vector d-form lo_sum #2";
11155 else
11157 type = "address";
11158 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
11160 break;
11162 /* TOC references look like offsetable memory. */
11163 case UNSPEC:
11164 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
11166 fail_msg = "bad UNSPEC";
11167 extra_cost = -1;
11170 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
11172 extra_cost = -1;
11173 type = "vector d-form lo_sum #3";
11176 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11178 extra_cost = 1;
11179 type = "toc reference";
11181 break;
11183 default:
11185 fail_msg = "bad address";
11186 extra_cost = -1;
11190 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
11192 if (extra_cost < 0)
11193 fprintf (stderr,
11194 "rs6000_secondary_reload_memory error: mode = %s, "
11195 "class = %s, addr_mask = '%s', %s\n",
11196 GET_MODE_NAME (mode),
11197 reg_class_names[rclass],
11198 rs6000_debug_addr_mask (addr_mask, false),
11199 (fail_msg != NULL) ? fail_msg : "<bad address>");
11201 else
11202 fprintf (stderr,
11203 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
11204 "addr_mask = '%s', extra cost = %d, %s\n",
11205 GET_MODE_NAME (mode),
11206 reg_class_names[rclass],
11207 rs6000_debug_addr_mask (addr_mask, false),
11208 extra_cost,
11209 (type) ? type : "<none>");
11211 debug_rtx (addr);
11214 return extra_cost;
11217 /* Helper function for rs6000_secondary_reload to return true if a move to a
11218 different register classe is really a simple move. */
11220 static bool
11221 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
11222 enum rs6000_reg_type from_type,
11223 machine_mode mode)
11225 int size = GET_MODE_SIZE (mode);
11227 /* Add support for various direct moves available. In this function, we only
11228 look at cases where we don't need any extra registers, and one or more
11229 simple move insns are issued. Originally small integers are not allowed
11230 in FPR/VSX registers. Single precision binary floating is not a simple
11231 move because we need to convert to the single precision memory layout.
11232 The 4-byte SDmode can be moved. TDmode values are disallowed since they
11233 need special direct move handling, which we do not support yet. */
11234 if (TARGET_DIRECT_MOVE
11235 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11236 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
11238 if (TARGET_POWERPC64)
11240 /* ISA 2.07: MTVSRD or MVFVSRD. */
11241 if (size == 8)
11242 return true;
11244 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
11245 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
11246 return true;
11249 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11250 if (TARGET_P8_VECTOR)
11252 if (mode == SImode)
11253 return true;
11255 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
11256 return true;
11259 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
11260 if (mode == SDmode)
11261 return true;
11264 /* Move to/from SPR. */
11265 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
11266 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
11267 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
11268 return true;
11270 return false;
11273 /* Direct move helper function for rs6000_secondary_reload, handle all of the
11274 special direct moves that involve allocating an extra register, return the
11275 insn code of the helper function if there is such a function or
11276 CODE_FOR_nothing if not. */
11278 static bool
11279 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
11280 enum rs6000_reg_type from_type,
11281 machine_mode mode,
11282 secondary_reload_info *sri,
11283 bool altivec_p)
11285 bool ret = false;
11286 enum insn_code icode = CODE_FOR_nothing;
11287 int cost = 0;
11288 int size = GET_MODE_SIZE (mode);
11290 if (TARGET_POWERPC64 && size == 16)
11292 /* Handle moving 128-bit values from GPRs to VSX point registers on
11293 ISA 2.07 (power8, power9) when running in 64-bit mode using
11294 XXPERMDI to glue the two 64-bit values back together. */
11295 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11297 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
11298 icode = reg_addr[mode].reload_vsx_gpr;
11301 /* Handle moving 128-bit values from VSX point registers to GPRs on
11302 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
11303 bottom 64-bit value. */
11304 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11306 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
11307 icode = reg_addr[mode].reload_gpr_vsx;
11311 else if (TARGET_POWERPC64 && mode == SFmode)
11313 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
11315 cost = 3; /* xscvdpspn, mfvsrd, and. */
11316 icode = reg_addr[mode].reload_gpr_vsx;
11319 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
11321 cost = 2; /* mtvsrz, xscvspdpn. */
11322 icode = reg_addr[mode].reload_vsx_gpr;
11326 else if (!TARGET_POWERPC64 && size == 8)
11328 /* Handle moving 64-bit values from GPRs to floating point registers on
11329 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
11330 32-bit values back together. Altivec register classes must be handled
11331 specially since a different instruction is used, and the secondary
11332 reload support requires a single instruction class in the scratch
11333 register constraint. However, right now TFmode is not allowed in
11334 Altivec registers, so the pattern will never match. */
11335 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
11337 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
11338 icode = reg_addr[mode].reload_fpr_gpr;
11342 if (icode != CODE_FOR_nothing)
11344 ret = true;
11345 if (sri)
11347 sri->icode = icode;
11348 sri->extra_cost = cost;
11352 return ret;
11355 /* Return whether a move between two register classes can be done either
11356 directly (simple move) or via a pattern that uses a single extra temporary
11357 (using ISA 2.07's direct move in this case. */
11359 static bool
11360 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
11361 enum rs6000_reg_type from_type,
11362 machine_mode mode,
11363 secondary_reload_info *sri,
11364 bool altivec_p)
11366 /* Fall back to load/store reloads if either type is not a register. */
11367 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
11368 return false;
11370 /* If we haven't allocated registers yet, assume the move can be done for the
11371 standard register types. */
11372 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
11373 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
11374 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
11375 return true;
11377 /* Moves to the same set of registers is a simple move for non-specialized
11378 registers. */
11379 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
11380 return true;
11382 /* Check whether a simple move can be done directly. */
11383 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
11385 if (sri)
11387 sri->icode = CODE_FOR_nothing;
11388 sri->extra_cost = 0;
11390 return true;
11393 /* Now check if we can do it in a few steps. */
11394 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
11395 altivec_p);
11398 /* Inform reload about cases where moving X with a mode MODE to a register in
11399 RCLASS requires an extra scratch or immediate register. Return the class
11400 needed for the immediate register.
11402 For VSX and Altivec, we may need a register to convert sp+offset into
11403 reg+sp.
11405 For misaligned 64-bit gpr loads and stores we need a register to
11406 convert an offset address to indirect. */
11408 static reg_class_t
11409 rs6000_secondary_reload (bool in_p,
11410 rtx x,
11411 reg_class_t rclass_i,
11412 machine_mode mode,
11413 secondary_reload_info *sri)
11415 enum reg_class rclass = (enum reg_class) rclass_i;
11416 reg_class_t ret = ALL_REGS;
11417 enum insn_code icode;
11418 bool default_p = false;
11419 bool done_p = false;
11421 /* Allow subreg of memory before/during reload. */
11422 bool memory_p = (MEM_P (x)
11423 || (!reload_completed && SUBREG_P (x)
11424 && MEM_P (SUBREG_REG (x))));
11426 sri->icode = CODE_FOR_nothing;
11427 sri->t_icode = CODE_FOR_nothing;
11428 sri->extra_cost = 0;
11429 icode = ((in_p)
11430 ? reg_addr[mode].reload_load
11431 : reg_addr[mode].reload_store);
11433 if (REG_P (x) || register_operand (x, mode))
11435 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
11436 bool altivec_p = (rclass == ALTIVEC_REGS);
11437 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
11439 if (!in_p)
11440 std::swap (to_type, from_type);
11442 /* Can we do a direct move of some sort? */
11443 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
11444 altivec_p))
11446 icode = (enum insn_code)sri->icode;
11447 default_p = false;
11448 done_p = true;
11449 ret = NO_REGS;
11453 /* Make sure 0.0 is not reloaded or forced into memory. */
11454 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
11456 ret = NO_REGS;
11457 default_p = false;
11458 done_p = true;
11461 /* If this is a scalar floating point value and we want to load it into the
11462 traditional Altivec registers, do it via a move via a traditional floating
11463 point register, unless we have D-form addressing. Also make sure that
11464 non-zero constants use a FPR. */
11465 if (!done_p && reg_addr[mode].scalar_in_vmx_p
11466 && !mode_supports_vmx_dform (mode)
11467 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
11468 && (memory_p || CONST_DOUBLE_P (x)))
11470 ret = FLOAT_REGS;
11471 default_p = false;
11472 done_p = true;
11475 /* Handle reload of load/stores if we have reload helper functions. */
11476 if (!done_p && icode != CODE_FOR_nothing && memory_p)
11478 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
11479 mode);
11481 if (extra_cost >= 0)
11483 done_p = true;
11484 ret = NO_REGS;
11485 if (extra_cost > 0)
11487 sri->extra_cost = extra_cost;
11488 sri->icode = icode;
11493 /* Handle unaligned loads and stores of integer registers. */
11494 if (!done_p && TARGET_POWERPC64
11495 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11496 && memory_p
11497 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
11499 rtx addr = XEXP (x, 0);
11500 rtx off = address_offset (addr);
11502 if (off != NULL_RTX)
11504 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11505 unsigned HOST_WIDE_INT offset = INTVAL (off);
11507 /* We need a secondary reload when our legitimate_address_p
11508 says the address is good (as otherwise the entire address
11509 will be reloaded), and the offset is not a multiple of
11510 four or we have an address wrap. Address wrap will only
11511 occur for LO_SUMs since legitimate_offset_address_p
11512 rejects addresses for 16-byte mems that will wrap. */
11513 if (GET_CODE (addr) == LO_SUM
11514 ? (1 /* legitimate_address_p allows any offset for lo_sum */
11515 && ((offset & 3) != 0
11516 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
11517 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
11518 && (offset & 3) != 0))
11520 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
11521 if (in_p)
11522 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
11523 : CODE_FOR_reload_di_load);
11524 else
11525 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
11526 : CODE_FOR_reload_di_store);
11527 sri->extra_cost = 2;
11528 ret = NO_REGS;
11529 done_p = true;
11531 else
11532 default_p = true;
11534 else
11535 default_p = true;
11538 if (!done_p && !TARGET_POWERPC64
11539 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
11540 && memory_p
11541 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
11543 rtx addr = XEXP (x, 0);
11544 rtx off = address_offset (addr);
11546 if (off != NULL_RTX)
11548 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
11549 unsigned HOST_WIDE_INT offset = INTVAL (off);
11551 /* We need a secondary reload when our legitimate_address_p
11552 says the address is good (as otherwise the entire address
11553 will be reloaded), and we have a wrap.
11555 legitimate_lo_sum_address_p allows LO_SUM addresses to
11556 have any offset so test for wrap in the low 16 bits.
11558 legitimate_offset_address_p checks for the range
11559 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
11560 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
11561 [0x7ff4,0x7fff] respectively, so test for the
11562 intersection of these ranges, [0x7ffc,0x7fff] and
11563 [0x7ff4,0x7ff7] respectively.
11565 Note that the address we see here may have been
11566 manipulated by legitimize_reload_address. */
11567 if (GET_CODE (addr) == LO_SUM
11568 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
11569 : offset - (0x8000 - extra) < UNITS_PER_WORD)
11571 if (in_p)
11572 sri->icode = CODE_FOR_reload_si_load;
11573 else
11574 sri->icode = CODE_FOR_reload_si_store;
11575 sri->extra_cost = 2;
11576 ret = NO_REGS;
11577 done_p = true;
11579 else
11580 default_p = true;
11582 else
11583 default_p = true;
11586 if (!done_p)
11587 default_p = true;
11589 if (default_p)
11590 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
11592 gcc_assert (ret != ALL_REGS);
11594 if (TARGET_DEBUG_ADDR)
11596 fprintf (stderr,
11597 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
11598 "mode = %s",
11599 reg_class_names[ret],
11600 in_p ? "true" : "false",
11601 reg_class_names[rclass],
11602 GET_MODE_NAME (mode));
11604 if (reload_completed)
11605 fputs (", after reload", stderr);
11607 if (!done_p)
11608 fputs (", done_p not set", stderr);
11610 if (default_p)
11611 fputs (", default secondary reload", stderr);
11613 if (sri->icode != CODE_FOR_nothing)
11614 fprintf (stderr, ", reload func = %s, extra cost = %d",
11615 insn_data[sri->icode].name, sri->extra_cost);
11617 else if (sri->extra_cost > 0)
11618 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
11620 fputs ("\n", stderr);
11621 debug_rtx (x);
11624 return ret;
11627 /* Better tracing for rs6000_secondary_reload_inner. */
11629 static void
11630 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
11631 bool store_p)
11633 rtx set, clobber;
11635 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
11637 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
11638 store_p ? "store" : "load");
11640 if (store_p)
11641 set = gen_rtx_SET (mem, reg);
11642 else
11643 set = gen_rtx_SET (reg, mem);
11645 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11646 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11649 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
11650 ATTRIBUTE_NORETURN;
11652 static void
11653 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
11654 bool store_p)
11656 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
11657 gcc_unreachable ();
11660 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
11661 reload helper functions. These were identified in
11662 rs6000_secondary_reload_memory, and if reload decided to use the secondary
11663 reload, it calls the insns:
11664 reload_<RELOAD:mode>_<P:mptrsize>_store
11665 reload_<RELOAD:mode>_<P:mptrsize>_load
11667 which in turn calls this function, to do whatever is necessary to create
11668 valid addresses. */
11670 void
11671 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
11673 int regno = true_regnum (reg);
11674 machine_mode mode = GET_MODE (reg);
11675 addr_mask_type addr_mask;
11676 rtx addr;
11677 rtx new_addr;
11678 rtx op_reg, op0, op1;
11679 rtx and_op;
11680 rtx cc_clobber;
11681 rtvec rv;
11683 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
11684 || !base_reg_operand (scratch, GET_MODE (scratch)))
11685 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11687 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
11688 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
11690 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
11691 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
11693 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
11694 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
11696 else
11697 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11699 /* Make sure the mode is valid in this register class. */
11700 if ((addr_mask & RELOAD_REG_VALID) == 0)
11701 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11703 if (TARGET_DEBUG_ADDR)
11704 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
11706 new_addr = addr = XEXP (mem, 0);
11707 switch (GET_CODE (addr))
11709 /* Does the register class support auto update forms for this mode? If
11710 not, do the update now. We don't need a scratch register, since the
11711 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
11712 case PRE_INC:
11713 case PRE_DEC:
11714 op_reg = XEXP (addr, 0);
11715 if (!base_reg_operand (op_reg, Pmode))
11716 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11718 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
11720 int delta = GET_MODE_SIZE (mode);
11721 if (GET_CODE (addr) == PRE_DEC)
11722 delta = -delta;
11723 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
11724 new_addr = op_reg;
11726 break;
11728 case PRE_MODIFY:
11729 op0 = XEXP (addr, 0);
11730 op1 = XEXP (addr, 1);
11731 if (!base_reg_operand (op0, Pmode)
11732 || GET_CODE (op1) != PLUS
11733 || !rtx_equal_p (op0, XEXP (op1, 0)))
11734 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11736 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
11738 emit_insn (gen_rtx_SET (op0, op1));
11739 new_addr = reg;
11741 break;
11743 /* Do we need to simulate AND -16 to clear the bottom address bits used
11744 in VMX load/stores? */
11745 case AND:
11746 op0 = XEXP (addr, 0);
11747 op1 = XEXP (addr, 1);
11748 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
11750 if (REG_P (op0) || SUBREG_P (op0))
11751 op_reg = op0;
11753 else if (GET_CODE (op1) == PLUS)
11755 emit_insn (gen_rtx_SET (scratch, op1));
11756 op_reg = scratch;
11759 else
11760 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11762 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
11763 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
11764 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
11765 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
11766 new_addr = scratch;
11768 break;
11770 /* If this is an indirect address, make sure it is a base register. */
11771 case REG:
11772 case SUBREG:
11773 if (!base_reg_operand (addr, GET_MODE (addr)))
11775 emit_insn (gen_rtx_SET (scratch, addr));
11776 new_addr = scratch;
11778 break;
11780 /* If this is an indexed address, make sure the register class can handle
11781 indexed addresses for this mode. */
11782 case PLUS:
11783 op0 = XEXP (addr, 0);
11784 op1 = XEXP (addr, 1);
11785 if (!base_reg_operand (op0, Pmode))
11786 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11788 else if (int_reg_operand (op1, Pmode))
11790 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11792 emit_insn (gen_rtx_SET (scratch, addr));
11793 new_addr = scratch;
11797 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
11799 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
11800 || !quad_address_p (addr, mode, false))
11802 emit_insn (gen_rtx_SET (scratch, addr));
11803 new_addr = scratch;
11807 /* Make sure the register class can handle offset addresses. */
11808 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
11810 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11812 emit_insn (gen_rtx_SET (scratch, addr));
11813 new_addr = scratch;
11817 else
11818 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11820 break;
11822 case LO_SUM:
11823 op0 = XEXP (addr, 0);
11824 op1 = XEXP (addr, 1);
11825 if (!base_reg_operand (op0, Pmode))
11826 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11828 else if (int_reg_operand (op1, Pmode))
11830 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
11832 emit_insn (gen_rtx_SET (scratch, addr));
11833 new_addr = scratch;
11837 /* Quad offsets are restricted and can't handle normal addresses. */
11838 else if (mode_supports_dq_form (mode))
11840 emit_insn (gen_rtx_SET (scratch, addr));
11841 new_addr = scratch;
11844 /* Make sure the register class can handle offset addresses. */
11845 else if (legitimate_lo_sum_address_p (mode, addr, false))
11847 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
11849 emit_insn (gen_rtx_SET (scratch, addr));
11850 new_addr = scratch;
11854 else
11855 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11857 break;
11859 case SYMBOL_REF:
11860 case CONST:
11861 case LABEL_REF:
11862 rs6000_emit_move (scratch, addr, Pmode);
11863 new_addr = scratch;
11864 break;
11866 default:
11867 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
11870 /* Adjust the address if it changed. */
11871 if (addr != new_addr)
11873 mem = replace_equiv_address_nv (mem, new_addr);
11874 if (TARGET_DEBUG_ADDR)
11875 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
11878 /* Now create the move. */
11879 if (store_p)
11880 emit_insn (gen_rtx_SET (mem, reg));
11881 else
11882 emit_insn (gen_rtx_SET (reg, mem));
11884 return;
11887 /* Convert reloads involving 64-bit gprs and misaligned offset
11888 addressing, or multiple 32-bit gprs and offsets that are too large,
11889 to use indirect addressing. */
11891 void
11892 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
11894 int regno = true_regnum (reg);
11895 enum reg_class rclass;
11896 rtx addr;
11897 rtx scratch_or_premodify = scratch;
11899 if (TARGET_DEBUG_ADDR)
11901 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
11902 store_p ? "store" : "load");
11903 fprintf (stderr, "reg:\n");
11904 debug_rtx (reg);
11905 fprintf (stderr, "mem:\n");
11906 debug_rtx (mem);
11907 fprintf (stderr, "scratch:\n");
11908 debug_rtx (scratch);
11911 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
11912 gcc_assert (MEM_P (mem));
11913 rclass = REGNO_REG_CLASS (regno);
11914 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
11915 addr = XEXP (mem, 0);
11917 if (GET_CODE (addr) == PRE_MODIFY)
11919 gcc_assert (REG_P (XEXP (addr, 0))
11920 && GET_CODE (XEXP (addr, 1)) == PLUS
11921 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
11922 scratch_or_premodify = XEXP (addr, 0);
11923 addr = XEXP (addr, 1);
11925 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
11927 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
11929 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
11931 /* Now create the move. */
11932 if (store_p)
11933 emit_insn (gen_rtx_SET (mem, reg));
11934 else
11935 emit_insn (gen_rtx_SET (reg, mem));
11937 return;
11940 /* Given an rtx X being reloaded into a reg required to be
11941 in class CLASS, return the class of reg to actually use.
11942 In general this is just CLASS; but on some machines
11943 in some cases it is preferable to use a more restrictive class.
11945 On the RS/6000, we have to return NO_REGS when we want to reload a
11946 floating-point CONST_DOUBLE to force it to be copied to memory.
11948 We also don't want to reload integer values into floating-point
11949 registers if we can at all help it. In fact, this can
11950 cause reload to die, if it tries to generate a reload of CTR
11951 into a FP register and discovers it doesn't have the memory location
11952 required.
11954 ??? Would it be a good idea to have reload do the converse, that is
11955 try to reload floating modes into FP registers if possible?
11958 static enum reg_class
11959 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
11961 machine_mode mode = GET_MODE (x);
11962 bool is_constant = CONSTANT_P (x);
11964 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
11965 reload class for it. */
11966 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
11967 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
11968 return NO_REGS;
11970 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
11971 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
11972 return NO_REGS;
11974 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
11975 the reloading of address expressions using PLUS into floating point
11976 registers. */
11977 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
11979 if (is_constant)
11981 /* Zero is always allowed in all VSX registers. */
11982 if (x == CONST0_RTX (mode))
11983 return rclass;
11985 /* If this is a vector constant that can be formed with a few Altivec
11986 instructions, we want altivec registers. */
11987 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
11988 return ALTIVEC_REGS;
11990 /* If this is an integer constant that can easily be loaded into
11991 vector registers, allow it. */
11992 if (CONST_INT_P (x))
11994 HOST_WIDE_INT value = INTVAL (x);
11996 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
11997 2.06 can generate it in the Altivec registers with
11998 VSPLTI<x>. */
11999 if (value == -1)
12001 if (TARGET_P8_VECTOR)
12002 return rclass;
12003 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
12004 return ALTIVEC_REGS;
12005 else
12006 return NO_REGS;
12009 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
12010 a sign extend in the Altivec registers. */
12011 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
12012 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
12013 return ALTIVEC_REGS;
12016 /* Force constant to memory. */
12017 return NO_REGS;
12020 /* D-form addressing can easily reload the value. */
12021 if (mode_supports_vmx_dform (mode)
12022 || mode_supports_dq_form (mode))
12023 return rclass;
12025 /* If this is a scalar floating point value and we don't have D-form
12026 addressing, prefer the traditional floating point registers so that we
12027 can use D-form (register+offset) addressing. */
12028 if (rclass == VSX_REGS
12029 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
12030 return FLOAT_REGS;
12032 /* Prefer the Altivec registers if Altivec is handling the vector
12033 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
12034 loads. */
12035 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
12036 || mode == V1TImode)
12037 return ALTIVEC_REGS;
12039 return rclass;
12042 if (is_constant || GET_CODE (x) == PLUS)
12044 if (reg_class_subset_p (GENERAL_REGS, rclass))
12045 return GENERAL_REGS;
12046 if (reg_class_subset_p (BASE_REGS, rclass))
12047 return BASE_REGS;
12048 return NO_REGS;
12051 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == GEN_OR_FLOAT_REGS)
12052 return GENERAL_REGS;
12054 return rclass;
12057 /* Debug version of rs6000_preferred_reload_class. */
12058 static enum reg_class
12059 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
12061 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
12063 fprintf (stderr,
12064 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
12065 "mode = %s, x:\n",
12066 reg_class_names[ret], reg_class_names[rclass],
12067 GET_MODE_NAME (GET_MODE (x)));
12068 debug_rtx (x);
12070 return ret;
12073 /* If we are copying between FP or AltiVec registers and anything else, we need
12074 a memory location. The exception is when we are targeting ppc64 and the
12075 move to/from fpr to gpr instructions are available. Also, under VSX, you
12076 can copy vector registers from the FP register set to the Altivec register
12077 set and vice versa. */
12079 static bool
12080 rs6000_secondary_memory_needed (machine_mode mode,
12081 reg_class_t from_class,
12082 reg_class_t to_class)
12084 enum rs6000_reg_type from_type, to_type;
12085 bool altivec_p = ((from_class == ALTIVEC_REGS)
12086 || (to_class == ALTIVEC_REGS));
12088 /* If a simple/direct move is available, we don't need secondary memory */
12089 from_type = reg_class_to_reg_type[(int)from_class];
12090 to_type = reg_class_to_reg_type[(int)to_class];
12092 if (rs6000_secondary_reload_move (to_type, from_type, mode,
12093 (secondary_reload_info *)0, altivec_p))
12094 return false;
12096 /* If we have a floating point or vector register class, we need to use
12097 memory to transfer the data. */
12098 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
12099 return true;
12101 return false;
12104 /* Debug version of rs6000_secondary_memory_needed. */
12105 static bool
12106 rs6000_debug_secondary_memory_needed (machine_mode mode,
12107 reg_class_t from_class,
12108 reg_class_t to_class)
12110 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
12112 fprintf (stderr,
12113 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
12114 "to_class = %s, mode = %s\n",
12115 ret ? "true" : "false",
12116 reg_class_names[from_class],
12117 reg_class_names[to_class],
12118 GET_MODE_NAME (mode));
12120 return ret;
12123 /* Return the register class of a scratch register needed to copy IN into
12124 or out of a register in RCLASS in MODE. If it can be done directly,
12125 NO_REGS is returned. */
12127 static enum reg_class
12128 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
12129 rtx in)
12131 int regno;
12133 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
12134 #if TARGET_MACHO
12135 && MACHOPIC_INDIRECT
12136 #endif
12139 /* We cannot copy a symbolic operand directly into anything
12140 other than BASE_REGS for TARGET_ELF. So indicate that a
12141 register from BASE_REGS is needed as an intermediate
12142 register.
12144 On Darwin, pic addresses require a load from memory, which
12145 needs a base register. */
12146 if (rclass != BASE_REGS
12147 && (SYMBOL_REF_P (in)
12148 || GET_CODE (in) == HIGH
12149 || GET_CODE (in) == LABEL_REF
12150 || GET_CODE (in) == CONST))
12151 return BASE_REGS;
12154 if (REG_P (in))
12156 regno = REGNO (in);
12157 if (!HARD_REGISTER_NUM_P (regno))
12159 regno = true_regnum (in);
12160 if (!HARD_REGISTER_NUM_P (regno))
12161 regno = -1;
12164 else if (SUBREG_P (in))
12166 regno = true_regnum (in);
12167 if (!HARD_REGISTER_NUM_P (regno))
12168 regno = -1;
12170 else
12171 regno = -1;
12173 /* If we have VSX register moves, prefer moving scalar values between
12174 Altivec registers and GPR by going via an FPR (and then via memory)
12175 instead of reloading the secondary memory address for Altivec moves. */
12176 if (TARGET_VSX
12177 && GET_MODE_SIZE (mode) < 16
12178 && !mode_supports_vmx_dform (mode)
12179 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
12180 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
12181 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12182 && (regno >= 0 && INT_REGNO_P (regno)))))
12183 return FLOAT_REGS;
12185 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
12186 into anything. */
12187 if (rclass == GENERAL_REGS || rclass == BASE_REGS
12188 || (regno >= 0 && INT_REGNO_P (regno)))
12189 return NO_REGS;
12191 /* Constants, memory, and VSX registers can go into VSX registers (both the
12192 traditional floating point and the altivec registers). */
12193 if (rclass == VSX_REGS
12194 && (regno == -1 || VSX_REGNO_P (regno)))
12195 return NO_REGS;
12197 /* Constants, memory, and FP registers can go into FP registers. */
12198 if ((regno == -1 || FP_REGNO_P (regno))
12199 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
12200 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
12202 /* Memory, and AltiVec registers can go into AltiVec registers. */
12203 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
12204 && rclass == ALTIVEC_REGS)
12205 return NO_REGS;
12207 /* We can copy among the CR registers. */
12208 if ((rclass == CR_REGS || rclass == CR0_REGS)
12209 && regno >= 0 && CR_REGNO_P (regno))
12210 return NO_REGS;
12212 /* Otherwise, we need GENERAL_REGS. */
12213 return GENERAL_REGS;
12216 /* Debug version of rs6000_secondary_reload_class. */
12217 static enum reg_class
12218 rs6000_debug_secondary_reload_class (enum reg_class rclass,
12219 machine_mode mode, rtx in)
12221 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
12222 fprintf (stderr,
12223 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
12224 "mode = %s, input rtx:\n",
12225 reg_class_names[ret], reg_class_names[rclass],
12226 GET_MODE_NAME (mode));
12227 debug_rtx (in);
12229 return ret;
12232 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
12234 static bool
12235 rs6000_can_change_mode_class (machine_mode from,
12236 machine_mode to,
12237 reg_class_t rclass)
12239 unsigned from_size = GET_MODE_SIZE (from);
12240 unsigned to_size = GET_MODE_SIZE (to);
12242 if (from_size != to_size)
12244 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
12246 if (reg_classes_intersect_p (xclass, rclass))
12248 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
12249 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
12250 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
12251 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
12253 /* Don't allow 64-bit types to overlap with 128-bit types that take a
12254 single register under VSX because the scalar part of the register
12255 is in the upper 64-bits, and not the lower 64-bits. Types like
12256 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
12257 IEEE floating point can't overlap, and neither can small
12258 values. */
12260 if (to_float128_vector_p && from_float128_vector_p)
12261 return true;
12263 else if (to_float128_vector_p || from_float128_vector_p)
12264 return false;
12266 /* TDmode in floating-mode registers must always go into a register
12267 pair with the most significant word in the even-numbered register
12268 to match ISA requirements. In little-endian mode, this does not
12269 match subreg numbering, so we cannot allow subregs. */
12270 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
12271 return false;
12273 if (from_size < 8 || to_size < 8)
12274 return false;
12276 if (from_size == 8 && (8 * to_nregs) != to_size)
12277 return false;
12279 if (to_size == 8 && (8 * from_nregs) != from_size)
12280 return false;
12282 return true;
12284 else
12285 return true;
12288 /* Since the VSX register set includes traditional floating point registers
12289 and altivec registers, just check for the size being different instead of
12290 trying to check whether the modes are vector modes. Otherwise it won't
12291 allow say DF and DI to change classes. For types like TFmode and TDmode
12292 that take 2 64-bit registers, rather than a single 128-bit register, don't
12293 allow subregs of those types to other 128 bit types. */
12294 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
12296 unsigned num_regs = (from_size + 15) / 16;
12297 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
12298 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
12299 return false;
12301 return (from_size == 8 || from_size == 16);
12304 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
12305 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
12306 return false;
12308 return true;
12311 /* Debug version of rs6000_can_change_mode_class. */
12312 static bool
12313 rs6000_debug_can_change_mode_class (machine_mode from,
12314 machine_mode to,
12315 reg_class_t rclass)
12317 bool ret = rs6000_can_change_mode_class (from, to, rclass);
12319 fprintf (stderr,
12320 "rs6000_can_change_mode_class, return %s, from = %s, "
12321 "to = %s, rclass = %s\n",
12322 ret ? "true" : "false",
12323 GET_MODE_NAME (from), GET_MODE_NAME (to),
12324 reg_class_names[rclass]);
12326 return ret;
12329 /* Return a string to do a move operation of 128 bits of data. */
12331 const char *
12332 rs6000_output_move_128bit (rtx operands[])
12334 rtx dest = operands[0];
12335 rtx src = operands[1];
12336 machine_mode mode = GET_MODE (dest);
12337 int dest_regno;
12338 int src_regno;
12339 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
12340 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
12342 if (REG_P (dest))
12344 dest_regno = REGNO (dest);
12345 dest_gpr_p = INT_REGNO_P (dest_regno);
12346 dest_fp_p = FP_REGNO_P (dest_regno);
12347 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
12348 dest_vsx_p = dest_fp_p | dest_vmx_p;
12350 else
12352 dest_regno = -1;
12353 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
12356 if (REG_P (src))
12358 src_regno = REGNO (src);
12359 src_gpr_p = INT_REGNO_P (src_regno);
12360 src_fp_p = FP_REGNO_P (src_regno);
12361 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
12362 src_vsx_p = src_fp_p | src_vmx_p;
12364 else
12366 src_regno = -1;
12367 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
12370 /* Register moves. */
12371 if (dest_regno >= 0 && src_regno >= 0)
12373 if (dest_gpr_p)
12375 if (src_gpr_p)
12376 return "#";
12378 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
12379 return (WORDS_BIG_ENDIAN
12380 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
12381 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
12383 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
12384 return "#";
12387 else if (TARGET_VSX && dest_vsx_p)
12389 if (src_vsx_p)
12390 return "xxlor %x0,%x1,%x1";
12392 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
12393 return (WORDS_BIG_ENDIAN
12394 ? "mtvsrdd %x0,%1,%L1"
12395 : "mtvsrdd %x0,%L1,%1");
12397 else if (TARGET_DIRECT_MOVE && src_gpr_p)
12398 return "#";
12401 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
12402 return "vor %0,%1,%1";
12404 else if (dest_fp_p && src_fp_p)
12405 return "#";
12408 /* Loads. */
12409 else if (dest_regno >= 0 && MEM_P (src))
12411 if (dest_gpr_p)
12413 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12414 return "lq %0,%1";
12415 else
12416 return "#";
12419 else if (TARGET_ALTIVEC && dest_vmx_p
12420 && altivec_indexed_or_indirect_operand (src, mode))
12421 return "lvx %0,%y1";
12423 else if (TARGET_VSX && dest_vsx_p)
12425 if (mode_supports_dq_form (mode)
12426 && quad_address_p (XEXP (src, 0), mode, true))
12427 return "lxv %x0,%1";
12429 else if (TARGET_P9_VECTOR)
12430 return "lxvx %x0,%y1";
12432 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12433 return "lxvw4x %x0,%y1";
12435 else
12436 return "lxvd2x %x0,%y1";
12439 else if (TARGET_ALTIVEC && dest_vmx_p)
12440 return "lvx %0,%y1";
12442 else if (dest_fp_p)
12443 return "#";
12446 /* Stores. */
12447 else if (src_regno >= 0 && MEM_P (dest))
12449 if (src_gpr_p)
12451 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
12452 return "stq %1,%0";
12453 else
12454 return "#";
12457 else if (TARGET_ALTIVEC && src_vmx_p
12458 && altivec_indexed_or_indirect_operand (dest, mode))
12459 return "stvx %1,%y0";
12461 else if (TARGET_VSX && src_vsx_p)
12463 if (mode_supports_dq_form (mode)
12464 && quad_address_p (XEXP (dest, 0), mode, true))
12465 return "stxv %x1,%0";
12467 else if (TARGET_P9_VECTOR)
12468 return "stxvx %x1,%y0";
12470 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
12471 return "stxvw4x %x1,%y0";
12473 else
12474 return "stxvd2x %x1,%y0";
12477 else if (TARGET_ALTIVEC && src_vmx_p)
12478 return "stvx %1,%y0";
12480 else if (src_fp_p)
12481 return "#";
12484 /* Constants. */
12485 else if (dest_regno >= 0
12486 && (CONST_INT_P (src)
12487 || CONST_WIDE_INT_P (src)
12488 || CONST_DOUBLE_P (src)
12489 || GET_CODE (src) == CONST_VECTOR))
12491 if (dest_gpr_p)
12492 return "#";
12494 else if ((dest_vmx_p && TARGET_ALTIVEC)
12495 || (dest_vsx_p && TARGET_VSX))
12496 return output_vec_const_move (operands);
12499 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
12502 /* Validate a 128-bit move. */
12503 bool
12504 rs6000_move_128bit_ok_p (rtx operands[])
12506 machine_mode mode = GET_MODE (operands[0]);
12507 return (gpc_reg_operand (operands[0], mode)
12508 || gpc_reg_operand (operands[1], mode));
12511 /* Return true if a 128-bit move needs to be split. */
12512 bool
12513 rs6000_split_128bit_ok_p (rtx operands[])
12515 if (!reload_completed)
12516 return false;
12518 if (!gpr_or_gpr_p (operands[0], operands[1]))
12519 return false;
12521 if (quad_load_store_p (operands[0], operands[1]))
12522 return false;
12524 return true;
12528 /* Given a comparison operation, return the bit number in CCR to test. We
12529 know this is a valid comparison.
12531 SCC_P is 1 if this is for an scc. That means that %D will have been
12532 used instead of %C, so the bits will be in different places.
12534 Return -1 if OP isn't a valid comparison for some reason. */
12537 ccr_bit (rtx op, int scc_p)
12539 enum rtx_code code = GET_CODE (op);
12540 machine_mode cc_mode;
12541 int cc_regnum;
12542 int base_bit;
12543 rtx reg;
12545 if (!COMPARISON_P (op))
12546 return -1;
12548 reg = XEXP (op, 0);
12550 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
12551 return -1;
12553 cc_mode = GET_MODE (reg);
12554 cc_regnum = REGNO (reg);
12555 base_bit = 4 * (cc_regnum - CR0_REGNO);
12557 validate_condition_mode (code, cc_mode);
12559 /* When generating a sCOND operation, only positive conditions are
12560 allowed. */
12561 if (scc_p)
12562 switch (code)
12564 case EQ:
12565 case GT:
12566 case LT:
12567 case UNORDERED:
12568 case GTU:
12569 case LTU:
12570 break;
12571 default:
12572 return -1;
12575 switch (code)
12577 case NE:
12578 return scc_p ? base_bit + 3 : base_bit + 2;
12579 case EQ:
12580 return base_bit + 2;
12581 case GT: case GTU: case UNLE:
12582 return base_bit + 1;
12583 case LT: case LTU: case UNGE:
12584 return base_bit;
12585 case ORDERED: case UNORDERED:
12586 return base_bit + 3;
12588 case GE: case GEU:
12589 /* If scc, we will have done a cror to put the bit in the
12590 unordered position. So test that bit. For integer, this is ! LT
12591 unless this is an scc insn. */
12592 return scc_p ? base_bit + 3 : base_bit;
12594 case LE: case LEU:
12595 return scc_p ? base_bit + 3 : base_bit + 1;
12597 default:
12598 return -1;
12602 /* Return the GOT register. */
12605 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
12607 /* The second flow pass currently (June 1999) can't update
12608 regs_ever_live without disturbing other parts of the compiler, so
12609 update it here to make the prolog/epilogue code happy. */
12610 if (!can_create_pseudo_p ()
12611 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
12612 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
12614 crtl->uses_pic_offset_table = 1;
12616 return pic_offset_table_rtx;
12619 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
12621 /* Write out a function code label. */
12623 void
12624 rs6000_output_function_entry (FILE *file, const char *fname)
12626 if (fname[0] != '.')
12628 switch (DEFAULT_ABI)
12630 default:
12631 gcc_unreachable ();
12633 case ABI_AIX:
12634 if (DOT_SYMBOLS)
12635 putc ('.', file);
12636 else
12637 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
12638 break;
12640 case ABI_ELFv2:
12641 case ABI_V4:
12642 case ABI_DARWIN:
12643 break;
12647 RS6000_OUTPUT_BASENAME (file, fname);
12650 /* Print an operand. Recognize special options, documented below. */
12652 #if TARGET_ELF
12653 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
12654 only introduced by the linker, when applying the sda21
12655 relocation. */
12656 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
12657 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
12658 #else
12659 #define SMALL_DATA_RELOC "sda21"
12660 #define SMALL_DATA_REG 0
12661 #endif
12663 void
12664 print_operand (FILE *file, rtx x, int code)
12666 int i;
12667 unsigned HOST_WIDE_INT uval;
12669 switch (code)
12671 /* %a is output_address. */
12673 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
12674 output_operand. */
12676 case 'D':
12677 /* Like 'J' but get to the GT bit only. */
12678 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12680 output_operand_lossage ("invalid %%D value");
12681 return;
12684 /* Bit 1 is GT bit. */
12685 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
12687 /* Add one for shift count in rlinm for scc. */
12688 fprintf (file, "%d", i + 1);
12689 return;
12691 case 'e':
12692 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
12693 if (! INT_P (x))
12695 output_operand_lossage ("invalid %%e value");
12696 return;
12699 uval = INTVAL (x);
12700 if ((uval & 0xffff) == 0 && uval != 0)
12701 putc ('s', file);
12702 return;
12704 case 'E':
12705 /* X is a CR register. Print the number of the EQ bit of the CR */
12706 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12707 output_operand_lossage ("invalid %%E value");
12708 else
12709 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
12710 return;
12712 case 'f':
12713 /* X is a CR register. Print the shift count needed to move it
12714 to the high-order four bits. */
12715 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12716 output_operand_lossage ("invalid %%f value");
12717 else
12718 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
12719 return;
12721 case 'F':
12722 /* Similar, but print the count for the rotate in the opposite
12723 direction. */
12724 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12725 output_operand_lossage ("invalid %%F value");
12726 else
12727 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
12728 return;
12730 case 'G':
12731 /* X is a constant integer. If it is negative, print "m",
12732 otherwise print "z". This is to make an aze or ame insn. */
12733 if (!CONST_INT_P (x))
12734 output_operand_lossage ("invalid %%G value");
12735 else if (INTVAL (x) >= 0)
12736 putc ('z', file);
12737 else
12738 putc ('m', file);
12739 return;
12741 case 'h':
12742 /* If constant, output low-order five bits. Otherwise, write
12743 normally. */
12744 if (INT_P (x))
12745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
12746 else
12747 print_operand (file, x, 0);
12748 return;
12750 case 'H':
12751 /* If constant, output low-order six bits. Otherwise, write
12752 normally. */
12753 if (INT_P (x))
12754 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
12755 else
12756 print_operand (file, x, 0);
12757 return;
12759 case 'I':
12760 /* Print `i' if this is a constant, else nothing. */
12761 if (INT_P (x))
12762 putc ('i', file);
12763 return;
12765 case 'j':
12766 /* Write the bit number in CCR for jump. */
12767 i = ccr_bit (x, 0);
12768 if (i == -1)
12769 output_operand_lossage ("invalid %%j code");
12770 else
12771 fprintf (file, "%d", i);
12772 return;
12774 case 'J':
12775 /* Similar, but add one for shift count in rlinm for scc and pass
12776 scc flag to `ccr_bit'. */
12777 i = ccr_bit (x, 1);
12778 if (i == -1)
12779 output_operand_lossage ("invalid %%J code");
12780 else
12781 /* If we want bit 31, write a shift count of zero, not 32. */
12782 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12783 return;
12785 case 'k':
12786 /* X must be a constant. Write the 1's complement of the
12787 constant. */
12788 if (! INT_P (x))
12789 output_operand_lossage ("invalid %%k value");
12790 else
12791 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
12792 return;
12794 case 'K':
12795 /* X must be a symbolic constant on ELF. Write an
12796 expression suitable for an 'addi' that adds in the low 16
12797 bits of the MEM. */
12798 if (GET_CODE (x) == CONST)
12800 if (GET_CODE (XEXP (x, 0)) != PLUS
12801 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
12802 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
12803 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
12804 output_operand_lossage ("invalid %%K value");
12806 print_operand_address (file, x);
12807 fputs ("@l", file);
12808 return;
12810 /* %l is output_asm_label. */
12812 case 'L':
12813 /* Write second word of DImode or DFmode reference. Works on register
12814 or non-indexed memory only. */
12815 if (REG_P (x))
12816 fputs (reg_names[REGNO (x) + 1], file);
12817 else if (MEM_P (x))
12819 machine_mode mode = GET_MODE (x);
12820 /* Handle possible auto-increment. Since it is pre-increment and
12821 we have already done it, we can just use an offset of word. */
12822 if (GET_CODE (XEXP (x, 0)) == PRE_INC
12823 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
12824 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12825 UNITS_PER_WORD));
12826 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
12827 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
12828 UNITS_PER_WORD));
12829 else
12830 output_address (mode, XEXP (adjust_address_nv (x, SImode,
12831 UNITS_PER_WORD),
12832 0));
12834 if (small_data_operand (x, GET_MODE (x)))
12835 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
12836 reg_names[SMALL_DATA_REG]);
12838 return;
12840 case 'N': /* Unused */
12841 /* Write the number of elements in the vector times 4. */
12842 if (GET_CODE (x) != PARALLEL)
12843 output_operand_lossage ("invalid %%N value");
12844 else
12845 fprintf (file, "%d", XVECLEN (x, 0) * 4);
12846 return;
12848 case 'O': /* Unused */
12849 /* Similar, but subtract 1 first. */
12850 if (GET_CODE (x) != PARALLEL)
12851 output_operand_lossage ("invalid %%O value");
12852 else
12853 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
12854 return;
12856 case 'p':
12857 /* X is a CONST_INT that is a power of two. Output the logarithm. */
12858 if (! INT_P (x)
12859 || INTVAL (x) < 0
12860 || (i = exact_log2 (INTVAL (x))) < 0)
12861 output_operand_lossage ("invalid %%p value");
12862 else
12863 fprintf (file, "%d", i);
12864 return;
12866 case 'P':
12867 /* The operand must be an indirect memory reference. The result
12868 is the register name. */
12869 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
12870 || REGNO (XEXP (x, 0)) >= 32)
12871 output_operand_lossage ("invalid %%P value");
12872 else
12873 fputs (reg_names[REGNO (XEXP (x, 0))], file);
12874 return;
12876 case 'q':
12877 /* This outputs the logical code corresponding to a boolean
12878 expression. The expression may have one or both operands
12879 negated (if one, only the first one). For condition register
12880 logical operations, it will also treat the negated
12881 CR codes as NOTs, but not handle NOTs of them. */
12883 const char *const *t = 0;
12884 const char *s;
12885 enum rtx_code code = GET_CODE (x);
12886 static const char * const tbl[3][3] = {
12887 { "and", "andc", "nor" },
12888 { "or", "orc", "nand" },
12889 { "xor", "eqv", "xor" } };
12891 if (code == AND)
12892 t = tbl[0];
12893 else if (code == IOR)
12894 t = tbl[1];
12895 else if (code == XOR)
12896 t = tbl[2];
12897 else
12898 output_operand_lossage ("invalid %%q value");
12900 if (GET_CODE (XEXP (x, 0)) != NOT)
12901 s = t[0];
12902 else
12904 if (GET_CODE (XEXP (x, 1)) == NOT)
12905 s = t[2];
12906 else
12907 s = t[1];
12910 fputs (s, file);
12912 return;
12914 case 'Q':
12915 if (! TARGET_MFCRF)
12916 return;
12917 fputc (',', file);
12918 /* FALLTHRU */
12920 case 'R':
12921 /* X is a CR register. Print the mask for `mtcrf'. */
12922 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12923 output_operand_lossage ("invalid %%R value");
12924 else
12925 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
12926 return;
12928 case 's':
12929 /* Low 5 bits of 32 - value */
12930 if (! INT_P (x))
12931 output_operand_lossage ("invalid %%s value");
12932 else
12933 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
12934 return;
12936 case 't':
12937 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
12938 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
12940 output_operand_lossage ("invalid %%t value");
12941 return;
12944 /* Bit 3 is OV bit. */
12945 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
12947 /* If we want bit 31, write a shift count of zero, not 32. */
12948 fprintf (file, "%d", i == 31 ? 0 : i + 1);
12949 return;
12951 case 'T':
12952 /* Print the symbolic name of a branch target register. */
12953 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
12954 x = XVECEXP (x, 0, 0);
12955 if (!REG_P (x) || (REGNO (x) != LR_REGNO
12956 && REGNO (x) != CTR_REGNO))
12957 output_operand_lossage ("invalid %%T value");
12958 else if (REGNO (x) == LR_REGNO)
12959 fputs ("lr", file);
12960 else
12961 fputs ("ctr", file);
12962 return;
12964 case 'u':
12965 /* High-order or low-order 16 bits of constant, whichever is non-zero,
12966 for use in unsigned operand. */
12967 if (! INT_P (x))
12969 output_operand_lossage ("invalid %%u value");
12970 return;
12973 uval = INTVAL (x);
12974 if ((uval & 0xffff) == 0)
12975 uval >>= 16;
12977 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
12978 return;
12980 case 'v':
12981 /* High-order 16 bits of constant for use in signed operand. */
12982 if (! INT_P (x))
12983 output_operand_lossage ("invalid %%v value");
12984 else
12985 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
12986 (INTVAL (x) >> 16) & 0xffff);
12987 return;
12989 case 'U':
12990 /* Print `u' if this has an auto-increment or auto-decrement. */
12991 if (MEM_P (x)
12992 && (GET_CODE (XEXP (x, 0)) == PRE_INC
12993 || GET_CODE (XEXP (x, 0)) == PRE_DEC
12994 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
12995 putc ('u', file);
12996 return;
12998 case 'V':
12999 /* Print the trap code for this operand. */
13000 switch (GET_CODE (x))
13002 case EQ:
13003 fputs ("eq", file); /* 4 */
13004 break;
13005 case NE:
13006 fputs ("ne", file); /* 24 */
13007 break;
13008 case LT:
13009 fputs ("lt", file); /* 16 */
13010 break;
13011 case LE:
13012 fputs ("le", file); /* 20 */
13013 break;
13014 case GT:
13015 fputs ("gt", file); /* 8 */
13016 break;
13017 case GE:
13018 fputs ("ge", file); /* 12 */
13019 break;
13020 case LTU:
13021 fputs ("llt", file); /* 2 */
13022 break;
13023 case LEU:
13024 fputs ("lle", file); /* 6 */
13025 break;
13026 case GTU:
13027 fputs ("lgt", file); /* 1 */
13028 break;
13029 case GEU:
13030 fputs ("lge", file); /* 5 */
13031 break;
13032 default:
13033 output_operand_lossage ("invalid %%V value");
13035 break;
13037 case 'w':
13038 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
13039 normally. */
13040 if (INT_P (x))
13041 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
13042 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
13043 else
13044 print_operand (file, x, 0);
13045 return;
13047 case 'x':
13048 /* X is a FPR or Altivec register used in a VSX context. */
13049 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
13050 output_operand_lossage ("invalid %%x value");
13051 else
13053 int reg = REGNO (x);
13054 int vsx_reg = (FP_REGNO_P (reg)
13055 ? reg - 32
13056 : reg - FIRST_ALTIVEC_REGNO + 32);
13058 #ifdef TARGET_REGNAMES
13059 if (TARGET_REGNAMES)
13060 fprintf (file, "%%vs%d", vsx_reg);
13061 else
13062 #endif
13063 fprintf (file, "%d", vsx_reg);
13065 return;
13067 case 'X':
13068 if (MEM_P (x)
13069 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
13070 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
13071 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
13072 putc ('x', file);
13073 return;
13075 case 'Y':
13076 /* Like 'L', for third word of TImode/PTImode */
13077 if (REG_P (x))
13078 fputs (reg_names[REGNO (x) + 2], file);
13079 else if (MEM_P (x))
13081 machine_mode mode = GET_MODE (x);
13082 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13083 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13084 output_address (mode, plus_constant (Pmode,
13085 XEXP (XEXP (x, 0), 0), 8));
13086 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13087 output_address (mode, plus_constant (Pmode,
13088 XEXP (XEXP (x, 0), 0), 8));
13089 else
13090 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
13091 if (small_data_operand (x, GET_MODE (x)))
13092 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13093 reg_names[SMALL_DATA_REG]);
13095 return;
13097 case 'z':
13098 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13099 x = XVECEXP (x, 0, 1);
13100 /* X is a SYMBOL_REF. Write out the name preceded by a
13101 period and without any trailing data in brackets. Used for function
13102 names. If we are configured for System V (or the embedded ABI) on
13103 the PowerPC, do not emit the period, since those systems do not use
13104 TOCs and the like. */
13105 if (!SYMBOL_REF_P (x))
13107 output_operand_lossage ("invalid %%z value");
13108 return;
13111 /* For macho, check to see if we need a stub. */
13112 if (TARGET_MACHO)
13114 const char *name = XSTR (x, 0);
13115 #if TARGET_MACHO
13116 if (darwin_symbol_stubs
13117 && MACHOPIC_INDIRECT
13118 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13119 name = machopic_indirection_name (x, /*stub_p=*/true);
13120 #endif
13121 assemble_name (file, name);
13123 else if (!DOT_SYMBOLS)
13124 assemble_name (file, XSTR (x, 0));
13125 else
13126 rs6000_output_function_entry (file, XSTR (x, 0));
13127 return;
13129 case 'Z':
13130 /* Like 'L', for last word of TImode/PTImode. */
13131 if (REG_P (x))
13132 fputs (reg_names[REGNO (x) + 3], file);
13133 else if (MEM_P (x))
13135 machine_mode mode = GET_MODE (x);
13136 if (GET_CODE (XEXP (x, 0)) == PRE_INC
13137 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
13138 output_address (mode, plus_constant (Pmode,
13139 XEXP (XEXP (x, 0), 0), 12));
13140 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13141 output_address (mode, plus_constant (Pmode,
13142 XEXP (XEXP (x, 0), 0), 12));
13143 else
13144 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
13145 if (small_data_operand (x, GET_MODE (x)))
13146 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13147 reg_names[SMALL_DATA_REG]);
13149 return;
13151 /* Print AltiVec memory operand. */
13152 case 'y':
13154 rtx tmp;
13156 gcc_assert (MEM_P (x));
13158 tmp = XEXP (x, 0);
13160 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
13161 && GET_CODE (tmp) == AND
13162 && CONST_INT_P (XEXP (tmp, 1))
13163 && INTVAL (XEXP (tmp, 1)) == -16)
13164 tmp = XEXP (tmp, 0);
13165 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
13166 && GET_CODE (tmp) == PRE_MODIFY)
13167 tmp = XEXP (tmp, 1);
13168 if (REG_P (tmp))
13169 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
13170 else
13172 if (GET_CODE (tmp) != PLUS
13173 || !REG_P (XEXP (tmp, 0))
13174 || !REG_P (XEXP (tmp, 1)))
13176 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
13177 break;
13180 if (REGNO (XEXP (tmp, 0)) == 0)
13181 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
13182 reg_names[ REGNO (XEXP (tmp, 0)) ]);
13183 else
13184 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
13185 reg_names[ REGNO (XEXP (tmp, 1)) ]);
13187 break;
13190 case 0:
13191 if (REG_P (x))
13192 fprintf (file, "%s", reg_names[REGNO (x)]);
13193 else if (MEM_P (x))
13195 /* We need to handle PRE_INC and PRE_DEC here, since we need to
13196 know the width from the mode. */
13197 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
13198 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
13199 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13200 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
13201 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
13202 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
13203 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
13204 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
13205 else
13206 output_address (GET_MODE (x), XEXP (x, 0));
13208 else if (toc_relative_expr_p (x, false,
13209 &tocrel_base_oac, &tocrel_offset_oac))
13210 /* This hack along with a corresponding hack in
13211 rs6000_output_addr_const_extra arranges to output addends
13212 where the assembler expects to find them. eg.
13213 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
13214 without this hack would be output as "x@toc+4". We
13215 want "x+4@toc". */
13216 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13217 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
13218 output_addr_const (file, XVECEXP (x, 0, 0));
13219 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
13220 output_addr_const (file, XVECEXP (x, 0, 1));
13221 else
13222 output_addr_const (file, x);
13223 return;
13225 case '&':
13226 if (const char *name = get_some_local_dynamic_name ())
13227 assemble_name (file, name);
13228 else
13229 output_operand_lossage ("'%%&' used without any "
13230 "local dynamic TLS references");
13231 return;
13233 default:
13234 output_operand_lossage ("invalid %%xn code");
13238 /* Print the address of an operand. */
13240 void
13241 print_operand_address (FILE *file, rtx x)
13243 if (REG_P (x))
13244 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
13246 /* Is it a PC-relative address? */
13247 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
13249 HOST_WIDE_INT offset;
13251 if (GET_CODE (x) == CONST)
13252 x = XEXP (x, 0);
13254 if (GET_CODE (x) == PLUS)
13256 offset = INTVAL (XEXP (x, 1));
13257 x = XEXP (x, 0);
13259 else
13260 offset = 0;
13262 output_addr_const (file, x);
13264 if (offset)
13265 fprintf (file, "%+" PRId64, offset);
13267 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
13268 fprintf (file, "@got");
13270 fprintf (file, "@pcrel");
13272 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
13273 || GET_CODE (x) == LABEL_REF)
13275 output_addr_const (file, x);
13276 if (small_data_operand (x, GET_MODE (x)))
13277 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
13278 reg_names[SMALL_DATA_REG]);
13279 else
13280 gcc_assert (!TARGET_TOC);
13282 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13283 && REG_P (XEXP (x, 1)))
13285 if (REGNO (XEXP (x, 0)) == 0)
13286 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
13287 reg_names[ REGNO (XEXP (x, 0)) ]);
13288 else
13289 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
13290 reg_names[ REGNO (XEXP (x, 1)) ]);
13292 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
13293 && CONST_INT_P (XEXP (x, 1)))
13294 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
13295 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
13296 #if TARGET_MACHO
13297 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13298 && CONSTANT_P (XEXP (x, 1)))
13300 fprintf (file, "lo16(");
13301 output_addr_const (file, XEXP (x, 1));
13302 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13304 #endif
13305 #if TARGET_ELF
13306 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
13307 && CONSTANT_P (XEXP (x, 1)))
13309 output_addr_const (file, XEXP (x, 1));
13310 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
13312 #endif
13313 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
13315 /* This hack along with a corresponding hack in
13316 rs6000_output_addr_const_extra arranges to output addends
13317 where the assembler expects to find them. eg.
13318 (lo_sum (reg 9)
13319 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
13320 without this hack would be output as "x@toc+8@l(9)". We
13321 want "x+8@toc@l(9)". */
13322 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
13323 if (GET_CODE (x) == LO_SUM)
13324 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
13325 else
13326 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
13328 else
13329 output_addr_const (file, x);
13332 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13334 bool
13335 rs6000_output_addr_const_extra (FILE *file, rtx x)
13337 if (GET_CODE (x) == UNSPEC)
13338 switch (XINT (x, 1))
13340 case UNSPEC_TOCREL:
13341 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
13342 && REG_P (XVECEXP (x, 0, 1))
13343 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
13344 output_addr_const (file, XVECEXP (x, 0, 0));
13345 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
13347 if (INTVAL (tocrel_offset_oac) >= 0)
13348 fprintf (file, "+");
13349 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
13351 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
13353 putc ('-', file);
13354 assemble_name (file, toc_label_name);
13355 need_toc_init = 1;
13357 else if (TARGET_ELF)
13358 fputs ("@toc", file);
13359 return true;
13361 #if TARGET_MACHO
13362 case UNSPEC_MACHOPIC_OFFSET:
13363 output_addr_const (file, XVECEXP (x, 0, 0));
13364 putc ('-', file);
13365 machopic_output_function_base_name (file);
13366 return true;
13367 #endif
13369 return false;
13372 /* Target hook for assembling integer objects. The PowerPC version has
13373 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
13374 is defined. It also needs to handle DI-mode objects on 64-bit
13375 targets. */
13377 static bool
13378 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
13380 #ifdef RELOCATABLE_NEEDS_FIXUP
13381 /* Special handling for SI values. */
13382 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
13384 static int recurse = 0;
13386 /* For -mrelocatable, we mark all addresses that need to be fixed up in
13387 the .fixup section. Since the TOC section is already relocated, we
13388 don't need to mark it here. We used to skip the text section, but it
13389 should never be valid for relocated addresses to be placed in the text
13390 section. */
13391 if (DEFAULT_ABI == ABI_V4
13392 && (TARGET_RELOCATABLE || flag_pic > 1)
13393 && in_section != toc_section
13394 && !recurse
13395 && !CONST_SCALAR_INT_P (x)
13396 && CONSTANT_P (x))
13398 char buf[256];
13400 recurse = 1;
13401 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
13402 fixuplabelno++;
13403 ASM_OUTPUT_LABEL (asm_out_file, buf);
13404 fprintf (asm_out_file, "\t.long\t(");
13405 output_addr_const (asm_out_file, x);
13406 fprintf (asm_out_file, ")@fixup\n");
13407 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
13408 ASM_OUTPUT_ALIGN (asm_out_file, 2);
13409 fprintf (asm_out_file, "\t.long\t");
13410 assemble_name (asm_out_file, buf);
13411 fprintf (asm_out_file, "\n\t.previous\n");
13412 recurse = 0;
13413 return true;
13415 /* Remove initial .'s to turn a -mcall-aixdesc function
13416 address into the address of the descriptor, not the function
13417 itself. */
13418 else if (SYMBOL_REF_P (x)
13419 && XSTR (x, 0)[0] == '.'
13420 && DEFAULT_ABI == ABI_AIX)
13422 const char *name = XSTR (x, 0);
13423 while (*name == '.')
13424 name++;
13426 fprintf (asm_out_file, "\t.long\t%s\n", name);
13427 return true;
13430 #endif /* RELOCATABLE_NEEDS_FIXUP */
13431 return default_assemble_integer (x, size, aligned_p);
13434 /* Return a template string for assembly to emit when making an
13435 external call. FUNOP is the call mem argument operand number. */
13437 static const char *
13438 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
13440 /* -Wformat-overflow workaround, without which gcc thinks that %u
13441 might produce 10 digits. */
13442 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13444 char arg[12];
13445 arg[0] = 0;
13446 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13448 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13449 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
13450 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13451 sprintf (arg, "(%%&@tlsld)");
13454 /* The magic 32768 offset here corresponds to the offset of
13455 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
13456 char z[11];
13457 sprintf (z, "%%z%u%s", funop,
13458 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
13459 ? "+32768" : ""));
13461 static char str[32]; /* 1 spare */
13462 if (rs6000_pcrel_p (cfun))
13463 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
13464 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
13465 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13466 sibcall ? "" : "\n\tnop");
13467 else if (DEFAULT_ABI == ABI_V4)
13468 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
13469 flag_pic ? "@plt" : "");
13470 #if TARGET_MACHO
13471 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
13472 else if (DEFAULT_ABI == ABI_DARWIN)
13474 /* The cookie is in operand func+2. */
13475 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
13476 int cookie = INTVAL (operands[funop + 2]);
13477 if (cookie & CALL_LONG)
13479 tree funname = get_identifier (XSTR (operands[funop], 0));
13480 tree labelname = get_prev_label (funname);
13481 gcc_checking_assert (labelname && !sibcall);
13483 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
13484 instruction will reach 'foo', otherwise link as 'bl L42'".
13485 "L42" should be a 'branch island', that will do a far jump to
13486 'foo'. Branch islands are generated in
13487 macho_branch_islands(). */
13488 sprintf (str, "jbsr %%z%u,%.10s", funop,
13489 IDENTIFIER_POINTER (labelname));
13491 else
13492 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
13493 after the call. */
13494 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
13496 #endif
13497 else
13498 gcc_unreachable ();
13499 return str;
13502 const char *
13503 rs6000_call_template (rtx *operands, unsigned int funop)
13505 return rs6000_call_template_1 (operands, funop, false);
13508 const char *
13509 rs6000_sibcall_template (rtx *operands, unsigned int funop)
13511 return rs6000_call_template_1 (operands, funop, true);
13514 /* As above, for indirect calls. */
13516 static const char *
13517 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
13518 bool sibcall)
13520 /* -Wformat-overflow workaround, without which gcc thinks that %u
13521 might produce 10 digits. Note that -Wformat-overflow will not
13522 currently warn here for str[], so do not rely on a warning to
13523 ensure str[] is correctly sized. */
13524 gcc_assert (funop <= MAX_RECOG_OPERANDS);
13526 /* Currently, funop is either 0 or 1. The maximum string is always
13527 a !speculate 64-bit __tls_get_addr call.
13529 ABI_ELFv2, pcrel:
13530 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13531 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
13532 . 9 crset 2\n\t
13533 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13534 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
13535 . 8 beq%T1l-
13536 .---
13537 .142
13539 ABI_AIX:
13540 . 9 ld 2,%3\n\t
13541 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13542 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13543 . 9 crset 2\n\t
13544 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13545 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13546 . 10 beq%T1l-\n\t
13547 . 10 ld 2,%4(1)
13548 .---
13549 .151
13551 ABI_ELFv2:
13552 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13553 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
13554 . 9 crset 2\n\t
13555 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13556 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
13557 . 10 beq%T1l-\n\t
13558 . 10 ld 2,%3(1)
13559 .---
13560 .142
13562 ABI_V4:
13563 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13564 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
13565 . 9 crset 2\n\t
13566 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
13567 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
13568 . 8 beq%T1l-
13569 .---
13570 .141 */
13571 static char str[160]; /* 8 spare */
13572 char *s = str;
13573 const char *ptrload = TARGET_64BIT ? "d" : "wz";
13575 if (DEFAULT_ABI == ABI_AIX)
13576 s += sprintf (s,
13577 "l%s 2,%%%u\n\t",
13578 ptrload, funop + 2);
13580 /* We don't need the extra code to stop indirect call speculation if
13581 calling via LR. */
13582 bool speculate = (TARGET_MACHO
13583 || rs6000_speculate_indirect_jumps
13584 || (REG_P (operands[funop])
13585 && REGNO (operands[funop]) == LR_REGNO));
13587 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
13589 const char *rel64 = TARGET_64BIT ? "64" : "";
13590 char tls[29];
13591 tls[0] = 0;
13592 if (GET_CODE (operands[funop + 1]) == UNSPEC)
13594 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
13595 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
13596 rel64, funop + 1);
13597 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
13598 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
13599 rel64);
13602 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
13603 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13604 && flag_pic == 2 ? "+32768" : "");
13605 if (!speculate)
13607 s += sprintf (s,
13608 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
13609 tls, rel64, notoc, funop, addend);
13610 s += sprintf (s, "crset 2\n\t");
13612 s += sprintf (s,
13613 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
13614 tls, rel64, notoc, funop, addend);
13616 else if (!speculate)
13617 s += sprintf (s, "crset 2\n\t");
13619 if (rs6000_pcrel_p (cfun))
13621 if (speculate)
13622 sprintf (s, "b%%T%ul", funop);
13623 else
13624 sprintf (s, "beq%%T%ul-", funop);
13626 else if (DEFAULT_ABI == ABI_AIX)
13628 if (speculate)
13629 sprintf (s,
13630 "b%%T%ul\n\t"
13631 "l%s 2,%%%u(1)",
13632 funop, ptrload, funop + 3);
13633 else
13634 sprintf (s,
13635 "beq%%T%ul-\n\t"
13636 "l%s 2,%%%u(1)",
13637 funop, ptrload, funop + 3);
13639 else if (DEFAULT_ABI == ABI_ELFv2)
13641 if (speculate)
13642 sprintf (s,
13643 "b%%T%ul\n\t"
13644 "l%s 2,%%%u(1)",
13645 funop, ptrload, funop + 2);
13646 else
13647 sprintf (s,
13648 "beq%%T%ul-\n\t"
13649 "l%s 2,%%%u(1)",
13650 funop, ptrload, funop + 2);
13652 else
13654 if (speculate)
13655 sprintf (s,
13656 "b%%T%u%s",
13657 funop, sibcall ? "" : "l");
13658 else
13659 sprintf (s,
13660 "beq%%T%u%s-%s",
13661 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
13663 return str;
13666 const char *
13667 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
13669 return rs6000_indirect_call_template_1 (operands, funop, false);
13672 const char *
13673 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
13675 return rs6000_indirect_call_template_1 (operands, funop, true);
13678 #if HAVE_AS_PLTSEQ
13679 /* Output indirect call insns. WHICH identifies the type of sequence. */
13680 const char *
13681 rs6000_pltseq_template (rtx *operands, int which)
13683 const char *rel64 = TARGET_64BIT ? "64" : "";
13684 char tls[30];
13685 tls[0] = 0;
13686 if (GET_CODE (operands[3]) == UNSPEC)
13688 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
13689 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
13690 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
13691 off, rel64);
13692 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
13693 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
13694 off, rel64);
13697 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
13698 static char str[96]; /* 10 spare */
13699 char off = WORDS_BIG_ENDIAN ? '2' : '4';
13700 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
13701 && flag_pic == 2 ? "+32768" : "");
13702 switch (which)
13704 case RS6000_PLTSEQ_TOCSAVE:
13705 sprintf (str,
13706 "st%s\n\t"
13707 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
13708 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
13709 tls, rel64);
13710 break;
13711 case RS6000_PLTSEQ_PLT16_HA:
13712 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
13713 sprintf (str,
13714 "lis %%0,0\n\t"
13715 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
13716 tls, off, rel64);
13717 else
13718 sprintf (str,
13719 "addis %%0,%%1,0\n\t"
13720 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
13721 tls, off, rel64, addend);
13722 break;
13723 case RS6000_PLTSEQ_PLT16_LO:
13724 sprintf (str,
13725 "l%s %%0,0(%%1)\n\t"
13726 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
13727 TARGET_64BIT ? "d" : "wz",
13728 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
13729 break;
13730 case RS6000_PLTSEQ_MTCTR:
13731 sprintf (str,
13732 "mtctr %%1\n\t"
13733 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
13734 tls, rel64, addend);
13735 break;
13736 case RS6000_PLTSEQ_PLT_PCREL34:
13737 sprintf (str,
13738 "pl%s %%0,0(0),1\n\t"
13739 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
13740 TARGET_64BIT ? "d" : "wz",
13741 tls, rel64);
13742 break;
13743 default:
13744 gcc_unreachable ();
13746 return str;
13748 #endif
13750 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
13751 /* Emit an assembler directive to set symbol visibility for DECL to
13752 VISIBILITY_TYPE. */
13754 static void
13755 rs6000_assemble_visibility (tree decl, int vis)
13757 if (TARGET_XCOFF)
13758 return;
13760 /* Functions need to have their entry point symbol visibility set as
13761 well as their descriptor symbol visibility. */
13762 if (DEFAULT_ABI == ABI_AIX
13763 && DOT_SYMBOLS
13764 && TREE_CODE (decl) == FUNCTION_DECL)
13766 static const char * const visibility_types[] = {
13767 NULL, "protected", "hidden", "internal"
13770 const char *name, *type;
13772 name = ((* targetm.strip_name_encoding)
13773 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
13774 type = visibility_types[vis];
13776 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
13777 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
13779 else
13780 default_assemble_visibility (decl, vis);
13782 #endif
13784 enum rtx_code
13785 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
13787 /* Reversal of FP compares takes care -- an ordered compare
13788 becomes an unordered compare and vice versa. */
13789 if (mode == CCFPmode
13790 && (!flag_finite_math_only
13791 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
13792 || code == UNEQ || code == LTGT))
13793 return reverse_condition_maybe_unordered (code);
13794 else
13795 return reverse_condition (code);
13798 /* Generate a compare for CODE. Return a brand-new rtx that
13799 represents the result of the compare. */
13801 static rtx
13802 rs6000_generate_compare (rtx cmp, machine_mode mode)
13804 machine_mode comp_mode;
13805 rtx compare_result;
13806 enum rtx_code code = GET_CODE (cmp);
13807 rtx op0 = XEXP (cmp, 0);
13808 rtx op1 = XEXP (cmp, 1);
13810 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13811 comp_mode = CCmode;
13812 else if (FLOAT_MODE_P (mode))
13813 comp_mode = CCFPmode;
13814 else if (code == GTU || code == LTU
13815 || code == GEU || code == LEU)
13816 comp_mode = CCUNSmode;
13817 else if ((code == EQ || code == NE)
13818 && unsigned_reg_p (op0)
13819 && (unsigned_reg_p (op1)
13820 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
13821 /* These are unsigned values, perhaps there will be a later
13822 ordering compare that can be shared with this one. */
13823 comp_mode = CCUNSmode;
13824 else
13825 comp_mode = CCmode;
13827 /* If we have an unsigned compare, make sure we don't have a signed value as
13828 an immediate. */
13829 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
13830 && INTVAL (op1) < 0)
13832 op0 = copy_rtx_if_shared (op0);
13833 op1 = force_reg (GET_MODE (op0), op1);
13834 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
13837 /* First, the compare. */
13838 compare_result = gen_reg_rtx (comp_mode);
13840 /* IEEE 128-bit support in VSX registers when we do not have hardware
13841 support. */
13842 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
13844 rtx libfunc = NULL_RTX;
13845 bool check_nan = false;
13846 rtx dest;
13848 switch (code)
13850 case EQ:
13851 case NE:
13852 libfunc = optab_libfunc (eq_optab, mode);
13853 break;
13855 case GT:
13856 case GE:
13857 libfunc = optab_libfunc (ge_optab, mode);
13858 break;
13860 case LT:
13861 case LE:
13862 libfunc = optab_libfunc (le_optab, mode);
13863 break;
13865 case UNORDERED:
13866 case ORDERED:
13867 libfunc = optab_libfunc (unord_optab, mode);
13868 code = (code == UNORDERED) ? NE : EQ;
13869 break;
13871 case UNGE:
13872 case UNGT:
13873 check_nan = true;
13874 libfunc = optab_libfunc (ge_optab, mode);
13875 code = (code == UNGE) ? GE : GT;
13876 break;
13878 case UNLE:
13879 case UNLT:
13880 check_nan = true;
13881 libfunc = optab_libfunc (le_optab, mode);
13882 code = (code == UNLE) ? LE : LT;
13883 break;
13885 case UNEQ:
13886 case LTGT:
13887 check_nan = true;
13888 libfunc = optab_libfunc (eq_optab, mode);
13889 code = (code = UNEQ) ? EQ : NE;
13890 break;
13892 default:
13893 gcc_unreachable ();
13896 gcc_assert (libfunc);
13898 if (!check_nan)
13899 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13900 SImode, op0, mode, op1, mode);
13902 /* The library signals an exception for signalling NaNs, so we need to
13903 handle isgreater, etc. by first checking isordered. */
13904 else
13906 rtx ne_rtx, normal_dest, unord_dest;
13907 rtx unord_func = optab_libfunc (unord_optab, mode);
13908 rtx join_label = gen_label_rtx ();
13909 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
13910 rtx unord_cmp = gen_reg_rtx (comp_mode);
13913 /* Test for either value being a NaN. */
13914 gcc_assert (unord_func);
13915 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
13916 SImode, op0, mode, op1, mode);
13918 /* Set value (0) if either value is a NaN, and jump to the join
13919 label. */
13920 dest = gen_reg_rtx (SImode);
13921 emit_move_insn (dest, const1_rtx);
13922 emit_insn (gen_rtx_SET (unord_cmp,
13923 gen_rtx_COMPARE (comp_mode, unord_dest,
13924 const0_rtx)));
13926 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
13927 emit_jump_insn (gen_rtx_SET (pc_rtx,
13928 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
13929 join_ref,
13930 pc_rtx)));
13932 /* Do the normal comparison, knowing that the values are not
13933 NaNs. */
13934 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
13935 SImode, op0, mode, op1, mode);
13937 emit_insn (gen_cstoresi4 (dest,
13938 gen_rtx_fmt_ee (code, SImode, normal_dest,
13939 const0_rtx),
13940 normal_dest, const0_rtx));
13942 /* Join NaN and non-Nan paths. Compare dest against 0. */
13943 emit_label (join_label);
13944 code = NE;
13947 emit_insn (gen_rtx_SET (compare_result,
13948 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
13951 else
13953 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
13954 CLOBBERs to match cmptf_internal2 pattern. */
13955 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
13956 && FLOAT128_IBM_P (GET_MODE (op0))
13957 && TARGET_HARD_FLOAT)
13958 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13959 gen_rtvec (10,
13960 gen_rtx_SET (compare_result,
13961 gen_rtx_COMPARE (comp_mode, op0, op1)),
13962 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13963 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13964 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13965 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13966 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13967 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13968 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13969 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
13970 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
13971 else if (GET_CODE (op1) == UNSPEC
13972 && XINT (op1, 1) == UNSPEC_SP_TEST)
13974 rtx op1b = XVECEXP (op1, 0, 0);
13975 comp_mode = CCEQmode;
13976 compare_result = gen_reg_rtx (CCEQmode);
13977 if (TARGET_64BIT)
13978 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
13979 else
13980 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
13982 else
13983 emit_insn (gen_rtx_SET (compare_result,
13984 gen_rtx_COMPARE (comp_mode, op0, op1)));
13987 validate_condition_mode (code, GET_MODE (compare_result));
13989 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
13993 /* Return the diagnostic message string if the binary operation OP is
13994 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13996 static const char*
13997 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
13998 const_tree type1,
13999 const_tree type2)
14001 machine_mode mode1 = TYPE_MODE (type1);
14002 machine_mode mode2 = TYPE_MODE (type2);
14004 /* For complex modes, use the inner type. */
14005 if (COMPLEX_MODE_P (mode1))
14006 mode1 = GET_MODE_INNER (mode1);
14008 if (COMPLEX_MODE_P (mode2))
14009 mode2 = GET_MODE_INNER (mode2);
14011 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
14012 double to intermix unless -mfloat128-convert. */
14013 if (mode1 == mode2)
14014 return NULL;
14016 if (!TARGET_FLOAT128_CVT)
14018 if ((mode1 == KFmode && mode2 == IFmode)
14019 || (mode1 == IFmode && mode2 == KFmode))
14020 return N_("__float128 and __ibm128 cannot be used in the same "
14021 "expression");
14023 if (TARGET_IEEEQUAD
14024 && ((mode1 == IFmode && mode2 == TFmode)
14025 || (mode1 == TFmode && mode2 == IFmode)))
14026 return N_("__ibm128 and long double cannot be used in the same "
14027 "expression");
14029 if (!TARGET_IEEEQUAD
14030 && ((mode1 == KFmode && mode2 == TFmode)
14031 || (mode1 == TFmode && mode2 == KFmode)))
14032 return N_("__float128 and long double cannot be used in the same "
14033 "expression");
14036 return NULL;
14040 /* Expand floating point conversion to/from __float128 and __ibm128. */
14042 void
14043 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
14045 machine_mode dest_mode = GET_MODE (dest);
14046 machine_mode src_mode = GET_MODE (src);
14047 convert_optab cvt = unknown_optab;
14048 bool do_move = false;
14049 rtx libfunc = NULL_RTX;
14050 rtx dest2;
14051 typedef rtx (*rtx_2func_t) (rtx, rtx);
14052 rtx_2func_t hw_convert = (rtx_2func_t)0;
14053 size_t kf_or_tf;
14055 struct hw_conv_t {
14056 rtx_2func_t from_df;
14057 rtx_2func_t from_sf;
14058 rtx_2func_t from_si_sign;
14059 rtx_2func_t from_si_uns;
14060 rtx_2func_t from_di_sign;
14061 rtx_2func_t from_di_uns;
14062 rtx_2func_t to_df;
14063 rtx_2func_t to_sf;
14064 rtx_2func_t to_si_sign;
14065 rtx_2func_t to_si_uns;
14066 rtx_2func_t to_di_sign;
14067 rtx_2func_t to_di_uns;
14068 } hw_conversions[2] = {
14069 /* convertions to/from KFmode */
14071 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
14072 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
14073 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
14074 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
14075 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
14076 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
14077 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
14078 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
14079 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
14080 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
14081 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
14082 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
14085 /* convertions to/from TFmode */
14087 gen_extenddftf2_hw, /* TFmode <- DFmode. */
14088 gen_extendsftf2_hw, /* TFmode <- SFmode. */
14089 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
14090 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
14091 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
14092 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
14093 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
14094 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
14095 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
14096 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
14097 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
14098 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
14102 if (dest_mode == src_mode)
14103 gcc_unreachable ();
14105 /* Eliminate memory operations. */
14106 if (MEM_P (src))
14107 src = force_reg (src_mode, src);
14109 if (MEM_P (dest))
14111 rtx tmp = gen_reg_rtx (dest_mode);
14112 rs6000_expand_float128_convert (tmp, src, unsigned_p);
14113 rs6000_emit_move (dest, tmp, dest_mode);
14114 return;
14117 /* Convert to IEEE 128-bit floating point. */
14118 if (FLOAT128_IEEE_P (dest_mode))
14120 if (dest_mode == KFmode)
14121 kf_or_tf = 0;
14122 else if (dest_mode == TFmode)
14123 kf_or_tf = 1;
14124 else
14125 gcc_unreachable ();
14127 switch (src_mode)
14129 case E_DFmode:
14130 cvt = sext_optab;
14131 hw_convert = hw_conversions[kf_or_tf].from_df;
14132 break;
14134 case E_SFmode:
14135 cvt = sext_optab;
14136 hw_convert = hw_conversions[kf_or_tf].from_sf;
14137 break;
14139 case E_KFmode:
14140 case E_IFmode:
14141 case E_TFmode:
14142 if (FLOAT128_IBM_P (src_mode))
14143 cvt = sext_optab;
14144 else
14145 do_move = true;
14146 break;
14148 case E_SImode:
14149 if (unsigned_p)
14151 cvt = ufloat_optab;
14152 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
14154 else
14156 cvt = sfloat_optab;
14157 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
14159 break;
14161 case E_DImode:
14162 if (unsigned_p)
14164 cvt = ufloat_optab;
14165 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
14167 else
14169 cvt = sfloat_optab;
14170 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
14172 break;
14174 default:
14175 gcc_unreachable ();
14179 /* Convert from IEEE 128-bit floating point. */
14180 else if (FLOAT128_IEEE_P (src_mode))
14182 if (src_mode == KFmode)
14183 kf_or_tf = 0;
14184 else if (src_mode == TFmode)
14185 kf_or_tf = 1;
14186 else
14187 gcc_unreachable ();
14189 switch (dest_mode)
14191 case E_DFmode:
14192 cvt = trunc_optab;
14193 hw_convert = hw_conversions[kf_or_tf].to_df;
14194 break;
14196 case E_SFmode:
14197 cvt = trunc_optab;
14198 hw_convert = hw_conversions[kf_or_tf].to_sf;
14199 break;
14201 case E_KFmode:
14202 case E_IFmode:
14203 case E_TFmode:
14204 if (FLOAT128_IBM_P (dest_mode))
14205 cvt = trunc_optab;
14206 else
14207 do_move = true;
14208 break;
14210 case E_SImode:
14211 if (unsigned_p)
14213 cvt = ufix_optab;
14214 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
14216 else
14218 cvt = sfix_optab;
14219 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
14221 break;
14223 case E_DImode:
14224 if (unsigned_p)
14226 cvt = ufix_optab;
14227 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
14229 else
14231 cvt = sfix_optab;
14232 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
14234 break;
14236 default:
14237 gcc_unreachable ();
14241 /* Both IBM format. */
14242 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
14243 do_move = true;
14245 else
14246 gcc_unreachable ();
14248 /* Handle conversion between TFmode/KFmode/IFmode. */
14249 if (do_move)
14250 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
14252 /* Handle conversion if we have hardware support. */
14253 else if (TARGET_FLOAT128_HW && hw_convert)
14254 emit_insn ((hw_convert) (dest, src));
14256 /* Call an external function to do the conversion. */
14257 else if (cvt != unknown_optab)
14259 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
14260 gcc_assert (libfunc != NULL_RTX);
14262 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
14263 src, src_mode);
14265 gcc_assert (dest2 != NULL_RTX);
14266 if (!rtx_equal_p (dest, dest2))
14267 emit_move_insn (dest, dest2);
14270 else
14271 gcc_unreachable ();
14273 return;
14277 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
14278 can be used as that dest register. Return the dest register. */
14281 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
14283 if (op2 == const0_rtx)
14284 return op1;
14286 if (GET_CODE (scratch) == SCRATCH)
14287 scratch = gen_reg_rtx (mode);
14289 if (logical_operand (op2, mode))
14290 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
14291 else
14292 emit_insn (gen_rtx_SET (scratch,
14293 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
14295 return scratch;
14298 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
14299 requires this. The result is mode MODE. */
14301 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
14303 rtx cond[2];
14304 int n = 0;
14305 if (code == LTGT || code == LE || code == UNLT)
14306 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
14307 if (code == LTGT || code == GE || code == UNGT)
14308 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
14309 if (code == LE || code == GE || code == UNEQ)
14310 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
14311 if (code == UNLT || code == UNGT || code == UNEQ)
14312 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
14314 gcc_assert (n == 2);
14316 rtx cc = gen_reg_rtx (CCEQmode);
14317 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
14318 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
14320 return cc;
14323 void
14324 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
14326 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
14327 rtx_code cond_code = GET_CODE (condition_rtx);
14329 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
14330 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
14332 else if (cond_code == NE
14333 || cond_code == GE || cond_code == LE
14334 || cond_code == GEU || cond_code == LEU
14335 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
14337 rtx not_result = gen_reg_rtx (CCEQmode);
14338 rtx not_op, rev_cond_rtx;
14339 machine_mode cc_mode;
14341 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
14343 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
14344 SImode, XEXP (condition_rtx, 0), const0_rtx);
14345 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
14346 emit_insn (gen_rtx_SET (not_result, not_op));
14347 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
14350 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
14351 if (op_mode == VOIDmode)
14352 op_mode = GET_MODE (XEXP (operands[1], 1));
14354 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
14356 PUT_MODE (condition_rtx, DImode);
14357 convert_move (operands[0], condition_rtx, 0);
14359 else
14361 PUT_MODE (condition_rtx, SImode);
14362 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
14366 /* Emit a branch of kind CODE to location LOC. */
14368 void
14369 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
14371 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
14372 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
14373 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
14374 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
14377 /* Return the string to output a conditional branch to LABEL, which is
14378 the operand template of the label, or NULL if the branch is really a
14379 conditional return.
14381 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
14382 condition code register and its mode specifies what kind of
14383 comparison we made.
14385 REVERSED is nonzero if we should reverse the sense of the comparison.
14387 INSN is the insn. */
14389 char *
14390 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
14392 static char string[64];
14393 enum rtx_code code = GET_CODE (op);
14394 rtx cc_reg = XEXP (op, 0);
14395 machine_mode mode = GET_MODE (cc_reg);
14396 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
14397 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
14398 int really_reversed = reversed ^ need_longbranch;
14399 char *s = string;
14400 const char *ccode;
14401 const char *pred;
14402 rtx note;
14404 validate_condition_mode (code, mode);
14406 /* Work out which way this really branches. We could use
14407 reverse_condition_maybe_unordered here always but this
14408 makes the resulting assembler clearer. */
14409 if (really_reversed)
14411 /* Reversal of FP compares takes care -- an ordered compare
14412 becomes an unordered compare and vice versa. */
14413 if (mode == CCFPmode)
14414 code = reverse_condition_maybe_unordered (code);
14415 else
14416 code = reverse_condition (code);
14419 switch (code)
14421 /* Not all of these are actually distinct opcodes, but
14422 we distinguish them for clarity of the resulting assembler. */
14423 case NE: case LTGT:
14424 ccode = "ne"; break;
14425 case EQ: case UNEQ:
14426 ccode = "eq"; break;
14427 case GE: case GEU:
14428 ccode = "ge"; break;
14429 case GT: case GTU: case UNGT:
14430 ccode = "gt"; break;
14431 case LE: case LEU:
14432 ccode = "le"; break;
14433 case LT: case LTU: case UNLT:
14434 ccode = "lt"; break;
14435 case UNORDERED: ccode = "un"; break;
14436 case ORDERED: ccode = "nu"; break;
14437 case UNGE: ccode = "nl"; break;
14438 case UNLE: ccode = "ng"; break;
14439 default:
14440 gcc_unreachable ();
14443 /* Maybe we have a guess as to how likely the branch is. */
14444 pred = "";
14445 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
14446 if (note != NULL_RTX)
14448 /* PROB is the difference from 50%. */
14449 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
14450 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
14452 /* Only hint for highly probable/improbable branches on newer cpus when
14453 we have real profile data, as static prediction overrides processor
14454 dynamic prediction. For older cpus we may as well always hint, but
14455 assume not taken for branches that are very close to 50% as a
14456 mispredicted taken branch is more expensive than a
14457 mispredicted not-taken branch. */
14458 if (rs6000_always_hint
14459 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
14460 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
14461 && br_prob_note_reliable_p (note)))
14463 if (abs (prob) > REG_BR_PROB_BASE / 20
14464 && ((prob > 0) ^ need_longbranch))
14465 pred = "+";
14466 else
14467 pred = "-";
14471 if (label == NULL)
14472 s += sprintf (s, "b%slr%s ", ccode, pred);
14473 else
14474 s += sprintf (s, "b%s%s ", ccode, pred);
14476 /* We need to escape any '%' characters in the reg_names string.
14477 Assume they'd only be the first character.... */
14478 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
14479 *s++ = '%';
14480 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
14482 if (label != NULL)
14484 /* If the branch distance was too far, we may have to use an
14485 unconditional branch to go the distance. */
14486 if (need_longbranch)
14487 s += sprintf (s, ",$+8\n\tb %s", label);
14488 else
14489 s += sprintf (s, ",%s", label);
14492 return string;
14495 /* Return insn for VSX or Altivec comparisons. */
14497 static rtx
14498 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
14500 rtx mask;
14501 machine_mode mode = GET_MODE (op0);
14503 switch (code)
14505 default:
14506 break;
14508 case GE:
14509 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14510 return NULL_RTX;
14511 /* FALLTHRU */
14513 case EQ:
14514 case GT:
14515 case GTU:
14516 case ORDERED:
14517 case UNORDERED:
14518 case UNEQ:
14519 case LTGT:
14520 mask = gen_reg_rtx (mode);
14521 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
14522 return mask;
14525 return NULL_RTX;
14528 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
14529 DMODE is expected destination mode. This is a recursive function. */
14531 static rtx
14532 rs6000_emit_vector_compare (enum rtx_code rcode,
14533 rtx op0, rtx op1,
14534 machine_mode dmode)
14536 rtx mask;
14537 bool swap_operands = false;
14538 bool try_again = false;
14540 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
14541 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
14543 /* See if the comparison works as is. */
14544 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14545 if (mask)
14546 return mask;
14548 switch (rcode)
14550 case LT:
14551 rcode = GT;
14552 swap_operands = true;
14553 try_again = true;
14554 break;
14555 case LTU:
14556 rcode = GTU;
14557 swap_operands = true;
14558 try_again = true;
14559 break;
14560 case NE:
14561 case UNLE:
14562 case UNLT:
14563 case UNGE:
14564 case UNGT:
14565 /* Invert condition and try again.
14566 e.g., A != B becomes ~(A==B). */
14568 enum rtx_code rev_code;
14569 enum insn_code nor_code;
14570 rtx mask2;
14572 rev_code = reverse_condition_maybe_unordered (rcode);
14573 if (rev_code == UNKNOWN)
14574 return NULL_RTX;
14576 nor_code = optab_handler (one_cmpl_optab, dmode);
14577 if (nor_code == CODE_FOR_nothing)
14578 return NULL_RTX;
14580 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
14581 if (!mask2)
14582 return NULL_RTX;
14584 mask = gen_reg_rtx (dmode);
14585 emit_insn (GEN_FCN (nor_code) (mask, mask2));
14586 return mask;
14588 break;
14589 case GE:
14590 case GEU:
14591 case LE:
14592 case LEU:
14593 /* Try GT/GTU/LT/LTU OR EQ */
14595 rtx c_rtx, eq_rtx;
14596 enum insn_code ior_code;
14597 enum rtx_code new_code;
14599 switch (rcode)
14601 case GE:
14602 new_code = GT;
14603 break;
14605 case GEU:
14606 new_code = GTU;
14607 break;
14609 case LE:
14610 new_code = LT;
14611 break;
14613 case LEU:
14614 new_code = LTU;
14615 break;
14617 default:
14618 gcc_unreachable ();
14621 ior_code = optab_handler (ior_optab, dmode);
14622 if (ior_code == CODE_FOR_nothing)
14623 return NULL_RTX;
14625 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
14626 if (!c_rtx)
14627 return NULL_RTX;
14629 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
14630 if (!eq_rtx)
14631 return NULL_RTX;
14633 mask = gen_reg_rtx (dmode);
14634 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
14635 return mask;
14637 break;
14638 default:
14639 return NULL_RTX;
14642 if (try_again)
14644 if (swap_operands)
14645 std::swap (op0, op1);
14647 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
14648 if (mask)
14649 return mask;
14652 /* You only get two chances. */
14653 return NULL_RTX;
14656 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
14657 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
14658 operands for the relation operation COND. */
14661 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
14662 rtx cond, rtx cc_op0, rtx cc_op1)
14664 machine_mode dest_mode = GET_MODE (dest);
14665 machine_mode mask_mode = GET_MODE (cc_op0);
14666 enum rtx_code rcode = GET_CODE (cond);
14667 machine_mode cc_mode = CCmode;
14668 rtx mask;
14669 rtx cond2;
14670 bool invert_move = false;
14672 if (VECTOR_UNIT_NONE_P (dest_mode))
14673 return 0;
14675 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
14676 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
14678 switch (rcode)
14680 /* Swap operands if we can, and fall back to doing the operation as
14681 specified, and doing a NOR to invert the test. */
14682 case NE:
14683 case UNLE:
14684 case UNLT:
14685 case UNGE:
14686 case UNGT:
14687 /* Invert condition and try again.
14688 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
14689 invert_move = true;
14690 rcode = reverse_condition_maybe_unordered (rcode);
14691 if (rcode == UNKNOWN)
14692 return 0;
14693 break;
14695 case GE:
14696 case LE:
14697 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
14699 /* Invert condition to avoid compound test. */
14700 invert_move = true;
14701 rcode = reverse_condition (rcode);
14703 break;
14705 case GTU:
14706 case GEU:
14707 case LTU:
14708 case LEU:
14709 /* Mark unsigned tests with CCUNSmode. */
14710 cc_mode = CCUNSmode;
14712 /* Invert condition to avoid compound test if necessary. */
14713 if (rcode == GEU || rcode == LEU)
14715 invert_move = true;
14716 rcode = reverse_condition (rcode);
14718 break;
14720 default:
14721 break;
14724 /* Get the vector mask for the given relational operations. */
14725 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
14727 if (!mask)
14728 return 0;
14730 if (invert_move)
14731 std::swap (op_true, op_false);
14733 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
14734 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
14735 && (GET_CODE (op_true) == CONST_VECTOR
14736 || GET_CODE (op_false) == CONST_VECTOR))
14738 rtx constant_0 = CONST0_RTX (dest_mode);
14739 rtx constant_m1 = CONSTM1_RTX (dest_mode);
14741 if (op_true == constant_m1 && op_false == constant_0)
14743 emit_move_insn (dest, mask);
14744 return 1;
14747 else if (op_true == constant_0 && op_false == constant_m1)
14749 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
14750 return 1;
14753 /* If we can't use the vector comparison directly, perhaps we can use
14754 the mask for the true or false fields, instead of loading up a
14755 constant. */
14756 if (op_true == constant_m1)
14757 op_true = mask;
14759 if (op_false == constant_0)
14760 op_false = mask;
14763 if (!REG_P (op_true) && !SUBREG_P (op_true))
14764 op_true = force_reg (dest_mode, op_true);
14766 if (!REG_P (op_false) && !SUBREG_P (op_false))
14767 op_false = force_reg (dest_mode, op_false);
14769 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
14770 CONST0_RTX (dest_mode));
14771 emit_insn (gen_rtx_SET (dest,
14772 gen_rtx_IF_THEN_ELSE (dest_mode,
14773 cond2,
14774 op_true,
14775 op_false)));
14776 return 1;
14779 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
14780 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
14781 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
14782 hardware has no such operation. */
14784 static int
14785 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14787 enum rtx_code code = GET_CODE (op);
14788 rtx op0 = XEXP (op, 0);
14789 rtx op1 = XEXP (op, 1);
14790 machine_mode compare_mode = GET_MODE (op0);
14791 machine_mode result_mode = GET_MODE (dest);
14792 bool max_p = false;
14794 if (result_mode != compare_mode)
14795 return 0;
14797 if (code == GE || code == GT)
14798 max_p = true;
14799 else if (code == LE || code == LT)
14800 max_p = false;
14801 else
14802 return 0;
14804 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
14807 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
14808 max_p = !max_p;
14810 else
14811 return 0;
14813 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
14814 return 1;
14817 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
14818 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
14819 operands of the last comparison is nonzero/true, FALSE_COND if it is
14820 zero/false. Return 0 if the hardware has no such operation. */
14822 static int
14823 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14825 enum rtx_code code = GET_CODE (op);
14826 rtx op0 = XEXP (op, 0);
14827 rtx op1 = XEXP (op, 1);
14828 machine_mode result_mode = GET_MODE (dest);
14829 rtx compare_rtx;
14830 rtx cmove_rtx;
14831 rtx clobber_rtx;
14833 if (!can_create_pseudo_p ())
14834 return 0;
14836 switch (code)
14838 case EQ:
14839 case GE:
14840 case GT:
14841 break;
14843 case NE:
14844 case LT:
14845 case LE:
14846 code = swap_condition (code);
14847 std::swap (op0, op1);
14848 break;
14850 default:
14851 return 0;
14854 /* Generate: [(parallel [(set (dest)
14855 (if_then_else (op (cmp1) (cmp2))
14856 (true)
14857 (false)))
14858 (clobber (scratch))])]. */
14860 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
14861 cmove_rtx = gen_rtx_SET (dest,
14862 gen_rtx_IF_THEN_ELSE (result_mode,
14863 compare_rtx,
14864 true_cond,
14865 false_cond));
14867 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
14868 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14869 gen_rtvec (2, cmove_rtx, clobber_rtx)));
14871 return 1;
14874 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
14875 operands of the last comparison is nonzero/true, FALSE_COND if it
14876 is zero/false. Return 0 if the hardware has no such operation. */
14879 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
14881 enum rtx_code code = GET_CODE (op);
14882 rtx op0 = XEXP (op, 0);
14883 rtx op1 = XEXP (op, 1);
14884 machine_mode compare_mode = GET_MODE (op0);
14885 machine_mode result_mode = GET_MODE (dest);
14886 rtx temp;
14887 bool is_against_zero;
14889 /* These modes should always match. */
14890 if (GET_MODE (op1) != compare_mode
14891 /* In the isel case however, we can use a compare immediate, so
14892 op1 may be a small constant. */
14893 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
14894 return 0;
14895 if (GET_MODE (true_cond) != result_mode)
14896 return 0;
14897 if (GET_MODE (false_cond) != result_mode)
14898 return 0;
14900 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
14901 if (TARGET_P9_MINMAX
14902 && (compare_mode == SFmode || compare_mode == DFmode)
14903 && (result_mode == SFmode || result_mode == DFmode))
14905 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
14906 return 1;
14908 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
14909 return 1;
14912 /* Don't allow using floating point comparisons for integer results for
14913 now. */
14914 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
14915 return 0;
14917 /* First, work out if the hardware can do this at all, or
14918 if it's too slow.... */
14919 if (!FLOAT_MODE_P (compare_mode))
14921 if (TARGET_ISEL)
14922 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
14923 return 0;
14926 is_against_zero = op1 == CONST0_RTX (compare_mode);
14928 /* A floating-point subtract might overflow, underflow, or produce
14929 an inexact result, thus changing the floating-point flags, so it
14930 can't be generated if we care about that. It's safe if one side
14931 of the construct is zero, since then no subtract will be
14932 generated. */
14933 if (SCALAR_FLOAT_MODE_P (compare_mode)
14934 && flag_trapping_math && ! is_against_zero)
14935 return 0;
14937 /* Eliminate half of the comparisons by switching operands, this
14938 makes the remaining code simpler. */
14939 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
14940 || code == LTGT || code == LT || code == UNLE)
14942 code = reverse_condition_maybe_unordered (code);
14943 temp = true_cond;
14944 true_cond = false_cond;
14945 false_cond = temp;
14948 /* UNEQ and LTGT take four instructions for a comparison with zero,
14949 it'll probably be faster to use a branch here too. */
14950 if (code == UNEQ && HONOR_NANS (compare_mode))
14951 return 0;
14953 /* We're going to try to implement comparisons by performing
14954 a subtract, then comparing against zero. Unfortunately,
14955 Inf - Inf is NaN which is not zero, and so if we don't
14956 know that the operand is finite and the comparison
14957 would treat EQ different to UNORDERED, we can't do it. */
14958 if (HONOR_INFINITIES (compare_mode)
14959 && code != GT && code != UNGE
14960 && (!CONST_DOUBLE_P (op1)
14961 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
14962 /* Constructs of the form (a OP b ? a : b) are safe. */
14963 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
14964 || (! rtx_equal_p (op0, true_cond)
14965 && ! rtx_equal_p (op1, true_cond))))
14966 return 0;
14968 /* At this point we know we can use fsel. */
14970 /* Reduce the comparison to a comparison against zero. */
14971 if (! is_against_zero)
14973 temp = gen_reg_rtx (compare_mode);
14974 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
14975 op0 = temp;
14976 op1 = CONST0_RTX (compare_mode);
14979 /* If we don't care about NaNs we can reduce some of the comparisons
14980 down to faster ones. */
14981 if (! HONOR_NANS (compare_mode))
14982 switch (code)
14984 case GT:
14985 code = LE;
14986 temp = true_cond;
14987 true_cond = false_cond;
14988 false_cond = temp;
14989 break;
14990 case UNGE:
14991 code = GE;
14992 break;
14993 case UNEQ:
14994 code = EQ;
14995 break;
14996 default:
14997 break;
15000 /* Now, reduce everything down to a GE. */
15001 switch (code)
15003 case GE:
15004 break;
15006 case LE:
15007 temp = gen_reg_rtx (compare_mode);
15008 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15009 op0 = temp;
15010 break;
15012 case ORDERED:
15013 temp = gen_reg_rtx (compare_mode);
15014 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
15015 op0 = temp;
15016 break;
15018 case EQ:
15019 temp = gen_reg_rtx (compare_mode);
15020 emit_insn (gen_rtx_SET (temp,
15021 gen_rtx_NEG (compare_mode,
15022 gen_rtx_ABS (compare_mode, op0))));
15023 op0 = temp;
15024 break;
15026 case UNGE:
15027 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
15028 temp = gen_reg_rtx (result_mode);
15029 emit_insn (gen_rtx_SET (temp,
15030 gen_rtx_IF_THEN_ELSE (result_mode,
15031 gen_rtx_GE (VOIDmode,
15032 op0, op1),
15033 true_cond, false_cond)));
15034 false_cond = true_cond;
15035 true_cond = temp;
15037 temp = gen_reg_rtx (compare_mode);
15038 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15039 op0 = temp;
15040 break;
15042 case GT:
15043 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
15044 temp = gen_reg_rtx (result_mode);
15045 emit_insn (gen_rtx_SET (temp,
15046 gen_rtx_IF_THEN_ELSE (result_mode,
15047 gen_rtx_GE (VOIDmode,
15048 op0, op1),
15049 true_cond, false_cond)));
15050 true_cond = false_cond;
15051 false_cond = temp;
15053 temp = gen_reg_rtx (compare_mode);
15054 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
15055 op0 = temp;
15056 break;
15058 default:
15059 gcc_unreachable ();
15062 emit_insn (gen_rtx_SET (dest,
15063 gen_rtx_IF_THEN_ELSE (result_mode,
15064 gen_rtx_GE (VOIDmode,
15065 op0, op1),
15066 true_cond, false_cond)));
15067 return 1;
15070 /* Same as above, but for ints (isel). */
15073 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
15075 rtx condition_rtx, cr;
15076 machine_mode mode = GET_MODE (dest);
15077 enum rtx_code cond_code;
15078 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
15079 bool signedp;
15081 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
15082 return 0;
15084 /* We still have to do the compare, because isel doesn't do a
15085 compare, it just looks at the CRx bits set by a previous compare
15086 instruction. */
15087 condition_rtx = rs6000_generate_compare (op, mode);
15088 cond_code = GET_CODE (condition_rtx);
15089 cr = XEXP (condition_rtx, 0);
15090 signedp = GET_MODE (cr) == CCmode;
15092 isel_func = (mode == SImode
15093 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
15094 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
15096 switch (cond_code)
15098 case LT: case GT: case LTU: case GTU: case EQ:
15099 /* isel handles these directly. */
15100 break;
15102 default:
15103 /* We need to swap the sense of the comparison. */
15105 std::swap (false_cond, true_cond);
15106 PUT_CODE (condition_rtx, reverse_condition (cond_code));
15108 break;
15111 false_cond = force_reg (mode, false_cond);
15112 if (true_cond != const0_rtx)
15113 true_cond = force_reg (mode, true_cond);
15115 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
15117 return 1;
15120 void
15121 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
15123 machine_mode mode = GET_MODE (op0);
15124 enum rtx_code c;
15125 rtx target;
15127 /* VSX/altivec have direct min/max insns. */
15128 if ((code == SMAX || code == SMIN)
15129 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15130 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
15132 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
15133 return;
15136 if (code == SMAX || code == SMIN)
15137 c = GE;
15138 else
15139 c = GEU;
15141 if (code == SMAX || code == UMAX)
15142 target = emit_conditional_move (dest, c, op0, op1, mode,
15143 op0, op1, mode, 0);
15144 else
15145 target = emit_conditional_move (dest, c, op0, op1, mode,
15146 op1, op0, mode, 0);
15147 gcc_assert (target);
15148 if (target != dest)
15149 emit_move_insn (dest, target);
15152 /* A subroutine of the atomic operation splitters. Jump to LABEL if
15153 COND is true. Mark the jump as unlikely to be taken. */
15155 static void
15156 emit_unlikely_jump (rtx cond, rtx label)
15158 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
15159 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
15160 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
15163 /* A subroutine of the atomic operation splitters. Emit a load-locked
15164 instruction in MODE. For QI/HImode, possibly use a pattern than includes
15165 the zero_extend operation. */
15167 static void
15168 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
15170 rtx (*fn) (rtx, rtx) = NULL;
15172 switch (mode)
15174 case E_QImode:
15175 fn = gen_load_lockedqi;
15176 break;
15177 case E_HImode:
15178 fn = gen_load_lockedhi;
15179 break;
15180 case E_SImode:
15181 if (GET_MODE (mem) == QImode)
15182 fn = gen_load_lockedqi_si;
15183 else if (GET_MODE (mem) == HImode)
15184 fn = gen_load_lockedhi_si;
15185 else
15186 fn = gen_load_lockedsi;
15187 break;
15188 case E_DImode:
15189 fn = gen_load_lockeddi;
15190 break;
15191 case E_TImode:
15192 fn = gen_load_lockedti;
15193 break;
15194 default:
15195 gcc_unreachable ();
15197 emit_insn (fn (reg, mem));
15200 /* A subroutine of the atomic operation splitters. Emit a store-conditional
15201 instruction in MODE. */
15203 static void
15204 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
15206 rtx (*fn) (rtx, rtx, rtx) = NULL;
15208 switch (mode)
15210 case E_QImode:
15211 fn = gen_store_conditionalqi;
15212 break;
15213 case E_HImode:
15214 fn = gen_store_conditionalhi;
15215 break;
15216 case E_SImode:
15217 fn = gen_store_conditionalsi;
15218 break;
15219 case E_DImode:
15220 fn = gen_store_conditionaldi;
15221 break;
15222 case E_TImode:
15223 fn = gen_store_conditionalti;
15224 break;
15225 default:
15226 gcc_unreachable ();
15229 /* Emit sync before stwcx. to address PPC405 Erratum. */
15230 if (PPC405_ERRATUM77)
15231 emit_insn (gen_hwsync ());
15233 emit_insn (fn (res, mem, val));
15236 /* Expand barriers before and after a load_locked/store_cond sequence. */
15238 static rtx
15239 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
15241 rtx addr = XEXP (mem, 0);
15243 if (!legitimate_indirect_address_p (addr, reload_completed)
15244 && !legitimate_indexed_address_p (addr, reload_completed))
15246 addr = force_reg (Pmode, addr);
15247 mem = replace_equiv_address_nv (mem, addr);
15250 switch (model)
15252 case MEMMODEL_RELAXED:
15253 case MEMMODEL_CONSUME:
15254 case MEMMODEL_ACQUIRE:
15255 break;
15256 case MEMMODEL_RELEASE:
15257 case MEMMODEL_ACQ_REL:
15258 emit_insn (gen_lwsync ());
15259 break;
15260 case MEMMODEL_SEQ_CST:
15261 emit_insn (gen_hwsync ());
15262 break;
15263 default:
15264 gcc_unreachable ();
15266 return mem;
15269 static void
15270 rs6000_post_atomic_barrier (enum memmodel model)
15272 switch (model)
15274 case MEMMODEL_RELAXED:
15275 case MEMMODEL_CONSUME:
15276 case MEMMODEL_RELEASE:
15277 break;
15278 case MEMMODEL_ACQUIRE:
15279 case MEMMODEL_ACQ_REL:
15280 case MEMMODEL_SEQ_CST:
15281 emit_insn (gen_isync ());
15282 break;
15283 default:
15284 gcc_unreachable ();
15288 /* A subroutine of the various atomic expanders. For sub-word operations,
15289 we must adjust things to operate on SImode. Given the original MEM,
15290 return a new aligned memory. Also build and return the quantities by
15291 which to shift and mask. */
15293 static rtx
15294 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
15296 rtx addr, align, shift, mask, mem;
15297 HOST_WIDE_INT shift_mask;
15298 machine_mode mode = GET_MODE (orig_mem);
15300 /* For smaller modes, we have to implement this via SImode. */
15301 shift_mask = (mode == QImode ? 0x18 : 0x10);
15303 addr = XEXP (orig_mem, 0);
15304 addr = force_reg (GET_MODE (addr), addr);
15306 /* Aligned memory containing subword. Generate a new memory. We
15307 do not want any of the existing MEM_ATTR data, as we're now
15308 accessing memory outside the original object. */
15309 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
15310 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15311 mem = gen_rtx_MEM (SImode, align);
15312 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
15313 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
15314 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
15316 /* Shift amount for subword relative to aligned word. */
15317 shift = gen_reg_rtx (SImode);
15318 addr = gen_lowpart (SImode, addr);
15319 rtx tmp = gen_reg_rtx (SImode);
15320 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
15321 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
15322 if (BYTES_BIG_ENDIAN)
15323 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
15324 shift, 1, OPTAB_LIB_WIDEN);
15325 *pshift = shift;
15327 /* Mask for insertion. */
15328 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
15329 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
15330 *pmask = mask;
15332 return mem;
15335 /* A subroutine of the various atomic expanders. For sub-word operands,
15336 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
15338 static rtx
15339 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
15341 rtx x;
15343 x = gen_reg_rtx (SImode);
15344 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
15345 gen_rtx_NOT (SImode, mask),
15346 oldval)));
15348 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
15350 return x;
15353 /* A subroutine of the various atomic expanders. For sub-word operands,
15354 extract WIDE to NARROW via SHIFT. */
15356 static void
15357 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
15359 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
15360 wide, 1, OPTAB_LIB_WIDEN);
15361 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
15364 /* Expand an atomic compare and swap operation. */
15366 void
15367 rs6000_expand_atomic_compare_and_swap (rtx operands[])
15369 rtx boolval, retval, mem, oldval, newval, cond;
15370 rtx label1, label2, x, mask, shift;
15371 machine_mode mode, orig_mode;
15372 enum memmodel mod_s, mod_f;
15373 bool is_weak;
15375 boolval = operands[0];
15376 retval = operands[1];
15377 mem = operands[2];
15378 oldval = operands[3];
15379 newval = operands[4];
15380 is_weak = (INTVAL (operands[5]) != 0);
15381 mod_s = memmodel_base (INTVAL (operands[6]));
15382 mod_f = memmodel_base (INTVAL (operands[7]));
15383 orig_mode = mode = GET_MODE (mem);
15385 mask = shift = NULL_RTX;
15386 if (mode == QImode || mode == HImode)
15388 /* Before power8, we didn't have access to lbarx/lharx, so generate a
15389 lwarx and shift/mask operations. With power8, we need to do the
15390 comparison in SImode, but the store is still done in QI/HImode. */
15391 oldval = convert_modes (SImode, mode, oldval, 1);
15393 if (!TARGET_SYNC_HI_QI)
15395 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15397 /* Shift and mask OLDVAL into position with the word. */
15398 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
15399 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15401 /* Shift and mask NEWVAL into position within the word. */
15402 newval = convert_modes (SImode, mode, newval, 1);
15403 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
15404 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15407 /* Prepare to adjust the return value. */
15408 retval = gen_reg_rtx (SImode);
15409 mode = SImode;
15411 else if (reg_overlap_mentioned_p (retval, oldval))
15412 oldval = copy_to_reg (oldval);
15414 if (mode != TImode && !reg_or_short_operand (oldval, mode))
15415 oldval = copy_to_mode_reg (mode, oldval);
15417 if (reg_overlap_mentioned_p (retval, newval))
15418 newval = copy_to_reg (newval);
15420 mem = rs6000_pre_atomic_barrier (mem, mod_s);
15422 label1 = NULL_RTX;
15423 if (!is_weak)
15425 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15426 emit_label (XEXP (label1, 0));
15428 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15430 emit_load_locked (mode, retval, mem);
15432 x = retval;
15433 if (mask)
15434 x = expand_simple_binop (SImode, AND, retval, mask,
15435 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15437 cond = gen_reg_rtx (CCmode);
15438 /* If we have TImode, synthesize a comparison. */
15439 if (mode != TImode)
15440 x = gen_rtx_COMPARE (CCmode, x, oldval);
15441 else
15443 rtx xor1_result = gen_reg_rtx (DImode);
15444 rtx xor2_result = gen_reg_rtx (DImode);
15445 rtx or_result = gen_reg_rtx (DImode);
15446 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
15447 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
15448 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
15449 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
15451 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
15452 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
15453 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
15454 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
15457 emit_insn (gen_rtx_SET (cond, x));
15459 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15460 emit_unlikely_jump (x, label2);
15462 x = newval;
15463 if (mask)
15464 x = rs6000_mask_atomic_subword (retval, newval, mask);
15466 emit_store_conditional (orig_mode, cond, mem, x);
15468 if (!is_weak)
15470 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15471 emit_unlikely_jump (x, label1);
15474 if (!is_mm_relaxed (mod_f))
15475 emit_label (XEXP (label2, 0));
15477 rs6000_post_atomic_barrier (mod_s);
15479 if (is_mm_relaxed (mod_f))
15480 emit_label (XEXP (label2, 0));
15482 if (shift)
15483 rs6000_finish_atomic_subword (operands[1], retval, shift);
15484 else if (mode != GET_MODE (operands[1]))
15485 convert_move (operands[1], retval, 1);
15487 /* In all cases, CR0 contains EQ on success, and NE on failure. */
15488 x = gen_rtx_EQ (SImode, cond, const0_rtx);
15489 emit_insn (gen_rtx_SET (boolval, x));
15492 /* Expand an atomic exchange operation. */
15494 void
15495 rs6000_expand_atomic_exchange (rtx operands[])
15497 rtx retval, mem, val, cond;
15498 machine_mode mode;
15499 enum memmodel model;
15500 rtx label, x, mask, shift;
15502 retval = operands[0];
15503 mem = operands[1];
15504 val = operands[2];
15505 model = memmodel_base (INTVAL (operands[3]));
15506 mode = GET_MODE (mem);
15508 mask = shift = NULL_RTX;
15509 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
15511 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15513 /* Shift and mask VAL into position with the word. */
15514 val = convert_modes (SImode, mode, val, 1);
15515 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15516 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15518 /* Prepare to adjust the return value. */
15519 retval = gen_reg_rtx (SImode);
15520 mode = SImode;
15523 mem = rs6000_pre_atomic_barrier (mem, model);
15525 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
15526 emit_label (XEXP (label, 0));
15528 emit_load_locked (mode, retval, mem);
15530 x = val;
15531 if (mask)
15532 x = rs6000_mask_atomic_subword (retval, val, mask);
15534 cond = gen_reg_rtx (CCmode);
15535 emit_store_conditional (mode, cond, mem, x);
15537 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15538 emit_unlikely_jump (x, label);
15540 rs6000_post_atomic_barrier (model);
15542 if (shift)
15543 rs6000_finish_atomic_subword (operands[0], retval, shift);
15546 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
15547 to perform. MEM is the memory on which to operate. VAL is the second
15548 operand of the binary operator. BEFORE and AFTER are optional locations to
15549 return the value of MEM either before of after the operation. MODEL_RTX
15550 is a CONST_INT containing the memory model to use. */
15552 void
15553 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
15554 rtx orig_before, rtx orig_after, rtx model_rtx)
15556 enum memmodel model = memmodel_base (INTVAL (model_rtx));
15557 machine_mode mode = GET_MODE (mem);
15558 machine_mode store_mode = mode;
15559 rtx label, x, cond, mask, shift;
15560 rtx before = orig_before, after = orig_after;
15562 mask = shift = NULL_RTX;
15563 /* On power8, we want to use SImode for the operation. On previous systems,
15564 use the operation in a subword and shift/mask to get the proper byte or
15565 halfword. */
15566 if (mode == QImode || mode == HImode)
15568 if (TARGET_SYNC_HI_QI)
15570 val = convert_modes (SImode, mode, val, 1);
15572 /* Prepare to adjust the return value. */
15573 before = gen_reg_rtx (SImode);
15574 if (after)
15575 after = gen_reg_rtx (SImode);
15576 mode = SImode;
15578 else
15580 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
15582 /* Shift and mask VAL into position with the word. */
15583 val = convert_modes (SImode, mode, val, 1);
15584 val = expand_simple_binop (SImode, ASHIFT, val, shift,
15585 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15587 switch (code)
15589 case IOR:
15590 case XOR:
15591 /* We've already zero-extended VAL. That is sufficient to
15592 make certain that it does not affect other bits. */
15593 mask = NULL;
15594 break;
15596 case AND:
15597 /* If we make certain that all of the other bits in VAL are
15598 set, that will be sufficient to not affect other bits. */
15599 x = gen_rtx_NOT (SImode, mask);
15600 x = gen_rtx_IOR (SImode, x, val);
15601 emit_insn (gen_rtx_SET (val, x));
15602 mask = NULL;
15603 break;
15605 case NOT:
15606 case PLUS:
15607 case MINUS:
15608 /* These will all affect bits outside the field and need
15609 adjustment via MASK within the loop. */
15610 break;
15612 default:
15613 gcc_unreachable ();
15616 /* Prepare to adjust the return value. */
15617 before = gen_reg_rtx (SImode);
15618 if (after)
15619 after = gen_reg_rtx (SImode);
15620 store_mode = mode = SImode;
15624 mem = rs6000_pre_atomic_barrier (mem, model);
15626 label = gen_label_rtx ();
15627 emit_label (label);
15628 label = gen_rtx_LABEL_REF (VOIDmode, label);
15630 if (before == NULL_RTX)
15631 before = gen_reg_rtx (mode);
15633 emit_load_locked (mode, before, mem);
15635 if (code == NOT)
15637 x = expand_simple_binop (mode, AND, before, val,
15638 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15639 after = expand_simple_unop (mode, NOT, x, after, 1);
15641 else
15643 after = expand_simple_binop (mode, code, before, val,
15644 after, 1, OPTAB_LIB_WIDEN);
15647 x = after;
15648 if (mask)
15650 x = expand_simple_binop (SImode, AND, after, mask,
15651 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15652 x = rs6000_mask_atomic_subword (before, x, mask);
15654 else if (store_mode != mode)
15655 x = convert_modes (store_mode, mode, x, 1);
15657 cond = gen_reg_rtx (CCmode);
15658 emit_store_conditional (store_mode, cond, mem, x);
15660 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
15661 emit_unlikely_jump (x, label);
15663 rs6000_post_atomic_barrier (model);
15665 if (shift)
15667 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
15668 then do the calcuations in a SImode register. */
15669 if (orig_before)
15670 rs6000_finish_atomic_subword (orig_before, before, shift);
15671 if (orig_after)
15672 rs6000_finish_atomic_subword (orig_after, after, shift);
15674 else if (store_mode != mode)
15676 /* QImode/HImode on machines with lbarx/lharx where we do the native
15677 operation and then do the calcuations in a SImode register. */
15678 if (orig_before)
15679 convert_move (orig_before, before, 1);
15680 if (orig_after)
15681 convert_move (orig_after, after, 1);
15683 else if (orig_after && after != orig_after)
15684 emit_move_insn (orig_after, after);
15687 /* Emit instructions to move SRC to DST. Called by splitters for
15688 multi-register moves. It will emit at most one instruction for
15689 each register that is accessed; that is, it won't emit li/lis pairs
15690 (or equivalent for 64-bit code). One of SRC or DST must be a hard
15691 register. */
15693 void
15694 rs6000_split_multireg_move (rtx dst, rtx src)
15696 /* The register number of the first register being moved. */
15697 int reg;
15698 /* The mode that is to be moved. */
15699 machine_mode mode;
15700 /* The mode that the move is being done in, and its size. */
15701 machine_mode reg_mode;
15702 int reg_mode_size;
15703 /* The number of registers that will be moved. */
15704 int nregs;
15706 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
15707 mode = GET_MODE (dst);
15708 nregs = hard_regno_nregs (reg, mode);
15709 if (FP_REGNO_P (reg))
15710 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
15711 (TARGET_HARD_FLOAT ? DFmode : SFmode);
15712 else if (ALTIVEC_REGNO_P (reg))
15713 reg_mode = V16QImode;
15714 else
15715 reg_mode = word_mode;
15716 reg_mode_size = GET_MODE_SIZE (reg_mode);
15718 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
15720 /* TDmode residing in FP registers is special, since the ISA requires that
15721 the lower-numbered word of a register pair is always the most significant
15722 word, even in little-endian mode. This does not match the usual subreg
15723 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
15724 the appropriate constituent registers "by hand" in little-endian mode.
15726 Note we do not need to check for destructive overlap here since TDmode
15727 can only reside in even/odd register pairs. */
15728 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
15730 rtx p_src, p_dst;
15731 int i;
15733 for (i = 0; i < nregs; i++)
15735 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
15736 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
15737 else
15738 p_src = simplify_gen_subreg (reg_mode, src, mode,
15739 i * reg_mode_size);
15741 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
15742 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
15743 else
15744 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
15745 i * reg_mode_size);
15747 emit_insn (gen_rtx_SET (p_dst, p_src));
15750 return;
15753 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
15755 /* Move register range backwards, if we might have destructive
15756 overlap. */
15757 int i;
15758 for (i = nregs - 1; i >= 0; i--)
15759 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15760 i * reg_mode_size),
15761 simplify_gen_subreg (reg_mode, src, mode,
15762 i * reg_mode_size)));
15764 else
15766 int i;
15767 int j = -1;
15768 bool used_update = false;
15769 rtx restore_basereg = NULL_RTX;
15771 if (MEM_P (src) && INT_REGNO_P (reg))
15773 rtx breg;
15775 if (GET_CODE (XEXP (src, 0)) == PRE_INC
15776 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
15778 rtx delta_rtx;
15779 breg = XEXP (XEXP (src, 0), 0);
15780 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
15781 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
15782 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
15783 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15784 src = replace_equiv_address (src, breg);
15786 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
15788 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
15790 rtx basereg = XEXP (XEXP (src, 0), 0);
15791 if (TARGET_UPDATE)
15793 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
15794 emit_insn (gen_rtx_SET (ndst,
15795 gen_rtx_MEM (reg_mode,
15796 XEXP (src, 0))));
15797 used_update = true;
15799 else
15800 emit_insn (gen_rtx_SET (basereg,
15801 XEXP (XEXP (src, 0), 1)));
15802 src = replace_equiv_address (src, basereg);
15804 else
15806 rtx basereg = gen_rtx_REG (Pmode, reg);
15807 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
15808 src = replace_equiv_address (src, basereg);
15812 breg = XEXP (src, 0);
15813 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
15814 breg = XEXP (breg, 0);
15816 /* If the base register we are using to address memory is
15817 also a destination reg, then change that register last. */
15818 if (REG_P (breg)
15819 && REGNO (breg) >= REGNO (dst)
15820 && REGNO (breg) < REGNO (dst) + nregs)
15821 j = REGNO (breg) - REGNO (dst);
15823 else if (MEM_P (dst) && INT_REGNO_P (reg))
15825 rtx breg;
15827 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
15828 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
15830 rtx delta_rtx;
15831 breg = XEXP (XEXP (dst, 0), 0);
15832 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
15833 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
15834 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
15836 /* We have to update the breg before doing the store.
15837 Use store with update, if available. */
15839 if (TARGET_UPDATE)
15841 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15842 emit_insn (TARGET_32BIT
15843 ? (TARGET_POWERPC64
15844 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
15845 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
15846 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
15847 used_update = true;
15849 else
15850 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
15851 dst = replace_equiv_address (dst, breg);
15853 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
15854 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
15856 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
15858 rtx basereg = XEXP (XEXP (dst, 0), 0);
15859 if (TARGET_UPDATE)
15861 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
15862 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
15863 XEXP (dst, 0)),
15864 nsrc));
15865 used_update = true;
15867 else
15868 emit_insn (gen_rtx_SET (basereg,
15869 XEXP (XEXP (dst, 0), 1)));
15870 dst = replace_equiv_address (dst, basereg);
15872 else
15874 rtx basereg = XEXP (XEXP (dst, 0), 0);
15875 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
15876 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
15877 && REG_P (basereg)
15878 && REG_P (offsetreg)
15879 && REGNO (basereg) != REGNO (offsetreg));
15880 if (REGNO (basereg) == 0)
15882 rtx tmp = offsetreg;
15883 offsetreg = basereg;
15884 basereg = tmp;
15886 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
15887 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
15888 dst = replace_equiv_address (dst, basereg);
15891 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
15892 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
15895 for (i = 0; i < nregs; i++)
15897 /* Calculate index to next subword. */
15898 ++j;
15899 if (j == nregs)
15900 j = 0;
15902 /* If compiler already emitted move of first word by
15903 store with update, no need to do anything. */
15904 if (j == 0 && used_update)
15905 continue;
15907 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
15908 j * reg_mode_size),
15909 simplify_gen_subreg (reg_mode, src, mode,
15910 j * reg_mode_size)));
15912 if (restore_basereg != NULL_RTX)
15913 emit_insn (restore_basereg);
15917 static GTY(()) alias_set_type TOC_alias_set = -1;
15919 alias_set_type
15920 get_TOC_alias_set (void)
15922 if (TOC_alias_set == -1)
15923 TOC_alias_set = new_alias_set ();
15924 return TOC_alias_set;
15927 /* The mode the ABI uses for a word. This is not the same as word_mode
15928 for -m32 -mpowerpc64. This is used to implement various target hooks. */
15930 static scalar_int_mode
15931 rs6000_abi_word_mode (void)
15933 return TARGET_32BIT ? SImode : DImode;
15936 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
15937 static char *
15938 rs6000_offload_options (void)
15940 if (TARGET_64BIT)
15941 return xstrdup ("-foffload-abi=lp64");
15942 else
15943 return xstrdup ("-foffload-abi=ilp32");
15947 /* A quick summary of the various types of 'constant-pool tables'
15948 under PowerPC:
15950 Target Flags Name One table per
15951 AIX (none) AIX TOC object file
15952 AIX -mfull-toc AIX TOC object file
15953 AIX -mminimal-toc AIX minimal TOC translation unit
15954 SVR4/EABI (none) SVR4 SDATA object file
15955 SVR4/EABI -fpic SVR4 pic object file
15956 SVR4/EABI -fPIC SVR4 PIC translation unit
15957 SVR4/EABI -mrelocatable EABI TOC function
15958 SVR4/EABI -maix AIX TOC object file
15959 SVR4/EABI -maix -mminimal-toc
15960 AIX minimal TOC translation unit
15962 Name Reg. Set by entries contains:
15963 made by addrs? fp? sum?
15965 AIX TOC 2 crt0 as Y option option
15966 AIX minimal TOC 30 prolog gcc Y Y option
15967 SVR4 SDATA 13 crt0 gcc N Y N
15968 SVR4 pic 30 prolog ld Y not yet N
15969 SVR4 PIC 30 prolog gcc Y option option
15970 EABI TOC 30 prolog gcc Y option option
15974 /* Hash functions for the hash table. */
15976 static unsigned
15977 rs6000_hash_constant (rtx k)
15979 enum rtx_code code = GET_CODE (k);
15980 machine_mode mode = GET_MODE (k);
15981 unsigned result = (code << 3) ^ mode;
15982 const char *format;
15983 int flen, fidx;
15985 format = GET_RTX_FORMAT (code);
15986 flen = strlen (format);
15987 fidx = 0;
15989 switch (code)
15991 case LABEL_REF:
15992 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
15994 case CONST_WIDE_INT:
15996 int i;
15997 flen = CONST_WIDE_INT_NUNITS (k);
15998 for (i = 0; i < flen; i++)
15999 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
16000 return result;
16003 case CONST_DOUBLE:
16004 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
16006 case CODE_LABEL:
16007 fidx = 3;
16008 break;
16010 default:
16011 break;
16014 for (; fidx < flen; fidx++)
16015 switch (format[fidx])
16017 case 's':
16019 unsigned i, len;
16020 const char *str = XSTR (k, fidx);
16021 len = strlen (str);
16022 result = result * 613 + len;
16023 for (i = 0; i < len; i++)
16024 result = result * 613 + (unsigned) str[i];
16025 break;
16027 case 'u':
16028 case 'e':
16029 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
16030 break;
16031 case 'i':
16032 case 'n':
16033 result = result * 613 + (unsigned) XINT (k, fidx);
16034 break;
16035 case 'w':
16036 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
16037 result = result * 613 + (unsigned) XWINT (k, fidx);
16038 else
16040 size_t i;
16041 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
16042 result = result * 613 + (unsigned) (XWINT (k, fidx)
16043 >> CHAR_BIT * i);
16045 break;
16046 case '0':
16047 break;
16048 default:
16049 gcc_unreachable ();
16052 return result;
16055 hashval_t
16056 toc_hasher::hash (toc_hash_struct *thc)
16058 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
16061 /* Compare H1 and H2 for equivalence. */
16063 bool
16064 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
16066 rtx r1 = h1->key;
16067 rtx r2 = h2->key;
16069 if (h1->key_mode != h2->key_mode)
16070 return 0;
16072 return rtx_equal_p (r1, r2);
16075 /* These are the names given by the C++ front-end to vtables, and
16076 vtable-like objects. Ideally, this logic should not be here;
16077 instead, there should be some programmatic way of inquiring as
16078 to whether or not an object is a vtable. */
16080 #define VTABLE_NAME_P(NAME) \
16081 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
16082 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
16083 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
16084 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
16085 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
16087 #ifdef NO_DOLLAR_IN_LABEL
16088 /* Return a GGC-allocated character string translating dollar signs in
16089 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
16091 const char *
16092 rs6000_xcoff_strip_dollar (const char *name)
16094 char *strip, *p;
16095 const char *q;
16096 size_t len;
16098 q = (const char *) strchr (name, '$');
16100 if (q == 0 || q == name)
16101 return name;
16103 len = strlen (name);
16104 strip = XALLOCAVEC (char, len + 1);
16105 strcpy (strip, name);
16106 p = strip + (q - name);
16107 while (p)
16109 *p = '_';
16110 p = strchr (p + 1, '$');
16113 return ggc_alloc_string (strip, len);
16115 #endif
16117 void
16118 rs6000_output_symbol_ref (FILE *file, rtx x)
16120 const char *name = XSTR (x, 0);
16122 /* Currently C++ toc references to vtables can be emitted before it
16123 is decided whether the vtable is public or private. If this is
16124 the case, then the linker will eventually complain that there is
16125 a reference to an unknown section. Thus, for vtables only,
16126 we emit the TOC reference to reference the identifier and not the
16127 symbol. */
16128 if (VTABLE_NAME_P (name))
16130 RS6000_OUTPUT_BASENAME (file, name);
16132 else
16133 assemble_name (file, name);
16136 /* Output a TOC entry. We derive the entry name from what is being
16137 written. */
16139 void
16140 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
16142 char buf[256];
16143 const char *name = buf;
16144 rtx base = x;
16145 HOST_WIDE_INT offset = 0;
16147 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
16149 /* When the linker won't eliminate them, don't output duplicate
16150 TOC entries (this happens on AIX if there is any kind of TOC,
16151 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
16152 CODE_LABELs. */
16153 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
16155 struct toc_hash_struct *h;
16157 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
16158 time because GGC is not initialized at that point. */
16159 if (toc_hash_table == NULL)
16160 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
16162 h = ggc_alloc<toc_hash_struct> ();
16163 h->key = x;
16164 h->key_mode = mode;
16165 h->labelno = labelno;
16167 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
16168 if (*found == NULL)
16169 *found = h;
16170 else /* This is indeed a duplicate.
16171 Set this label equal to that label. */
16173 fputs ("\t.set ", file);
16174 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16175 fprintf (file, "%d,", labelno);
16176 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
16177 fprintf (file, "%d\n", ((*found)->labelno));
16179 #ifdef HAVE_AS_TLS
16180 if (TARGET_XCOFF && SYMBOL_REF_P (x)
16181 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
16182 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
16184 fputs ("\t.set ", file);
16185 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16186 fprintf (file, "%d,", labelno);
16187 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
16188 fprintf (file, "%d\n", ((*found)->labelno));
16190 #endif
16191 return;
16195 /* If we're going to put a double constant in the TOC, make sure it's
16196 aligned properly when strict alignment is on. */
16197 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
16198 && STRICT_ALIGNMENT
16199 && GET_MODE_BITSIZE (mode) >= 64
16200 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
16201 ASM_OUTPUT_ALIGN (file, 3);
16204 (*targetm.asm_out.internal_label) (file, "LC", labelno);
16206 /* Handle FP constants specially. Note that if we have a minimal
16207 TOC, things we put here aren't actually in the TOC, so we can allow
16208 FP constants. */
16209 if (CONST_DOUBLE_P (x)
16210 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
16211 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
16213 long k[4];
16215 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16216 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
16217 else
16218 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16220 if (TARGET_64BIT)
16222 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16223 fputs (DOUBLE_INT_ASM_OP, file);
16224 else
16225 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16226 k[0] & 0xffffffff, k[1] & 0xffffffff,
16227 k[2] & 0xffffffff, k[3] & 0xffffffff);
16228 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
16229 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16230 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
16231 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
16232 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
16233 return;
16235 else
16237 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16238 fputs ("\t.long ", file);
16239 else
16240 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
16241 k[0] & 0xffffffff, k[1] & 0xffffffff,
16242 k[2] & 0xffffffff, k[3] & 0xffffffff);
16243 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
16244 k[0] & 0xffffffff, k[1] & 0xffffffff,
16245 k[2] & 0xffffffff, k[3] & 0xffffffff);
16246 return;
16249 else if (CONST_DOUBLE_P (x)
16250 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
16252 long k[2];
16254 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16255 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
16256 else
16257 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
16259 if (TARGET_64BIT)
16261 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16262 fputs (DOUBLE_INT_ASM_OP, file);
16263 else
16264 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16265 k[0] & 0xffffffff, k[1] & 0xffffffff);
16266 fprintf (file, "0x%lx%08lx\n",
16267 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
16268 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
16269 return;
16271 else
16273 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16274 fputs ("\t.long ", file);
16275 else
16276 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
16277 k[0] & 0xffffffff, k[1] & 0xffffffff);
16278 fprintf (file, "0x%lx,0x%lx\n",
16279 k[0] & 0xffffffff, k[1] & 0xffffffff);
16280 return;
16283 else if (CONST_DOUBLE_P (x)
16284 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
16286 long l;
16288 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
16289 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
16290 else
16291 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
16293 if (TARGET_64BIT)
16295 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16296 fputs (DOUBLE_INT_ASM_OP, file);
16297 else
16298 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16299 if (WORDS_BIG_ENDIAN)
16300 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
16301 else
16302 fprintf (file, "0x%lx\n", l & 0xffffffff);
16303 return;
16305 else
16307 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16308 fputs ("\t.long ", file);
16309 else
16310 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
16311 fprintf (file, "0x%lx\n", l & 0xffffffff);
16312 return;
16315 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
16317 unsigned HOST_WIDE_INT low;
16318 HOST_WIDE_INT high;
16320 low = INTVAL (x) & 0xffffffff;
16321 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
16323 /* TOC entries are always Pmode-sized, so when big-endian
16324 smaller integer constants in the TOC need to be padded.
16325 (This is still a win over putting the constants in
16326 a separate constant pool, because then we'd have
16327 to have both a TOC entry _and_ the actual constant.)
16329 For a 32-bit target, CONST_INT values are loaded and shifted
16330 entirely within `low' and can be stored in one TOC entry. */
16332 /* It would be easy to make this work, but it doesn't now. */
16333 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
16335 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
16337 low |= high << 32;
16338 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
16339 high = (HOST_WIDE_INT) low >> 32;
16340 low &= 0xffffffff;
16343 if (TARGET_64BIT)
16345 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16346 fputs (DOUBLE_INT_ASM_OP, file);
16347 else
16348 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16349 (long) high & 0xffffffff, (long) low & 0xffffffff);
16350 fprintf (file, "0x%lx%08lx\n",
16351 (long) high & 0xffffffff, (long) low & 0xffffffff);
16352 return;
16354 else
16356 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
16358 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16359 fputs ("\t.long ", file);
16360 else
16361 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
16362 (long) high & 0xffffffff, (long) low & 0xffffffff);
16363 fprintf (file, "0x%lx,0x%lx\n",
16364 (long) high & 0xffffffff, (long) low & 0xffffffff);
16366 else
16368 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16369 fputs ("\t.long ", file);
16370 else
16371 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
16372 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
16374 return;
16378 if (GET_CODE (x) == CONST)
16380 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
16381 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
16383 base = XEXP (XEXP (x, 0), 0);
16384 offset = INTVAL (XEXP (XEXP (x, 0), 1));
16387 switch (GET_CODE (base))
16389 case SYMBOL_REF:
16390 name = XSTR (base, 0);
16391 break;
16393 case LABEL_REF:
16394 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
16395 CODE_LABEL_NUMBER (XEXP (base, 0)));
16396 break;
16398 case CODE_LABEL:
16399 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
16400 break;
16402 default:
16403 gcc_unreachable ();
16406 if (TARGET_ELF || TARGET_MINIMAL_TOC)
16407 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
16408 else
16410 fputs ("\t.tc ", file);
16411 RS6000_OUTPUT_BASENAME (file, name);
16413 if (offset < 0)
16414 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
16415 else if (offset)
16416 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
16418 /* Mark large TOC symbols on AIX with [TE] so they are mapped
16419 after other TOC symbols, reducing overflow of small TOC access
16420 to [TC] symbols. */
16421 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
16422 ? "[TE]," : "[TC],", file);
16425 /* Currently C++ toc references to vtables can be emitted before it
16426 is decided whether the vtable is public or private. If this is
16427 the case, then the linker will eventually complain that there is
16428 a TOC reference to an unknown section. Thus, for vtables only,
16429 we emit the TOC reference to reference the symbol and not the
16430 section. */
16431 if (VTABLE_NAME_P (name))
16433 RS6000_OUTPUT_BASENAME (file, name);
16434 if (offset < 0)
16435 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
16436 else if (offset > 0)
16437 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
16439 else
16440 output_addr_const (file, x);
16442 #if HAVE_AS_TLS
16443 if (TARGET_XCOFF && SYMBOL_REF_P (base))
16445 switch (SYMBOL_REF_TLS_MODEL (base))
16447 case 0:
16448 break;
16449 case TLS_MODEL_LOCAL_EXEC:
16450 fputs ("@le", file);
16451 break;
16452 case TLS_MODEL_INITIAL_EXEC:
16453 fputs ("@ie", file);
16454 break;
16455 /* Use global-dynamic for local-dynamic. */
16456 case TLS_MODEL_GLOBAL_DYNAMIC:
16457 case TLS_MODEL_LOCAL_DYNAMIC:
16458 putc ('\n', file);
16459 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
16460 fputs ("\t.tc .", file);
16461 RS6000_OUTPUT_BASENAME (file, name);
16462 fputs ("[TC],", file);
16463 output_addr_const (file, x);
16464 fputs ("@m", file);
16465 break;
16466 default:
16467 gcc_unreachable ();
16470 #endif
16472 putc ('\n', file);
16475 /* Output an assembler pseudo-op to write an ASCII string of N characters
16476 starting at P to FILE.
16478 On the RS/6000, we have to do this using the .byte operation and
16479 write out special characters outside the quoted string.
16480 Also, the assembler is broken; very long strings are truncated,
16481 so we must artificially break them up early. */
16483 void
16484 output_ascii (FILE *file, const char *p, int n)
16486 char c;
16487 int i, count_string;
16488 const char *for_string = "\t.byte \"";
16489 const char *for_decimal = "\t.byte ";
16490 const char *to_close = NULL;
16492 count_string = 0;
16493 for (i = 0; i < n; i++)
16495 c = *p++;
16496 if (c >= ' ' && c < 0177)
16498 if (for_string)
16499 fputs (for_string, file);
16500 putc (c, file);
16502 /* Write two quotes to get one. */
16503 if (c == '"')
16505 putc (c, file);
16506 ++count_string;
16509 for_string = NULL;
16510 for_decimal = "\"\n\t.byte ";
16511 to_close = "\"\n";
16512 ++count_string;
16514 if (count_string >= 512)
16516 fputs (to_close, file);
16518 for_string = "\t.byte \"";
16519 for_decimal = "\t.byte ";
16520 to_close = NULL;
16521 count_string = 0;
16524 else
16526 if (for_decimal)
16527 fputs (for_decimal, file);
16528 fprintf (file, "%d", c);
16530 for_string = "\n\t.byte \"";
16531 for_decimal = ", ";
16532 to_close = "\n";
16533 count_string = 0;
16537 /* Now close the string if we have written one. Then end the line. */
16538 if (to_close)
16539 fputs (to_close, file);
16542 /* Generate a unique section name for FILENAME for a section type
16543 represented by SECTION_DESC. Output goes into BUF.
16545 SECTION_DESC can be any string, as long as it is different for each
16546 possible section type.
16548 We name the section in the same manner as xlc. The name begins with an
16549 underscore followed by the filename (after stripping any leading directory
16550 names) with the last period replaced by the string SECTION_DESC. If
16551 FILENAME does not contain a period, SECTION_DESC is appended to the end of
16552 the name. */
16554 void
16555 rs6000_gen_section_name (char **buf, const char *filename,
16556 const char *section_desc)
16558 const char *q, *after_last_slash, *last_period = 0;
16559 char *p;
16560 int len;
16562 after_last_slash = filename;
16563 for (q = filename; *q; q++)
16565 if (*q == '/')
16566 after_last_slash = q + 1;
16567 else if (*q == '.')
16568 last_period = q;
16571 len = strlen (after_last_slash) + strlen (section_desc) + 2;
16572 *buf = (char *) xmalloc (len);
16574 p = *buf;
16575 *p++ = '_';
16577 for (q = after_last_slash; *q; q++)
16579 if (q == last_period)
16581 strcpy (p, section_desc);
16582 p += strlen (section_desc);
16583 break;
16586 else if (ISALNUM (*q))
16587 *p++ = *q;
16590 if (last_period == 0)
16591 strcpy (p, section_desc);
16592 else
16593 *p = '\0';
16596 /* Emit profile function. */
16598 void
16599 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
16601 /* Non-standard profiling for kernels, which just saves LR then calls
16602 _mcount without worrying about arg saves. The idea is to change
16603 the function prologue as little as possible as it isn't easy to
16604 account for arg save/restore code added just for _mcount. */
16605 if (TARGET_PROFILE_KERNEL)
16606 return;
16608 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
16610 #ifndef NO_PROFILE_COUNTERS
16611 # define NO_PROFILE_COUNTERS 0
16612 #endif
16613 if (NO_PROFILE_COUNTERS)
16614 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16615 LCT_NORMAL, VOIDmode);
16616 else
16618 char buf[30];
16619 const char *label_name;
16620 rtx fun;
16622 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16623 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
16624 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
16626 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
16627 LCT_NORMAL, VOIDmode, fun, Pmode);
16630 else if (DEFAULT_ABI == ABI_DARWIN)
16632 const char *mcount_name = RS6000_MCOUNT;
16633 int caller_addr_regno = LR_REGNO;
16635 /* Be conservative and always set this, at least for now. */
16636 crtl->uses_pic_offset_table = 1;
16638 #if TARGET_MACHO
16639 /* For PIC code, set up a stub and collect the caller's address
16640 from r0, which is where the prologue puts it. */
16641 if (MACHOPIC_INDIRECT
16642 && crtl->uses_pic_offset_table)
16643 caller_addr_regno = 0;
16644 #endif
16645 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
16646 LCT_NORMAL, VOIDmode,
16647 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
16651 /* Write function profiler code. */
16653 void
16654 output_function_profiler (FILE *file, int labelno)
16656 char buf[100];
16658 switch (DEFAULT_ABI)
16660 default:
16661 gcc_unreachable ();
16663 case ABI_V4:
16664 if (!TARGET_32BIT)
16666 warning (0, "no profiling of 64-bit code for this ABI");
16667 return;
16669 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
16670 fprintf (file, "\tmflr %s\n", reg_names[0]);
16671 if (NO_PROFILE_COUNTERS)
16673 asm_fprintf (file, "\tstw %s,4(%s)\n",
16674 reg_names[0], reg_names[1]);
16676 else if (TARGET_SECURE_PLT && flag_pic)
16678 if (TARGET_LINK_STACK)
16680 char name[32];
16681 get_ppc476_thunk_name (name);
16682 asm_fprintf (file, "\tbl %s\n", name);
16684 else
16685 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
16686 asm_fprintf (file, "\tstw %s,4(%s)\n",
16687 reg_names[0], reg_names[1]);
16688 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16689 asm_fprintf (file, "\taddis %s,%s,",
16690 reg_names[12], reg_names[12]);
16691 assemble_name (file, buf);
16692 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
16693 assemble_name (file, buf);
16694 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
16696 else if (flag_pic == 1)
16698 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
16699 asm_fprintf (file, "\tstw %s,4(%s)\n",
16700 reg_names[0], reg_names[1]);
16701 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
16702 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
16703 assemble_name (file, buf);
16704 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
16706 else if (flag_pic > 1)
16708 asm_fprintf (file, "\tstw %s,4(%s)\n",
16709 reg_names[0], reg_names[1]);
16710 /* Now, we need to get the address of the label. */
16711 if (TARGET_LINK_STACK)
16713 char name[32];
16714 get_ppc476_thunk_name (name);
16715 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
16716 assemble_name (file, buf);
16717 fputs ("-.\n1:", file);
16718 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16719 asm_fprintf (file, "\taddi %s,%s,4\n",
16720 reg_names[11], reg_names[11]);
16722 else
16724 fputs ("\tbcl 20,31,1f\n\t.long ", file);
16725 assemble_name (file, buf);
16726 fputs ("-.\n1:", file);
16727 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
16729 asm_fprintf (file, "\tlwz %s,0(%s)\n",
16730 reg_names[0], reg_names[11]);
16731 asm_fprintf (file, "\tadd %s,%s,%s\n",
16732 reg_names[0], reg_names[0], reg_names[11]);
16734 else
16736 asm_fprintf (file, "\tlis %s,", reg_names[12]);
16737 assemble_name (file, buf);
16738 fputs ("@ha\n", file);
16739 asm_fprintf (file, "\tstw %s,4(%s)\n",
16740 reg_names[0], reg_names[1]);
16741 asm_fprintf (file, "\tla %s,", reg_names[0]);
16742 assemble_name (file, buf);
16743 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
16746 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
16747 fprintf (file, "\tbl %s%s\n",
16748 RS6000_MCOUNT, flag_pic ? "@plt" : "");
16749 break;
16751 case ABI_AIX:
16752 case ABI_ELFv2:
16753 case ABI_DARWIN:
16754 /* Don't do anything, done in output_profile_hook (). */
16755 break;
16761 /* The following variable value is the last issued insn. */
16763 static rtx_insn *last_scheduled_insn;
16765 /* The following variable helps to balance issuing of load and
16766 store instructions */
16768 static int load_store_pendulum;
16770 /* The following variable helps pair divide insns during scheduling. */
16771 static int divide_cnt;
16772 /* The following variable helps pair and alternate vector and vector load
16773 insns during scheduling. */
16774 static int vec_pairing;
16777 /* Power4 load update and store update instructions are cracked into a
16778 load or store and an integer insn which are executed in the same cycle.
16779 Branches have their own dispatch slot which does not count against the
16780 GCC issue rate, but it changes the program flow so there are no other
16781 instructions to issue in this cycle. */
16783 static int
16784 rs6000_variable_issue_1 (rtx_insn *insn, int more)
16786 last_scheduled_insn = insn;
16787 if (GET_CODE (PATTERN (insn)) == USE
16788 || GET_CODE (PATTERN (insn)) == CLOBBER)
16790 cached_can_issue_more = more;
16791 return cached_can_issue_more;
16794 if (insn_terminates_group_p (insn, current_group))
16796 cached_can_issue_more = 0;
16797 return cached_can_issue_more;
16800 /* If no reservation, but reach here */
16801 if (recog_memoized (insn) < 0)
16802 return more;
16804 if (rs6000_sched_groups)
16806 if (is_microcoded_insn (insn))
16807 cached_can_issue_more = 0;
16808 else if (is_cracked_insn (insn))
16809 cached_can_issue_more = more > 2 ? more - 2 : 0;
16810 else
16811 cached_can_issue_more = more - 1;
16813 return cached_can_issue_more;
16816 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
16817 return 0;
16819 cached_can_issue_more = more - 1;
16820 return cached_can_issue_more;
16823 static int
16824 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
16826 int r = rs6000_variable_issue_1 (insn, more);
16827 if (verbose)
16828 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
16829 return r;
16832 /* Adjust the cost of a scheduling dependency. Return the new cost of
16833 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
16835 static int
16836 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
16837 unsigned int)
16839 enum attr_type attr_type;
16841 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
16842 return cost;
16844 switch (dep_type)
16846 case REG_DEP_TRUE:
16848 /* Data dependency; DEP_INSN writes a register that INSN reads
16849 some cycles later. */
16851 /* Separate a load from a narrower, dependent store. */
16852 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
16853 || rs6000_tune == PROCESSOR_FUTURE)
16854 && GET_CODE (PATTERN (insn)) == SET
16855 && GET_CODE (PATTERN (dep_insn)) == SET
16856 && MEM_P (XEXP (PATTERN (insn), 1))
16857 && MEM_P (XEXP (PATTERN (dep_insn), 0))
16858 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
16859 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
16860 return cost + 14;
16862 attr_type = get_attr_type (insn);
16864 switch (attr_type)
16866 case TYPE_JMPREG:
16867 /* Tell the first scheduling pass about the latency between
16868 a mtctr and bctr (and mtlr and br/blr). The first
16869 scheduling pass will not know about this latency since
16870 the mtctr instruction, which has the latency associated
16871 to it, will be generated by reload. */
16872 return 4;
16873 case TYPE_BRANCH:
16874 /* Leave some extra cycles between a compare and its
16875 dependent branch, to inhibit expensive mispredicts. */
16876 if ((rs6000_tune == PROCESSOR_PPC603
16877 || rs6000_tune == PROCESSOR_PPC604
16878 || rs6000_tune == PROCESSOR_PPC604e
16879 || rs6000_tune == PROCESSOR_PPC620
16880 || rs6000_tune == PROCESSOR_PPC630
16881 || rs6000_tune == PROCESSOR_PPC750
16882 || rs6000_tune == PROCESSOR_PPC7400
16883 || rs6000_tune == PROCESSOR_PPC7450
16884 || rs6000_tune == PROCESSOR_PPCE5500
16885 || rs6000_tune == PROCESSOR_PPCE6500
16886 || rs6000_tune == PROCESSOR_POWER4
16887 || rs6000_tune == PROCESSOR_POWER5
16888 || rs6000_tune == PROCESSOR_POWER7
16889 || rs6000_tune == PROCESSOR_POWER8
16890 || rs6000_tune == PROCESSOR_POWER9
16891 || rs6000_tune == PROCESSOR_FUTURE
16892 || rs6000_tune == PROCESSOR_CELL)
16893 && recog_memoized (dep_insn)
16894 && (INSN_CODE (dep_insn) >= 0))
16896 switch (get_attr_type (dep_insn))
16898 case TYPE_CMP:
16899 case TYPE_FPCOMPARE:
16900 case TYPE_CR_LOGICAL:
16901 return cost + 2;
16902 case TYPE_EXTS:
16903 case TYPE_MUL:
16904 if (get_attr_dot (dep_insn) == DOT_YES)
16905 return cost + 2;
16906 else
16907 break;
16908 case TYPE_SHIFT:
16909 if (get_attr_dot (dep_insn) == DOT_YES
16910 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
16911 return cost + 2;
16912 else
16913 break;
16914 default:
16915 break;
16917 break;
16919 case TYPE_STORE:
16920 case TYPE_FPSTORE:
16921 if ((rs6000_tune == PROCESSOR_POWER6)
16922 && recog_memoized (dep_insn)
16923 && (INSN_CODE (dep_insn) >= 0))
16926 if (GET_CODE (PATTERN (insn)) != SET)
16927 /* If this happens, we have to extend this to schedule
16928 optimally. Return default for now. */
16929 return cost;
16931 /* Adjust the cost for the case where the value written
16932 by a fixed point operation is used as the address
16933 gen value on a store. */
16934 switch (get_attr_type (dep_insn))
16936 case TYPE_LOAD:
16937 case TYPE_CNTLZ:
16939 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16940 return get_attr_sign_extend (dep_insn)
16941 == SIGN_EXTEND_YES ? 6 : 4;
16942 break;
16944 case TYPE_SHIFT:
16946 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16947 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
16948 6 : 3;
16949 break;
16951 case TYPE_INTEGER:
16952 case TYPE_ADD:
16953 case TYPE_LOGICAL:
16954 case TYPE_EXTS:
16955 case TYPE_INSERT:
16957 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16958 return 3;
16959 break;
16961 case TYPE_STORE:
16962 case TYPE_FPLOAD:
16963 case TYPE_FPSTORE:
16965 if (get_attr_update (dep_insn) == UPDATE_YES
16966 && ! rs6000_store_data_bypass_p (dep_insn, insn))
16967 return 3;
16968 break;
16970 case TYPE_MUL:
16972 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16973 return 17;
16974 break;
16976 case TYPE_DIV:
16978 if (! rs6000_store_data_bypass_p (dep_insn, insn))
16979 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
16980 break;
16982 default:
16983 break;
16986 break;
16988 case TYPE_LOAD:
16989 if ((rs6000_tune == PROCESSOR_POWER6)
16990 && recog_memoized (dep_insn)
16991 && (INSN_CODE (dep_insn) >= 0))
16994 /* Adjust the cost for the case where the value written
16995 by a fixed point instruction is used within the address
16996 gen portion of a subsequent load(u)(x) */
16997 switch (get_attr_type (dep_insn))
16999 case TYPE_LOAD:
17000 case TYPE_CNTLZ:
17002 if (set_to_load_agen (dep_insn, insn))
17003 return get_attr_sign_extend (dep_insn)
17004 == SIGN_EXTEND_YES ? 6 : 4;
17005 break;
17007 case TYPE_SHIFT:
17009 if (set_to_load_agen (dep_insn, insn))
17010 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
17011 6 : 3;
17012 break;
17014 case TYPE_INTEGER:
17015 case TYPE_ADD:
17016 case TYPE_LOGICAL:
17017 case TYPE_EXTS:
17018 case TYPE_INSERT:
17020 if (set_to_load_agen (dep_insn, insn))
17021 return 3;
17022 break;
17024 case TYPE_STORE:
17025 case TYPE_FPLOAD:
17026 case TYPE_FPSTORE:
17028 if (get_attr_update (dep_insn) == UPDATE_YES
17029 && set_to_load_agen (dep_insn, insn))
17030 return 3;
17031 break;
17033 case TYPE_MUL:
17035 if (set_to_load_agen (dep_insn, insn))
17036 return 17;
17037 break;
17039 case TYPE_DIV:
17041 if (set_to_load_agen (dep_insn, insn))
17042 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
17043 break;
17045 default:
17046 break;
17049 break;
17051 case TYPE_FPLOAD:
17052 if ((rs6000_tune == PROCESSOR_POWER6)
17053 && get_attr_update (insn) == UPDATE_NO
17054 && recog_memoized (dep_insn)
17055 && (INSN_CODE (dep_insn) >= 0)
17056 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
17057 return 2;
17059 default:
17060 break;
17063 /* Fall out to return default cost. */
17065 break;
17067 case REG_DEP_OUTPUT:
17068 /* Output dependency; DEP_INSN writes a register that INSN writes some
17069 cycles later. */
17070 if ((rs6000_tune == PROCESSOR_POWER6)
17071 && recog_memoized (dep_insn)
17072 && (INSN_CODE (dep_insn) >= 0))
17074 attr_type = get_attr_type (insn);
17076 switch (attr_type)
17078 case TYPE_FP:
17079 case TYPE_FPSIMPLE:
17080 if (get_attr_type (dep_insn) == TYPE_FP
17081 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
17082 return 1;
17083 break;
17084 case TYPE_FPLOAD:
17085 if (get_attr_update (insn) == UPDATE_NO
17086 && get_attr_type (dep_insn) == TYPE_MFFGPR)
17087 return 2;
17088 break;
17089 default:
17090 break;
17093 /* Fall through, no cost for output dependency. */
17094 /* FALLTHRU */
17096 case REG_DEP_ANTI:
17097 /* Anti dependency; DEP_INSN reads a register that INSN writes some
17098 cycles later. */
17099 return 0;
17101 default:
17102 gcc_unreachable ();
17105 return cost;
17108 /* Debug version of rs6000_adjust_cost. */
17110 static int
17111 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
17112 int cost, unsigned int dw)
17114 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
17116 if (ret != cost)
17118 const char *dep;
17120 switch (dep_type)
17122 default: dep = "unknown depencency"; break;
17123 case REG_DEP_TRUE: dep = "data dependency"; break;
17124 case REG_DEP_OUTPUT: dep = "output dependency"; break;
17125 case REG_DEP_ANTI: dep = "anti depencency"; break;
17128 fprintf (stderr,
17129 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
17130 "%s, insn:\n", ret, cost, dep);
17132 debug_rtx (insn);
17135 return ret;
17138 /* The function returns a true if INSN is microcoded.
17139 Return false otherwise. */
17141 static bool
17142 is_microcoded_insn (rtx_insn *insn)
17144 if (!insn || !NONDEBUG_INSN_P (insn)
17145 || GET_CODE (PATTERN (insn)) == USE
17146 || GET_CODE (PATTERN (insn)) == CLOBBER)
17147 return false;
17149 if (rs6000_tune == PROCESSOR_CELL)
17150 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
17152 if (rs6000_sched_groups
17153 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17155 enum attr_type type = get_attr_type (insn);
17156 if ((type == TYPE_LOAD
17157 && get_attr_update (insn) == UPDATE_YES
17158 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
17159 || ((type == TYPE_LOAD || type == TYPE_STORE)
17160 && get_attr_update (insn) == UPDATE_YES
17161 && get_attr_indexed (insn) == INDEXED_YES)
17162 || type == TYPE_MFCR)
17163 return true;
17166 return false;
17169 /* The function returns true if INSN is cracked into 2 instructions
17170 by the processor (and therefore occupies 2 issue slots). */
17172 static bool
17173 is_cracked_insn (rtx_insn *insn)
17175 if (!insn || !NONDEBUG_INSN_P (insn)
17176 || GET_CODE (PATTERN (insn)) == USE
17177 || GET_CODE (PATTERN (insn)) == CLOBBER)
17178 return false;
17180 if (rs6000_sched_groups
17181 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
17183 enum attr_type type = get_attr_type (insn);
17184 if ((type == TYPE_LOAD
17185 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
17186 && get_attr_update (insn) == UPDATE_NO)
17187 || (type == TYPE_LOAD
17188 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
17189 && get_attr_update (insn) == UPDATE_YES
17190 && get_attr_indexed (insn) == INDEXED_NO)
17191 || (type == TYPE_STORE
17192 && get_attr_update (insn) == UPDATE_YES
17193 && get_attr_indexed (insn) == INDEXED_NO)
17194 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
17195 && get_attr_update (insn) == UPDATE_YES)
17196 || (type == TYPE_CR_LOGICAL
17197 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
17198 || (type == TYPE_EXTS
17199 && get_attr_dot (insn) == DOT_YES)
17200 || (type == TYPE_SHIFT
17201 && get_attr_dot (insn) == DOT_YES
17202 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
17203 || (type == TYPE_MUL
17204 && get_attr_dot (insn) == DOT_YES)
17205 || type == TYPE_DIV
17206 || (type == TYPE_INSERT
17207 && get_attr_size (insn) == SIZE_32))
17208 return true;
17211 return false;
17214 /* The function returns true if INSN can be issued only from
17215 the branch slot. */
17217 static bool
17218 is_branch_slot_insn (rtx_insn *insn)
17220 if (!insn || !NONDEBUG_INSN_P (insn)
17221 || GET_CODE (PATTERN (insn)) == USE
17222 || GET_CODE (PATTERN (insn)) == CLOBBER)
17223 return false;
17225 if (rs6000_sched_groups)
17227 enum attr_type type = get_attr_type (insn);
17228 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
17229 return true;
17230 return false;
17233 return false;
17236 /* The function returns true if out_inst sets a value that is
17237 used in the address generation computation of in_insn */
17238 static bool
17239 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
17241 rtx out_set, in_set;
17243 /* For performance reasons, only handle the simple case where
17244 both loads are a single_set. */
17245 out_set = single_set (out_insn);
17246 if (out_set)
17248 in_set = single_set (in_insn);
17249 if (in_set)
17250 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
17253 return false;
17256 /* Try to determine base/offset/size parts of the given MEM.
17257 Return true if successful, false if all the values couldn't
17258 be determined.
17260 This function only looks for REG or REG+CONST address forms.
17261 REG+REG address form will return false. */
17263 static bool
17264 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
17265 HOST_WIDE_INT *size)
17267 rtx addr_rtx;
17268 if MEM_SIZE_KNOWN_P (mem)
17269 *size = MEM_SIZE (mem);
17270 else
17271 return false;
17273 addr_rtx = (XEXP (mem, 0));
17274 if (GET_CODE (addr_rtx) == PRE_MODIFY)
17275 addr_rtx = XEXP (addr_rtx, 1);
17277 *offset = 0;
17278 while (GET_CODE (addr_rtx) == PLUS
17279 && CONST_INT_P (XEXP (addr_rtx, 1)))
17281 *offset += INTVAL (XEXP (addr_rtx, 1));
17282 addr_rtx = XEXP (addr_rtx, 0);
17284 if (!REG_P (addr_rtx))
17285 return false;
17287 *base = addr_rtx;
17288 return true;
17291 /* The function returns true if the target storage location of
17292 mem1 is adjacent to the target storage location of mem2 */
17293 /* Return 1 if memory locations are adjacent. */
17295 static bool
17296 adjacent_mem_locations (rtx mem1, rtx mem2)
17298 rtx reg1, reg2;
17299 HOST_WIDE_INT off1, size1, off2, size2;
17301 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17302 && get_memref_parts (mem2, &reg2, &off2, &size2))
17303 return ((REGNO (reg1) == REGNO (reg2))
17304 && ((off1 + size1 == off2)
17305 || (off2 + size2 == off1)));
17307 return false;
17310 /* This function returns true if it can be determined that the two MEM
17311 locations overlap by at least 1 byte based on base reg/offset/size. */
17313 static bool
17314 mem_locations_overlap (rtx mem1, rtx mem2)
17316 rtx reg1, reg2;
17317 HOST_WIDE_INT off1, size1, off2, size2;
17319 if (get_memref_parts (mem1, &reg1, &off1, &size1)
17320 && get_memref_parts (mem2, &reg2, &off2, &size2))
17321 return ((REGNO (reg1) == REGNO (reg2))
17322 && (((off1 <= off2) && (off1 + size1 > off2))
17323 || ((off2 <= off1) && (off2 + size2 > off1))));
17325 return false;
17328 /* A C statement (sans semicolon) to update the integer scheduling
17329 priority INSN_PRIORITY (INSN). Increase the priority to execute the
17330 INSN earlier, reduce the priority to execute INSN later. Do not
17331 define this macro if you do not need to adjust the scheduling
17332 priorities of insns. */
17334 static int
17335 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
17337 rtx load_mem, str_mem;
17338 /* On machines (like the 750) which have asymmetric integer units,
17339 where one integer unit can do multiply and divides and the other
17340 can't, reduce the priority of multiply/divide so it is scheduled
17341 before other integer operations. */
17343 #if 0
17344 if (! INSN_P (insn))
17345 return priority;
17347 if (GET_CODE (PATTERN (insn)) == USE)
17348 return priority;
17350 switch (rs6000_tune) {
17351 case PROCESSOR_PPC750:
17352 switch (get_attr_type (insn))
17354 default:
17355 break;
17357 case TYPE_MUL:
17358 case TYPE_DIV:
17359 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
17360 priority, priority);
17361 if (priority >= 0 && priority < 0x01000000)
17362 priority >>= 3;
17363 break;
17366 #endif
17368 if (insn_must_be_first_in_group (insn)
17369 && reload_completed
17370 && current_sched_info->sched_max_insns_priority
17371 && rs6000_sched_restricted_insns_priority)
17374 /* Prioritize insns that can be dispatched only in the first
17375 dispatch slot. */
17376 if (rs6000_sched_restricted_insns_priority == 1)
17377 /* Attach highest priority to insn. This means that in
17378 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
17379 precede 'priority' (critical path) considerations. */
17380 return current_sched_info->sched_max_insns_priority;
17381 else if (rs6000_sched_restricted_insns_priority == 2)
17382 /* Increase priority of insn by a minimal amount. This means that in
17383 haifa-sched.c:ready_sort(), only 'priority' (critical path)
17384 considerations precede dispatch-slot restriction considerations. */
17385 return (priority + 1);
17388 if (rs6000_tune == PROCESSOR_POWER6
17389 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
17390 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
17391 /* Attach highest priority to insn if the scheduler has just issued two
17392 stores and this instruction is a load, or two loads and this instruction
17393 is a store. Power6 wants loads and stores scheduled alternately
17394 when possible */
17395 return current_sched_info->sched_max_insns_priority;
17397 return priority;
17400 /* Return true if the instruction is nonpipelined on the Cell. */
17401 static bool
17402 is_nonpipeline_insn (rtx_insn *insn)
17404 enum attr_type type;
17405 if (!insn || !NONDEBUG_INSN_P (insn)
17406 || GET_CODE (PATTERN (insn)) == USE
17407 || GET_CODE (PATTERN (insn)) == CLOBBER)
17408 return false;
17410 type = get_attr_type (insn);
17411 if (type == TYPE_MUL
17412 || type == TYPE_DIV
17413 || type == TYPE_SDIV
17414 || type == TYPE_DDIV
17415 || type == TYPE_SSQRT
17416 || type == TYPE_DSQRT
17417 || type == TYPE_MFCR
17418 || type == TYPE_MFCRF
17419 || type == TYPE_MFJMPR)
17421 return true;
17423 return false;
17427 /* Return how many instructions the machine can issue per cycle. */
17429 static int
17430 rs6000_issue_rate (void)
17432 /* Unless scheduling for register pressure, use issue rate of 1 for
17433 first scheduling pass to decrease degradation. */
17434 if (!reload_completed && !flag_sched_pressure)
17435 return 1;
17437 switch (rs6000_tune) {
17438 case PROCESSOR_RS64A:
17439 case PROCESSOR_PPC601: /* ? */
17440 case PROCESSOR_PPC7450:
17441 return 3;
17442 case PROCESSOR_PPC440:
17443 case PROCESSOR_PPC603:
17444 case PROCESSOR_PPC750:
17445 case PROCESSOR_PPC7400:
17446 case PROCESSOR_PPC8540:
17447 case PROCESSOR_PPC8548:
17448 case PROCESSOR_CELL:
17449 case PROCESSOR_PPCE300C2:
17450 case PROCESSOR_PPCE300C3:
17451 case PROCESSOR_PPCE500MC:
17452 case PROCESSOR_PPCE500MC64:
17453 case PROCESSOR_PPCE5500:
17454 case PROCESSOR_PPCE6500:
17455 case PROCESSOR_TITAN:
17456 return 2;
17457 case PROCESSOR_PPC476:
17458 case PROCESSOR_PPC604:
17459 case PROCESSOR_PPC604e:
17460 case PROCESSOR_PPC620:
17461 case PROCESSOR_PPC630:
17462 return 4;
17463 case PROCESSOR_POWER4:
17464 case PROCESSOR_POWER5:
17465 case PROCESSOR_POWER6:
17466 case PROCESSOR_POWER7:
17467 return 5;
17468 case PROCESSOR_POWER8:
17469 return 7;
17470 case PROCESSOR_POWER9:
17471 case PROCESSOR_FUTURE:
17472 return 6;
17473 default:
17474 return 1;
17478 /* Return how many instructions to look ahead for better insn
17479 scheduling. */
17481 static int
17482 rs6000_use_sched_lookahead (void)
17484 switch (rs6000_tune)
17486 case PROCESSOR_PPC8540:
17487 case PROCESSOR_PPC8548:
17488 return 4;
17490 case PROCESSOR_CELL:
17491 return (reload_completed ? 8 : 0);
17493 default:
17494 return 0;
17498 /* We are choosing insn from the ready queue. Return zero if INSN can be
17499 chosen. */
17500 static int
17501 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
17503 if (ready_index == 0)
17504 return 0;
17506 if (rs6000_tune != PROCESSOR_CELL)
17507 return 0;
17509 gcc_assert (insn != NULL_RTX && INSN_P (insn));
17511 if (!reload_completed
17512 || is_nonpipeline_insn (insn)
17513 || is_microcoded_insn (insn))
17514 return 1;
17516 return 0;
17519 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
17520 and return true. */
17522 static bool
17523 find_mem_ref (rtx pat, rtx *mem_ref)
17525 const char * fmt;
17526 int i, j;
17528 /* stack_tie does not produce any real memory traffic. */
17529 if (tie_operand (pat, VOIDmode))
17530 return false;
17532 if (MEM_P (pat))
17534 *mem_ref = pat;
17535 return true;
17538 /* Recursively process the pattern. */
17539 fmt = GET_RTX_FORMAT (GET_CODE (pat));
17541 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
17543 if (fmt[i] == 'e')
17545 if (find_mem_ref (XEXP (pat, i), mem_ref))
17546 return true;
17548 else if (fmt[i] == 'E')
17549 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
17551 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
17552 return true;
17556 return false;
17559 /* Determine if PAT is a PATTERN of a load insn. */
17561 static bool
17562 is_load_insn1 (rtx pat, rtx *load_mem)
17564 if (!pat || pat == NULL_RTX)
17565 return false;
17567 if (GET_CODE (pat) == SET)
17568 return find_mem_ref (SET_SRC (pat), load_mem);
17570 if (GET_CODE (pat) == PARALLEL)
17572 int i;
17574 for (i = 0; i < XVECLEN (pat, 0); i++)
17575 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
17576 return true;
17579 return false;
17582 /* Determine if INSN loads from memory. */
17584 static bool
17585 is_load_insn (rtx insn, rtx *load_mem)
17587 if (!insn || !INSN_P (insn))
17588 return false;
17590 if (CALL_P (insn))
17591 return false;
17593 return is_load_insn1 (PATTERN (insn), load_mem);
17596 /* Determine if PAT is a PATTERN of a store insn. */
17598 static bool
17599 is_store_insn1 (rtx pat, rtx *str_mem)
17601 if (!pat || pat == NULL_RTX)
17602 return false;
17604 if (GET_CODE (pat) == SET)
17605 return find_mem_ref (SET_DEST (pat), str_mem);
17607 if (GET_CODE (pat) == PARALLEL)
17609 int i;
17611 for (i = 0; i < XVECLEN (pat, 0); i++)
17612 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
17613 return true;
17616 return false;
17619 /* Determine if INSN stores to memory. */
17621 static bool
17622 is_store_insn (rtx insn, rtx *str_mem)
17624 if (!insn || !INSN_P (insn))
17625 return false;
17627 return is_store_insn1 (PATTERN (insn), str_mem);
17630 /* Return whether TYPE is a Power9 pairable vector instruction type. */
17632 static bool
17633 is_power9_pairable_vec_type (enum attr_type type)
17635 switch (type)
17637 case TYPE_VECSIMPLE:
17638 case TYPE_VECCOMPLEX:
17639 case TYPE_VECDIV:
17640 case TYPE_VECCMP:
17641 case TYPE_VECPERM:
17642 case TYPE_VECFLOAT:
17643 case TYPE_VECFDIV:
17644 case TYPE_VECDOUBLE:
17645 return true;
17646 default:
17647 break;
17649 return false;
17652 /* Returns whether the dependence between INSN and NEXT is considered
17653 costly by the given target. */
17655 static bool
17656 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
17658 rtx insn;
17659 rtx next;
17660 rtx load_mem, str_mem;
17662 /* If the flag is not enabled - no dependence is considered costly;
17663 allow all dependent insns in the same group.
17664 This is the most aggressive option. */
17665 if (rs6000_sched_costly_dep == no_dep_costly)
17666 return false;
17668 /* If the flag is set to 1 - a dependence is always considered costly;
17669 do not allow dependent instructions in the same group.
17670 This is the most conservative option. */
17671 if (rs6000_sched_costly_dep == all_deps_costly)
17672 return true;
17674 insn = DEP_PRO (dep);
17675 next = DEP_CON (dep);
17677 if (rs6000_sched_costly_dep == store_to_load_dep_costly
17678 && is_load_insn (next, &load_mem)
17679 && is_store_insn (insn, &str_mem))
17680 /* Prevent load after store in the same group. */
17681 return true;
17683 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
17684 && is_load_insn (next, &load_mem)
17685 && is_store_insn (insn, &str_mem)
17686 && DEP_TYPE (dep) == REG_DEP_TRUE
17687 && mem_locations_overlap(str_mem, load_mem))
17688 /* Prevent load after store in the same group if it is a true
17689 dependence. */
17690 return true;
17692 /* The flag is set to X; dependences with latency >= X are considered costly,
17693 and will not be scheduled in the same group. */
17694 if (rs6000_sched_costly_dep <= max_dep_latency
17695 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
17696 return true;
17698 return false;
17701 /* Return the next insn after INSN that is found before TAIL is reached,
17702 skipping any "non-active" insns - insns that will not actually occupy
17703 an issue slot. Return NULL_RTX if such an insn is not found. */
17705 static rtx_insn *
17706 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
17708 if (insn == NULL_RTX || insn == tail)
17709 return NULL;
17711 while (1)
17713 insn = NEXT_INSN (insn);
17714 if (insn == NULL_RTX || insn == tail)
17715 return NULL;
17717 if (CALL_P (insn)
17718 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
17719 || (NONJUMP_INSN_P (insn)
17720 && GET_CODE (PATTERN (insn)) != USE
17721 && GET_CODE (PATTERN (insn)) != CLOBBER
17722 && INSN_CODE (insn) != CODE_FOR_stack_tie))
17723 break;
17725 return insn;
17728 /* Move instruction at POS to the end of the READY list. */
17730 static void
17731 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
17733 rtx_insn *tmp;
17734 int i;
17736 tmp = ready[pos];
17737 for (i = pos; i < lastpos; i++)
17738 ready[i] = ready[i + 1];
17739 ready[lastpos] = tmp;
17742 /* Do Power6 specific sched_reorder2 reordering of ready list. */
17744 static int
17745 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
17747 /* For Power6, we need to handle some special cases to try and keep the
17748 store queue from overflowing and triggering expensive flushes.
17750 This code monitors how load and store instructions are being issued
17751 and skews the ready list one way or the other to increase the likelihood
17752 that a desired instruction is issued at the proper time.
17754 A couple of things are done. First, we maintain a "load_store_pendulum"
17755 to track the current state of load/store issue.
17757 - If the pendulum is at zero, then no loads or stores have been
17758 issued in the current cycle so we do nothing.
17760 - If the pendulum is 1, then a single load has been issued in this
17761 cycle and we attempt to locate another load in the ready list to
17762 issue with it.
17764 - If the pendulum is -2, then two stores have already been
17765 issued in this cycle, so we increase the priority of the first load
17766 in the ready list to increase it's likelihood of being chosen first
17767 in the next cycle.
17769 - If the pendulum is -1, then a single store has been issued in this
17770 cycle and we attempt to locate another store in the ready list to
17771 issue with it, preferring a store to an adjacent memory location to
17772 facilitate store pairing in the store queue.
17774 - If the pendulum is 2, then two loads have already been
17775 issued in this cycle, so we increase the priority of the first store
17776 in the ready list to increase it's likelihood of being chosen first
17777 in the next cycle.
17779 - If the pendulum < -2 or > 2, then do nothing.
17781 Note: This code covers the most common scenarios. There exist non
17782 load/store instructions which make use of the LSU and which
17783 would need to be accounted for to strictly model the behavior
17784 of the machine. Those instructions are currently unaccounted
17785 for to help minimize compile time overhead of this code.
17787 int pos;
17788 rtx load_mem, str_mem;
17790 if (is_store_insn (last_scheduled_insn, &str_mem))
17791 /* Issuing a store, swing the load_store_pendulum to the left */
17792 load_store_pendulum--;
17793 else if (is_load_insn (last_scheduled_insn, &load_mem))
17794 /* Issuing a load, swing the load_store_pendulum to the right */
17795 load_store_pendulum++;
17796 else
17797 return cached_can_issue_more;
17799 /* If the pendulum is balanced, or there is only one instruction on
17800 the ready list, then all is well, so return. */
17801 if ((load_store_pendulum == 0) || (lastpos <= 0))
17802 return cached_can_issue_more;
17804 if (load_store_pendulum == 1)
17806 /* A load has been issued in this cycle. Scan the ready list
17807 for another load to issue with it */
17808 pos = lastpos;
17810 while (pos >= 0)
17812 if (is_load_insn (ready[pos], &load_mem))
17814 /* Found a load. Move it to the head of the ready list,
17815 and adjust it's priority so that it is more likely to
17816 stay there */
17817 move_to_end_of_ready (ready, pos, lastpos);
17819 if (!sel_sched_p ()
17820 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17821 INSN_PRIORITY (ready[lastpos])++;
17822 break;
17824 pos--;
17827 else if (load_store_pendulum == -2)
17829 /* Two stores have been issued in this cycle. Increase the
17830 priority of the first load in the ready list to favor it for
17831 issuing in the next cycle. */
17832 pos = lastpos;
17834 while (pos >= 0)
17836 if (is_load_insn (ready[pos], &load_mem)
17837 && !sel_sched_p ()
17838 && INSN_PRIORITY_KNOWN (ready[pos]))
17840 INSN_PRIORITY (ready[pos])++;
17842 /* Adjust the pendulum to account for the fact that a load
17843 was found and increased in priority. This is to prevent
17844 increasing the priority of multiple loads */
17845 load_store_pendulum--;
17847 break;
17849 pos--;
17852 else if (load_store_pendulum == -1)
17854 /* A store has been issued in this cycle. Scan the ready list for
17855 another store to issue with it, preferring a store to an adjacent
17856 memory location */
17857 int first_store_pos = -1;
17859 pos = lastpos;
17861 while (pos >= 0)
17863 if (is_store_insn (ready[pos], &str_mem))
17865 rtx str_mem2;
17866 /* Maintain the index of the first store found on the
17867 list */
17868 if (first_store_pos == -1)
17869 first_store_pos = pos;
17871 if (is_store_insn (last_scheduled_insn, &str_mem2)
17872 && adjacent_mem_locations (str_mem, str_mem2))
17874 /* Found an adjacent store. Move it to the head of the
17875 ready list, and adjust it's priority so that it is
17876 more likely to stay there */
17877 move_to_end_of_ready (ready, pos, lastpos);
17879 if (!sel_sched_p ()
17880 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17881 INSN_PRIORITY (ready[lastpos])++;
17883 first_store_pos = -1;
17885 break;
17888 pos--;
17891 if (first_store_pos >= 0)
17893 /* An adjacent store wasn't found, but a non-adjacent store was,
17894 so move the non-adjacent store to the front of the ready
17895 list, and adjust its priority so that it is more likely to
17896 stay there. */
17897 move_to_end_of_ready (ready, first_store_pos, lastpos);
17898 if (!sel_sched_p ()
17899 && INSN_PRIORITY_KNOWN (ready[lastpos]))
17900 INSN_PRIORITY (ready[lastpos])++;
17903 else if (load_store_pendulum == 2)
17905 /* Two loads have been issued in this cycle. Increase the priority
17906 of the first store in the ready list to favor it for issuing in
17907 the next cycle. */
17908 pos = lastpos;
17910 while (pos >= 0)
17912 if (is_store_insn (ready[pos], &str_mem)
17913 && !sel_sched_p ()
17914 && INSN_PRIORITY_KNOWN (ready[pos]))
17916 INSN_PRIORITY (ready[pos])++;
17918 /* Adjust the pendulum to account for the fact that a store
17919 was found and increased in priority. This is to prevent
17920 increasing the priority of multiple stores */
17921 load_store_pendulum++;
17923 break;
17925 pos--;
17929 return cached_can_issue_more;
17932 /* Do Power9 specific sched_reorder2 reordering of ready list. */
17934 static int
17935 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
17937 int pos;
17938 enum attr_type type, type2;
17940 type = get_attr_type (last_scheduled_insn);
17942 /* Try to issue fixed point divides back-to-back in pairs so they will be
17943 routed to separate execution units and execute in parallel. */
17944 if (type == TYPE_DIV && divide_cnt == 0)
17946 /* First divide has been scheduled. */
17947 divide_cnt = 1;
17949 /* Scan the ready list looking for another divide, if found move it
17950 to the end of the list so it is chosen next. */
17951 pos = lastpos;
17952 while (pos >= 0)
17954 if (recog_memoized (ready[pos]) >= 0
17955 && get_attr_type (ready[pos]) == TYPE_DIV)
17957 move_to_end_of_ready (ready, pos, lastpos);
17958 break;
17960 pos--;
17963 else
17965 /* Last insn was the 2nd divide or not a divide, reset the counter. */
17966 divide_cnt = 0;
17968 /* The best dispatch throughput for vector and vector load insns can be
17969 achieved by interleaving a vector and vector load such that they'll
17970 dispatch to the same superslice. If this pairing cannot be achieved
17971 then it is best to pair vector insns together and vector load insns
17972 together.
17974 To aid in this pairing, vec_pairing maintains the current state with
17975 the following values:
17977 0 : Initial state, no vecload/vector pairing has been started.
17979 1 : A vecload or vector insn has been issued and a candidate for
17980 pairing has been found and moved to the end of the ready
17981 list. */
17982 if (type == TYPE_VECLOAD)
17984 /* Issued a vecload. */
17985 if (vec_pairing == 0)
17987 int vecload_pos = -1;
17988 /* We issued a single vecload, look for a vector insn to pair it
17989 with. If one isn't found, try to pair another vecload. */
17990 pos = lastpos;
17991 while (pos >= 0)
17993 if (recog_memoized (ready[pos]) >= 0)
17995 type2 = get_attr_type (ready[pos]);
17996 if (is_power9_pairable_vec_type (type2))
17998 /* Found a vector insn to pair with, move it to the
17999 end of the ready list so it is scheduled next. */
18000 move_to_end_of_ready (ready, pos, lastpos);
18001 vec_pairing = 1;
18002 return cached_can_issue_more;
18004 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
18005 /* Remember position of first vecload seen. */
18006 vecload_pos = pos;
18008 pos--;
18010 if (vecload_pos >= 0)
18012 /* Didn't find a vector to pair with but did find a vecload,
18013 move it to the end of the ready list. */
18014 move_to_end_of_ready (ready, vecload_pos, lastpos);
18015 vec_pairing = 1;
18016 return cached_can_issue_more;
18020 else if (is_power9_pairable_vec_type (type))
18022 /* Issued a vector operation. */
18023 if (vec_pairing == 0)
18025 int vec_pos = -1;
18026 /* We issued a single vector insn, look for a vecload to pair it
18027 with. If one isn't found, try to pair another vector. */
18028 pos = lastpos;
18029 while (pos >= 0)
18031 if (recog_memoized (ready[pos]) >= 0)
18033 type2 = get_attr_type (ready[pos]);
18034 if (type2 == TYPE_VECLOAD)
18036 /* Found a vecload insn to pair with, move it to the
18037 end of the ready list so it is scheduled next. */
18038 move_to_end_of_ready (ready, pos, lastpos);
18039 vec_pairing = 1;
18040 return cached_can_issue_more;
18042 else if (is_power9_pairable_vec_type (type2)
18043 && vec_pos == -1)
18044 /* Remember position of first vector insn seen. */
18045 vec_pos = pos;
18047 pos--;
18049 if (vec_pos >= 0)
18051 /* Didn't find a vecload to pair with but did find a vector
18052 insn, move it to the end of the ready list. */
18053 move_to_end_of_ready (ready, vec_pos, lastpos);
18054 vec_pairing = 1;
18055 return cached_can_issue_more;
18060 /* We've either finished a vec/vecload pair, couldn't find an insn to
18061 continue the current pair, or the last insn had nothing to do with
18062 with pairing. In any case, reset the state. */
18063 vec_pairing = 0;
18066 return cached_can_issue_more;
18069 /* We are about to begin issuing insns for this clock cycle. */
18071 static int
18072 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
18073 rtx_insn **ready ATTRIBUTE_UNUSED,
18074 int *pn_ready ATTRIBUTE_UNUSED,
18075 int clock_var ATTRIBUTE_UNUSED)
18077 int n_ready = *pn_ready;
18079 if (sched_verbose)
18080 fprintf (dump, "// rs6000_sched_reorder :\n");
18082 /* Reorder the ready list, if the second to last ready insn
18083 is a nonepipeline insn. */
18084 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
18086 if (is_nonpipeline_insn (ready[n_ready - 1])
18087 && (recog_memoized (ready[n_ready - 2]) > 0))
18088 /* Simply swap first two insns. */
18089 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
18092 if (rs6000_tune == PROCESSOR_POWER6)
18093 load_store_pendulum = 0;
18095 return rs6000_issue_rate ();
18098 /* Like rs6000_sched_reorder, but called after issuing each insn. */
18100 static int
18101 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
18102 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
18104 if (sched_verbose)
18105 fprintf (dump, "// rs6000_sched_reorder2 :\n");
18107 /* Do Power6 dependent reordering if necessary. */
18108 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
18109 return power6_sched_reorder2 (ready, *pn_ready - 1);
18111 /* Do Power9 dependent reordering if necessary. */
18112 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
18113 && recog_memoized (last_scheduled_insn) >= 0)
18114 return power9_sched_reorder2 (ready, *pn_ready - 1);
18116 return cached_can_issue_more;
18119 /* Return whether the presence of INSN causes a dispatch group termination
18120 of group WHICH_GROUP.
18122 If WHICH_GROUP == current_group, this function will return true if INSN
18123 causes the termination of the current group (i.e, the dispatch group to
18124 which INSN belongs). This means that INSN will be the last insn in the
18125 group it belongs to.
18127 If WHICH_GROUP == previous_group, this function will return true if INSN
18128 causes the termination of the previous group (i.e, the dispatch group that
18129 precedes the group to which INSN belongs). This means that INSN will be
18130 the first insn in the group it belongs to). */
18132 static bool
18133 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
18135 bool first, last;
18137 if (! insn)
18138 return false;
18140 first = insn_must_be_first_in_group (insn);
18141 last = insn_must_be_last_in_group (insn);
18143 if (first && last)
18144 return true;
18146 if (which_group == current_group)
18147 return last;
18148 else if (which_group == previous_group)
18149 return first;
18151 return false;
18155 static bool
18156 insn_must_be_first_in_group (rtx_insn *insn)
18158 enum attr_type type;
18160 if (!insn
18161 || NOTE_P (insn)
18162 || DEBUG_INSN_P (insn)
18163 || GET_CODE (PATTERN (insn)) == USE
18164 || GET_CODE (PATTERN (insn)) == CLOBBER)
18165 return false;
18167 switch (rs6000_tune)
18169 case PROCESSOR_POWER5:
18170 if (is_cracked_insn (insn))
18171 return true;
18172 /* FALLTHRU */
18173 case PROCESSOR_POWER4:
18174 if (is_microcoded_insn (insn))
18175 return true;
18177 if (!rs6000_sched_groups)
18178 return false;
18180 type = get_attr_type (insn);
18182 switch (type)
18184 case TYPE_MFCR:
18185 case TYPE_MFCRF:
18186 case TYPE_MTCR:
18187 case TYPE_CR_LOGICAL:
18188 case TYPE_MTJMPR:
18189 case TYPE_MFJMPR:
18190 case TYPE_DIV:
18191 case TYPE_LOAD_L:
18192 case TYPE_STORE_C:
18193 case TYPE_ISYNC:
18194 case TYPE_SYNC:
18195 return true;
18196 default:
18197 break;
18199 break;
18200 case PROCESSOR_POWER6:
18201 type = get_attr_type (insn);
18203 switch (type)
18205 case TYPE_EXTS:
18206 case TYPE_CNTLZ:
18207 case TYPE_TRAP:
18208 case TYPE_MUL:
18209 case TYPE_INSERT:
18210 case TYPE_FPCOMPARE:
18211 case TYPE_MFCR:
18212 case TYPE_MTCR:
18213 case TYPE_MFJMPR:
18214 case TYPE_MTJMPR:
18215 case TYPE_ISYNC:
18216 case TYPE_SYNC:
18217 case TYPE_LOAD_L:
18218 case TYPE_STORE_C:
18219 return true;
18220 case TYPE_SHIFT:
18221 if (get_attr_dot (insn) == DOT_NO
18222 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18223 return true;
18224 else
18225 break;
18226 case TYPE_DIV:
18227 if (get_attr_size (insn) == SIZE_32)
18228 return true;
18229 else
18230 break;
18231 case TYPE_LOAD:
18232 case TYPE_STORE:
18233 case TYPE_FPLOAD:
18234 case TYPE_FPSTORE:
18235 if (get_attr_update (insn) == UPDATE_YES)
18236 return true;
18237 else
18238 break;
18239 default:
18240 break;
18242 break;
18243 case PROCESSOR_POWER7:
18244 type = get_attr_type (insn);
18246 switch (type)
18248 case TYPE_CR_LOGICAL:
18249 case TYPE_MFCR:
18250 case TYPE_MFCRF:
18251 case TYPE_MTCR:
18252 case TYPE_DIV:
18253 case TYPE_ISYNC:
18254 case TYPE_LOAD_L:
18255 case TYPE_STORE_C:
18256 case TYPE_MFJMPR:
18257 case TYPE_MTJMPR:
18258 return true;
18259 case TYPE_MUL:
18260 case TYPE_SHIFT:
18261 case TYPE_EXTS:
18262 if (get_attr_dot (insn) == DOT_YES)
18263 return true;
18264 else
18265 break;
18266 case TYPE_LOAD:
18267 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18268 || get_attr_update (insn) == UPDATE_YES)
18269 return true;
18270 else
18271 break;
18272 case TYPE_STORE:
18273 case TYPE_FPLOAD:
18274 case TYPE_FPSTORE:
18275 if (get_attr_update (insn) == UPDATE_YES)
18276 return true;
18277 else
18278 break;
18279 default:
18280 break;
18282 break;
18283 case PROCESSOR_POWER8:
18284 type = get_attr_type (insn);
18286 switch (type)
18288 case TYPE_CR_LOGICAL:
18289 case TYPE_MFCR:
18290 case TYPE_MFCRF:
18291 case TYPE_MTCR:
18292 case TYPE_SYNC:
18293 case TYPE_ISYNC:
18294 case TYPE_LOAD_L:
18295 case TYPE_STORE_C:
18296 case TYPE_VECSTORE:
18297 case TYPE_MFJMPR:
18298 case TYPE_MTJMPR:
18299 return true;
18300 case TYPE_SHIFT:
18301 case TYPE_EXTS:
18302 case TYPE_MUL:
18303 if (get_attr_dot (insn) == DOT_YES)
18304 return true;
18305 else
18306 break;
18307 case TYPE_LOAD:
18308 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18309 || get_attr_update (insn) == UPDATE_YES)
18310 return true;
18311 else
18312 break;
18313 case TYPE_STORE:
18314 if (get_attr_update (insn) == UPDATE_YES
18315 && get_attr_indexed (insn) == INDEXED_YES)
18316 return true;
18317 else
18318 break;
18319 default:
18320 break;
18322 break;
18323 default:
18324 break;
18327 return false;
18330 static bool
18331 insn_must_be_last_in_group (rtx_insn *insn)
18333 enum attr_type type;
18335 if (!insn
18336 || NOTE_P (insn)
18337 || DEBUG_INSN_P (insn)
18338 || GET_CODE (PATTERN (insn)) == USE
18339 || GET_CODE (PATTERN (insn)) == CLOBBER)
18340 return false;
18342 switch (rs6000_tune) {
18343 case PROCESSOR_POWER4:
18344 case PROCESSOR_POWER5:
18345 if (is_microcoded_insn (insn))
18346 return true;
18348 if (is_branch_slot_insn (insn))
18349 return true;
18351 break;
18352 case PROCESSOR_POWER6:
18353 type = get_attr_type (insn);
18355 switch (type)
18357 case TYPE_EXTS:
18358 case TYPE_CNTLZ:
18359 case TYPE_TRAP:
18360 case TYPE_MUL:
18361 case TYPE_FPCOMPARE:
18362 case TYPE_MFCR:
18363 case TYPE_MTCR:
18364 case TYPE_MFJMPR:
18365 case TYPE_MTJMPR:
18366 case TYPE_ISYNC:
18367 case TYPE_SYNC:
18368 case TYPE_LOAD_L:
18369 case TYPE_STORE_C:
18370 return true;
18371 case TYPE_SHIFT:
18372 if (get_attr_dot (insn) == DOT_NO
18373 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
18374 return true;
18375 else
18376 break;
18377 case TYPE_DIV:
18378 if (get_attr_size (insn) == SIZE_32)
18379 return true;
18380 else
18381 break;
18382 default:
18383 break;
18385 break;
18386 case PROCESSOR_POWER7:
18387 type = get_attr_type (insn);
18389 switch (type)
18391 case TYPE_ISYNC:
18392 case TYPE_SYNC:
18393 case TYPE_LOAD_L:
18394 case TYPE_STORE_C:
18395 return true;
18396 case TYPE_LOAD:
18397 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18398 && get_attr_update (insn) == UPDATE_YES)
18399 return true;
18400 else
18401 break;
18402 case TYPE_STORE:
18403 if (get_attr_update (insn) == UPDATE_YES
18404 && get_attr_indexed (insn) == INDEXED_YES)
18405 return true;
18406 else
18407 break;
18408 default:
18409 break;
18411 break;
18412 case PROCESSOR_POWER8:
18413 type = get_attr_type (insn);
18415 switch (type)
18417 case TYPE_MFCR:
18418 case TYPE_MTCR:
18419 case TYPE_ISYNC:
18420 case TYPE_SYNC:
18421 case TYPE_LOAD_L:
18422 case TYPE_STORE_C:
18423 return true;
18424 case TYPE_LOAD:
18425 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18426 && get_attr_update (insn) == UPDATE_YES)
18427 return true;
18428 else
18429 break;
18430 case TYPE_STORE:
18431 if (get_attr_update (insn) == UPDATE_YES
18432 && get_attr_indexed (insn) == INDEXED_YES)
18433 return true;
18434 else
18435 break;
18436 default:
18437 break;
18439 break;
18440 default:
18441 break;
18444 return false;
18447 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
18448 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
18450 static bool
18451 is_costly_group (rtx *group_insns, rtx next_insn)
18453 int i;
18454 int issue_rate = rs6000_issue_rate ();
18456 for (i = 0; i < issue_rate; i++)
18458 sd_iterator_def sd_it;
18459 dep_t dep;
18460 rtx insn = group_insns[i];
18462 if (!insn)
18463 continue;
18465 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
18467 rtx next = DEP_CON (dep);
18469 if (next == next_insn
18470 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
18471 return true;
18475 return false;
18478 /* Utility of the function redefine_groups.
18479 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
18480 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
18481 to keep it "far" (in a separate group) from GROUP_INSNS, following
18482 one of the following schemes, depending on the value of the flag
18483 -minsert_sched_nops = X:
18484 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
18485 in order to force NEXT_INSN into a separate group.
18486 (2) X < sched_finish_regroup_exact: insert exactly X nops.
18487 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
18488 insertion (has a group just ended, how many vacant issue slots remain in the
18489 last group, and how many dispatch groups were encountered so far). */
18491 static int
18492 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
18493 rtx_insn *next_insn, bool *group_end, int can_issue_more,
18494 int *group_count)
18496 rtx nop;
18497 bool force;
18498 int issue_rate = rs6000_issue_rate ();
18499 bool end = *group_end;
18500 int i;
18502 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
18503 return can_issue_more;
18505 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
18506 return can_issue_more;
18508 force = is_costly_group (group_insns, next_insn);
18509 if (!force)
18510 return can_issue_more;
18512 if (sched_verbose > 6)
18513 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
18514 *group_count ,can_issue_more);
18516 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
18518 if (*group_end)
18519 can_issue_more = 0;
18521 /* Since only a branch can be issued in the last issue_slot, it is
18522 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
18523 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
18524 in this case the last nop will start a new group and the branch
18525 will be forced to the new group. */
18526 if (can_issue_more && !is_branch_slot_insn (next_insn))
18527 can_issue_more--;
18529 /* Do we have a special group ending nop? */
18530 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
18531 || rs6000_tune == PROCESSOR_POWER8)
18533 nop = gen_group_ending_nop ();
18534 emit_insn_before (nop, next_insn);
18535 can_issue_more = 0;
18537 else
18538 while (can_issue_more > 0)
18540 nop = gen_nop ();
18541 emit_insn_before (nop, next_insn);
18542 can_issue_more--;
18545 *group_end = true;
18546 return 0;
18549 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
18551 int n_nops = rs6000_sched_insert_nops;
18553 /* Nops can't be issued from the branch slot, so the effective
18554 issue_rate for nops is 'issue_rate - 1'. */
18555 if (can_issue_more == 0)
18556 can_issue_more = issue_rate;
18557 can_issue_more--;
18558 if (can_issue_more == 0)
18560 can_issue_more = issue_rate - 1;
18561 (*group_count)++;
18562 end = true;
18563 for (i = 0; i < issue_rate; i++)
18565 group_insns[i] = 0;
18569 while (n_nops > 0)
18571 nop = gen_nop ();
18572 emit_insn_before (nop, next_insn);
18573 if (can_issue_more == issue_rate - 1) /* new group begins */
18574 end = false;
18575 can_issue_more--;
18576 if (can_issue_more == 0)
18578 can_issue_more = issue_rate - 1;
18579 (*group_count)++;
18580 end = true;
18581 for (i = 0; i < issue_rate; i++)
18583 group_insns[i] = 0;
18586 n_nops--;
18589 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
18590 can_issue_more++;
18592 /* Is next_insn going to start a new group? */
18593 *group_end
18594 = (end
18595 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18596 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18597 || (can_issue_more < issue_rate &&
18598 insn_terminates_group_p (next_insn, previous_group)));
18599 if (*group_end && end)
18600 (*group_count)--;
18602 if (sched_verbose > 6)
18603 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
18604 *group_count, can_issue_more);
18605 return can_issue_more;
18608 return can_issue_more;
18611 /* This function tries to synch the dispatch groups that the compiler "sees"
18612 with the dispatch groups that the processor dispatcher is expected to
18613 form in practice. It tries to achieve this synchronization by forcing the
18614 estimated processor grouping on the compiler (as opposed to the function
18615 'pad_goups' which tries to force the scheduler's grouping on the processor).
18617 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
18618 examines the (estimated) dispatch groups that will be formed by the processor
18619 dispatcher. It marks these group boundaries to reflect the estimated
18620 processor grouping, overriding the grouping that the scheduler had marked.
18621 Depending on the value of the flag '-minsert-sched-nops' this function can
18622 force certain insns into separate groups or force a certain distance between
18623 them by inserting nops, for example, if there exists a "costly dependence"
18624 between the insns.
18626 The function estimates the group boundaries that the processor will form as
18627 follows: It keeps track of how many vacant issue slots are available after
18628 each insn. A subsequent insn will start a new group if one of the following
18629 4 cases applies:
18630 - no more vacant issue slots remain in the current dispatch group.
18631 - only the last issue slot, which is the branch slot, is vacant, but the next
18632 insn is not a branch.
18633 - only the last 2 or less issue slots, including the branch slot, are vacant,
18634 which means that a cracked insn (which occupies two issue slots) can't be
18635 issued in this group.
18636 - less than 'issue_rate' slots are vacant, and the next insn always needs to
18637 start a new group. */
18639 static int
18640 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18641 rtx_insn *tail)
18643 rtx_insn *insn, *next_insn;
18644 int issue_rate;
18645 int can_issue_more;
18646 int slot, i;
18647 bool group_end;
18648 int group_count = 0;
18649 rtx *group_insns;
18651 /* Initialize. */
18652 issue_rate = rs6000_issue_rate ();
18653 group_insns = XALLOCAVEC (rtx, issue_rate);
18654 for (i = 0; i < issue_rate; i++)
18656 group_insns[i] = 0;
18658 can_issue_more = issue_rate;
18659 slot = 0;
18660 insn = get_next_active_insn (prev_head_insn, tail);
18661 group_end = false;
18663 while (insn != NULL_RTX)
18665 slot = (issue_rate - can_issue_more);
18666 group_insns[slot] = insn;
18667 can_issue_more =
18668 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18669 if (insn_terminates_group_p (insn, current_group))
18670 can_issue_more = 0;
18672 next_insn = get_next_active_insn (insn, tail);
18673 if (next_insn == NULL_RTX)
18674 return group_count + 1;
18676 /* Is next_insn going to start a new group? */
18677 group_end
18678 = (can_issue_more == 0
18679 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
18680 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
18681 || (can_issue_more < issue_rate &&
18682 insn_terminates_group_p (next_insn, previous_group)));
18684 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
18685 next_insn, &group_end, can_issue_more,
18686 &group_count);
18688 if (group_end)
18690 group_count++;
18691 can_issue_more = 0;
18692 for (i = 0; i < issue_rate; i++)
18694 group_insns[i] = 0;
18698 if (GET_MODE (next_insn) == TImode && can_issue_more)
18699 PUT_MODE (next_insn, VOIDmode);
18700 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
18701 PUT_MODE (next_insn, TImode);
18703 insn = next_insn;
18704 if (can_issue_more == 0)
18705 can_issue_more = issue_rate;
18706 } /* while */
18708 return group_count;
18711 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
18712 dispatch group boundaries that the scheduler had marked. Pad with nops
18713 any dispatch groups which have vacant issue slots, in order to force the
18714 scheduler's grouping on the processor dispatcher. The function
18715 returns the number of dispatch groups found. */
18717 static int
18718 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
18719 rtx_insn *tail)
18721 rtx_insn *insn, *next_insn;
18722 rtx nop;
18723 int issue_rate;
18724 int can_issue_more;
18725 int group_end;
18726 int group_count = 0;
18728 /* Initialize issue_rate. */
18729 issue_rate = rs6000_issue_rate ();
18730 can_issue_more = issue_rate;
18732 insn = get_next_active_insn (prev_head_insn, tail);
18733 next_insn = get_next_active_insn (insn, tail);
18735 while (insn != NULL_RTX)
18737 can_issue_more =
18738 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
18740 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
18742 if (next_insn == NULL_RTX)
18743 break;
18745 if (group_end)
18747 /* If the scheduler had marked group termination at this location
18748 (between insn and next_insn), and neither insn nor next_insn will
18749 force group termination, pad the group with nops to force group
18750 termination. */
18751 if (can_issue_more
18752 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
18753 && !insn_terminates_group_p (insn, current_group)
18754 && !insn_terminates_group_p (next_insn, previous_group))
18756 if (!is_branch_slot_insn (next_insn))
18757 can_issue_more--;
18759 while (can_issue_more)
18761 nop = gen_nop ();
18762 emit_insn_before (nop, next_insn);
18763 can_issue_more--;
18767 can_issue_more = issue_rate;
18768 group_count++;
18771 insn = next_insn;
18772 next_insn = get_next_active_insn (insn, tail);
18775 return group_count;
18778 /* We're beginning a new block. Initialize data structures as necessary. */
18780 static void
18781 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
18782 int sched_verbose ATTRIBUTE_UNUSED,
18783 int max_ready ATTRIBUTE_UNUSED)
18785 last_scheduled_insn = NULL;
18786 load_store_pendulum = 0;
18787 divide_cnt = 0;
18788 vec_pairing = 0;
18791 /* The following function is called at the end of scheduling BB.
18792 After reload, it inserts nops at insn group bundling. */
18794 static void
18795 rs6000_sched_finish (FILE *dump, int sched_verbose)
18797 int n_groups;
18799 if (sched_verbose)
18800 fprintf (dump, "=== Finishing schedule.\n");
18802 if (reload_completed && rs6000_sched_groups)
18804 /* Do not run sched_finish hook when selective scheduling enabled. */
18805 if (sel_sched_p ())
18806 return;
18808 if (rs6000_sched_insert_nops == sched_finish_none)
18809 return;
18811 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
18812 n_groups = pad_groups (dump, sched_verbose,
18813 current_sched_info->prev_head,
18814 current_sched_info->next_tail);
18815 else
18816 n_groups = redefine_groups (dump, sched_verbose,
18817 current_sched_info->prev_head,
18818 current_sched_info->next_tail);
18820 if (sched_verbose >= 6)
18822 fprintf (dump, "ngroups = %d\n", n_groups);
18823 print_rtl (dump, current_sched_info->prev_head);
18824 fprintf (dump, "Done finish_sched\n");
18829 struct rs6000_sched_context
18831 short cached_can_issue_more;
18832 rtx_insn *last_scheduled_insn;
18833 int load_store_pendulum;
18834 int divide_cnt;
18835 int vec_pairing;
18838 typedef struct rs6000_sched_context rs6000_sched_context_def;
18839 typedef rs6000_sched_context_def *rs6000_sched_context_t;
18841 /* Allocate store for new scheduling context. */
18842 static void *
18843 rs6000_alloc_sched_context (void)
18845 return xmalloc (sizeof (rs6000_sched_context_def));
18848 /* If CLEAN_P is true then initializes _SC with clean data,
18849 and from the global context otherwise. */
18850 static void
18851 rs6000_init_sched_context (void *_sc, bool clean_p)
18853 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18855 if (clean_p)
18857 sc->cached_can_issue_more = 0;
18858 sc->last_scheduled_insn = NULL;
18859 sc->load_store_pendulum = 0;
18860 sc->divide_cnt = 0;
18861 sc->vec_pairing = 0;
18863 else
18865 sc->cached_can_issue_more = cached_can_issue_more;
18866 sc->last_scheduled_insn = last_scheduled_insn;
18867 sc->load_store_pendulum = load_store_pendulum;
18868 sc->divide_cnt = divide_cnt;
18869 sc->vec_pairing = vec_pairing;
18873 /* Sets the global scheduling context to the one pointed to by _SC. */
18874 static void
18875 rs6000_set_sched_context (void *_sc)
18877 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
18879 gcc_assert (sc != NULL);
18881 cached_can_issue_more = sc->cached_can_issue_more;
18882 last_scheduled_insn = sc->last_scheduled_insn;
18883 load_store_pendulum = sc->load_store_pendulum;
18884 divide_cnt = sc->divide_cnt;
18885 vec_pairing = sc->vec_pairing;
18888 /* Free _SC. */
18889 static void
18890 rs6000_free_sched_context (void *_sc)
18892 gcc_assert (_sc != NULL);
18894 free (_sc);
18897 static bool
18898 rs6000_sched_can_speculate_insn (rtx_insn *insn)
18900 switch (get_attr_type (insn))
18902 case TYPE_DIV:
18903 case TYPE_SDIV:
18904 case TYPE_DDIV:
18905 case TYPE_VECDIV:
18906 case TYPE_SSQRT:
18907 case TYPE_DSQRT:
18908 return false;
18910 default:
18911 return true;
18915 /* Length in units of the trampoline for entering a nested function. */
18918 rs6000_trampoline_size (void)
18920 int ret = 0;
18922 switch (DEFAULT_ABI)
18924 default:
18925 gcc_unreachable ();
18927 case ABI_AIX:
18928 ret = (TARGET_32BIT) ? 12 : 24;
18929 break;
18931 case ABI_ELFv2:
18932 gcc_assert (!TARGET_32BIT);
18933 ret = 32;
18934 break;
18936 case ABI_DARWIN:
18937 case ABI_V4:
18938 ret = (TARGET_32BIT) ? 40 : 48;
18939 break;
18942 return ret;
18945 /* Emit RTL insns to initialize the variable parts of a trampoline.
18946 FNADDR is an RTX for the address of the function's pure code.
18947 CXT is an RTX for the static chain value for the function. */
18949 static void
18950 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
18952 int regsize = (TARGET_32BIT) ? 4 : 8;
18953 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
18954 rtx ctx_reg = force_reg (Pmode, cxt);
18955 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
18957 switch (DEFAULT_ABI)
18959 default:
18960 gcc_unreachable ();
18962 /* Under AIX, just build the 3 word function descriptor */
18963 case ABI_AIX:
18965 rtx fnmem, fn_reg, toc_reg;
18967 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
18968 error ("you cannot take the address of a nested function if you use "
18969 "the %qs option", "-mno-pointers-to-nested-functions");
18971 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
18972 fn_reg = gen_reg_rtx (Pmode);
18973 toc_reg = gen_reg_rtx (Pmode);
18975 /* Macro to shorten the code expansions below. */
18976 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
18978 m_tramp = replace_equiv_address (m_tramp, addr);
18980 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
18981 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
18982 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
18983 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
18984 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
18986 # undef MEM_PLUS
18988 break;
18990 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
18991 case ABI_ELFv2:
18992 case ABI_DARWIN:
18993 case ABI_V4:
18994 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
18995 LCT_NORMAL, VOIDmode,
18996 addr, Pmode,
18997 GEN_INT (rs6000_trampoline_size ()), SImode,
18998 fnaddr, Pmode,
18999 ctx_reg, Pmode);
19000 break;
19005 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
19006 identifier as an argument, so the front end shouldn't look it up. */
19008 static bool
19009 rs6000_attribute_takes_identifier_p (const_tree attr_id)
19011 return is_attribute_p ("altivec", attr_id);
19014 /* Handle the "altivec" attribute. The attribute may have
19015 arguments as follows:
19017 __attribute__((altivec(vector__)))
19018 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
19019 __attribute__((altivec(bool__))) (always followed by 'unsigned')
19021 and may appear more than once (e.g., 'vector bool char') in a
19022 given declaration. */
19024 static tree
19025 rs6000_handle_altivec_attribute (tree *node,
19026 tree name ATTRIBUTE_UNUSED,
19027 tree args,
19028 int flags ATTRIBUTE_UNUSED,
19029 bool *no_add_attrs)
19031 tree type = *node, result = NULL_TREE;
19032 machine_mode mode;
19033 int unsigned_p;
19034 char altivec_type
19035 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
19036 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
19037 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
19038 : '?');
19040 while (POINTER_TYPE_P (type)
19041 || TREE_CODE (type) == FUNCTION_TYPE
19042 || TREE_CODE (type) == METHOD_TYPE
19043 || TREE_CODE (type) == ARRAY_TYPE)
19044 type = TREE_TYPE (type);
19046 mode = TYPE_MODE (type);
19048 /* Check for invalid AltiVec type qualifiers. */
19049 if (type == long_double_type_node)
19050 error ("use of %<long double%> in AltiVec types is invalid");
19051 else if (type == boolean_type_node)
19052 error ("use of boolean types in AltiVec types is invalid");
19053 else if (TREE_CODE (type) == COMPLEX_TYPE)
19054 error ("use of %<complex%> in AltiVec types is invalid");
19055 else if (DECIMAL_FLOAT_MODE_P (mode))
19056 error ("use of decimal floating point types in AltiVec types is invalid");
19057 else if (!TARGET_VSX)
19059 if (type == long_unsigned_type_node || type == long_integer_type_node)
19061 if (TARGET_64BIT)
19062 error ("use of %<long%> in AltiVec types is invalid for "
19063 "64-bit code without %qs", "-mvsx");
19064 else if (rs6000_warn_altivec_long)
19065 warning (0, "use of %<long%> in AltiVec types is deprecated; "
19066 "use %<int%>");
19068 else if (type == long_long_unsigned_type_node
19069 || type == long_long_integer_type_node)
19070 error ("use of %<long long%> in AltiVec types is invalid without %qs",
19071 "-mvsx");
19072 else if (type == double_type_node)
19073 error ("use of %<double%> in AltiVec types is invalid without %qs",
19074 "-mvsx");
19077 switch (altivec_type)
19079 case 'v':
19080 unsigned_p = TYPE_UNSIGNED (type);
19081 switch (mode)
19083 case E_TImode:
19084 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
19085 break;
19086 case E_DImode:
19087 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
19088 break;
19089 case E_SImode:
19090 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
19091 break;
19092 case E_HImode:
19093 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
19094 break;
19095 case E_QImode:
19096 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
19097 break;
19098 case E_SFmode: result = V4SF_type_node; break;
19099 case E_DFmode: result = V2DF_type_node; break;
19100 /* If the user says 'vector int bool', we may be handed the 'bool'
19101 attribute _before_ the 'vector' attribute, and so select the
19102 proper type in the 'b' case below. */
19103 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
19104 case E_V2DImode: case E_V2DFmode:
19105 result = type;
19106 default: break;
19108 break;
19109 case 'b':
19110 switch (mode)
19112 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
19113 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
19114 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
19115 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
19116 default: break;
19118 break;
19119 case 'p':
19120 switch (mode)
19122 case E_V8HImode: result = pixel_V8HI_type_node;
19123 default: break;
19125 default: break;
19128 /* Propagate qualifiers attached to the element type
19129 onto the vector type. */
19130 if (result && result != type && TYPE_QUALS (type))
19131 result = build_qualified_type (result, TYPE_QUALS (type));
19133 *no_add_attrs = true; /* No need to hang on to the attribute. */
19135 if (result)
19136 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
19138 return NULL_TREE;
19141 /* AltiVec defines five built-in scalar types that serve as vector
19142 elements; we must teach the compiler how to mangle them. The 128-bit
19143 floating point mangling is target-specific as well. */
19145 static const char *
19146 rs6000_mangle_type (const_tree type)
19148 type = TYPE_MAIN_VARIANT (type);
19150 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
19151 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
19152 return NULL;
19154 if (type == bool_char_type_node) return "U6__boolc";
19155 if (type == bool_short_type_node) return "U6__bools";
19156 if (type == pixel_type_node) return "u7__pixel";
19157 if (type == bool_int_type_node) return "U6__booli";
19158 if (type == bool_long_long_type_node) return "U6__boolx";
19160 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
19161 return "g";
19162 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
19163 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
19165 /* For all other types, use the default mangling. */
19166 return NULL;
19169 /* Handle a "longcall" or "shortcall" attribute; arguments as in
19170 struct attribute_spec.handler. */
19172 static tree
19173 rs6000_handle_longcall_attribute (tree *node, tree name,
19174 tree args ATTRIBUTE_UNUSED,
19175 int flags ATTRIBUTE_UNUSED,
19176 bool *no_add_attrs)
19178 if (TREE_CODE (*node) != FUNCTION_TYPE
19179 && TREE_CODE (*node) != FIELD_DECL
19180 && TREE_CODE (*node) != TYPE_DECL)
19182 warning (OPT_Wattributes, "%qE attribute only applies to functions",
19183 name);
19184 *no_add_attrs = true;
19187 return NULL_TREE;
19190 /* Set longcall attributes on all functions declared when
19191 rs6000_default_long_calls is true. */
19192 static void
19193 rs6000_set_default_type_attributes (tree type)
19195 if (rs6000_default_long_calls
19196 && (TREE_CODE (type) == FUNCTION_TYPE
19197 || TREE_CODE (type) == METHOD_TYPE))
19198 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
19199 NULL_TREE,
19200 TYPE_ATTRIBUTES (type));
19202 #if TARGET_MACHO
19203 darwin_set_default_type_attributes (type);
19204 #endif
19207 /* Return a reference suitable for calling a function with the
19208 longcall attribute. */
19210 static rtx
19211 rs6000_longcall_ref (rtx call_ref, rtx arg)
19213 /* System V adds '.' to the internal name, so skip them. */
19214 const char *call_name = XSTR (call_ref, 0);
19215 if (*call_name == '.')
19217 while (*call_name == '.')
19218 call_name++;
19220 tree node = get_identifier (call_name);
19221 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
19224 if (TARGET_PLTSEQ)
19226 rtx base = const0_rtx;
19227 int regno = 12;
19228 if (rs6000_pcrel_p (cfun))
19230 rtx reg = gen_rtx_REG (Pmode, regno);
19231 rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19232 UNSPEC_PLT_PCREL);
19233 emit_insn (gen_rtx_SET (reg, u));
19234 return reg;
19237 if (DEFAULT_ABI == ABI_ELFv2)
19238 base = gen_rtx_REG (Pmode, TOC_REGISTER);
19239 else
19241 if (flag_pic)
19242 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
19243 regno = 11;
19245 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
19246 may be used by a function global entry point. For SysV4, r11
19247 is used by __glink_PLTresolve lazy resolver entry. */
19248 rtx reg = gen_rtx_REG (Pmode, regno);
19249 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
19250 UNSPEC_PLT16_HA);
19251 rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
19252 UNSPEC_PLT16_LO);
19253 emit_insn (gen_rtx_SET (reg, hi));
19254 emit_insn (gen_rtx_SET (reg, lo));
19255 return reg;
19258 return force_reg (Pmode, call_ref);
19261 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
19262 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
19263 #endif
19265 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19266 struct attribute_spec.handler. */
19267 static tree
19268 rs6000_handle_struct_attribute (tree *node, tree name,
19269 tree args ATTRIBUTE_UNUSED,
19270 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19272 tree *type = NULL;
19273 if (DECL_P (*node))
19275 if (TREE_CODE (*node) == TYPE_DECL)
19276 type = &TREE_TYPE (*node);
19278 else
19279 type = node;
19281 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19282 || TREE_CODE (*type) == UNION_TYPE)))
19284 warning (OPT_Wattributes, "%qE attribute ignored", name);
19285 *no_add_attrs = true;
19288 else if ((is_attribute_p ("ms_struct", name)
19289 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19290 || ((is_attribute_p ("gcc_struct", name)
19291 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19293 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
19294 name);
19295 *no_add_attrs = true;
19298 return NULL_TREE;
19301 static bool
19302 rs6000_ms_bitfield_layout_p (const_tree record_type)
19304 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
19305 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19306 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19309 #ifdef USING_ELFOS_H
19311 /* A get_unnamed_section callback, used for switching to toc_section. */
19313 static void
19314 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
19316 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19317 && TARGET_MINIMAL_TOC)
19319 if (!toc_initialized)
19321 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19322 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19323 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
19324 fprintf (asm_out_file, "\t.tc ");
19325 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
19326 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19327 fprintf (asm_out_file, "\n");
19329 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19330 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19331 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19332 fprintf (asm_out_file, " = .+32768\n");
19333 toc_initialized = 1;
19335 else
19336 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19338 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19340 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
19341 if (!toc_initialized)
19343 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19344 toc_initialized = 1;
19347 else
19349 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19350 if (!toc_initialized)
19352 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
19353 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
19354 fprintf (asm_out_file, " = .+32768\n");
19355 toc_initialized = 1;
19360 /* Implement TARGET_ASM_INIT_SECTIONS. */
19362 static void
19363 rs6000_elf_asm_init_sections (void)
19365 toc_section
19366 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
19368 sdata2_section
19369 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
19370 SDATA2_SECTION_ASM_OP);
19373 /* Implement TARGET_SELECT_RTX_SECTION. */
19375 static section *
19376 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
19377 unsigned HOST_WIDE_INT align)
19379 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
19380 return toc_section;
19381 else
19382 return default_elf_select_rtx_section (mode, x, align);
19385 /* For a SYMBOL_REF, set generic flags and then perform some
19386 target-specific processing.
19388 When the AIX ABI is requested on a non-AIX system, replace the
19389 function name with the real name (with a leading .) rather than the
19390 function descriptor name. This saves a lot of overriding code to
19391 read the prefixes. */
19393 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
19394 static void
19395 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
19397 default_encode_section_info (decl, rtl, first);
19399 if (first
19400 && TREE_CODE (decl) == FUNCTION_DECL
19401 && !TARGET_AIX
19402 && DEFAULT_ABI == ABI_AIX)
19404 rtx sym_ref = XEXP (rtl, 0);
19405 size_t len = strlen (XSTR (sym_ref, 0));
19406 char *str = XALLOCAVEC (char, len + 2);
19407 str[0] = '.';
19408 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
19409 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
19413 static inline bool
19414 compare_section_name (const char *section, const char *templ)
19416 int len;
19418 len = strlen (templ);
19419 return (strncmp (section, templ, len) == 0
19420 && (section[len] == 0 || section[len] == '.'));
19423 bool
19424 rs6000_elf_in_small_data_p (const_tree decl)
19426 if (rs6000_sdata == SDATA_NONE)
19427 return false;
19429 /* We want to merge strings, so we never consider them small data. */
19430 if (TREE_CODE (decl) == STRING_CST)
19431 return false;
19433 /* Functions are never in the small data area. */
19434 if (TREE_CODE (decl) == FUNCTION_DECL)
19435 return false;
19437 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
19439 const char *section = DECL_SECTION_NAME (decl);
19440 if (compare_section_name (section, ".sdata")
19441 || compare_section_name (section, ".sdata2")
19442 || compare_section_name (section, ".gnu.linkonce.s")
19443 || compare_section_name (section, ".sbss")
19444 || compare_section_name (section, ".sbss2")
19445 || compare_section_name (section, ".gnu.linkonce.sb")
19446 || strcmp (section, ".PPC.EMB.sdata0") == 0
19447 || strcmp (section, ".PPC.EMB.sbss0") == 0)
19448 return true;
19450 else
19452 /* If we are told not to put readonly data in sdata, then don't. */
19453 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
19454 && !rs6000_readonly_in_sdata)
19455 return false;
19457 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
19459 if (size > 0
19460 && size <= g_switch_value
19461 /* If it's not public, and we're not going to reference it there,
19462 there's no need to put it in the small data section. */
19463 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
19464 return true;
19467 return false;
19470 #endif /* USING_ELFOS_H */
19472 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
19474 static bool
19475 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
19477 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
19480 /* Do not place thread-local symbols refs in the object blocks. */
19482 static bool
19483 rs6000_use_blocks_for_decl_p (const_tree decl)
19485 return !DECL_THREAD_LOCAL_P (decl);
19488 /* Return a REG that occurs in ADDR with coefficient 1.
19489 ADDR can be effectively incremented by incrementing REG.
19491 r0 is special and we must not select it as an address
19492 register by this routine since our caller will try to
19493 increment the returned register via an "la" instruction. */
19496 find_addr_reg (rtx addr)
19498 while (GET_CODE (addr) == PLUS)
19500 if (REG_P (XEXP (addr, 0))
19501 && REGNO (XEXP (addr, 0)) != 0)
19502 addr = XEXP (addr, 0);
19503 else if (REG_P (XEXP (addr, 1))
19504 && REGNO (XEXP (addr, 1)) != 0)
19505 addr = XEXP (addr, 1);
19506 else if (CONSTANT_P (XEXP (addr, 0)))
19507 addr = XEXP (addr, 1);
19508 else if (CONSTANT_P (XEXP (addr, 1)))
19509 addr = XEXP (addr, 0);
19510 else
19511 gcc_unreachable ();
19513 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
19514 return addr;
19517 void
19518 rs6000_fatal_bad_address (rtx op)
19520 fatal_insn ("bad address", op);
19523 #if TARGET_MACHO
19525 vec<branch_island, va_gc> *branch_islands;
19527 /* Remember to generate a branch island for far calls to the given
19528 function. */
19530 static void
19531 add_compiler_branch_island (tree label_name, tree function_name,
19532 int line_number)
19534 branch_island bi = {function_name, label_name, line_number};
19535 vec_safe_push (branch_islands, bi);
19538 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
19539 already there or not. */
19541 static int
19542 no_previous_def (tree function_name)
19544 branch_island *bi;
19545 unsigned ix;
19547 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19548 if (function_name == bi->function_name)
19549 return 0;
19550 return 1;
19553 /* GET_PREV_LABEL gets the label name from the previous definition of
19554 the function. */
19556 static tree
19557 get_prev_label (tree function_name)
19559 branch_island *bi;
19560 unsigned ix;
19562 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
19563 if (function_name == bi->function_name)
19564 return bi->label_name;
19565 return NULL_TREE;
19568 /* Generate external symbol indirection stubs (PIC and non-PIC). */
19570 void
19571 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19573 unsigned int length;
19574 char *symbol_name, *lazy_ptr_name;
19575 char *local_label_0;
19576 static unsigned label = 0;
19578 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19579 symb = (*targetm.strip_name_encoding) (symb);
19581 length = strlen (symb);
19582 symbol_name = XALLOCAVEC (char, length + 32);
19583 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19585 lazy_ptr_name = XALLOCAVEC (char, length + 32);
19586 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
19588 if (MACHOPIC_PURE)
19590 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
19591 fprintf (file, "\t.align 5\n");
19593 fprintf (file, "%s:\n", stub);
19594 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19596 label++;
19597 local_label_0 = XALLOCAVEC (char, 16);
19598 sprintf (local_label_0, "L%u$spb", label);
19600 fprintf (file, "\tmflr r0\n");
19601 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
19602 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
19603 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
19604 lazy_ptr_name, local_label_0);
19605 fprintf (file, "\tmtlr r0\n");
19606 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
19607 (TARGET_64BIT ? "ldu" : "lwzu"),
19608 lazy_ptr_name, local_label_0);
19609 fprintf (file, "\tmtctr r12\n");
19610 fprintf (file, "\tbctr\n");
19612 else /* mdynamic-no-pic or mkernel. */
19614 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
19615 fprintf (file, "\t.align 4\n");
19617 fprintf (file, "%s:\n", stub);
19618 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19620 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
19621 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
19622 (TARGET_64BIT ? "ldu" : "lwzu"),
19623 lazy_ptr_name);
19624 fprintf (file, "\tmtctr r12\n");
19625 fprintf (file, "\tbctr\n");
19628 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19629 fprintf (file, "%s:\n", lazy_ptr_name);
19630 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19631 fprintf (file, "%sdyld_stub_binding_helper\n",
19632 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
19635 /* Legitimize PIC addresses. If the address is already
19636 position-independent, we return ORIG. Newly generated
19637 position-independent addresses go into a reg. This is REG if non
19638 zero, otherwise we allocate register(s) as necessary. */
19640 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
19643 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
19644 rtx reg)
19646 rtx base, offset;
19648 if (reg == NULL && !reload_completed)
19649 reg = gen_reg_rtx (Pmode);
19651 if (GET_CODE (orig) == CONST)
19653 rtx reg_temp;
19655 if (GET_CODE (XEXP (orig, 0)) == PLUS
19656 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
19657 return orig;
19659 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
19661 /* Use a different reg for the intermediate value, as
19662 it will be marked UNCHANGING. */
19663 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
19664 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
19665 Pmode, reg_temp);
19666 offset =
19667 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
19668 Pmode, reg);
19670 if (CONST_INT_P (offset))
19672 if (SMALL_INT (offset))
19673 return plus_constant (Pmode, base, INTVAL (offset));
19674 else if (!reload_completed)
19675 offset = force_reg (Pmode, offset);
19676 else
19678 rtx mem = force_const_mem (Pmode, orig);
19679 return machopic_legitimize_pic_address (mem, Pmode, reg);
19682 return gen_rtx_PLUS (Pmode, base, offset);
19685 /* Fall back on generic machopic code. */
19686 return machopic_legitimize_pic_address (orig, mode, reg);
19689 /* Output a .machine directive for the Darwin assembler, and call
19690 the generic start_file routine. */
19692 static void
19693 rs6000_darwin_file_start (void)
19695 static const struct
19697 const char *arg;
19698 const char *name;
19699 HOST_WIDE_INT if_set;
19700 } mapping[] = {
19701 { "ppc64", "ppc64", MASK_64BIT },
19702 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
19703 { "power4", "ppc970", 0 },
19704 { "G5", "ppc970", 0 },
19705 { "7450", "ppc7450", 0 },
19706 { "7400", "ppc7400", MASK_ALTIVEC },
19707 { "G4", "ppc7400", 0 },
19708 { "750", "ppc750", 0 },
19709 { "740", "ppc750", 0 },
19710 { "G3", "ppc750", 0 },
19711 { "604e", "ppc604e", 0 },
19712 { "604", "ppc604", 0 },
19713 { "603e", "ppc603", 0 },
19714 { "603", "ppc603", 0 },
19715 { "601", "ppc601", 0 },
19716 { NULL, "ppc", 0 } };
19717 const char *cpu_id = "";
19718 size_t i;
19720 rs6000_file_start ();
19721 darwin_file_start ();
19723 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
19725 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
19726 cpu_id = rs6000_default_cpu;
19728 if (global_options_set.x_rs6000_cpu_index)
19729 cpu_id = processor_target_table[rs6000_cpu_index].name;
19731 /* Look through the mapping array. Pick the first name that either
19732 matches the argument, has a bit set in IF_SET that is also set
19733 in the target flags, or has a NULL name. */
19735 i = 0;
19736 while (mapping[i].arg != NULL
19737 && strcmp (mapping[i].arg, cpu_id) != 0
19738 && (mapping[i].if_set & rs6000_isa_flags) == 0)
19739 i++;
19741 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
19744 #endif /* TARGET_MACHO */
19746 #if TARGET_ELF
19747 static int
19748 rs6000_elf_reloc_rw_mask (void)
19750 if (flag_pic)
19751 return 3;
19752 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
19753 return 2;
19754 else
19755 return 0;
19758 /* Record an element in the table of global constructors. SYMBOL is
19759 a SYMBOL_REF of the function to be called; PRIORITY is a number
19760 between 0 and MAX_INIT_PRIORITY.
19762 This differs from default_named_section_asm_out_constructor in
19763 that we have special handling for -mrelocatable. */
19765 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
19766 static void
19767 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
19769 const char *section = ".ctors";
19770 char buf[18];
19772 if (priority != DEFAULT_INIT_PRIORITY)
19774 sprintf (buf, ".ctors.%.5u",
19775 /* Invert the numbering so the linker puts us in the proper
19776 order; constructors are run from right to left, and the
19777 linker sorts in increasing order. */
19778 MAX_INIT_PRIORITY - priority);
19779 section = buf;
19782 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19783 assemble_align (POINTER_SIZE);
19785 if (DEFAULT_ABI == ABI_V4
19786 && (TARGET_RELOCATABLE || flag_pic > 1))
19788 fputs ("\t.long (", asm_out_file);
19789 output_addr_const (asm_out_file, symbol);
19790 fputs (")@fixup\n", asm_out_file);
19792 else
19793 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19796 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
19797 static void
19798 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
19800 const char *section = ".dtors";
19801 char buf[18];
19803 if (priority != DEFAULT_INIT_PRIORITY)
19805 sprintf (buf, ".dtors.%.5u",
19806 /* Invert the numbering so the linker puts us in the proper
19807 order; constructors are run from right to left, and the
19808 linker sorts in increasing order. */
19809 MAX_INIT_PRIORITY - priority);
19810 section = buf;
19813 switch_to_section (get_section (section, SECTION_WRITE, NULL));
19814 assemble_align (POINTER_SIZE);
19816 if (DEFAULT_ABI == ABI_V4
19817 && (TARGET_RELOCATABLE || flag_pic > 1))
19819 fputs ("\t.long (", asm_out_file);
19820 output_addr_const (asm_out_file, symbol);
19821 fputs (")@fixup\n", asm_out_file);
19823 else
19824 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
19827 void
19828 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
19830 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
19832 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
19833 ASM_OUTPUT_LABEL (file, name);
19834 fputs (DOUBLE_INT_ASM_OP, file);
19835 rs6000_output_function_entry (file, name);
19836 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
19837 if (DOT_SYMBOLS)
19839 fputs ("\t.size\t", file);
19840 assemble_name (file, name);
19841 fputs (",24\n\t.type\t.", file);
19842 assemble_name (file, name);
19843 fputs (",@function\n", file);
19844 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
19846 fputs ("\t.globl\t.", file);
19847 assemble_name (file, name);
19848 putc ('\n', file);
19851 else
19852 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19853 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19854 rs6000_output_function_entry (file, name);
19855 fputs (":\n", file);
19856 return;
19859 int uses_toc;
19860 if (DEFAULT_ABI == ABI_V4
19861 && (TARGET_RELOCATABLE || flag_pic > 1)
19862 && !TARGET_SECURE_PLT
19863 && (!constant_pool_empty_p () || crtl->profile)
19864 && (uses_toc = uses_TOC ()))
19866 char buf[256];
19868 if (uses_toc == 2)
19869 switch_to_other_text_partition ();
19870 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19872 fprintf (file, "\t.long ");
19873 assemble_name (file, toc_label_name);
19874 need_toc_init = 1;
19875 putc ('-', file);
19876 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19877 assemble_name (file, buf);
19878 putc ('\n', file);
19879 if (uses_toc == 2)
19880 switch_to_other_text_partition ();
19883 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19884 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19886 if (TARGET_CMODEL == CMODEL_LARGE
19887 && rs6000_global_entry_point_prologue_needed_p ())
19889 char buf[256];
19891 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
19893 fprintf (file, "\t.quad .TOC.-");
19894 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
19895 assemble_name (file, buf);
19896 putc ('\n', file);
19899 if (DEFAULT_ABI == ABI_AIX)
19901 const char *desc_name, *orig_name;
19903 orig_name = (*targetm.strip_name_encoding) (name);
19904 desc_name = orig_name;
19905 while (*desc_name == '.')
19906 desc_name++;
19908 if (TREE_PUBLIC (decl))
19909 fprintf (file, "\t.globl %s\n", desc_name);
19911 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
19912 fprintf (file, "%s:\n", desc_name);
19913 fprintf (file, "\t.long %s\n", orig_name);
19914 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
19915 fputs ("\t.long 0\n", file);
19916 fprintf (file, "\t.previous\n");
19918 ASM_OUTPUT_LABEL (file, name);
19921 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
19922 static void
19923 rs6000_elf_file_end (void)
19925 #ifdef HAVE_AS_GNU_ATTRIBUTE
19926 /* ??? The value emitted depends on options active at file end.
19927 Assume anyone using #pragma or attributes that might change
19928 options knows what they are doing. */
19929 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
19930 && rs6000_passes_float)
19932 int fp;
19934 if (TARGET_HARD_FLOAT)
19935 fp = 1;
19936 else
19937 fp = 2;
19938 if (rs6000_passes_long_double)
19940 if (!TARGET_LONG_DOUBLE_128)
19941 fp |= 2 * 4;
19942 else if (TARGET_IEEEQUAD)
19943 fp |= 3 * 4;
19944 else
19945 fp |= 1 * 4;
19947 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
19949 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
19951 if (rs6000_passes_vector)
19952 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
19953 (TARGET_ALTIVEC_ABI ? 2 : 1));
19954 if (rs6000_returns_struct)
19955 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
19956 aix_struct_return ? 2 : 1);
19958 #endif
19959 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
19960 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
19961 file_end_indicate_exec_stack ();
19962 #endif
19964 if (flag_split_stack)
19965 file_end_indicate_split_stack ();
19967 if (cpu_builtin_p)
19969 /* We have expanded a CPU builtin, so we need to emit a reference to
19970 the special symbol that LIBC uses to declare it supports the
19971 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
19972 switch_to_section (data_section);
19973 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
19974 fprintf (asm_out_file, "\t%s %s\n",
19975 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
19978 #endif
19980 #if TARGET_XCOFF
19982 #ifndef HAVE_XCOFF_DWARF_EXTRAS
19983 #define HAVE_XCOFF_DWARF_EXTRAS 0
19984 #endif
19986 static enum unwind_info_type
19987 rs6000_xcoff_debug_unwind_info (void)
19989 return UI_NONE;
19992 static void
19993 rs6000_xcoff_asm_output_anchor (rtx symbol)
19995 char buffer[100];
19997 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
19998 SYMBOL_REF_BLOCK_OFFSET (symbol));
19999 fprintf (asm_out_file, "%s", SET_ASM_OP);
20000 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
20001 fprintf (asm_out_file, ",");
20002 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
20003 fprintf (asm_out_file, "\n");
20006 static void
20007 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
20009 fputs (GLOBAL_ASM_OP, stream);
20010 RS6000_OUTPUT_BASENAME (stream, name);
20011 putc ('\n', stream);
20014 /* A get_unnamed_decl callback, used for read-only sections. PTR
20015 points to the section string variable. */
20017 static void
20018 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
20020 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
20021 *(const char *const *) directive,
20022 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20025 /* Likewise for read-write sections. */
20027 static void
20028 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
20030 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
20031 *(const char *const *) directive,
20032 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20035 static void
20036 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
20038 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
20039 *(const char *const *) directive,
20040 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
20043 /* A get_unnamed_section callback, used for switching to toc_section. */
20045 static void
20046 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
20048 if (TARGET_MINIMAL_TOC)
20050 /* toc_section is always selected at least once from
20051 rs6000_xcoff_file_start, so this is guaranteed to
20052 always be defined once and only once in each file. */
20053 if (!toc_initialized)
20055 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
20056 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
20057 toc_initialized = 1;
20059 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
20060 (TARGET_32BIT ? "" : ",3"));
20062 else
20063 fputs ("\t.toc\n", asm_out_file);
20066 /* Implement TARGET_ASM_INIT_SECTIONS. */
20068 static void
20069 rs6000_xcoff_asm_init_sections (void)
20071 read_only_data_section
20072 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20073 &xcoff_read_only_section_name);
20075 private_data_section
20076 = get_unnamed_section (SECTION_WRITE,
20077 rs6000_xcoff_output_readwrite_section_asm_op,
20078 &xcoff_private_data_section_name);
20080 read_only_private_data_section
20081 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
20082 &xcoff_private_rodata_section_name);
20084 tls_data_section
20085 = get_unnamed_section (SECTION_TLS,
20086 rs6000_xcoff_output_tls_section_asm_op,
20087 &xcoff_tls_data_section_name);
20089 tls_private_data_section
20090 = get_unnamed_section (SECTION_TLS,
20091 rs6000_xcoff_output_tls_section_asm_op,
20092 &xcoff_private_data_section_name);
20094 toc_section
20095 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
20097 readonly_data_section = read_only_data_section;
20100 static int
20101 rs6000_xcoff_reloc_rw_mask (void)
20103 return 3;
20106 static void
20107 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
20108 tree decl ATTRIBUTE_UNUSED)
20110 int smclass;
20111 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
20113 if (flags & SECTION_EXCLUDE)
20114 smclass = 4;
20115 else if (flags & SECTION_DEBUG)
20117 fprintf (asm_out_file, "\t.dwsect %s\n", name);
20118 return;
20120 else if (flags & SECTION_CODE)
20121 smclass = 0;
20122 else if (flags & SECTION_TLS)
20123 smclass = 3;
20124 else if (flags & SECTION_WRITE)
20125 smclass = 2;
20126 else
20127 smclass = 1;
20129 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
20130 (flags & SECTION_CODE) ? "." : "",
20131 name, suffix[smclass], flags & SECTION_ENTSIZE);
20134 #define IN_NAMED_SECTION(DECL) \
20135 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
20136 && DECL_SECTION_NAME (DECL) != NULL)
20138 static section *
20139 rs6000_xcoff_select_section (tree decl, int reloc,
20140 unsigned HOST_WIDE_INT align)
20142 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
20143 named section. */
20144 if (align > BIGGEST_ALIGNMENT)
20146 resolve_unique_section (decl, reloc, true);
20147 if (IN_NAMED_SECTION (decl))
20148 return get_named_section (decl, NULL, reloc);
20151 if (decl_readonly_section (decl, reloc))
20153 if (TREE_PUBLIC (decl))
20154 return read_only_data_section;
20155 else
20156 return read_only_private_data_section;
20158 else
20160 #if HAVE_AS_TLS
20161 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20163 if (TREE_PUBLIC (decl))
20164 return tls_data_section;
20165 else if (bss_initializer_p (decl))
20167 /* Convert to COMMON to emit in BSS. */
20168 DECL_COMMON (decl) = 1;
20169 return tls_comm_section;
20171 else
20172 return tls_private_data_section;
20174 else
20175 #endif
20176 if (TREE_PUBLIC (decl))
20177 return data_section;
20178 else
20179 return private_data_section;
20183 static void
20184 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
20186 const char *name;
20188 /* Use select_section for private data and uninitialized data with
20189 alignment <= BIGGEST_ALIGNMENT. */
20190 if (!TREE_PUBLIC (decl)
20191 || DECL_COMMON (decl)
20192 || (DECL_INITIAL (decl) == NULL_TREE
20193 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
20194 || DECL_INITIAL (decl) == error_mark_node
20195 || (flag_zero_initialized_in_bss
20196 && initializer_zerop (DECL_INITIAL (decl))))
20197 return;
20199 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
20200 name = (*targetm.strip_name_encoding) (name);
20201 set_decl_section_name (decl, name);
20204 /* Select section for constant in constant pool.
20206 On RS/6000, all constants are in the private read-only data area.
20207 However, if this is being placed in the TOC it must be output as a
20208 toc entry. */
20210 static section *
20211 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
20212 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
20214 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20215 return toc_section;
20216 else
20217 return read_only_private_data_section;
20220 /* Remove any trailing [DS] or the like from the symbol name. */
20222 static const char *
20223 rs6000_xcoff_strip_name_encoding (const char *name)
20225 size_t len;
20226 if (*name == '*')
20227 name++;
20228 len = strlen (name);
20229 if (name[len - 1] == ']')
20230 return ggc_alloc_string (name, len - 4);
20231 else
20232 return name;
20235 /* Section attributes. AIX is always PIC. */
20237 static unsigned int
20238 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
20240 unsigned int align;
20241 unsigned int flags = default_section_type_flags (decl, name, reloc);
20243 /* Align to at least UNIT size. */
20244 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
20245 align = MIN_UNITS_PER_WORD;
20246 else
20247 /* Increase alignment of large objects if not already stricter. */
20248 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
20249 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
20250 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
20252 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
20255 /* Output at beginning of assembler file.
20257 Initialize the section names for the RS/6000 at this point.
20259 Specify filename, including full path, to assembler.
20261 We want to go into the TOC section so at least one .toc will be emitted.
20262 Also, in order to output proper .bs/.es pairs, we need at least one static
20263 [RW] section emitted.
20265 Finally, declare mcount when profiling to make the assembler happy. */
20267 static void
20268 rs6000_xcoff_file_start (void)
20270 rs6000_gen_section_name (&xcoff_bss_section_name,
20271 main_input_filename, ".bss_");
20272 rs6000_gen_section_name (&xcoff_private_data_section_name,
20273 main_input_filename, ".rw_");
20274 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
20275 main_input_filename, ".rop_");
20276 rs6000_gen_section_name (&xcoff_read_only_section_name,
20277 main_input_filename, ".ro_");
20278 rs6000_gen_section_name (&xcoff_tls_data_section_name,
20279 main_input_filename, ".tls_");
20280 rs6000_gen_section_name (&xcoff_tbss_section_name,
20281 main_input_filename, ".tbss_[UL]");
20283 fputs ("\t.file\t", asm_out_file);
20284 output_quoted_string (asm_out_file, main_input_filename);
20285 fputc ('\n', asm_out_file);
20286 if (write_symbols != NO_DEBUG)
20287 switch_to_section (private_data_section);
20288 switch_to_section (toc_section);
20289 switch_to_section (text_section);
20290 if (profile_flag)
20291 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
20292 rs6000_file_start ();
20295 /* Output at end of assembler file.
20296 On the RS/6000, referencing data should automatically pull in text. */
20298 static void
20299 rs6000_xcoff_file_end (void)
20301 switch_to_section (text_section);
20302 fputs ("_section_.text:\n", asm_out_file);
20303 switch_to_section (data_section);
20304 fputs (TARGET_32BIT
20305 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
20306 asm_out_file);
20309 struct declare_alias_data
20311 FILE *file;
20312 bool function_descriptor;
20315 /* Declare alias N. A helper function for for_node_and_aliases. */
20317 static bool
20318 rs6000_declare_alias (struct symtab_node *n, void *d)
20320 struct declare_alias_data *data = (struct declare_alias_data *)d;
20321 /* Main symbol is output specially, because varasm machinery does part of
20322 the job for us - we do not need to declare .globl/lglobs and such. */
20323 if (!n->alias || n->weakref)
20324 return false;
20326 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
20327 return false;
20329 /* Prevent assemble_alias from trying to use .set pseudo operation
20330 that does not behave as expected by the middle-end. */
20331 TREE_ASM_WRITTEN (n->decl) = true;
20333 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
20334 char *buffer = (char *) alloca (strlen (name) + 2);
20335 char *p;
20336 int dollar_inside = 0;
20338 strcpy (buffer, name);
20339 p = strchr (buffer, '$');
20340 while (p) {
20341 *p = '_';
20342 dollar_inside++;
20343 p = strchr (p + 1, '$');
20345 if (TREE_PUBLIC (n->decl))
20347 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
20349 if (dollar_inside) {
20350 if (data->function_descriptor)
20351 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20352 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20354 if (data->function_descriptor)
20356 fputs ("\t.globl .", data->file);
20357 RS6000_OUTPUT_BASENAME (data->file, buffer);
20358 putc ('\n', data->file);
20360 fputs ("\t.globl ", data->file);
20361 RS6000_OUTPUT_BASENAME (data->file, buffer);
20362 putc ('\n', data->file);
20364 #ifdef ASM_WEAKEN_DECL
20365 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
20366 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
20367 #endif
20369 else
20371 if (dollar_inside)
20373 if (data->function_descriptor)
20374 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
20375 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
20377 if (data->function_descriptor)
20379 fputs ("\t.lglobl .", data->file);
20380 RS6000_OUTPUT_BASENAME (data->file, buffer);
20381 putc ('\n', data->file);
20383 fputs ("\t.lglobl ", data->file);
20384 RS6000_OUTPUT_BASENAME (data->file, buffer);
20385 putc ('\n', data->file);
20387 if (data->function_descriptor)
20388 fputs (".", data->file);
20389 RS6000_OUTPUT_BASENAME (data->file, buffer);
20390 fputs (":\n", data->file);
20391 return false;
20395 #ifdef HAVE_GAS_HIDDEN
20396 /* Helper function to calculate visibility of a DECL
20397 and return the value as a const string. */
20399 static const char *
20400 rs6000_xcoff_visibility (tree decl)
20402 static const char * const visibility_types[] = {
20403 "", ",protected", ",hidden", ",internal"
20406 enum symbol_visibility vis = DECL_VISIBILITY (decl);
20407 return visibility_types[vis];
20409 #endif
20412 /* This macro produces the initial definition of a function name.
20413 On the RS/6000, we need to place an extra '.' in the function name and
20414 output the function descriptor.
20415 Dollar signs are converted to underscores.
20417 The csect for the function will have already been created when
20418 text_section was selected. We do have to go back to that csect, however.
20420 The third and fourth parameters to the .function pseudo-op (16 and 044)
20421 are placeholders which no longer have any use.
20423 Because AIX assembler's .set command has unexpected semantics, we output
20424 all aliases as alternative labels in front of the definition. */
20426 void
20427 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
20429 char *buffer = (char *) alloca (strlen (name) + 1);
20430 char *p;
20431 int dollar_inside = 0;
20432 struct declare_alias_data data = {file, false};
20434 strcpy (buffer, name);
20435 p = strchr (buffer, '$');
20436 while (p) {
20437 *p = '_';
20438 dollar_inside++;
20439 p = strchr (p + 1, '$');
20441 if (TREE_PUBLIC (decl))
20443 if (!RS6000_WEAK || !DECL_WEAK (decl))
20445 if (dollar_inside) {
20446 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20447 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20449 fputs ("\t.globl .", file);
20450 RS6000_OUTPUT_BASENAME (file, buffer);
20451 #ifdef HAVE_GAS_HIDDEN
20452 fputs (rs6000_xcoff_visibility (decl), file);
20453 #endif
20454 putc ('\n', file);
20457 else
20459 if (dollar_inside) {
20460 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
20461 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
20463 fputs ("\t.lglobl .", file);
20464 RS6000_OUTPUT_BASENAME (file, buffer);
20465 putc ('\n', file);
20467 fputs ("\t.csect ", file);
20468 RS6000_OUTPUT_BASENAME (file, buffer);
20469 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
20470 RS6000_OUTPUT_BASENAME (file, buffer);
20471 fputs (":\n", file);
20472 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20473 &data, true);
20474 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
20475 RS6000_OUTPUT_BASENAME (file, buffer);
20476 fputs (", TOC[tc0], 0\n", file);
20477 in_section = NULL;
20478 switch_to_section (function_section (decl));
20479 putc ('.', file);
20480 RS6000_OUTPUT_BASENAME (file, buffer);
20481 fputs (":\n", file);
20482 data.function_descriptor = true;
20483 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20484 &data, true);
20485 if (!DECL_IGNORED_P (decl))
20487 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
20488 xcoffout_declare_function (file, decl, buffer);
20489 else if (write_symbols == DWARF2_DEBUG)
20491 name = (*targetm.strip_name_encoding) (name);
20492 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
20495 return;
20499 /* Output assembly language to globalize a symbol from a DECL,
20500 possibly with visibility. */
20502 void
20503 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
20505 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
20506 fputs (GLOBAL_ASM_OP, stream);
20507 RS6000_OUTPUT_BASENAME (stream, name);
20508 #ifdef HAVE_GAS_HIDDEN
20509 fputs (rs6000_xcoff_visibility (decl), stream);
20510 #endif
20511 putc ('\n', stream);
20514 /* Output assembly language to define a symbol as COMMON from a DECL,
20515 possibly with visibility. */
20517 void
20518 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
20519 tree decl ATTRIBUTE_UNUSED,
20520 const char *name,
20521 unsigned HOST_WIDE_INT size,
20522 unsigned HOST_WIDE_INT align)
20524 unsigned HOST_WIDE_INT align2 = 2;
20526 if (align > 32)
20527 align2 = floor_log2 (align / BITS_PER_UNIT);
20528 else if (size > 4)
20529 align2 = 3;
20531 fputs (COMMON_ASM_OP, stream);
20532 RS6000_OUTPUT_BASENAME (stream, name);
20534 fprintf (stream,
20535 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
20536 size, align2);
20538 #ifdef HAVE_GAS_HIDDEN
20539 if (decl != NULL)
20540 fputs (rs6000_xcoff_visibility (decl), stream);
20541 #endif
20542 putc ('\n', stream);
20545 /* This macro produces the initial definition of a object (variable) name.
20546 Because AIX assembler's .set command has unexpected semantics, we output
20547 all aliases as alternative labels in front of the definition. */
20549 void
20550 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
20552 struct declare_alias_data data = {file, false};
20553 RS6000_OUTPUT_BASENAME (file, name);
20554 fputs (":\n", file);
20555 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
20556 &data, true);
20559 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
20561 void
20562 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
20564 fputs (integer_asm_op (size, FALSE), file);
20565 assemble_name (file, label);
20566 fputs ("-$", file);
20569 /* Output a symbol offset relative to the dbase for the current object.
20570 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
20571 signed offsets.
20573 __gcc_unwind_dbase is embedded in all executables/libraries through
20574 libgcc/config/rs6000/crtdbase.S. */
20576 void
20577 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
20579 fputs (integer_asm_op (size, FALSE), file);
20580 assemble_name (file, label);
20581 fputs("-__gcc_unwind_dbase", file);
20584 #ifdef HAVE_AS_TLS
20585 static void
20586 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
20588 rtx symbol;
20589 int flags;
20590 const char *symname;
20592 default_encode_section_info (decl, rtl, first);
20594 /* Careful not to prod global register variables. */
20595 if (!MEM_P (rtl))
20596 return;
20597 symbol = XEXP (rtl, 0);
20598 if (!SYMBOL_REF_P (symbol))
20599 return;
20601 flags = SYMBOL_REF_FLAGS (symbol);
20603 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
20604 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
20606 SYMBOL_REF_FLAGS (symbol) = flags;
20608 /* Append mapping class to extern decls. */
20609 symname = XSTR (symbol, 0);
20610 if (decl /* sync condition with assemble_external () */
20611 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
20612 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
20613 || TREE_CODE (decl) == FUNCTION_DECL)
20614 && symname[strlen (symname) - 1] != ']')
20616 char *newname = (char *) alloca (strlen (symname) + 5);
20617 strcpy (newname, symname);
20618 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
20619 ? "[DS]" : "[UA]"));
20620 XSTR (symbol, 0) = ggc_strdup (newname);
20623 #endif /* HAVE_AS_TLS */
20624 #endif /* TARGET_XCOFF */
20626 void
20627 rs6000_asm_weaken_decl (FILE *stream, tree decl,
20628 const char *name, const char *val)
20630 fputs ("\t.weak\t", stream);
20631 RS6000_OUTPUT_BASENAME (stream, name);
20632 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20633 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20635 if (TARGET_XCOFF)
20636 fputs ("[DS]", stream);
20637 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20638 if (TARGET_XCOFF)
20639 fputs (rs6000_xcoff_visibility (decl), stream);
20640 #endif
20641 fputs ("\n\t.weak\t.", stream);
20642 RS6000_OUTPUT_BASENAME (stream, name);
20644 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
20645 if (TARGET_XCOFF)
20646 fputs (rs6000_xcoff_visibility (decl), stream);
20647 #endif
20648 fputc ('\n', stream);
20649 if (val)
20651 #ifdef ASM_OUTPUT_DEF
20652 ASM_OUTPUT_DEF (stream, name, val);
20653 #endif
20654 if (decl && TREE_CODE (decl) == FUNCTION_DECL
20655 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
20657 fputs ("\t.set\t.", stream);
20658 RS6000_OUTPUT_BASENAME (stream, name);
20659 fputs (",.", stream);
20660 RS6000_OUTPUT_BASENAME (stream, val);
20661 fputc ('\n', stream);
20667 /* Return true if INSN should not be copied. */
20669 static bool
20670 rs6000_cannot_copy_insn_p (rtx_insn *insn)
20672 return recog_memoized (insn) >= 0
20673 && get_attr_cannot_copy (insn);
20676 /* Compute a (partial) cost for rtx X. Return true if the complete
20677 cost has been computed, and false if subexpressions should be
20678 scanned. In either case, *TOTAL contains the cost result. */
20680 static bool
20681 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
20682 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
20684 int code = GET_CODE (x);
20686 switch (code)
20688 /* On the RS/6000, if it is valid in the insn, it is free. */
20689 case CONST_INT:
20690 if (((outer_code == SET
20691 || outer_code == PLUS
20692 || outer_code == MINUS)
20693 && (satisfies_constraint_I (x)
20694 || satisfies_constraint_L (x)))
20695 || (outer_code == AND
20696 && (satisfies_constraint_K (x)
20697 || (mode == SImode
20698 ? satisfies_constraint_L (x)
20699 : satisfies_constraint_J (x))))
20700 || ((outer_code == IOR || outer_code == XOR)
20701 && (satisfies_constraint_K (x)
20702 || (mode == SImode
20703 ? satisfies_constraint_L (x)
20704 : satisfies_constraint_J (x))))
20705 || outer_code == ASHIFT
20706 || outer_code == ASHIFTRT
20707 || outer_code == LSHIFTRT
20708 || outer_code == ROTATE
20709 || outer_code == ROTATERT
20710 || outer_code == ZERO_EXTRACT
20711 || (outer_code == MULT
20712 && satisfies_constraint_I (x))
20713 || ((outer_code == DIV || outer_code == UDIV
20714 || outer_code == MOD || outer_code == UMOD)
20715 && exact_log2 (INTVAL (x)) >= 0)
20716 || (outer_code == COMPARE
20717 && (satisfies_constraint_I (x)
20718 || satisfies_constraint_K (x)))
20719 || ((outer_code == EQ || outer_code == NE)
20720 && (satisfies_constraint_I (x)
20721 || satisfies_constraint_K (x)
20722 || (mode == SImode
20723 ? satisfies_constraint_L (x)
20724 : satisfies_constraint_J (x))))
20725 || (outer_code == GTU
20726 && satisfies_constraint_I (x))
20727 || (outer_code == LTU
20728 && satisfies_constraint_P (x)))
20730 *total = 0;
20731 return true;
20733 else if ((outer_code == PLUS
20734 && reg_or_add_cint_operand (x, VOIDmode))
20735 || (outer_code == MINUS
20736 && reg_or_sub_cint_operand (x, VOIDmode))
20737 || ((outer_code == SET
20738 || outer_code == IOR
20739 || outer_code == XOR)
20740 && (INTVAL (x)
20741 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
20743 *total = COSTS_N_INSNS (1);
20744 return true;
20746 /* FALLTHRU */
20748 case CONST_DOUBLE:
20749 case CONST_WIDE_INT:
20750 case CONST:
20751 case HIGH:
20752 case SYMBOL_REF:
20753 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20754 return true;
20756 case MEM:
20757 /* When optimizing for size, MEM should be slightly more expensive
20758 than generating address, e.g., (plus (reg) (const)).
20759 L1 cache latency is about two instructions. */
20760 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
20761 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
20762 *total += COSTS_N_INSNS (100);
20763 return true;
20765 case LABEL_REF:
20766 *total = 0;
20767 return true;
20769 case PLUS:
20770 case MINUS:
20771 if (FLOAT_MODE_P (mode))
20772 *total = rs6000_cost->fp;
20773 else
20774 *total = COSTS_N_INSNS (1);
20775 return false;
20777 case MULT:
20778 if (CONST_INT_P (XEXP (x, 1))
20779 && satisfies_constraint_I (XEXP (x, 1)))
20781 if (INTVAL (XEXP (x, 1)) >= -256
20782 && INTVAL (XEXP (x, 1)) <= 255)
20783 *total = rs6000_cost->mulsi_const9;
20784 else
20785 *total = rs6000_cost->mulsi_const;
20787 else if (mode == SFmode)
20788 *total = rs6000_cost->fp;
20789 else if (FLOAT_MODE_P (mode))
20790 *total = rs6000_cost->dmul;
20791 else if (mode == DImode)
20792 *total = rs6000_cost->muldi;
20793 else
20794 *total = rs6000_cost->mulsi;
20795 return false;
20797 case FMA:
20798 if (mode == SFmode)
20799 *total = rs6000_cost->fp;
20800 else
20801 *total = rs6000_cost->dmul;
20802 break;
20804 case DIV:
20805 case MOD:
20806 if (FLOAT_MODE_P (mode))
20808 *total = mode == DFmode ? rs6000_cost->ddiv
20809 : rs6000_cost->sdiv;
20810 return false;
20812 /* FALLTHRU */
20814 case UDIV:
20815 case UMOD:
20816 if (CONST_INT_P (XEXP (x, 1))
20817 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
20819 if (code == DIV || code == MOD)
20820 /* Shift, addze */
20821 *total = COSTS_N_INSNS (2);
20822 else
20823 /* Shift */
20824 *total = COSTS_N_INSNS (1);
20826 else
20828 if (GET_MODE (XEXP (x, 1)) == DImode)
20829 *total = rs6000_cost->divdi;
20830 else
20831 *total = rs6000_cost->divsi;
20833 /* Add in shift and subtract for MOD unless we have a mod instruction. */
20834 if (!TARGET_MODULO && (code == MOD || code == UMOD))
20835 *total += COSTS_N_INSNS (2);
20836 return false;
20838 case CTZ:
20839 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
20840 return false;
20842 case FFS:
20843 *total = COSTS_N_INSNS (4);
20844 return false;
20846 case POPCOUNT:
20847 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
20848 return false;
20850 case PARITY:
20851 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
20852 return false;
20854 case NOT:
20855 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
20856 *total = 0;
20857 else
20858 *total = COSTS_N_INSNS (1);
20859 return false;
20861 case AND:
20862 if (CONST_INT_P (XEXP (x, 1)))
20864 rtx left = XEXP (x, 0);
20865 rtx_code left_code = GET_CODE (left);
20867 /* rotate-and-mask: 1 insn. */
20868 if ((left_code == ROTATE
20869 || left_code == ASHIFT
20870 || left_code == LSHIFTRT)
20871 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
20873 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
20874 if (!CONST_INT_P (XEXP (left, 1)))
20875 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
20876 *total += COSTS_N_INSNS (1);
20877 return true;
20880 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
20881 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
20882 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
20883 || (val & 0xffff) == val
20884 || (val & 0xffff0000) == val
20885 || ((val & 0xffff) == 0 && mode == SImode))
20887 *total = rtx_cost (left, mode, AND, 0, speed);
20888 *total += COSTS_N_INSNS (1);
20889 return true;
20892 /* 2 insns. */
20893 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
20895 *total = rtx_cost (left, mode, AND, 0, speed);
20896 *total += COSTS_N_INSNS (2);
20897 return true;
20901 *total = COSTS_N_INSNS (1);
20902 return false;
20904 case IOR:
20905 /* FIXME */
20906 *total = COSTS_N_INSNS (1);
20907 return true;
20909 case CLZ:
20910 case XOR:
20911 case ZERO_EXTRACT:
20912 *total = COSTS_N_INSNS (1);
20913 return false;
20915 case ASHIFT:
20916 /* The EXTSWSLI instruction is a combined instruction. Don't count both
20917 the sign extend and shift separately within the insn. */
20918 if (TARGET_EXTSWSLI && mode == DImode
20919 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
20920 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
20922 *total = 0;
20923 return false;
20925 /* fall through */
20927 case ASHIFTRT:
20928 case LSHIFTRT:
20929 case ROTATE:
20930 case ROTATERT:
20931 /* Handle mul_highpart. */
20932 if (outer_code == TRUNCATE
20933 && GET_CODE (XEXP (x, 0)) == MULT)
20935 if (mode == DImode)
20936 *total = rs6000_cost->muldi;
20937 else
20938 *total = rs6000_cost->mulsi;
20939 return true;
20941 else if (outer_code == AND)
20942 *total = 0;
20943 else
20944 *total = COSTS_N_INSNS (1);
20945 return false;
20947 case SIGN_EXTEND:
20948 case ZERO_EXTEND:
20949 if (MEM_P (XEXP (x, 0)))
20950 *total = 0;
20951 else
20952 *total = COSTS_N_INSNS (1);
20953 return false;
20955 case COMPARE:
20956 case NEG:
20957 case ABS:
20958 if (!FLOAT_MODE_P (mode))
20960 *total = COSTS_N_INSNS (1);
20961 return false;
20963 /* FALLTHRU */
20965 case FLOAT:
20966 case UNSIGNED_FLOAT:
20967 case FIX:
20968 case UNSIGNED_FIX:
20969 case FLOAT_TRUNCATE:
20970 *total = rs6000_cost->fp;
20971 return false;
20973 case FLOAT_EXTEND:
20974 if (mode == DFmode)
20975 *total = rs6000_cost->sfdf_convert;
20976 else
20977 *total = rs6000_cost->fp;
20978 return false;
20980 case CALL:
20981 case IF_THEN_ELSE:
20982 if (!speed)
20984 *total = COSTS_N_INSNS (1);
20985 return true;
20987 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
20989 *total = rs6000_cost->fp;
20990 return false;
20992 break;
20994 case NE:
20995 case EQ:
20996 case GTU:
20997 case LTU:
20998 /* Carry bit requires mode == Pmode.
20999 NEG or PLUS already counted so only add one. */
21000 if (mode == Pmode
21001 && (outer_code == NEG || outer_code == PLUS))
21003 *total = COSTS_N_INSNS (1);
21004 return true;
21006 /* FALLTHRU */
21008 case GT:
21009 case LT:
21010 case UNORDERED:
21011 if (outer_code == SET)
21013 if (XEXP (x, 1) == const0_rtx)
21015 *total = COSTS_N_INSNS (2);
21016 return true;
21018 else
21020 *total = COSTS_N_INSNS (3);
21021 return false;
21024 /* CC COMPARE. */
21025 if (outer_code == COMPARE)
21027 *total = 0;
21028 return true;
21030 break;
21032 default:
21033 break;
21036 return false;
21039 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
21041 static bool
21042 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
21043 int opno, int *total, bool speed)
21045 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
21047 fprintf (stderr,
21048 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
21049 "opno = %d, total = %d, speed = %s, x:\n",
21050 ret ? "complete" : "scan inner",
21051 GET_MODE_NAME (mode),
21052 GET_RTX_NAME (outer_code),
21053 opno,
21054 *total,
21055 speed ? "true" : "false");
21057 debug_rtx (x);
21059 return ret;
21062 static int
21063 rs6000_insn_cost (rtx_insn *insn, bool speed)
21065 if (recog_memoized (insn) < 0)
21066 return 0;
21068 /* If we are optimizing for size, just use the length. */
21069 if (!speed)
21070 return get_attr_length (insn);
21072 /* Use the cost if provided. */
21073 int cost = get_attr_cost (insn);
21074 if (cost > 0)
21075 return cost;
21077 /* If the insn tells us how many insns there are, use that. Otherwise use
21078 the length/4. Adjust the insn length to remove the extra size that
21079 prefixed instructions take. */
21080 int n = get_attr_num_insns (insn);
21081 if (n == 0)
21083 int length = get_attr_length (insn);
21084 if (get_attr_prefixed (insn) == PREFIXED_YES)
21086 int adjust = 0;
21087 ADJUST_INSN_LENGTH (insn, adjust);
21088 length -= adjust;
21091 n = length / 4;
21094 enum attr_type type = get_attr_type (insn);
21096 switch (type)
21098 case TYPE_LOAD:
21099 case TYPE_FPLOAD:
21100 case TYPE_VECLOAD:
21101 cost = COSTS_N_INSNS (n + 1);
21102 break;
21104 case TYPE_MUL:
21105 switch (get_attr_size (insn))
21107 case SIZE_8:
21108 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
21109 break;
21110 case SIZE_16:
21111 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
21112 break;
21113 case SIZE_32:
21114 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
21115 break;
21116 case SIZE_64:
21117 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
21118 break;
21119 default:
21120 gcc_unreachable ();
21122 break;
21123 case TYPE_DIV:
21124 switch (get_attr_size (insn))
21126 case SIZE_32:
21127 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
21128 break;
21129 case SIZE_64:
21130 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
21131 break;
21132 default:
21133 gcc_unreachable ();
21135 break;
21137 case TYPE_FP:
21138 cost = n * rs6000_cost->fp;
21139 break;
21140 case TYPE_DMUL:
21141 cost = n * rs6000_cost->dmul;
21142 break;
21143 case TYPE_SDIV:
21144 cost = n * rs6000_cost->sdiv;
21145 break;
21146 case TYPE_DDIV:
21147 cost = n * rs6000_cost->ddiv;
21148 break;
21150 case TYPE_SYNC:
21151 case TYPE_LOAD_L:
21152 case TYPE_MFCR:
21153 case TYPE_MFCRF:
21154 cost = COSTS_N_INSNS (n + 2);
21155 break;
21157 default:
21158 cost = COSTS_N_INSNS (n);
21161 return cost;
21164 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
21166 static int
21167 rs6000_debug_address_cost (rtx x, machine_mode mode,
21168 addr_space_t as, bool speed)
21170 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
21172 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
21173 ret, speed ? "true" : "false");
21174 debug_rtx (x);
21176 return ret;
21180 /* A C expression returning the cost of moving data from a register of class
21181 CLASS1 to one of CLASS2. */
21183 static int
21184 rs6000_register_move_cost (machine_mode mode,
21185 reg_class_t from, reg_class_t to)
21187 int ret;
21188 reg_class_t rclass;
21190 if (TARGET_DEBUG_COST)
21191 dbg_cost_ctrl++;
21193 /* If we have VSX, we can easily move between FPR or Altivec registers,
21194 otherwise we can only easily move within classes.
21195 Do this first so we give best-case answers for union classes
21196 containing both gprs and vsx regs. */
21197 HARD_REG_SET to_vsx, from_vsx;
21198 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
21199 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
21200 if (!hard_reg_set_empty_p (to_vsx)
21201 && !hard_reg_set_empty_p (from_vsx)
21202 && (TARGET_VSX
21203 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
21205 int reg = FIRST_FPR_REGNO;
21206 if (TARGET_VSX
21207 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
21208 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
21209 reg = FIRST_ALTIVEC_REGNO;
21210 ret = 2 * hard_regno_nregs (reg, mode);
21213 /* Moves from/to GENERAL_REGS. */
21214 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
21215 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
21217 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
21219 if (TARGET_DIRECT_MOVE)
21221 /* Keep the cost for direct moves above that for within
21222 a register class even if the actual processor cost is
21223 comparable. We do this because a direct move insn
21224 can't be a nop, whereas with ideal register
21225 allocation a move within the same class might turn
21226 out to be a nop. */
21227 if (rs6000_tune == PROCESSOR_POWER9
21228 || rs6000_tune == PROCESSOR_FUTURE)
21229 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21230 else
21231 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21232 /* SFmode requires a conversion when moving between gprs
21233 and vsx. */
21234 if (mode == SFmode)
21235 ret += 2;
21237 else
21238 ret = (rs6000_memory_move_cost (mode, rclass, false)
21239 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
21242 /* It's more expensive to move CR_REGS than CR0_REGS because of the
21243 shift. */
21244 else if (rclass == CR_REGS)
21245 ret = 4;
21247 /* For those processors that have slow LR/CTR moves, make them more
21248 expensive than memory in order to bias spills to memory .*/
21249 else if ((rs6000_tune == PROCESSOR_POWER6
21250 || rs6000_tune == PROCESSOR_POWER7
21251 || rs6000_tune == PROCESSOR_POWER8
21252 || rs6000_tune == PROCESSOR_POWER9)
21253 && reg_class_subset_p (rclass, SPECIAL_REGS))
21254 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21256 else
21257 /* A move will cost one instruction per GPR moved. */
21258 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
21261 /* Everything else has to go through GENERAL_REGS. */
21262 else
21263 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
21264 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
21266 if (TARGET_DEBUG_COST)
21268 if (dbg_cost_ctrl == 1)
21269 fprintf (stderr,
21270 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
21271 ret, GET_MODE_NAME (mode), reg_class_names[from],
21272 reg_class_names[to]);
21273 dbg_cost_ctrl--;
21276 return ret;
21279 /* A C expressions returning the cost of moving data of MODE from a register to
21280 or from memory. */
21282 static int
21283 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
21284 bool in ATTRIBUTE_UNUSED)
21286 int ret;
21288 if (TARGET_DEBUG_COST)
21289 dbg_cost_ctrl++;
21291 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
21292 ret = 4 * hard_regno_nregs (0, mode);
21293 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
21294 || reg_classes_intersect_p (rclass, VSX_REGS)))
21295 ret = 4 * hard_regno_nregs (32, mode);
21296 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
21297 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
21298 else
21299 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
21301 if (TARGET_DEBUG_COST)
21303 if (dbg_cost_ctrl == 1)
21304 fprintf (stderr,
21305 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
21306 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
21307 dbg_cost_ctrl--;
21310 return ret;
21313 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
21315 The register allocator chooses GEN_OR_VSX_REGS for the allocno
21316 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
21317 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
21318 move cost between GENERAL_REGS and VSX_REGS low.
21320 It might seem reasonable to use a union class. After all, if usage
21321 of vsr is low and gpr high, it might make sense to spill gpr to vsr
21322 rather than memory. However, in cases where register pressure of
21323 both is high, like the cactus_adm spec test, allowing
21324 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
21325 the first scheduling pass. This is partly due to an allocno of
21326 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
21327 class, which gives too high a pressure for GENERAL_REGS and too low
21328 for VSX_REGS. So, force a choice of the subclass here.
21330 The best class is also the union if GENERAL_REGS and VSX_REGS have
21331 the same cost. In that case we do use GEN_OR_VSX_REGS as the
21332 allocno class, since trying to narrow down the class by regno mode
21333 is prone to error. For example, SImode is allowed in VSX regs and
21334 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
21335 it would be wrong to choose an allocno of GENERAL_REGS based on
21336 SImode. */
21338 static reg_class_t
21339 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
21340 reg_class_t allocno_class,
21341 reg_class_t best_class)
21343 switch (allocno_class)
21345 case GEN_OR_VSX_REGS:
21346 /* best_class must be a subset of allocno_class. */
21347 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
21348 || best_class == GEN_OR_FLOAT_REGS
21349 || best_class == VSX_REGS
21350 || best_class == ALTIVEC_REGS
21351 || best_class == FLOAT_REGS
21352 || best_class == GENERAL_REGS
21353 || best_class == BASE_REGS);
21354 /* Use best_class but choose wider classes when copying from the
21355 wider class to best_class is cheap. This mimics IRA choice
21356 of allocno class. */
21357 if (best_class == BASE_REGS)
21358 return GENERAL_REGS;
21359 if (TARGET_VSX
21360 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
21361 return VSX_REGS;
21362 return best_class;
21364 default:
21365 break;
21368 return allocno_class;
21371 /* Returns a code for a target-specific builtin that implements
21372 reciprocal of the function, or NULL_TREE if not available. */
21374 static tree
21375 rs6000_builtin_reciprocal (tree fndecl)
21377 switch (DECL_MD_FUNCTION_CODE (fndecl))
21379 case VSX_BUILTIN_XVSQRTDP:
21380 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
21381 return NULL_TREE;
21383 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
21385 case VSX_BUILTIN_XVSQRTSP:
21386 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
21387 return NULL_TREE;
21389 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
21391 default:
21392 return NULL_TREE;
21396 /* Load up a constant. If the mode is a vector mode, splat the value across
21397 all of the vector elements. */
21399 static rtx
21400 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
21402 rtx reg;
21404 if (mode == SFmode || mode == DFmode)
21406 rtx d = const_double_from_real_value (dconst, mode);
21407 reg = force_reg (mode, d);
21409 else if (mode == V4SFmode)
21411 rtx d = const_double_from_real_value (dconst, SFmode);
21412 rtvec v = gen_rtvec (4, d, d, d, d);
21413 reg = gen_reg_rtx (mode);
21414 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21416 else if (mode == V2DFmode)
21418 rtx d = const_double_from_real_value (dconst, DFmode);
21419 rtvec v = gen_rtvec (2, d, d);
21420 reg = gen_reg_rtx (mode);
21421 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
21423 else
21424 gcc_unreachable ();
21426 return reg;
21429 /* Generate an FMA instruction. */
21431 static void
21432 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
21434 machine_mode mode = GET_MODE (target);
21435 rtx dst;
21437 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
21438 gcc_assert (dst != NULL);
21440 if (dst != target)
21441 emit_move_insn (target, dst);
21444 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
21446 static void
21447 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
21449 machine_mode mode = GET_MODE (dst);
21450 rtx r;
21452 /* This is a tad more complicated, since the fnma_optab is for
21453 a different expression: fma(-m1, m2, a), which is the same
21454 thing except in the case of signed zeros.
21456 Fortunately we know that if FMA is supported that FNMSUB is
21457 also supported in the ISA. Just expand it directly. */
21459 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
21461 r = gen_rtx_NEG (mode, a);
21462 r = gen_rtx_FMA (mode, m1, m2, r);
21463 r = gen_rtx_NEG (mode, r);
21464 emit_insn (gen_rtx_SET (dst, r));
21467 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
21468 add a reg_note saying that this was a division. Support both scalar and
21469 vector divide. Assumes no trapping math and finite arguments. */
21471 void
21472 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
21474 machine_mode mode = GET_MODE (dst);
21475 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
21476 int i;
21478 /* Low precision estimates guarantee 5 bits of accuracy. High
21479 precision estimates guarantee 14 bits of accuracy. SFmode
21480 requires 23 bits of accuracy. DFmode requires 52 bits of
21481 accuracy. Each pass at least doubles the accuracy, leading
21482 to the following. */
21483 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21484 if (mode == DFmode || mode == V2DFmode)
21485 passes++;
21487 enum insn_code code = optab_handler (smul_optab, mode);
21488 insn_gen_fn gen_mul = GEN_FCN (code);
21490 gcc_assert (code != CODE_FOR_nothing);
21492 one = rs6000_load_constant_and_splat (mode, dconst1);
21494 /* x0 = 1./d estimate */
21495 x0 = gen_reg_rtx (mode);
21496 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
21497 UNSPEC_FRES)));
21499 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
21500 if (passes > 1) {
21502 /* e0 = 1. - d * x0 */
21503 e0 = gen_reg_rtx (mode);
21504 rs6000_emit_nmsub (e0, d, x0, one);
21506 /* x1 = x0 + e0 * x0 */
21507 x1 = gen_reg_rtx (mode);
21508 rs6000_emit_madd (x1, e0, x0, x0);
21510 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
21511 ++i, xprev = xnext, eprev = enext) {
21513 /* enext = eprev * eprev */
21514 enext = gen_reg_rtx (mode);
21515 emit_insn (gen_mul (enext, eprev, eprev));
21517 /* xnext = xprev + enext * xprev */
21518 xnext = gen_reg_rtx (mode);
21519 rs6000_emit_madd (xnext, enext, xprev, xprev);
21522 } else
21523 xprev = x0;
21525 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
21527 /* u = n * xprev */
21528 u = gen_reg_rtx (mode);
21529 emit_insn (gen_mul (u, n, xprev));
21531 /* v = n - (d * u) */
21532 v = gen_reg_rtx (mode);
21533 rs6000_emit_nmsub (v, d, u, n);
21535 /* dst = (v * xprev) + u */
21536 rs6000_emit_madd (dst, v, xprev, u);
21538 if (note_p)
21539 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
21542 /* Goldschmidt's Algorithm for single/double-precision floating point
21543 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
21545 void
21546 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
21548 machine_mode mode = GET_MODE (src);
21549 rtx e = gen_reg_rtx (mode);
21550 rtx g = gen_reg_rtx (mode);
21551 rtx h = gen_reg_rtx (mode);
21553 /* Low precision estimates guarantee 5 bits of accuracy. High
21554 precision estimates guarantee 14 bits of accuracy. SFmode
21555 requires 23 bits of accuracy. DFmode requires 52 bits of
21556 accuracy. Each pass at least doubles the accuracy, leading
21557 to the following. */
21558 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
21559 if (mode == DFmode || mode == V2DFmode)
21560 passes++;
21562 int i;
21563 rtx mhalf;
21564 enum insn_code code = optab_handler (smul_optab, mode);
21565 insn_gen_fn gen_mul = GEN_FCN (code);
21567 gcc_assert (code != CODE_FOR_nothing);
21569 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
21571 /* e = rsqrt estimate */
21572 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
21573 UNSPEC_RSQRT)));
21575 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
21576 if (!recip)
21578 rtx zero = force_reg (mode, CONST0_RTX (mode));
21580 if (mode == SFmode)
21582 rtx target = emit_conditional_move (e, GT, src, zero, mode,
21583 e, zero, mode, 0);
21584 if (target != e)
21585 emit_move_insn (e, target);
21587 else
21589 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
21590 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
21594 /* g = sqrt estimate. */
21595 emit_insn (gen_mul (g, e, src));
21596 /* h = 1/(2*sqrt) estimate. */
21597 emit_insn (gen_mul (h, e, mhalf));
21599 if (recip)
21601 if (passes == 1)
21603 rtx t = gen_reg_rtx (mode);
21604 rs6000_emit_nmsub (t, g, h, mhalf);
21605 /* Apply correction directly to 1/rsqrt estimate. */
21606 rs6000_emit_madd (dst, e, t, e);
21608 else
21610 for (i = 0; i < passes; i++)
21612 rtx t1 = gen_reg_rtx (mode);
21613 rtx g1 = gen_reg_rtx (mode);
21614 rtx h1 = gen_reg_rtx (mode);
21616 rs6000_emit_nmsub (t1, g, h, mhalf);
21617 rs6000_emit_madd (g1, g, t1, g);
21618 rs6000_emit_madd (h1, h, t1, h);
21620 g = g1;
21621 h = h1;
21623 /* Multiply by 2 for 1/rsqrt. */
21624 emit_insn (gen_add3_insn (dst, h, h));
21627 else
21629 rtx t = gen_reg_rtx (mode);
21630 rs6000_emit_nmsub (t, g, h, mhalf);
21631 rs6000_emit_madd (dst, g, t, g);
21634 return;
21637 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
21638 (Power7) targets. DST is the target, and SRC is the argument operand. */
21640 void
21641 rs6000_emit_popcount (rtx dst, rtx src)
21643 machine_mode mode = GET_MODE (dst);
21644 rtx tmp1, tmp2;
21646 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
21647 if (TARGET_POPCNTD)
21649 if (mode == SImode)
21650 emit_insn (gen_popcntdsi2 (dst, src));
21651 else
21652 emit_insn (gen_popcntddi2 (dst, src));
21653 return;
21656 tmp1 = gen_reg_rtx (mode);
21658 if (mode == SImode)
21660 emit_insn (gen_popcntbsi2 (tmp1, src));
21661 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
21662 NULL_RTX, 0);
21663 tmp2 = force_reg (SImode, tmp2);
21664 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
21666 else
21668 emit_insn (gen_popcntbdi2 (tmp1, src));
21669 tmp2 = expand_mult (DImode, tmp1,
21670 GEN_INT ((HOST_WIDE_INT)
21671 0x01010101 << 32 | 0x01010101),
21672 NULL_RTX, 0);
21673 tmp2 = force_reg (DImode, tmp2);
21674 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
21679 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
21680 target, and SRC is the argument operand. */
21682 void
21683 rs6000_emit_parity (rtx dst, rtx src)
21685 machine_mode mode = GET_MODE (dst);
21686 rtx tmp;
21688 tmp = gen_reg_rtx (mode);
21690 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
21691 if (TARGET_CMPB)
21693 if (mode == SImode)
21695 emit_insn (gen_popcntbsi2 (tmp, src));
21696 emit_insn (gen_paritysi2_cmpb (dst, tmp));
21698 else
21700 emit_insn (gen_popcntbdi2 (tmp, src));
21701 emit_insn (gen_paritydi2_cmpb (dst, tmp));
21703 return;
21706 if (mode == SImode)
21708 /* Is mult+shift >= shift+xor+shift+xor? */
21709 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
21711 rtx tmp1, tmp2, tmp3, tmp4;
21713 tmp1 = gen_reg_rtx (SImode);
21714 emit_insn (gen_popcntbsi2 (tmp1, src));
21716 tmp2 = gen_reg_rtx (SImode);
21717 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
21718 tmp3 = gen_reg_rtx (SImode);
21719 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
21721 tmp4 = gen_reg_rtx (SImode);
21722 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
21723 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
21725 else
21726 rs6000_emit_popcount (tmp, src);
21727 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
21729 else
21731 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
21732 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
21734 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
21736 tmp1 = gen_reg_rtx (DImode);
21737 emit_insn (gen_popcntbdi2 (tmp1, src));
21739 tmp2 = gen_reg_rtx (DImode);
21740 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
21741 tmp3 = gen_reg_rtx (DImode);
21742 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
21744 tmp4 = gen_reg_rtx (DImode);
21745 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
21746 tmp5 = gen_reg_rtx (DImode);
21747 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
21749 tmp6 = gen_reg_rtx (DImode);
21750 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
21751 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
21753 else
21754 rs6000_emit_popcount (tmp, src);
21755 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
21759 /* Expand an Altivec constant permutation for little endian mode.
21760 OP0 and OP1 are the input vectors and TARGET is the output vector.
21761 SEL specifies the constant permutation vector.
21763 There are two issues: First, the two input operands must be
21764 swapped so that together they form a double-wide array in LE
21765 order. Second, the vperm instruction has surprising behavior
21766 in LE mode: it interprets the elements of the source vectors
21767 in BE mode ("left to right") and interprets the elements of
21768 the destination vector in LE mode ("right to left"). To
21769 correct for this, we must subtract each element of the permute
21770 control vector from 31.
21772 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
21773 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
21774 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
21775 serve as the permute control vector. Then, in BE mode,
21777 vperm 9,10,11,12
21779 places the desired result in vr9. However, in LE mode the
21780 vector contents will be
21782 vr10 = 00000003 00000002 00000001 00000000
21783 vr11 = 00000007 00000006 00000005 00000004
21785 The result of the vperm using the same permute control vector is
21787 vr9 = 05000000 07000000 01000000 03000000
21789 That is, the leftmost 4 bytes of vr10 are interpreted as the
21790 source for the rightmost 4 bytes of vr9, and so on.
21792 If we change the permute control vector to
21794 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
21796 and issue
21798 vperm 9,11,10,12
21800 we get the desired
21802 vr9 = 00000006 00000004 00000002 00000000. */
21804 static void
21805 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
21806 const vec_perm_indices &sel)
21808 unsigned int i;
21809 rtx perm[16];
21810 rtx constv, unspec;
21812 /* Unpack and adjust the constant selector. */
21813 for (i = 0; i < 16; ++i)
21815 unsigned int elt = 31 - (sel[i] & 31);
21816 perm[i] = GEN_INT (elt);
21819 /* Expand to a permute, swapping the inputs and using the
21820 adjusted selector. */
21821 if (!REG_P (op0))
21822 op0 = force_reg (V16QImode, op0);
21823 if (!REG_P (op1))
21824 op1 = force_reg (V16QImode, op1);
21826 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
21827 constv = force_reg (V16QImode, constv);
21828 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
21829 UNSPEC_VPERM);
21830 if (!REG_P (target))
21832 rtx tmp = gen_reg_rtx (V16QImode);
21833 emit_move_insn (tmp, unspec);
21834 unspec = tmp;
21837 emit_move_insn (target, unspec);
21840 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
21841 permute control vector. But here it's not a constant, so we must
21842 generate a vector NAND or NOR to do the adjustment. */
21844 void
21845 altivec_expand_vec_perm_le (rtx operands[4])
21847 rtx notx, iorx, unspec;
21848 rtx target = operands[0];
21849 rtx op0 = operands[1];
21850 rtx op1 = operands[2];
21851 rtx sel = operands[3];
21852 rtx tmp = target;
21853 rtx norreg = gen_reg_rtx (V16QImode);
21854 machine_mode mode = GET_MODE (target);
21856 /* Get everything in regs so the pattern matches. */
21857 if (!REG_P (op0))
21858 op0 = force_reg (mode, op0);
21859 if (!REG_P (op1))
21860 op1 = force_reg (mode, op1);
21861 if (!REG_P (sel))
21862 sel = force_reg (V16QImode, sel);
21863 if (!REG_P (target))
21864 tmp = gen_reg_rtx (mode);
21866 if (TARGET_P9_VECTOR)
21868 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
21869 UNSPEC_VPERMR);
21871 else
21873 /* Invert the selector with a VNAND if available, else a VNOR.
21874 The VNAND is preferred for future fusion opportunities. */
21875 notx = gen_rtx_NOT (V16QImode, sel);
21876 iorx = (TARGET_P8_VECTOR
21877 ? gen_rtx_IOR (V16QImode, notx, notx)
21878 : gen_rtx_AND (V16QImode, notx, notx));
21879 emit_insn (gen_rtx_SET (norreg, iorx));
21881 /* Permute with operands reversed and adjusted selector. */
21882 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
21883 UNSPEC_VPERM);
21886 /* Copy into target, possibly by way of a register. */
21887 if (!REG_P (target))
21889 emit_move_insn (tmp, unspec);
21890 unspec = tmp;
21893 emit_move_insn (target, unspec);
21896 /* Expand an Altivec constant permutation. Return true if we match
21897 an efficient implementation; false to fall back to VPERM.
21899 OP0 and OP1 are the input vectors and TARGET is the output vector.
21900 SEL specifies the constant permutation vector. */
21902 static bool
21903 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
21904 const vec_perm_indices &sel)
21906 struct altivec_perm_insn {
21907 HOST_WIDE_INT mask;
21908 enum insn_code impl;
21909 unsigned char perm[16];
21911 static const struct altivec_perm_insn patterns[] = {
21912 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
21913 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
21914 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
21915 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
21916 { OPTION_MASK_ALTIVEC,
21917 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
21918 : CODE_FOR_altivec_vmrglb_direct),
21919 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
21920 { OPTION_MASK_ALTIVEC,
21921 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
21922 : CODE_FOR_altivec_vmrglh_direct),
21923 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
21924 { OPTION_MASK_ALTIVEC,
21925 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
21926 : CODE_FOR_altivec_vmrglw_direct),
21927 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
21928 { OPTION_MASK_ALTIVEC,
21929 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
21930 : CODE_FOR_altivec_vmrghb_direct),
21931 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
21932 { OPTION_MASK_ALTIVEC,
21933 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
21934 : CODE_FOR_altivec_vmrghh_direct),
21935 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
21936 { OPTION_MASK_ALTIVEC,
21937 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
21938 : CODE_FOR_altivec_vmrghw_direct),
21939 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
21940 { OPTION_MASK_P8_VECTOR,
21941 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
21942 : CODE_FOR_p8_vmrgow_v4sf_direct),
21943 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
21944 { OPTION_MASK_P8_VECTOR,
21945 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
21946 : CODE_FOR_p8_vmrgew_v4sf_direct),
21947 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
21950 unsigned int i, j, elt, which;
21951 unsigned char perm[16];
21952 rtx x;
21953 bool one_vec;
21955 /* Unpack the constant selector. */
21956 for (i = which = 0; i < 16; ++i)
21958 elt = sel[i] & 31;
21959 which |= (elt < 16 ? 1 : 2);
21960 perm[i] = elt;
21963 /* Simplify the constant selector based on operands. */
21964 switch (which)
21966 default:
21967 gcc_unreachable ();
21969 case 3:
21970 one_vec = false;
21971 if (!rtx_equal_p (op0, op1))
21972 break;
21973 /* FALLTHRU */
21975 case 2:
21976 for (i = 0; i < 16; ++i)
21977 perm[i] &= 15;
21978 op0 = op1;
21979 one_vec = true;
21980 break;
21982 case 1:
21983 op1 = op0;
21984 one_vec = true;
21985 break;
21988 /* Look for splat patterns. */
21989 if (one_vec)
21991 elt = perm[0];
21993 for (i = 0; i < 16; ++i)
21994 if (perm[i] != elt)
21995 break;
21996 if (i == 16)
21998 if (!BYTES_BIG_ENDIAN)
21999 elt = 15 - elt;
22000 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
22001 return true;
22004 if (elt % 2 == 0)
22006 for (i = 0; i < 16; i += 2)
22007 if (perm[i] != elt || perm[i + 1] != elt + 1)
22008 break;
22009 if (i == 16)
22011 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
22012 x = gen_reg_rtx (V8HImode);
22013 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
22014 GEN_INT (field)));
22015 emit_move_insn (target, gen_lowpart (V16QImode, x));
22016 return true;
22020 if (elt % 4 == 0)
22022 for (i = 0; i < 16; i += 4)
22023 if (perm[i] != elt
22024 || perm[i + 1] != elt + 1
22025 || perm[i + 2] != elt + 2
22026 || perm[i + 3] != elt + 3)
22027 break;
22028 if (i == 16)
22030 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
22031 x = gen_reg_rtx (V4SImode);
22032 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
22033 GEN_INT (field)));
22034 emit_move_insn (target, gen_lowpart (V16QImode, x));
22035 return true;
22040 /* Look for merge and pack patterns. */
22041 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
22043 bool swapped;
22045 if ((patterns[j].mask & rs6000_isa_flags) == 0)
22046 continue;
22048 elt = patterns[j].perm[0];
22049 if (perm[0] == elt)
22050 swapped = false;
22051 else if (perm[0] == elt + 16)
22052 swapped = true;
22053 else
22054 continue;
22055 for (i = 1; i < 16; ++i)
22057 elt = patterns[j].perm[i];
22058 if (swapped)
22059 elt = (elt >= 16 ? elt - 16 : elt + 16);
22060 else if (one_vec && elt >= 16)
22061 elt -= 16;
22062 if (perm[i] != elt)
22063 break;
22065 if (i == 16)
22067 enum insn_code icode = patterns[j].impl;
22068 machine_mode omode = insn_data[icode].operand[0].mode;
22069 machine_mode imode = insn_data[icode].operand[1].mode;
22071 /* For little-endian, don't use vpkuwum and vpkuhum if the
22072 underlying vector type is not V4SI and V8HI, respectively.
22073 For example, using vpkuwum with a V8HI picks up the even
22074 halfwords (BE numbering) when the even halfwords (LE
22075 numbering) are what we need. */
22076 if (!BYTES_BIG_ENDIAN
22077 && icode == CODE_FOR_altivec_vpkuwum_direct
22078 && ((REG_P (op0)
22079 && GET_MODE (op0) != V4SImode)
22080 || (SUBREG_P (op0)
22081 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
22082 continue;
22083 if (!BYTES_BIG_ENDIAN
22084 && icode == CODE_FOR_altivec_vpkuhum_direct
22085 && ((REG_P (op0)
22086 && GET_MODE (op0) != V8HImode)
22087 || (SUBREG_P (op0)
22088 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
22089 continue;
22091 /* For little-endian, the two input operands must be swapped
22092 (or swapped back) to ensure proper right-to-left numbering
22093 from 0 to 2N-1. */
22094 if (swapped ^ !BYTES_BIG_ENDIAN)
22095 std::swap (op0, op1);
22096 if (imode != V16QImode)
22098 op0 = gen_lowpart (imode, op0);
22099 op1 = gen_lowpart (imode, op1);
22101 if (omode == V16QImode)
22102 x = target;
22103 else
22104 x = gen_reg_rtx (omode);
22105 emit_insn (GEN_FCN (icode) (x, op0, op1));
22106 if (omode != V16QImode)
22107 emit_move_insn (target, gen_lowpart (V16QImode, x));
22108 return true;
22112 if (!BYTES_BIG_ENDIAN)
22114 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
22115 return true;
22118 return false;
22121 /* Expand a VSX Permute Doubleword constant permutation.
22122 Return true if we match an efficient implementation. */
22124 static bool
22125 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
22126 unsigned char perm0, unsigned char perm1)
22128 rtx x;
22130 /* If both selectors come from the same operand, fold to single op. */
22131 if ((perm0 & 2) == (perm1 & 2))
22133 if (perm0 & 2)
22134 op0 = op1;
22135 else
22136 op1 = op0;
22138 /* If both operands are equal, fold to simpler permutation. */
22139 if (rtx_equal_p (op0, op1))
22141 perm0 = perm0 & 1;
22142 perm1 = (perm1 & 1) + 2;
22144 /* If the first selector comes from the second operand, swap. */
22145 else if (perm0 & 2)
22147 if (perm1 & 2)
22148 return false;
22149 perm0 -= 2;
22150 perm1 += 2;
22151 std::swap (op0, op1);
22153 /* If the second selector does not come from the second operand, fail. */
22154 else if ((perm1 & 2) == 0)
22155 return false;
22157 /* Success! */
22158 if (target != NULL)
22160 machine_mode vmode, dmode;
22161 rtvec v;
22163 vmode = GET_MODE (target);
22164 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
22165 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
22166 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
22167 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
22168 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
22169 emit_insn (gen_rtx_SET (target, x));
22171 return true;
22174 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
22176 static bool
22177 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
22178 rtx op1, const vec_perm_indices &sel)
22180 bool testing_p = !target;
22182 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
22183 if (TARGET_ALTIVEC && testing_p)
22184 return true;
22186 /* Check for ps_merge* or xxpermdi insns. */
22187 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
22189 if (testing_p)
22191 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
22192 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
22194 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
22195 return true;
22198 if (TARGET_ALTIVEC)
22200 /* Force the target-independent code to lower to V16QImode. */
22201 if (vmode != V16QImode)
22202 return false;
22203 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
22204 return true;
22207 return false;
22210 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
22211 OP0 and OP1 are the input vectors and TARGET is the output vector.
22212 PERM specifies the constant permutation vector. */
22214 static void
22215 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
22216 machine_mode vmode, const vec_perm_builder &perm)
22218 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
22219 if (x != target)
22220 emit_move_insn (target, x);
22223 /* Expand an extract even operation. */
22225 void
22226 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
22228 machine_mode vmode = GET_MODE (target);
22229 unsigned i, nelt = GET_MODE_NUNITS (vmode);
22230 vec_perm_builder perm (nelt, nelt, 1);
22232 for (i = 0; i < nelt; i++)
22233 perm.quick_push (i * 2);
22235 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22238 /* Expand a vector interleave operation. */
22240 void
22241 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
22243 machine_mode vmode = GET_MODE (target);
22244 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
22245 vec_perm_builder perm (nelt, nelt, 1);
22247 high = (highp ? 0 : nelt / 2);
22248 for (i = 0; i < nelt / 2; i++)
22250 perm.quick_push (i + high);
22251 perm.quick_push (i + nelt + high);
22254 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
22257 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
22258 void
22259 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
22261 HOST_WIDE_INT hwi_scale (scale);
22262 REAL_VALUE_TYPE r_pow;
22263 rtvec v = rtvec_alloc (2);
22264 rtx elt;
22265 rtx scale_vec = gen_reg_rtx (V2DFmode);
22266 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
22267 elt = const_double_from_real_value (r_pow, DFmode);
22268 RTVEC_ELT (v, 0) = elt;
22269 RTVEC_ELT (v, 1) = elt;
22270 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
22271 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
22274 /* Return an RTX representing where to find the function value of a
22275 function returning MODE. */
22276 static rtx
22277 rs6000_complex_function_value (machine_mode mode)
22279 unsigned int regno;
22280 rtx r1, r2;
22281 machine_mode inner = GET_MODE_INNER (mode);
22282 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
22284 if (TARGET_FLOAT128_TYPE
22285 && (mode == KCmode
22286 || (mode == TCmode && TARGET_IEEEQUAD)))
22287 regno = ALTIVEC_ARG_RETURN;
22289 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22290 regno = FP_ARG_RETURN;
22292 else
22294 regno = GP_ARG_RETURN;
22296 /* 32-bit is OK since it'll go in r3/r4. */
22297 if (TARGET_32BIT && inner_bytes >= 4)
22298 return gen_rtx_REG (mode, regno);
22301 if (inner_bytes >= 8)
22302 return gen_rtx_REG (mode, regno);
22304 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
22305 const0_rtx);
22306 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
22307 GEN_INT (inner_bytes));
22308 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
22311 /* Return an rtx describing a return value of MODE as a PARALLEL
22312 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
22313 stride REG_STRIDE. */
22315 static rtx
22316 rs6000_parallel_return (machine_mode mode,
22317 int n_elts, machine_mode elt_mode,
22318 unsigned int regno, unsigned int reg_stride)
22320 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
22322 int i;
22323 for (i = 0; i < n_elts; i++)
22325 rtx r = gen_rtx_REG (elt_mode, regno);
22326 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
22327 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
22328 regno += reg_stride;
22331 return par;
22334 /* Target hook for TARGET_FUNCTION_VALUE.
22336 An integer value is in r3 and a floating-point value is in fp1,
22337 unless -msoft-float. */
22339 static rtx
22340 rs6000_function_value (const_tree valtype,
22341 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
22342 bool outgoing ATTRIBUTE_UNUSED)
22344 machine_mode mode;
22345 unsigned int regno;
22346 machine_mode elt_mode;
22347 int n_elts;
22349 /* Special handling for structs in darwin64. */
22350 if (TARGET_MACHO
22351 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
22353 CUMULATIVE_ARGS valcum;
22354 rtx valret;
22356 valcum.words = 0;
22357 valcum.fregno = FP_ARG_MIN_REG;
22358 valcum.vregno = ALTIVEC_ARG_MIN_REG;
22359 /* Do a trial code generation as if this were going to be passed as
22360 an argument; if any part goes in memory, we return NULL. */
22361 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
22362 if (valret)
22363 return valret;
22364 /* Otherwise fall through to standard ABI rules. */
22367 mode = TYPE_MODE (valtype);
22369 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
22370 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
22372 int first_reg, n_regs;
22374 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
22376 /* _Decimal128 must use even/odd register pairs. */
22377 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22378 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
22380 else
22382 first_reg = ALTIVEC_ARG_RETURN;
22383 n_regs = 1;
22386 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
22389 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
22390 if (TARGET_32BIT && TARGET_POWERPC64)
22391 switch (mode)
22393 default:
22394 break;
22395 case E_DImode:
22396 case E_SCmode:
22397 case E_DCmode:
22398 case E_TCmode:
22399 int count = GET_MODE_SIZE (mode) / 4;
22400 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
22403 if ((INTEGRAL_TYPE_P (valtype)
22404 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
22405 || POINTER_TYPE_P (valtype))
22406 mode = TARGET_32BIT ? SImode : DImode;
22408 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22409 /* _Decimal128 must use an even/odd register pair. */
22410 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22411 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
22412 && !FLOAT128_VECTOR_P (mode))
22413 regno = FP_ARG_RETURN;
22414 else if (TREE_CODE (valtype) == COMPLEX_TYPE
22415 && targetm.calls.split_complex_arg)
22416 return rs6000_complex_function_value (mode);
22417 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22418 return register is used in both cases, and we won't see V2DImode/V2DFmode
22419 for pure altivec, combine the two cases. */
22420 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
22421 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
22422 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22423 regno = ALTIVEC_ARG_RETURN;
22424 else
22425 regno = GP_ARG_RETURN;
22427 return gen_rtx_REG (mode, regno);
22430 /* Define how to find the value returned by a library function
22431 assuming the value has mode MODE. */
22433 rs6000_libcall_value (machine_mode mode)
22435 unsigned int regno;
22437 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
22438 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
22439 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
22441 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
22442 /* _Decimal128 must use an even/odd register pair. */
22443 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
22444 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
22445 regno = FP_ARG_RETURN;
22446 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
22447 return register is used in both cases, and we won't see V2DImode/V2DFmode
22448 for pure altivec, combine the two cases. */
22449 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
22450 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
22451 regno = ALTIVEC_ARG_RETURN;
22452 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
22453 return rs6000_complex_function_value (mode);
22454 else
22455 regno = GP_ARG_RETURN;
22457 return gen_rtx_REG (mode, regno);
22460 /* Compute register pressure classes. We implement the target hook to avoid
22461 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
22462 lead to incorrect estimates of number of available registers and therefor
22463 increased register pressure/spill. */
22464 static int
22465 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
22467 int n;
22469 n = 0;
22470 pressure_classes[n++] = GENERAL_REGS;
22471 if (TARGET_VSX)
22472 pressure_classes[n++] = VSX_REGS;
22473 else
22475 if (TARGET_ALTIVEC)
22476 pressure_classes[n++] = ALTIVEC_REGS;
22477 if (TARGET_HARD_FLOAT)
22478 pressure_classes[n++] = FLOAT_REGS;
22480 pressure_classes[n++] = CR_REGS;
22481 pressure_classes[n++] = SPECIAL_REGS;
22483 return n;
22486 /* Given FROM and TO register numbers, say whether this elimination is allowed.
22487 Frame pointer elimination is automatically handled.
22489 For the RS/6000, if frame pointer elimination is being done, we would like
22490 to convert ap into fp, not sp.
22492 We need r30 if -mminimal-toc was specified, and there are constant pool
22493 references. */
22495 static bool
22496 rs6000_can_eliminate (const int from, const int to)
22498 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
22499 ? ! frame_pointer_needed
22500 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
22501 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
22502 || constant_pool_empty_p ()
22503 : true);
22506 /* Define the offset between two registers, FROM to be eliminated and its
22507 replacement TO, at the start of a routine. */
22508 HOST_WIDE_INT
22509 rs6000_initial_elimination_offset (int from, int to)
22511 rs6000_stack_t *info = rs6000_stack_info ();
22512 HOST_WIDE_INT offset;
22514 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22515 offset = info->push_p ? 0 : -info->total_size;
22516 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22518 offset = info->push_p ? 0 : -info->total_size;
22519 if (FRAME_GROWS_DOWNWARD)
22520 offset += info->fixed_size + info->vars_size + info->parm_size;
22522 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22523 offset = FRAME_GROWS_DOWNWARD
22524 ? info->fixed_size + info->vars_size + info->parm_size
22525 : 0;
22526 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
22527 offset = info->total_size;
22528 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
22529 offset = info->push_p ? info->total_size : 0;
22530 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
22531 offset = 0;
22532 else
22533 gcc_unreachable ();
22535 return offset;
22538 /* Fill in sizes of registers used by unwinder. */
22540 static void
22541 rs6000_init_dwarf_reg_sizes_extra (tree address)
22543 if (TARGET_MACHO && ! TARGET_ALTIVEC)
22545 int i;
22546 machine_mode mode = TYPE_MODE (char_type_node);
22547 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
22548 rtx mem = gen_rtx_MEM (BLKmode, addr);
22549 rtx value = gen_int_mode (16, mode);
22551 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
22552 The unwinder still needs to know the size of Altivec registers. */
22554 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
22556 int column = DWARF_REG_TO_UNWIND_COLUMN
22557 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
22558 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
22560 emit_move_insn (adjust_address (mem, mode, offset), value);
22565 /* Map internal gcc register numbers to debug format register numbers.
22566 FORMAT specifies the type of debug register number to use:
22567 0 -- debug information, except for frame-related sections
22568 1 -- DWARF .debug_frame section
22569 2 -- DWARF .eh_frame section */
22571 unsigned int
22572 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
22574 /* On some platforms, we use the standard DWARF register
22575 numbering for .debug_info and .debug_frame. */
22576 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1)
22578 #ifdef RS6000_USE_DWARF_NUMBERING
22579 if (regno <= 31)
22580 return regno;
22581 if (FP_REGNO_P (regno))
22582 return regno - FIRST_FPR_REGNO + 32;
22583 if (ALTIVEC_REGNO_P (regno))
22584 return regno - FIRST_ALTIVEC_REGNO + 1124;
22585 if (regno == LR_REGNO)
22586 return 108;
22587 if (regno == CTR_REGNO)
22588 return 109;
22589 if (regno == CA_REGNO)
22590 return 101; /* XER */
22591 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
22592 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
22593 The actual code emitted saves the whole of CR, so we map CR2_REGNO
22594 to the DWARF reg for CR. */
22595 if (format == 1 && regno == CR2_REGNO)
22596 return 64;
22597 if (CR_REGNO_P (regno))
22598 return regno - CR0_REGNO + 86;
22599 if (regno == VRSAVE_REGNO)
22600 return 356;
22601 if (regno == VSCR_REGNO)
22602 return 67;
22604 /* These do not make much sense. */
22605 if (regno == FRAME_POINTER_REGNUM)
22606 return 111;
22607 if (regno == ARG_POINTER_REGNUM)
22608 return 67;
22609 if (regno == 64)
22610 return 100;
22612 gcc_unreachable ();
22613 #endif
22616 /* We use the GCC 7 (and before) internal number for non-DWARF debug
22617 information, and also for .eh_frame. */
22618 /* Translate the regnos to their numbers in GCC 7 (and before). */
22619 if (regno <= 31)
22620 return regno;
22621 if (FP_REGNO_P (regno))
22622 return regno - FIRST_FPR_REGNO + 32;
22623 if (ALTIVEC_REGNO_P (regno))
22624 return regno - FIRST_ALTIVEC_REGNO + 77;
22625 if (regno == LR_REGNO)
22626 return 65;
22627 if (regno == CTR_REGNO)
22628 return 66;
22629 if (regno == CA_REGNO)
22630 return 76; /* XER */
22631 if (CR_REGNO_P (regno))
22632 return regno - CR0_REGNO + 68;
22633 if (regno == VRSAVE_REGNO)
22634 return 109;
22635 if (regno == VSCR_REGNO)
22636 return 110;
22638 if (regno == FRAME_POINTER_REGNUM)
22639 return 111;
22640 if (regno == ARG_POINTER_REGNUM)
22641 return 67;
22642 if (regno == 64)
22643 return 64;
22645 gcc_unreachable ();
22648 /* target hook eh_return_filter_mode */
22649 static scalar_int_mode
22650 rs6000_eh_return_filter_mode (void)
22652 return TARGET_32BIT ? SImode : word_mode;
22655 /* Target hook for translate_mode_attribute. */
22656 static machine_mode
22657 rs6000_translate_mode_attribute (machine_mode mode)
22659 if ((FLOAT128_IEEE_P (mode)
22660 && ieee128_float_type_node == long_double_type_node)
22661 || (FLOAT128_IBM_P (mode)
22662 && ibm128_float_type_node == long_double_type_node))
22663 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
22664 return mode;
22667 /* Target hook for scalar_mode_supported_p. */
22668 static bool
22669 rs6000_scalar_mode_supported_p (scalar_mode mode)
22671 /* -m32 does not support TImode. This is the default, from
22672 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
22673 same ABI as for -m32. But default_scalar_mode_supported_p allows
22674 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
22675 for -mpowerpc64. */
22676 if (TARGET_32BIT && mode == TImode)
22677 return false;
22679 if (DECIMAL_FLOAT_MODE_P (mode))
22680 return default_decimal_float_supported_p ();
22681 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
22682 return true;
22683 else
22684 return default_scalar_mode_supported_p (mode);
22687 /* Target hook for vector_mode_supported_p. */
22688 static bool
22689 rs6000_vector_mode_supported_p (machine_mode mode)
22691 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
22692 128-bit, the compiler might try to widen IEEE 128-bit to IBM
22693 double-double. */
22694 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
22695 return true;
22697 else
22698 return false;
22701 /* Target hook for floatn_mode. */
22702 static opt_scalar_float_mode
22703 rs6000_floatn_mode (int n, bool extended)
22705 if (extended)
22707 switch (n)
22709 case 32:
22710 return DFmode;
22712 case 64:
22713 if (TARGET_FLOAT128_TYPE)
22714 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22715 else
22716 return opt_scalar_float_mode ();
22718 case 128:
22719 return opt_scalar_float_mode ();
22721 default:
22722 /* Those are the only valid _FloatNx types. */
22723 gcc_unreachable ();
22726 else
22728 switch (n)
22730 case 32:
22731 return SFmode;
22733 case 64:
22734 return DFmode;
22736 case 128:
22737 if (TARGET_FLOAT128_TYPE)
22738 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22739 else
22740 return opt_scalar_float_mode ();
22742 default:
22743 return opt_scalar_float_mode ();
22749 /* Target hook for c_mode_for_suffix. */
22750 static machine_mode
22751 rs6000_c_mode_for_suffix (char suffix)
22753 if (TARGET_FLOAT128_TYPE)
22755 if (suffix == 'q' || suffix == 'Q')
22756 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
22758 /* At the moment, we are not defining a suffix for IBM extended double.
22759 If/when the default for -mabi=ieeelongdouble is changed, and we want
22760 to support __ibm128 constants in legacy library code, we may need to
22761 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
22762 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
22763 __float80 constants. */
22766 return VOIDmode;
22769 /* Target hook for invalid_arg_for_unprototyped_fn. */
22770 static const char *
22771 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
22773 return (!rs6000_darwin64_abi
22774 && typelist == 0
22775 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
22776 && (funcdecl == NULL_TREE
22777 || (TREE_CODE (funcdecl) == FUNCTION_DECL
22778 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
22779 ? N_("AltiVec argument passed to unprototyped function")
22780 : NULL;
22783 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
22784 setup by using __stack_chk_fail_local hidden function instead of
22785 calling __stack_chk_fail directly. Otherwise it is better to call
22786 __stack_chk_fail directly. */
22788 static tree ATTRIBUTE_UNUSED
22789 rs6000_stack_protect_fail (void)
22791 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
22792 ? default_hidden_stack_protect_fail ()
22793 : default_external_stack_protect_fail ();
22796 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
22798 #if TARGET_ELF
22799 static unsigned HOST_WIDE_INT
22800 rs6000_asan_shadow_offset (void)
22802 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
22804 #endif
22806 /* Mask options that we want to support inside of attribute((target)) and
22807 #pragma GCC target operations. Note, we do not include things like
22808 64/32-bit, endianness, hard/soft floating point, etc. that would have
22809 different calling sequences. */
22811 struct rs6000_opt_mask {
22812 const char *name; /* option name */
22813 HOST_WIDE_INT mask; /* mask to set */
22814 bool invert; /* invert sense of mask */
22815 bool valid_target; /* option is a target option */
22818 static struct rs6000_opt_mask const rs6000_opt_masks[] =
22820 { "altivec", OPTION_MASK_ALTIVEC, false, true },
22821 { "cmpb", OPTION_MASK_CMPB, false, true },
22822 { "crypto", OPTION_MASK_CRYPTO, false, true },
22823 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
22824 { "dlmzb", OPTION_MASK_DLMZB, false, true },
22825 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
22826 false, true },
22827 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
22828 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
22829 { "fprnd", OPTION_MASK_FPRND, false, true },
22830 { "future", OPTION_MASK_FUTURE, false, true },
22831 { "hard-dfp", OPTION_MASK_DFP, false, true },
22832 { "htm", OPTION_MASK_HTM, false, true },
22833 { "isel", OPTION_MASK_ISEL, false, true },
22834 { "mfcrf", OPTION_MASK_MFCRF, false, true },
22835 { "mfpgpr", 0, false, true },
22836 { "modulo", OPTION_MASK_MODULO, false, true },
22837 { "mulhw", OPTION_MASK_MULHW, false, true },
22838 { "multiple", OPTION_MASK_MULTIPLE, false, true },
22839 { "pcrel", OPTION_MASK_PCREL, false, true },
22840 { "popcntb", OPTION_MASK_POPCNTB, false, true },
22841 { "popcntd", OPTION_MASK_POPCNTD, false, true },
22842 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
22843 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
22844 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
22845 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
22846 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
22847 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
22848 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
22849 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
22850 { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
22851 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
22852 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
22853 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
22854 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
22855 { "string", 0, false, true },
22856 { "update", OPTION_MASK_NO_UPDATE, true , true },
22857 { "vsx", OPTION_MASK_VSX, false, true },
22858 #ifdef OPTION_MASK_64BIT
22859 #if TARGET_AIX_OS
22860 { "aix64", OPTION_MASK_64BIT, false, false },
22861 { "aix32", OPTION_MASK_64BIT, true, false },
22862 #else
22863 { "64", OPTION_MASK_64BIT, false, false },
22864 { "32", OPTION_MASK_64BIT, true, false },
22865 #endif
22866 #endif
22867 #ifdef OPTION_MASK_EABI
22868 { "eabi", OPTION_MASK_EABI, false, false },
22869 #endif
22870 #ifdef OPTION_MASK_LITTLE_ENDIAN
22871 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
22872 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
22873 #endif
22874 #ifdef OPTION_MASK_RELOCATABLE
22875 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
22876 #endif
22877 #ifdef OPTION_MASK_STRICT_ALIGN
22878 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
22879 #endif
22880 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
22881 { "string", 0, false, false },
22884 /* Builtin mask mapping for printing the flags. */
22885 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
22887 { "altivec", RS6000_BTM_ALTIVEC, false, false },
22888 { "vsx", RS6000_BTM_VSX, false, false },
22889 { "fre", RS6000_BTM_FRE, false, false },
22890 { "fres", RS6000_BTM_FRES, false, false },
22891 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
22892 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
22893 { "popcntd", RS6000_BTM_POPCNTD, false, false },
22894 { "cell", RS6000_BTM_CELL, false, false },
22895 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
22896 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
22897 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
22898 { "crypto", RS6000_BTM_CRYPTO, false, false },
22899 { "htm", RS6000_BTM_HTM, false, false },
22900 { "hard-dfp", RS6000_BTM_DFP, false, false },
22901 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
22902 { "long-double-128", RS6000_BTM_LDBL128, false, false },
22903 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
22904 { "float128", RS6000_BTM_FLOAT128, false, false },
22905 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
22908 /* Option variables that we want to support inside attribute((target)) and
22909 #pragma GCC target operations. */
22911 struct rs6000_opt_var {
22912 const char *name; /* option name */
22913 size_t global_offset; /* offset of the option in global_options. */
22914 size_t target_offset; /* offset of the option in target options. */
22917 static struct rs6000_opt_var const rs6000_opt_vars[] =
22919 { "friz",
22920 offsetof (struct gcc_options, x_TARGET_FRIZ),
22921 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
22922 { "avoid-indexed-addresses",
22923 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
22924 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
22925 { "longcall",
22926 offsetof (struct gcc_options, x_rs6000_default_long_calls),
22927 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
22928 { "optimize-swaps",
22929 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
22930 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
22931 { "allow-movmisalign",
22932 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
22933 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
22934 { "sched-groups",
22935 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
22936 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
22937 { "always-hint",
22938 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
22939 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
22940 { "align-branch-targets",
22941 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
22942 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
22943 { "sched-prolog",
22944 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22945 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22946 { "sched-epilog",
22947 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
22948 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
22949 { "speculate-indirect-jumps",
22950 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
22951 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
22954 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
22955 parsing. Return true if there were no errors. */
22957 static bool
22958 rs6000_inner_target_options (tree args, bool attr_p)
22960 bool ret = true;
22962 if (args == NULL_TREE)
22965 else if (TREE_CODE (args) == STRING_CST)
22967 char *p = ASTRDUP (TREE_STRING_POINTER (args));
22968 char *q;
22970 while ((q = strtok (p, ",")) != NULL)
22972 bool error_p = false;
22973 bool not_valid_p = false;
22974 const char *cpu_opt = NULL;
22976 p = NULL;
22977 if (strncmp (q, "cpu=", 4) == 0)
22979 int cpu_index = rs6000_cpu_name_lookup (q+4);
22980 if (cpu_index >= 0)
22981 rs6000_cpu_index = cpu_index;
22982 else
22984 error_p = true;
22985 cpu_opt = q+4;
22988 else if (strncmp (q, "tune=", 5) == 0)
22990 int tune_index = rs6000_cpu_name_lookup (q+5);
22991 if (tune_index >= 0)
22992 rs6000_tune_index = tune_index;
22993 else
22995 error_p = true;
22996 cpu_opt = q+5;
22999 else
23001 size_t i;
23002 bool invert = false;
23003 char *r = q;
23005 error_p = true;
23006 if (strncmp (r, "no-", 3) == 0)
23008 invert = true;
23009 r += 3;
23012 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
23013 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
23015 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
23017 if (!rs6000_opt_masks[i].valid_target)
23018 not_valid_p = true;
23019 else
23021 error_p = false;
23022 rs6000_isa_flags_explicit |= mask;
23024 /* VSX needs altivec, so -mvsx automagically sets
23025 altivec and disables -mavoid-indexed-addresses. */
23026 if (!invert)
23028 if (mask == OPTION_MASK_VSX)
23030 mask |= OPTION_MASK_ALTIVEC;
23031 TARGET_AVOID_XFORM = 0;
23035 if (rs6000_opt_masks[i].invert)
23036 invert = !invert;
23038 if (invert)
23039 rs6000_isa_flags &= ~mask;
23040 else
23041 rs6000_isa_flags |= mask;
23043 break;
23046 if (error_p && !not_valid_p)
23048 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
23049 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
23051 size_t j = rs6000_opt_vars[i].global_offset;
23052 *((int *) ((char *)&global_options + j)) = !invert;
23053 error_p = false;
23054 not_valid_p = false;
23055 break;
23060 if (error_p)
23062 const char *eprefix, *esuffix;
23064 ret = false;
23065 if (attr_p)
23067 eprefix = "__attribute__((__target__(";
23068 esuffix = ")))";
23070 else
23072 eprefix = "#pragma GCC target ";
23073 esuffix = "";
23076 if (cpu_opt)
23077 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
23078 q, esuffix);
23079 else if (not_valid_p)
23080 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
23081 else
23082 error ("%s%qs%s is invalid", eprefix, q, esuffix);
23087 else if (TREE_CODE (args) == TREE_LIST)
23091 tree value = TREE_VALUE (args);
23092 if (value)
23094 bool ret2 = rs6000_inner_target_options (value, attr_p);
23095 if (!ret2)
23096 ret = false;
23098 args = TREE_CHAIN (args);
23100 while (args != NULL_TREE);
23103 else
23105 error ("attribute %<target%> argument not a string");
23106 return false;
23109 return ret;
23112 /* Print out the target options as a list for -mdebug=target. */
23114 static void
23115 rs6000_debug_target_options (tree args, const char *prefix)
23117 if (args == NULL_TREE)
23118 fprintf (stderr, "%s<NULL>", prefix);
23120 else if (TREE_CODE (args) == STRING_CST)
23122 char *p = ASTRDUP (TREE_STRING_POINTER (args));
23123 char *q;
23125 while ((q = strtok (p, ",")) != NULL)
23127 p = NULL;
23128 fprintf (stderr, "%s\"%s\"", prefix, q);
23129 prefix = ", ";
23133 else if (TREE_CODE (args) == TREE_LIST)
23137 tree value = TREE_VALUE (args);
23138 if (value)
23140 rs6000_debug_target_options (value, prefix);
23141 prefix = ", ";
23143 args = TREE_CHAIN (args);
23145 while (args != NULL_TREE);
23148 else
23149 gcc_unreachable ();
23151 return;
23155 /* Hook to validate attribute((target("..."))). */
23157 static bool
23158 rs6000_valid_attribute_p (tree fndecl,
23159 tree ARG_UNUSED (name),
23160 tree args,
23161 int flags)
23163 struct cl_target_option cur_target;
23164 bool ret;
23165 tree old_optimize;
23166 tree new_target, new_optimize;
23167 tree func_optimize;
23169 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
23171 if (TARGET_DEBUG_TARGET)
23173 tree tname = DECL_NAME (fndecl);
23174 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
23175 if (tname)
23176 fprintf (stderr, "function: %.*s\n",
23177 (int) IDENTIFIER_LENGTH (tname),
23178 IDENTIFIER_POINTER (tname));
23179 else
23180 fprintf (stderr, "function: unknown\n");
23182 fprintf (stderr, "args:");
23183 rs6000_debug_target_options (args, " ");
23184 fprintf (stderr, "\n");
23186 if (flags)
23187 fprintf (stderr, "flags: 0x%x\n", flags);
23189 fprintf (stderr, "--------------------\n");
23192 /* attribute((target("default"))) does nothing, beyond
23193 affecting multi-versioning. */
23194 if (TREE_VALUE (args)
23195 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
23196 && TREE_CHAIN (args) == NULL_TREE
23197 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
23198 return true;
23200 old_optimize = build_optimization_node (&global_options);
23201 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
23203 /* If the function changed the optimization levels as well as setting target
23204 options, start with the optimizations specified. */
23205 if (func_optimize && func_optimize != old_optimize)
23206 cl_optimization_restore (&global_options,
23207 TREE_OPTIMIZATION (func_optimize));
23209 /* The target attributes may also change some optimization flags, so update
23210 the optimization options if necessary. */
23211 cl_target_option_save (&cur_target, &global_options);
23212 rs6000_cpu_index = rs6000_tune_index = -1;
23213 ret = rs6000_inner_target_options (args, true);
23215 /* Set up any additional state. */
23216 if (ret)
23218 ret = rs6000_option_override_internal (false);
23219 new_target = build_target_option_node (&global_options);
23221 else
23222 new_target = NULL;
23224 new_optimize = build_optimization_node (&global_options);
23226 if (!new_target)
23227 ret = false;
23229 else if (fndecl)
23231 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
23233 if (old_optimize != new_optimize)
23234 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
23237 cl_target_option_restore (&global_options, &cur_target);
23239 if (old_optimize != new_optimize)
23240 cl_optimization_restore (&global_options,
23241 TREE_OPTIMIZATION (old_optimize));
23243 return ret;
23247 /* Hook to validate the current #pragma GCC target and set the state, and
23248 update the macros based on what was changed. If ARGS is NULL, then
23249 POP_TARGET is used to reset the options. */
23251 bool
23252 rs6000_pragma_target_parse (tree args, tree pop_target)
23254 tree prev_tree = build_target_option_node (&global_options);
23255 tree cur_tree;
23256 struct cl_target_option *prev_opt, *cur_opt;
23257 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
23258 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
23260 if (TARGET_DEBUG_TARGET)
23262 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
23263 fprintf (stderr, "args:");
23264 rs6000_debug_target_options (args, " ");
23265 fprintf (stderr, "\n");
23267 if (pop_target)
23269 fprintf (stderr, "pop_target:\n");
23270 debug_tree (pop_target);
23272 else
23273 fprintf (stderr, "pop_target: <NULL>\n");
23275 fprintf (stderr, "--------------------\n");
23278 if (! args)
23280 cur_tree = ((pop_target)
23281 ? pop_target
23282 : target_option_default_node);
23283 cl_target_option_restore (&global_options,
23284 TREE_TARGET_OPTION (cur_tree));
23286 else
23288 rs6000_cpu_index = rs6000_tune_index = -1;
23289 if (!rs6000_inner_target_options (args, false)
23290 || !rs6000_option_override_internal (false)
23291 || (cur_tree = build_target_option_node (&global_options))
23292 == NULL_TREE)
23294 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
23295 fprintf (stderr, "invalid pragma\n");
23297 return false;
23301 target_option_current_node = cur_tree;
23302 rs6000_activate_target_options (target_option_current_node);
23304 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
23305 change the macros that are defined. */
23306 if (rs6000_target_modify_macros_ptr)
23308 prev_opt = TREE_TARGET_OPTION (prev_tree);
23309 prev_bumask = prev_opt->x_rs6000_builtin_mask;
23310 prev_flags = prev_opt->x_rs6000_isa_flags;
23312 cur_opt = TREE_TARGET_OPTION (cur_tree);
23313 cur_flags = cur_opt->x_rs6000_isa_flags;
23314 cur_bumask = cur_opt->x_rs6000_builtin_mask;
23316 diff_bumask = (prev_bumask ^ cur_bumask);
23317 diff_flags = (prev_flags ^ cur_flags);
23319 if ((diff_flags != 0) || (diff_bumask != 0))
23321 /* Delete old macros. */
23322 rs6000_target_modify_macros_ptr (false,
23323 prev_flags & diff_flags,
23324 prev_bumask & diff_bumask);
23326 /* Define new macros. */
23327 rs6000_target_modify_macros_ptr (true,
23328 cur_flags & diff_flags,
23329 cur_bumask & diff_bumask);
23333 return true;
23337 /* Remember the last target of rs6000_set_current_function. */
23338 static GTY(()) tree rs6000_previous_fndecl;
23340 /* Restore target's globals from NEW_TREE and invalidate the
23341 rs6000_previous_fndecl cache. */
23343 void
23344 rs6000_activate_target_options (tree new_tree)
23346 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
23347 if (TREE_TARGET_GLOBALS (new_tree))
23348 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
23349 else if (new_tree == target_option_default_node)
23350 restore_target_globals (&default_target_globals);
23351 else
23352 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
23353 rs6000_previous_fndecl = NULL_TREE;
23356 /* Establish appropriate back-end context for processing the function
23357 FNDECL. The argument might be NULL to indicate processing at top
23358 level, outside of any function scope. */
23359 static void
23360 rs6000_set_current_function (tree fndecl)
23362 if (TARGET_DEBUG_TARGET)
23364 fprintf (stderr, "\n==================== rs6000_set_current_function");
23366 if (fndecl)
23367 fprintf (stderr, ", fndecl %s (%p)",
23368 (DECL_NAME (fndecl)
23369 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
23370 : "<unknown>"), (void *)fndecl);
23372 if (rs6000_previous_fndecl)
23373 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
23375 fprintf (stderr, "\n");
23378 /* Only change the context if the function changes. This hook is called
23379 several times in the course of compiling a function, and we don't want to
23380 slow things down too much or call target_reinit when it isn't safe. */
23381 if (fndecl == rs6000_previous_fndecl)
23382 return;
23384 tree old_tree;
23385 if (rs6000_previous_fndecl == NULL_TREE)
23386 old_tree = target_option_current_node;
23387 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
23388 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
23389 else
23390 old_tree = target_option_default_node;
23392 tree new_tree;
23393 if (fndecl == NULL_TREE)
23395 if (old_tree != target_option_current_node)
23396 new_tree = target_option_current_node;
23397 else
23398 new_tree = NULL_TREE;
23400 else
23402 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23403 if (new_tree == NULL_TREE)
23404 new_tree = target_option_default_node;
23407 if (TARGET_DEBUG_TARGET)
23409 if (new_tree)
23411 fprintf (stderr, "\nnew fndecl target specific options:\n");
23412 debug_tree (new_tree);
23415 if (old_tree)
23417 fprintf (stderr, "\nold fndecl target specific options:\n");
23418 debug_tree (old_tree);
23421 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
23422 fprintf (stderr, "--------------------\n");
23425 if (new_tree && old_tree != new_tree)
23426 rs6000_activate_target_options (new_tree);
23428 if (fndecl)
23429 rs6000_previous_fndecl = fndecl;
23433 /* Save the current options */
23435 static void
23436 rs6000_function_specific_save (struct cl_target_option *ptr,
23437 struct gcc_options *opts)
23439 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
23440 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
23443 /* Restore the current options */
23445 static void
23446 rs6000_function_specific_restore (struct gcc_options *opts,
23447 struct cl_target_option *ptr)
23450 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
23451 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
23452 (void) rs6000_option_override_internal (false);
23455 /* Print the current options */
23457 static void
23458 rs6000_function_specific_print (FILE *file, int indent,
23459 struct cl_target_option *ptr)
23461 rs6000_print_isa_options (file, indent, "Isa options set",
23462 ptr->x_rs6000_isa_flags);
23464 rs6000_print_isa_options (file, indent, "Isa options explicit",
23465 ptr->x_rs6000_isa_flags_explicit);
23468 /* Helper function to print the current isa or misc options on a line. */
23470 static void
23471 rs6000_print_options_internal (FILE *file,
23472 int indent,
23473 const char *string,
23474 HOST_WIDE_INT flags,
23475 const char *prefix,
23476 const struct rs6000_opt_mask *opts,
23477 size_t num_elements)
23479 size_t i;
23480 size_t start_column = 0;
23481 size_t cur_column;
23482 size_t max_column = 120;
23483 size_t prefix_len = strlen (prefix);
23484 size_t comma_len = 0;
23485 const char *comma = "";
23487 if (indent)
23488 start_column += fprintf (file, "%*s", indent, "");
23490 if (!flags)
23492 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
23493 return;
23496 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
23498 /* Print the various mask options. */
23499 cur_column = start_column;
23500 for (i = 0; i < num_elements; i++)
23502 bool invert = opts[i].invert;
23503 const char *name = opts[i].name;
23504 const char *no_str = "";
23505 HOST_WIDE_INT mask = opts[i].mask;
23506 size_t len = comma_len + prefix_len + strlen (name);
23508 if (!invert)
23510 if ((flags & mask) == 0)
23512 no_str = "no-";
23513 len += sizeof ("no-") - 1;
23516 flags &= ~mask;
23519 else
23521 if ((flags & mask) != 0)
23523 no_str = "no-";
23524 len += sizeof ("no-") - 1;
23527 flags |= mask;
23530 cur_column += len;
23531 if (cur_column > max_column)
23533 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
23534 cur_column = start_column + len;
23535 comma = "";
23538 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
23539 comma = ", ";
23540 comma_len = sizeof (", ") - 1;
23543 fputs ("\n", file);
23546 /* Helper function to print the current isa options on a line. */
23548 static void
23549 rs6000_print_isa_options (FILE *file, int indent, const char *string,
23550 HOST_WIDE_INT flags)
23552 rs6000_print_options_internal (file, indent, string, flags, "-m",
23553 &rs6000_opt_masks[0],
23554 ARRAY_SIZE (rs6000_opt_masks));
23557 static void
23558 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
23559 HOST_WIDE_INT flags)
23561 rs6000_print_options_internal (file, indent, string, flags, "",
23562 &rs6000_builtin_mask_names[0],
23563 ARRAY_SIZE (rs6000_builtin_mask_names));
23566 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
23567 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
23568 -mupper-regs-df, etc.).
23570 If the user used -mno-power8-vector, we need to turn off all of the implicit
23571 ISA 2.07 and 3.0 options that relate to the vector unit.
23573 If the user used -mno-power9-vector, we need to turn off all of the implicit
23574 ISA 3.0 options that relate to the vector unit.
23576 This function does not handle explicit options such as the user specifying
23577 -mdirect-move. These are handled in rs6000_option_override_internal, and
23578 the appropriate error is given if needed.
23580 We return a mask of all of the implicit options that should not be enabled
23581 by default. */
23583 static HOST_WIDE_INT
23584 rs6000_disable_incompatible_switches (void)
23586 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
23587 size_t i, j;
23589 static const struct {
23590 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
23591 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
23592 const char *const name; /* name of the switch. */
23593 } flags[] = {
23594 { OPTION_MASK_FUTURE, OTHER_FUTURE_MASKS, "future" },
23595 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
23596 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
23597 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
23600 for (i = 0; i < ARRAY_SIZE (flags); i++)
23602 HOST_WIDE_INT no_flag = flags[i].no_flag;
23604 if ((rs6000_isa_flags & no_flag) == 0
23605 && (rs6000_isa_flags_explicit & no_flag) != 0)
23607 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
23608 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
23609 & rs6000_isa_flags
23610 & dep_flags);
23612 if (set_flags)
23614 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
23615 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
23617 set_flags &= ~rs6000_opt_masks[j].mask;
23618 error ("%<-mno-%s%> turns off %<-m%s%>",
23619 flags[i].name,
23620 rs6000_opt_masks[j].name);
23623 gcc_assert (!set_flags);
23626 rs6000_isa_flags &= ~dep_flags;
23627 ignore_masks |= no_flag | dep_flags;
23631 return ignore_masks;
23635 /* Helper function for printing the function name when debugging. */
23637 static const char *
23638 get_decl_name (tree fn)
23640 tree name;
23642 if (!fn)
23643 return "<null>";
23645 name = DECL_NAME (fn);
23646 if (!name)
23647 return "<no-name>";
23649 return IDENTIFIER_POINTER (name);
23652 /* Return the clone id of the target we are compiling code for in a target
23653 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
23654 the priority list for the target clones (ordered from lowest to
23655 highest). */
23657 static int
23658 rs6000_clone_priority (tree fndecl)
23660 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
23661 HOST_WIDE_INT isa_masks;
23662 int ret = CLONE_DEFAULT;
23663 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
23664 const char *attrs_str = NULL;
23666 attrs = TREE_VALUE (TREE_VALUE (attrs));
23667 attrs_str = TREE_STRING_POINTER (attrs);
23669 /* Return priority zero for default function. Return the ISA needed for the
23670 function if it is not the default. */
23671 if (strcmp (attrs_str, "default") != 0)
23673 if (fn_opts == NULL_TREE)
23674 fn_opts = target_option_default_node;
23676 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
23677 isa_masks = rs6000_isa_flags;
23678 else
23679 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
23681 for (ret = CLONE_MAX - 1; ret != 0; ret--)
23682 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
23683 break;
23686 if (TARGET_DEBUG_TARGET)
23687 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
23688 get_decl_name (fndecl), ret);
23690 return ret;
23693 /* This compares the priority of target features in function DECL1 and DECL2.
23694 It returns positive value if DECL1 is higher priority, negative value if
23695 DECL2 is higher priority and 0 if they are the same. Note, priorities are
23696 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
23698 static int
23699 rs6000_compare_version_priority (tree decl1, tree decl2)
23701 int priority1 = rs6000_clone_priority (decl1);
23702 int priority2 = rs6000_clone_priority (decl2);
23703 int ret = priority1 - priority2;
23705 if (TARGET_DEBUG_TARGET)
23706 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
23707 get_decl_name (decl1), get_decl_name (decl2), ret);
23709 return ret;
23712 /* Make a dispatcher declaration for the multi-versioned function DECL.
23713 Calls to DECL function will be replaced with calls to the dispatcher
23714 by the front-end. Returns the decl of the dispatcher function. */
23716 static tree
23717 rs6000_get_function_versions_dispatcher (void *decl)
23719 tree fn = (tree) decl;
23720 struct cgraph_node *node = NULL;
23721 struct cgraph_node *default_node = NULL;
23722 struct cgraph_function_version_info *node_v = NULL;
23723 struct cgraph_function_version_info *first_v = NULL;
23725 tree dispatch_decl = NULL;
23727 struct cgraph_function_version_info *default_version_info = NULL;
23728 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
23730 if (TARGET_DEBUG_TARGET)
23731 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
23732 get_decl_name (fn));
23734 node = cgraph_node::get (fn);
23735 gcc_assert (node != NULL);
23737 node_v = node->function_version ();
23738 gcc_assert (node_v != NULL);
23740 if (node_v->dispatcher_resolver != NULL)
23741 return node_v->dispatcher_resolver;
23743 /* Find the default version and make it the first node. */
23744 first_v = node_v;
23745 /* Go to the beginning of the chain. */
23746 while (first_v->prev != NULL)
23747 first_v = first_v->prev;
23749 default_version_info = first_v;
23750 while (default_version_info != NULL)
23752 const tree decl2 = default_version_info->this_node->decl;
23753 if (is_function_default_version (decl2))
23754 break;
23755 default_version_info = default_version_info->next;
23758 /* If there is no default node, just return NULL. */
23759 if (default_version_info == NULL)
23760 return NULL;
23762 /* Make default info the first node. */
23763 if (first_v != default_version_info)
23765 default_version_info->prev->next = default_version_info->next;
23766 if (default_version_info->next)
23767 default_version_info->next->prev = default_version_info->prev;
23768 first_v->prev = default_version_info;
23769 default_version_info->next = first_v;
23770 default_version_info->prev = NULL;
23773 default_node = default_version_info->this_node;
23775 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
23776 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23777 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
23778 "exports hardware capability bits");
23779 #else
23781 if (targetm.has_ifunc_p ())
23783 struct cgraph_function_version_info *it_v = NULL;
23784 struct cgraph_node *dispatcher_node = NULL;
23785 struct cgraph_function_version_info *dispatcher_version_info = NULL;
23787 /* Right now, the dispatching is done via ifunc. */
23788 dispatch_decl = make_dispatcher_decl (default_node->decl);
23790 dispatcher_node = cgraph_node::get_create (dispatch_decl);
23791 gcc_assert (dispatcher_node != NULL);
23792 dispatcher_node->dispatcher_function = 1;
23793 dispatcher_version_info
23794 = dispatcher_node->insert_new_function_version ();
23795 dispatcher_version_info->next = default_version_info;
23796 dispatcher_node->definition = 1;
23798 /* Set the dispatcher for all the versions. */
23799 it_v = default_version_info;
23800 while (it_v != NULL)
23802 it_v->dispatcher_resolver = dispatch_decl;
23803 it_v = it_v->next;
23806 else
23808 error_at (DECL_SOURCE_LOCATION (default_node->decl),
23809 "multiversioning needs ifunc which is not supported "
23810 "on this target");
23812 #endif
23814 return dispatch_decl;
23817 /* Make the resolver function decl to dispatch the versions of a multi-
23818 versioned function, DEFAULT_DECL. Create an empty basic block in the
23819 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
23820 function. */
23822 static tree
23823 make_resolver_func (const tree default_decl,
23824 const tree dispatch_decl,
23825 basic_block *empty_bb)
23827 /* Make the resolver function static. The resolver function returns
23828 void *. */
23829 tree decl_name = clone_function_name (default_decl, "resolver");
23830 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
23831 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
23832 tree decl = build_fn_decl (resolver_name, type);
23833 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
23835 DECL_NAME (decl) = decl_name;
23836 TREE_USED (decl) = 1;
23837 DECL_ARTIFICIAL (decl) = 1;
23838 DECL_IGNORED_P (decl) = 0;
23839 TREE_PUBLIC (decl) = 0;
23840 DECL_UNINLINABLE (decl) = 1;
23842 /* Resolver is not external, body is generated. */
23843 DECL_EXTERNAL (decl) = 0;
23844 DECL_EXTERNAL (dispatch_decl) = 0;
23846 DECL_CONTEXT (decl) = NULL_TREE;
23847 DECL_INITIAL (decl) = make_node (BLOCK);
23848 DECL_STATIC_CONSTRUCTOR (decl) = 0;
23850 /* Build result decl and add to function_decl. */
23851 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
23852 DECL_CONTEXT (t) = decl;
23853 DECL_ARTIFICIAL (t) = 1;
23854 DECL_IGNORED_P (t) = 1;
23855 DECL_RESULT (decl) = t;
23857 gimplify_function_tree (decl);
23858 push_cfun (DECL_STRUCT_FUNCTION (decl));
23859 *empty_bb = init_lowered_empty_function (decl, false,
23860 profile_count::uninitialized ());
23862 cgraph_node::add_new_function (decl, true);
23863 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
23865 pop_cfun ();
23867 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
23868 DECL_ATTRIBUTES (dispatch_decl)
23869 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
23871 cgraph_node::create_same_body_alias (dispatch_decl, decl);
23873 return decl;
23876 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
23877 return a pointer to VERSION_DECL if we are running on a machine that
23878 supports the index CLONE_ISA hardware architecture bits. This function will
23879 be called during version dispatch to decide which function version to
23880 execute. It returns the basic block at the end, to which more conditions
23881 can be added. */
23883 static basic_block
23884 add_condition_to_bb (tree function_decl, tree version_decl,
23885 int clone_isa, basic_block new_bb)
23887 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
23889 gcc_assert (new_bb != NULL);
23890 gimple_seq gseq = bb_seq (new_bb);
23893 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
23894 build_fold_addr_expr (version_decl));
23895 tree result_var = create_tmp_var (ptr_type_node);
23896 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
23897 gimple *return_stmt = gimple_build_return (result_var);
23899 if (clone_isa == CLONE_DEFAULT)
23901 gimple_seq_add_stmt (&gseq, convert_stmt);
23902 gimple_seq_add_stmt (&gseq, return_stmt);
23903 set_bb_seq (new_bb, gseq);
23904 gimple_set_bb (convert_stmt, new_bb);
23905 gimple_set_bb (return_stmt, new_bb);
23906 pop_cfun ();
23907 return new_bb;
23910 tree bool_zero = build_int_cst (bool_int_type_node, 0);
23911 tree cond_var = create_tmp_var (bool_int_type_node);
23912 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
23913 const char *arg_str = rs6000_clone_map[clone_isa].name;
23914 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
23915 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
23916 gimple_call_set_lhs (call_cond_stmt, cond_var);
23918 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
23919 gimple_set_bb (call_cond_stmt, new_bb);
23920 gimple_seq_add_stmt (&gseq, call_cond_stmt);
23922 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
23923 NULL_TREE, NULL_TREE);
23924 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
23925 gimple_set_bb (if_else_stmt, new_bb);
23926 gimple_seq_add_stmt (&gseq, if_else_stmt);
23928 gimple_seq_add_stmt (&gseq, convert_stmt);
23929 gimple_seq_add_stmt (&gseq, return_stmt);
23930 set_bb_seq (new_bb, gseq);
23932 basic_block bb1 = new_bb;
23933 edge e12 = split_block (bb1, if_else_stmt);
23934 basic_block bb2 = e12->dest;
23935 e12->flags &= ~EDGE_FALLTHRU;
23936 e12->flags |= EDGE_TRUE_VALUE;
23938 edge e23 = split_block (bb2, return_stmt);
23939 gimple_set_bb (convert_stmt, bb2);
23940 gimple_set_bb (return_stmt, bb2);
23942 basic_block bb3 = e23->dest;
23943 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
23945 remove_edge (e23);
23946 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
23948 pop_cfun ();
23949 return bb3;
23952 /* This function generates the dispatch function for multi-versioned functions.
23953 DISPATCH_DECL is the function which will contain the dispatch logic.
23954 FNDECLS are the function choices for dispatch, and is a tree chain.
23955 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
23956 code is generated. */
23958 static int
23959 dispatch_function_versions (tree dispatch_decl,
23960 void *fndecls_p,
23961 basic_block *empty_bb)
23963 int ix;
23964 tree ele;
23965 vec<tree> *fndecls;
23966 tree clones[CLONE_MAX];
23968 if (TARGET_DEBUG_TARGET)
23969 fputs ("dispatch_function_versions, top\n", stderr);
23971 gcc_assert (dispatch_decl != NULL
23972 && fndecls_p != NULL
23973 && empty_bb != NULL);
23975 /* fndecls_p is actually a vector. */
23976 fndecls = static_cast<vec<tree> *> (fndecls_p);
23978 /* At least one more version other than the default. */
23979 gcc_assert (fndecls->length () >= 2);
23981 /* The first version in the vector is the default decl. */
23982 memset ((void *) clones, '\0', sizeof (clones));
23983 clones[CLONE_DEFAULT] = (*fndecls)[0];
23985 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
23986 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
23987 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
23988 recent glibc. If we ever need to call __builtin_cpu_init, we would need
23989 to insert the code here to do the call. */
23991 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
23993 int priority = rs6000_clone_priority (ele);
23994 if (!clones[priority])
23995 clones[priority] = ele;
23998 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
23999 if (clones[ix])
24001 if (TARGET_DEBUG_TARGET)
24002 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
24003 ix, get_decl_name (clones[ix]));
24005 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
24006 *empty_bb);
24009 return 0;
24012 /* Generate the dispatching code body to dispatch multi-versioned function
24013 DECL. The target hook is called to process the "target" attributes and
24014 provide the code to dispatch the right function at run-time. NODE points
24015 to the dispatcher decl whose body will be created. */
24017 static tree
24018 rs6000_generate_version_dispatcher_body (void *node_p)
24020 tree resolver;
24021 basic_block empty_bb;
24022 struct cgraph_node *node = (cgraph_node *) node_p;
24023 struct cgraph_function_version_info *ninfo = node->function_version ();
24025 if (ninfo->dispatcher_resolver)
24026 return ninfo->dispatcher_resolver;
24028 /* node is going to be an alias, so remove the finalized bit. */
24029 node->definition = false;
24031 /* The first version in the chain corresponds to the default version. */
24032 ninfo->dispatcher_resolver = resolver
24033 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
24035 if (TARGET_DEBUG_TARGET)
24036 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
24037 get_decl_name (resolver));
24039 push_cfun (DECL_STRUCT_FUNCTION (resolver));
24040 auto_vec<tree, 2> fn_ver_vec;
24042 for (struct cgraph_function_version_info *vinfo = ninfo->next;
24043 vinfo;
24044 vinfo = vinfo->next)
24046 struct cgraph_node *version = vinfo->this_node;
24047 /* Check for virtual functions here again, as by this time it should
24048 have been determined if this function needs a vtable index or
24049 not. This happens for methods in derived classes that override
24050 virtual methods in base classes but are not explicitly marked as
24051 virtual. */
24052 if (DECL_VINDEX (version->decl))
24053 sorry ("Virtual function multiversioning not supported");
24055 fn_ver_vec.safe_push (version->decl);
24058 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
24059 cgraph_edge::rebuild_edges ();
24060 pop_cfun ();
24061 return resolver;
24065 /* Hook to determine if one function can safely inline another. */
24067 static bool
24068 rs6000_can_inline_p (tree caller, tree callee)
24070 bool ret = false;
24071 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
24072 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
24074 /* If the callee has no option attributes, then it is ok to inline. */
24075 if (!callee_tree)
24076 ret = true;
24078 else
24080 HOST_WIDE_INT caller_isa;
24081 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24082 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
24083 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
24085 /* If the caller has option attributes, then use them.
24086 Otherwise, use the command line options. */
24087 if (caller_tree)
24088 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
24089 else
24090 caller_isa = rs6000_isa_flags;
24092 /* The callee's options must be a subset of the caller's options, i.e.
24093 a vsx function may inline an altivec function, but a no-vsx function
24094 must not inline a vsx function. However, for those options that the
24095 callee has explicitly enabled or disabled, then we must enforce that
24096 the callee's and caller's options match exactly; see PR70010. */
24097 if (((caller_isa & callee_isa) == callee_isa)
24098 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
24099 ret = true;
24102 if (TARGET_DEBUG_TARGET)
24103 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
24104 get_decl_name (caller), get_decl_name (callee),
24105 (ret ? "can" : "cannot"));
24107 return ret;
24110 /* Allocate a stack temp and fixup the address so it meets the particular
24111 memory requirements (either offetable or REG+REG addressing). */
24114 rs6000_allocate_stack_temp (machine_mode mode,
24115 bool offsettable_p,
24116 bool reg_reg_p)
24118 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
24119 rtx addr = XEXP (stack, 0);
24120 int strict_p = reload_completed;
24122 if (!legitimate_indirect_address_p (addr, strict_p))
24124 if (offsettable_p
24125 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
24126 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24128 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
24129 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
24132 return stack;
24135 /* Given a memory reference, if it is not a reg or reg+reg addressing,
24136 convert to such a form to deal with memory reference instructions
24137 like STFIWX and LDBRX that only take reg+reg addressing. */
24140 rs6000_force_indexed_or_indirect_mem (rtx x)
24142 machine_mode mode = GET_MODE (x);
24144 gcc_assert (MEM_P (x));
24145 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
24147 rtx addr = XEXP (x, 0);
24148 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
24150 rtx reg = XEXP (addr, 0);
24151 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
24152 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
24153 gcc_assert (REG_P (reg));
24154 emit_insn (gen_add3_insn (reg, reg, size_rtx));
24155 addr = reg;
24157 else if (GET_CODE (addr) == PRE_MODIFY)
24159 rtx reg = XEXP (addr, 0);
24160 rtx expr = XEXP (addr, 1);
24161 gcc_assert (REG_P (reg));
24162 gcc_assert (GET_CODE (expr) == PLUS);
24163 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
24164 addr = reg;
24167 if (GET_CODE (addr) == PLUS)
24169 rtx op0 = XEXP (addr, 0);
24170 rtx op1 = XEXP (addr, 1);
24171 op0 = force_reg (Pmode, op0);
24172 op1 = force_reg (Pmode, op1);
24173 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
24175 else
24176 x = replace_equiv_address (x, force_reg (Pmode, addr));
24179 return x;
24182 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
24184 On the RS/6000, all integer constants are acceptable, most won't be valid
24185 for particular insns, though. Only easy FP constants are acceptable. */
24187 static bool
24188 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
24190 if (TARGET_ELF && tls_referenced_p (x))
24191 return false;
24193 if (CONST_DOUBLE_P (x))
24194 return easy_fp_constant (x, mode);
24196 if (GET_CODE (x) == CONST_VECTOR)
24197 return easy_vector_constant (x, mode);
24199 return true;
24203 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
24205 static bool
24206 chain_already_loaded (rtx_insn *last)
24208 for (; last != NULL; last = PREV_INSN (last))
24210 if (NONJUMP_INSN_P (last))
24212 rtx patt = PATTERN (last);
24214 if (GET_CODE (patt) == SET)
24216 rtx lhs = XEXP (patt, 0);
24218 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
24219 return true;
24223 return false;
24226 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
24228 void
24229 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24231 rtx func = func_desc;
24232 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
24233 rtx toc_load = NULL_RTX;
24234 rtx toc_restore = NULL_RTX;
24235 rtx func_addr;
24236 rtx abi_reg = NULL_RTX;
24237 rtx call[4];
24238 int n_call;
24239 rtx insn;
24240 bool is_pltseq_longcall;
24242 if (global_tlsarg)
24243 tlsarg = global_tlsarg;
24245 /* Handle longcall attributes. */
24246 is_pltseq_longcall = false;
24247 if ((INTVAL (cookie) & CALL_LONG) != 0
24248 && GET_CODE (func_desc) == SYMBOL_REF)
24250 func = rs6000_longcall_ref (func_desc, tlsarg);
24251 if (TARGET_PLTSEQ)
24252 is_pltseq_longcall = true;
24255 /* Handle indirect calls. */
24256 if (!SYMBOL_REF_P (func)
24257 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
24259 if (!rs6000_pcrel_p (cfun))
24261 /* Save the TOC into its reserved slot before the call,
24262 and prepare to restore it after the call. */
24263 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
24264 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
24265 gen_rtvec (1, stack_toc_offset),
24266 UNSPEC_TOCSLOT);
24267 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
24269 /* Can we optimize saving the TOC in the prologue or
24270 do we need to do it at every call? */
24271 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
24272 cfun->machine->save_toc_in_prologue = true;
24273 else
24275 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24276 rtx stack_toc_mem = gen_frame_mem (Pmode,
24277 gen_rtx_PLUS (Pmode, stack_ptr,
24278 stack_toc_offset));
24279 MEM_VOLATILE_P (stack_toc_mem) = 1;
24280 if (is_pltseq_longcall)
24282 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
24283 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24284 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
24286 else
24287 emit_move_insn (stack_toc_mem, toc_reg);
24291 if (DEFAULT_ABI == ABI_ELFv2)
24293 /* A function pointer in the ELFv2 ABI is just a plain address, but
24294 the ABI requires it to be loaded into r12 before the call. */
24295 func_addr = gen_rtx_REG (Pmode, 12);
24296 if (!rtx_equal_p (func_addr, func))
24297 emit_move_insn (func_addr, func);
24298 abi_reg = func_addr;
24299 /* Indirect calls via CTR are strongly preferred over indirect
24300 calls via LR, so move the address there. Needed to mark
24301 this insn for linker plt sequence editing too. */
24302 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24303 if (is_pltseq_longcall)
24305 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
24306 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24307 emit_insn (gen_rtx_SET (func_addr, mark_func));
24308 v = gen_rtvec (2, func_addr, func_desc);
24309 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24311 else
24312 emit_move_insn (func_addr, abi_reg);
24314 else
24316 /* A function pointer under AIX is a pointer to a data area whose
24317 first word contains the actual address of the function, whose
24318 second word contains a pointer to its TOC, and whose third word
24319 contains a value to place in the static chain register (r11).
24320 Note that if we load the static chain, our "trampoline" need
24321 not have any executable code. */
24323 /* Load up address of the actual function. */
24324 func = force_reg (Pmode, func);
24325 func_addr = gen_reg_rtx (Pmode);
24326 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
24328 /* Indirect calls via CTR are strongly preferred over indirect
24329 calls via LR, so move the address there. */
24330 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
24331 emit_move_insn (ctr_reg, func_addr);
24332 func_addr = ctr_reg;
24334 /* Prepare to load the TOC of the called function. Note that the
24335 TOC load must happen immediately before the actual call so
24336 that unwinding the TOC registers works correctly. See the
24337 comment in frob_update_context. */
24338 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
24339 rtx func_toc_mem = gen_rtx_MEM (Pmode,
24340 gen_rtx_PLUS (Pmode, func,
24341 func_toc_offset));
24342 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
24344 /* If we have a static chain, load it up. But, if the call was
24345 originally direct, the 3rd word has not been written since no
24346 trampoline has been built, so we ought not to load it, lest we
24347 override a static chain value. */
24348 if (!(GET_CODE (func_desc) == SYMBOL_REF
24349 && SYMBOL_REF_FUNCTION_P (func_desc))
24350 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
24351 && !chain_already_loaded (get_current_sequence ()->next->last))
24353 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
24354 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
24355 rtx func_sc_mem = gen_rtx_MEM (Pmode,
24356 gen_rtx_PLUS (Pmode, func,
24357 func_sc_offset));
24358 emit_move_insn (sc_reg, func_sc_mem);
24359 abi_reg = sc_reg;
24363 else
24365 /* No TOC register needed for calls from PC-relative callers. */
24366 if (!rs6000_pcrel_p (cfun))
24367 /* Direct calls use the TOC: for local calls, the callee will
24368 assume the TOC register is set; for non-local calls, the
24369 PLT stub needs the TOC register. */
24370 abi_reg = toc_reg;
24371 func_addr = func;
24374 /* Create the call. */
24375 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24376 if (value != NULL_RTX)
24377 call[0] = gen_rtx_SET (value, call[0]);
24378 n_call = 1;
24380 if (toc_load)
24381 call[n_call++] = toc_load;
24382 if (toc_restore)
24383 call[n_call++] = toc_restore;
24385 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24387 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
24388 insn = emit_call_insn (insn);
24390 /* Mention all registers defined by the ABI to hold information
24391 as uses in CALL_INSN_FUNCTION_USAGE. */
24392 if (abi_reg)
24393 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24396 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
24398 void
24399 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24401 rtx call[2];
24402 rtx insn;
24404 gcc_assert (INTVAL (cookie) == 0);
24406 if (global_tlsarg)
24407 tlsarg = global_tlsarg;
24409 /* Create the call. */
24410 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg);
24411 if (value != NULL_RTX)
24412 call[0] = gen_rtx_SET (value, call[0]);
24414 call[1] = simple_return_rtx;
24416 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
24417 insn = emit_call_insn (insn);
24419 /* Note use of the TOC register. */
24420 if (!rs6000_pcrel_p (cfun))
24421 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
24422 gen_rtx_REG (Pmode, TOC_REGNUM));
24425 /* Expand code to perform a call under the SYSV4 ABI. */
24427 void
24428 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24430 rtx func = func_desc;
24431 rtx func_addr;
24432 rtx call[4];
24433 rtx insn;
24434 rtx abi_reg = NULL_RTX;
24435 int n;
24437 if (global_tlsarg)
24438 tlsarg = global_tlsarg;
24440 /* Handle longcall attributes. */
24441 if ((INTVAL (cookie) & CALL_LONG) != 0
24442 && GET_CODE (func_desc) == SYMBOL_REF)
24444 func = rs6000_longcall_ref (func_desc, tlsarg);
24445 /* If the longcall was implemented as an inline PLT call using
24446 PLT unspecs then func will be REG:r11. If not, func will be
24447 a pseudo reg. The inline PLT call sequence supports lazy
24448 linking (and longcalls to functions in dlopen'd libraries).
24449 The other style of longcalls don't. The lazy linking entry
24450 to the dynamic symbol resolver requires r11 be the function
24451 address (as it is for linker generated PLT stubs). Ensure
24452 r11 stays valid to the bctrl by marking r11 used by the call. */
24453 if (TARGET_PLTSEQ)
24454 abi_reg = func;
24457 /* Handle indirect calls. */
24458 if (GET_CODE (func) != SYMBOL_REF)
24460 func = force_reg (Pmode, func);
24462 /* Indirect calls via CTR are strongly preferred over indirect
24463 calls via LR, so move the address there. That can't be left
24464 to reload because we want to mark every instruction in an
24465 inline PLT call sequence with a reloc, enabling the linker to
24466 edit the sequence back to a direct call when that makes sense. */
24467 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24468 if (abi_reg)
24470 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24471 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24472 emit_insn (gen_rtx_SET (func_addr, mark_func));
24473 v = gen_rtvec (2, func_addr, func_desc);
24474 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24476 else
24477 emit_move_insn (func_addr, func);
24479 else
24480 func_addr = func;
24482 /* Create the call. */
24483 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24484 if (value != NULL_RTX)
24485 call[0] = gen_rtx_SET (value, call[0]);
24487 call[1] = gen_rtx_USE (VOIDmode, cookie);
24488 n = 2;
24489 if (TARGET_SECURE_PLT
24490 && flag_pic
24491 && GET_CODE (func_addr) == SYMBOL_REF
24492 && !SYMBOL_REF_LOCAL_P (func_addr))
24493 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
24495 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24497 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
24498 insn = emit_call_insn (insn);
24499 if (abi_reg)
24500 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24503 /* Expand code to perform a sibling call under the SysV4 ABI. */
24505 void
24506 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
24508 rtx func = func_desc;
24509 rtx func_addr;
24510 rtx call[3];
24511 rtx insn;
24512 rtx abi_reg = NULL_RTX;
24514 if (global_tlsarg)
24515 tlsarg = global_tlsarg;
24517 /* Handle longcall attributes. */
24518 if ((INTVAL (cookie) & CALL_LONG) != 0
24519 && GET_CODE (func_desc) == SYMBOL_REF)
24521 func = rs6000_longcall_ref (func_desc, tlsarg);
24522 /* If the longcall was implemented as an inline PLT call using
24523 PLT unspecs then func will be REG:r11. If not, func will be
24524 a pseudo reg. The inline PLT call sequence supports lazy
24525 linking (and longcalls to functions in dlopen'd libraries).
24526 The other style of longcalls don't. The lazy linking entry
24527 to the dynamic symbol resolver requires r11 be the function
24528 address (as it is for linker generated PLT stubs). Ensure
24529 r11 stays valid to the bctr by marking r11 used by the call. */
24530 if (TARGET_PLTSEQ)
24531 abi_reg = func;
24534 /* Handle indirect calls. */
24535 if (GET_CODE (func) != SYMBOL_REF)
24537 func = force_reg (Pmode, func);
24539 /* Indirect sibcalls must go via CTR. That can't be left to
24540 reload because we want to mark every instruction in an inline
24541 PLT call sequence with a reloc, enabling the linker to edit
24542 the sequence back to a direct call when that makes sense. */
24543 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24544 if (abi_reg)
24546 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
24547 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24548 emit_insn (gen_rtx_SET (func_addr, mark_func));
24549 v = gen_rtvec (2, func_addr, func_desc);
24550 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
24552 else
24553 emit_move_insn (func_addr, func);
24555 else
24556 func_addr = func;
24558 /* Create the call. */
24559 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24560 if (value != NULL_RTX)
24561 call[0] = gen_rtx_SET (value, call[0]);
24563 call[1] = gen_rtx_USE (VOIDmode, cookie);
24564 call[2] = simple_return_rtx;
24566 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24567 insn = emit_call_insn (insn);
24568 if (abi_reg)
24569 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
24572 #if TARGET_MACHO
24574 /* Expand code to perform a call under the Darwin ABI.
24575 Modulo handling of mlongcall, this is much the same as sysv.
24576 if/when the longcall optimisation is removed, we could drop this
24577 code and use the sysv case (taking care to avoid the tls stuff).
24579 We can use this for sibcalls too, if needed. */
24581 void
24582 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
24583 rtx cookie, bool sibcall)
24585 rtx func = func_desc;
24586 rtx func_addr;
24587 rtx call[3];
24588 rtx insn;
24589 int cookie_val = INTVAL (cookie);
24590 bool make_island = false;
24592 /* Handle longcall attributes, there are two cases for Darwin:
24593 1) Newer linkers are capable of synthesising any branch islands needed.
24594 2) We need a helper branch island synthesised by the compiler.
24595 The second case has mostly been retired and we don't use it for m64.
24596 In fact, it's is an optimisation, we could just indirect as sysv does..
24597 ... however, backwards compatibility for now.
24598 If we're going to use this, then we need to keep the CALL_LONG bit set,
24599 so that we can pick up the special insn form later. */
24600 if ((cookie_val & CALL_LONG) != 0
24601 && GET_CODE (func_desc) == SYMBOL_REF)
24603 /* FIXME: the longcall opt should not hang off this flag, it is most
24604 likely incorrect for kernel-mode code-generation. */
24605 if (darwin_symbol_stubs && TARGET_32BIT)
24606 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
24607 else
24609 /* The linker is capable of doing this, but the user explicitly
24610 asked for -mlongcall, so we'll do the 'normal' version. */
24611 func = rs6000_longcall_ref (func_desc, NULL_RTX);
24612 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
24616 /* Handle indirect calls. */
24617 if (GET_CODE (func) != SYMBOL_REF)
24619 func = force_reg (Pmode, func);
24621 /* Indirect calls via CTR are strongly preferred over indirect
24622 calls via LR, and are required for indirect sibcalls, so move
24623 the address there. */
24624 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
24625 emit_move_insn (func_addr, func);
24627 else
24628 func_addr = func;
24630 /* Create the call. */
24631 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
24632 if (value != NULL_RTX)
24633 call[0] = gen_rtx_SET (value, call[0]);
24635 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
24637 if (sibcall)
24638 call[2] = simple_return_rtx;
24639 else
24640 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
24642 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
24643 insn = emit_call_insn (insn);
24644 /* Now we have the debug info in the insn, we can set up the branch island
24645 if we're using one. */
24646 if (make_island)
24648 tree funname = get_identifier (XSTR (func_desc, 0));
24650 if (no_previous_def (funname))
24652 rtx label_rtx = gen_label_rtx ();
24653 char *label_buf, temp_buf[256];
24654 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
24655 CODE_LABEL_NUMBER (label_rtx));
24656 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
24657 tree labelname = get_identifier (label_buf);
24658 add_compiler_branch_island (labelname, funname,
24659 insn_line ((const rtx_insn*)insn));
24663 #endif
24665 void
24666 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24667 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24669 #if TARGET_MACHO
24670 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
24671 #else
24672 gcc_unreachable();
24673 #endif
24677 void
24678 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
24679 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
24681 #if TARGET_MACHO
24682 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
24683 #else
24684 gcc_unreachable();
24685 #endif
24688 /* Return whether we should generate PC-relative code for FNDECL. */
24689 bool
24690 rs6000_fndecl_pcrel_p (const_tree fndecl)
24692 if (DEFAULT_ABI != ABI_ELFv2)
24693 return false;
24695 struct cl_target_option *opts = target_opts_for_fn (fndecl);
24697 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24698 && TARGET_CMODEL == CMODEL_MEDIUM);
24701 /* Return whether we should generate PC-relative code for *FN. */
24702 bool
24703 rs6000_pcrel_p (struct function *fn)
24705 if (DEFAULT_ABI != ABI_ELFv2)
24706 return false;
24708 /* Optimize usual case. */
24709 if (fn == cfun)
24710 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
24711 && TARGET_CMODEL == CMODEL_MEDIUM);
24713 return rs6000_fndecl_pcrel_p (fn->decl);
24717 /* Given an address (ADDR), a mode (MODE), and what the format of the
24718 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
24719 for the address. */
24721 enum insn_form
24722 address_to_insn_form (rtx addr,
24723 machine_mode mode,
24724 enum non_prefixed_form non_prefixed_format)
24726 /* Single register is easy. */
24727 if (REG_P (addr) || SUBREG_P (addr))
24728 return INSN_FORM_BASE_REG;
24730 /* If the non prefixed instruction format doesn't support offset addressing,
24731 make sure only indexed addressing is allowed.
24733 We special case SDmode so that the register allocator does not try to move
24734 SDmode through GPR registers, but instead uses the 32-bit integer load and
24735 store instructions for the floating point registers. */
24736 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
24738 if (GET_CODE (addr) != PLUS)
24739 return INSN_FORM_BAD;
24741 rtx op0 = XEXP (addr, 0);
24742 rtx op1 = XEXP (addr, 1);
24743 if (!REG_P (op0) && !SUBREG_P (op0))
24744 return INSN_FORM_BAD;
24746 if (!REG_P (op1) && !SUBREG_P (op1))
24747 return INSN_FORM_BAD;
24749 return INSN_FORM_X;
24752 /* Deal with update forms. */
24753 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
24754 return INSN_FORM_UPDATE;
24756 /* Handle PC-relative symbols and labels. Check for both local and external
24757 symbols. Assume labels are always local. */
24758 if (TARGET_PCREL)
24760 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
24761 return INSN_FORM_PCREL_EXTERNAL;
24763 if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
24764 return INSN_FORM_PCREL_LOCAL;
24767 if (GET_CODE (addr) == CONST)
24768 addr = XEXP (addr, 0);
24770 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
24771 if (GET_CODE (addr) == LO_SUM)
24772 return INSN_FORM_LO_SUM;
24774 /* Everything below must be an offset address of some form. */
24775 if (GET_CODE (addr) != PLUS)
24776 return INSN_FORM_BAD;
24778 rtx op0 = XEXP (addr, 0);
24779 rtx op1 = XEXP (addr, 1);
24781 /* Check for indexed addresses. */
24782 if (REG_P (op1) || SUBREG_P (op1))
24784 if (REG_P (op0) || SUBREG_P (op0))
24785 return INSN_FORM_X;
24787 return INSN_FORM_BAD;
24790 if (!CONST_INT_P (op1))
24791 return INSN_FORM_BAD;
24793 HOST_WIDE_INT offset = INTVAL (op1);
24794 if (!SIGNED_INTEGER_34BIT_P (offset))
24795 return INSN_FORM_BAD;
24797 /* Check for local and external PC-relative addresses. Labels are always
24798 local. */
24799 if (TARGET_PCREL)
24801 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
24802 return INSN_FORM_PCREL_EXTERNAL;
24804 if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
24805 return INSN_FORM_PCREL_LOCAL;
24808 /* If it isn't PC-relative, the address must use a base register. */
24809 if (!REG_P (op0) && !SUBREG_P (op0))
24810 return INSN_FORM_BAD;
24812 /* Large offsets must be prefixed. */
24813 if (!SIGNED_INTEGER_16BIT_P (offset))
24815 if (TARGET_PREFIXED_ADDR)
24816 return INSN_FORM_PREFIXED_NUMERIC;
24818 return INSN_FORM_BAD;
24821 /* We have a 16-bit offset, see what default instruction format to use. */
24822 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
24824 unsigned size = GET_MODE_SIZE (mode);
24826 /* On 64-bit systems, assume 64-bit integers need to use DS form
24827 addresses (for LD/STD). VSX vectors need to use DQ form addresses
24828 (for LXV and STXV). TImode is problematical in that its normal usage
24829 is expected to be GPRs where it wants a DS instruction format, but if
24830 it goes into the vector registers, it wants a DQ instruction
24831 format. */
24832 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
24833 non_prefixed_format = NON_PREFIXED_DS;
24835 else if (TARGET_VSX && size >= 16
24836 && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
24837 non_prefixed_format = NON_PREFIXED_DQ;
24839 else
24840 non_prefixed_format = NON_PREFIXED_D;
24843 /* Classify the D/DS/DQ-form addresses. */
24844 switch (non_prefixed_format)
24846 /* Instruction format D, all 16 bits are valid. */
24847 case NON_PREFIXED_D:
24848 return INSN_FORM_D;
24850 /* Instruction format DS, bottom 2 bits must be 0. */
24851 case NON_PREFIXED_DS:
24852 if ((offset & 3) == 0)
24853 return INSN_FORM_DS;
24855 else if (TARGET_PREFIXED_ADDR)
24856 return INSN_FORM_PREFIXED_NUMERIC;
24858 else
24859 return INSN_FORM_BAD;
24861 /* Instruction format DQ, bottom 4 bits must be 0. */
24862 case NON_PREFIXED_DQ:
24863 if ((offset & 15) == 0)
24864 return INSN_FORM_DQ;
24866 else if (TARGET_PREFIXED_ADDR)
24867 return INSN_FORM_PREFIXED_NUMERIC;
24869 else
24870 return INSN_FORM_BAD;
24872 default:
24873 break;
24876 return INSN_FORM_BAD;
24879 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
24880 instruction format (D/DS/DQ) used for offset memory. */
24882 static enum non_prefixed_form
24883 reg_to_non_prefixed (rtx reg, machine_mode mode)
24885 /* If it isn't a register, use the defaults. */
24886 if (!REG_P (reg) && !SUBREG_P (reg))
24887 return NON_PREFIXED_DEFAULT;
24889 unsigned int r = reg_or_subregno (reg);
24891 /* If we have a pseudo, use the default instruction format. */
24892 if (!HARD_REGISTER_NUM_P (r))
24893 return NON_PREFIXED_DEFAULT;
24895 unsigned size = GET_MODE_SIZE (mode);
24897 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
24898 128-bit floating point, and 128-bit integers. */
24899 if (FP_REGNO_P (r))
24901 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24902 return NON_PREFIXED_D;
24904 else if (size < 8)
24905 return NON_PREFIXED_X;
24907 else if (TARGET_VSX && size >= 16
24908 && (VECTOR_MODE_P (mode)
24909 || FLOAT128_VECTOR_P (mode)
24910 || mode == TImode || mode == CTImode))
24911 return NON_PREFIXED_DQ;
24913 else
24914 return NON_PREFIXED_DEFAULT;
24917 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
24918 128-bit floating point, and 128-bit integers. */
24919 else if (ALTIVEC_REGNO_P (r))
24921 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
24922 return NON_PREFIXED_DS;
24924 else if (size < 8)
24925 return NON_PREFIXED_X;
24927 else if (TARGET_VSX && size >= 16
24928 && (VECTOR_MODE_P (mode)
24929 || FLOAT128_VECTOR_P (mode)
24930 || mode == TImode || mode == CTImode))
24931 return NON_PREFIXED_DQ;
24933 else
24934 return NON_PREFIXED_DEFAULT;
24937 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
24938 otherwise. Assume that any other register, such as LR, CRs, etc. will go
24939 through the GPR registers for memory operations. */
24940 else if (TARGET_POWERPC64 && size >= 8)
24941 return NON_PREFIXED_DS;
24943 return NON_PREFIXED_D;
24947 /* Whether a load instruction is a prefixed instruction. This is called from
24948 the prefixed attribute processing. */
24950 bool
24951 prefixed_load_p (rtx_insn *insn)
24953 /* Validate the insn to make sure it is a normal load insn. */
24954 extract_insn_cached (insn);
24955 if (recog_data.n_operands < 2)
24956 return false;
24958 rtx reg = recog_data.operand[0];
24959 rtx mem = recog_data.operand[1];
24961 if (!REG_P (reg) && !SUBREG_P (reg))
24962 return false;
24964 if (!MEM_P (mem))
24965 return false;
24967 /* Prefixed load instructions do not support update or indexed forms. */
24968 if (get_attr_indexed (insn) == INDEXED_YES
24969 || get_attr_update (insn) == UPDATE_YES)
24970 return false;
24972 /* LWA uses the DS format instead of the D format that LWZ uses. */
24973 enum non_prefixed_form non_prefixed;
24974 machine_mode reg_mode = GET_MODE (reg);
24975 machine_mode mem_mode = GET_MODE (mem);
24977 if (mem_mode == SImode && reg_mode == DImode
24978 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
24979 non_prefixed = NON_PREFIXED_DS;
24981 else
24982 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
24984 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
24987 /* Whether a store instruction is a prefixed instruction. This is called from
24988 the prefixed attribute processing. */
24990 bool
24991 prefixed_store_p (rtx_insn *insn)
24993 /* Validate the insn to make sure it is a normal store insn. */
24994 extract_insn_cached (insn);
24995 if (recog_data.n_operands < 2)
24996 return false;
24998 rtx mem = recog_data.operand[0];
24999 rtx reg = recog_data.operand[1];
25001 if (!REG_P (reg) && !SUBREG_P (reg))
25002 return false;
25004 if (!MEM_P (mem))
25005 return false;
25007 /* Prefixed store instructions do not support update or indexed forms. */
25008 if (get_attr_indexed (insn) == INDEXED_YES
25009 || get_attr_update (insn) == UPDATE_YES)
25010 return false;
25012 machine_mode mem_mode = GET_MODE (mem);
25013 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
25014 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
25017 /* Whether a load immediate or add instruction is a prefixed instruction. This
25018 is called from the prefixed attribute processing. */
25020 bool
25021 prefixed_paddi_p (rtx_insn *insn)
25023 rtx set = single_set (insn);
25024 if (!set)
25025 return false;
25027 rtx dest = SET_DEST (set);
25028 rtx src = SET_SRC (set);
25030 if (!REG_P (dest) && !SUBREG_P (dest))
25031 return false;
25033 /* Is this a load immediate that can't be done with a simple ADDI or
25034 ADDIS? */
25035 if (CONST_INT_P (src))
25036 return (satisfies_constraint_eI (src)
25037 && !satisfies_constraint_I (src)
25038 && !satisfies_constraint_L (src));
25040 /* Is this a PADDI instruction that can't be done with a simple ADDI or
25041 ADDIS? */
25042 if (GET_CODE (src) == PLUS)
25044 rtx op1 = XEXP (src, 1);
25046 return (CONST_INT_P (op1)
25047 && satisfies_constraint_eI (op1)
25048 && !satisfies_constraint_I (op1)
25049 && !satisfies_constraint_L (op1));
25052 /* If not, is it a load of a PC-relative address? */
25053 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
25054 return false;
25056 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
25057 return false;
25059 enum insn_form iform = address_to_insn_form (src, Pmode,
25060 NON_PREFIXED_DEFAULT);
25062 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
25065 /* Whether the next instruction needs a 'p' prefix issued before the
25066 instruction is printed out. */
25067 static bool next_insn_prefixed_p;
25069 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
25070 outputting the assembler code. On the PowerPC, we remember if the current
25071 insn is a prefixed insn where we need to emit a 'p' before the insn.
25073 In addition, if the insn is part of a PC-relative reference to an external
25074 label optimization, this is recorded also. */
25075 void
25076 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
25078 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
25079 return;
25082 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
25083 We use it to emit a 'p' for prefixed insns that is set in
25084 FINAL_PRESCAN_INSN. */
25085 void
25086 rs6000_asm_output_opcode (FILE *stream)
25088 if (next_insn_prefixed_p)
25089 fprintf (stream, "p");
25091 return;
25094 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
25095 should be adjusted to reflect any required changes. This macro is used when
25096 there is some systematic length adjustment required that would be difficult
25097 to express in the length attribute.
25099 In the PowerPC, we use this to adjust the length of an instruction if one or
25100 more prefixed instructions are generated, using the attribute
25101 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
25102 hardware requires that a prefied instruciton does not cross a 64-byte
25103 boundary. This means the compiler has to assume the length of the first
25104 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
25105 already set for the non-prefixed instruction, we just need to udpate for the
25106 difference. */
25109 rs6000_adjust_insn_length (rtx_insn *insn, int length)
25111 if (TARGET_PREFIXED_ADDR && NONJUMP_INSN_P (insn))
25113 rtx pattern = PATTERN (insn);
25114 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
25115 && get_attr_prefixed (insn) == PREFIXED_YES)
25117 int num_prefixed = get_attr_max_prefixed_insns (insn);
25118 length += 4 * (num_prefixed + 1);
25122 return length;
25126 #ifdef HAVE_GAS_HIDDEN
25127 # define USE_HIDDEN_LINKONCE 1
25128 #else
25129 # define USE_HIDDEN_LINKONCE 0
25130 #endif
25132 /* Fills in the label name that should be used for a 476 link stack thunk. */
25134 void
25135 get_ppc476_thunk_name (char name[32])
25137 gcc_assert (TARGET_LINK_STACK);
25139 if (USE_HIDDEN_LINKONCE)
25140 sprintf (name, "__ppc476.get_thunk");
25141 else
25142 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
25145 /* This function emits the simple thunk routine that is used to preserve
25146 the link stack on the 476 cpu. */
25148 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
25149 static void
25150 rs6000_code_end (void)
25152 char name[32];
25153 tree decl;
25155 if (!TARGET_LINK_STACK)
25156 return;
25158 get_ppc476_thunk_name (name);
25160 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
25161 build_function_type_list (void_type_node, NULL_TREE));
25162 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
25163 NULL_TREE, void_type_node);
25164 TREE_PUBLIC (decl) = 1;
25165 TREE_STATIC (decl) = 1;
25167 #if RS6000_WEAK
25168 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
25170 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
25171 targetm.asm_out.unique_section (decl, 0);
25172 switch_to_section (get_named_section (decl, NULL, 0));
25173 DECL_WEAK (decl) = 1;
25174 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
25175 targetm.asm_out.globalize_label (asm_out_file, name);
25176 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
25177 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
25179 else
25180 #endif
25182 switch_to_section (text_section);
25183 ASM_OUTPUT_LABEL (asm_out_file, name);
25186 DECL_INITIAL (decl) = make_node (BLOCK);
25187 current_function_decl = decl;
25188 allocate_struct_function (decl, false);
25189 init_function_start (decl);
25190 first_function_block_is_cold = false;
25191 /* Make sure unwind info is emitted for the thunk if needed. */
25192 final_start_function (emit_barrier (), asm_out_file, 1);
25194 fputs ("\tblr\n", asm_out_file);
25196 final_end_function ();
25197 init_insn_lengths ();
25198 free_after_compilation (cfun);
25199 set_cfun (NULL);
25200 current_function_decl = NULL;
25203 /* Add r30 to hard reg set if the prologue sets it up and it is not
25204 pic_offset_table_rtx. */
25206 static void
25207 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
25209 if (!TARGET_SINGLE_PIC_BASE
25210 && TARGET_TOC
25211 && TARGET_MINIMAL_TOC
25212 && !constant_pool_empty_p ())
25213 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25214 if (cfun->machine->split_stack_argp_used)
25215 add_to_hard_reg_set (&set->set, Pmode, 12);
25217 /* Make sure the hard reg set doesn't include r2, which was possibly added
25218 via PIC_OFFSET_TABLE_REGNUM. */
25219 if (TARGET_TOC)
25220 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
25224 /* Helper function for rs6000_split_logical to emit a logical instruction after
25225 spliting the operation to single GPR registers.
25227 DEST is the destination register.
25228 OP1 and OP2 are the input source registers.
25229 CODE is the base operation (AND, IOR, XOR, NOT).
25230 MODE is the machine mode.
25231 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25232 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25233 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25235 static void
25236 rs6000_split_logical_inner (rtx dest,
25237 rtx op1,
25238 rtx op2,
25239 enum rtx_code code,
25240 machine_mode mode,
25241 bool complement_final_p,
25242 bool complement_op1_p,
25243 bool complement_op2_p)
25245 rtx bool_rtx;
25247 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
25248 if (op2 && CONST_INT_P (op2)
25249 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
25250 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25252 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
25253 HOST_WIDE_INT value = INTVAL (op2) & mask;
25255 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
25256 if (code == AND)
25258 if (value == 0)
25260 emit_insn (gen_rtx_SET (dest, const0_rtx));
25261 return;
25264 else if (value == mask)
25266 if (!rtx_equal_p (dest, op1))
25267 emit_insn (gen_rtx_SET (dest, op1));
25268 return;
25272 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
25273 into separate ORI/ORIS or XORI/XORIS instrucitons. */
25274 else if (code == IOR || code == XOR)
25276 if (value == 0)
25278 if (!rtx_equal_p (dest, op1))
25279 emit_insn (gen_rtx_SET (dest, op1));
25280 return;
25285 if (code == AND && mode == SImode
25286 && !complement_final_p && !complement_op1_p && !complement_op2_p)
25288 emit_insn (gen_andsi3 (dest, op1, op2));
25289 return;
25292 if (complement_op1_p)
25293 op1 = gen_rtx_NOT (mode, op1);
25295 if (complement_op2_p)
25296 op2 = gen_rtx_NOT (mode, op2);
25298 /* For canonical RTL, if only one arm is inverted it is the first. */
25299 if (!complement_op1_p && complement_op2_p)
25300 std::swap (op1, op2);
25302 bool_rtx = ((code == NOT)
25303 ? gen_rtx_NOT (mode, op1)
25304 : gen_rtx_fmt_ee (code, mode, op1, op2));
25306 if (complement_final_p)
25307 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
25309 emit_insn (gen_rtx_SET (dest, bool_rtx));
25312 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
25313 operations are split immediately during RTL generation to allow for more
25314 optimizations of the AND/IOR/XOR.
25316 OPERANDS is an array containing the destination and two input operands.
25317 CODE is the base operation (AND, IOR, XOR, NOT).
25318 MODE is the machine mode.
25319 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25320 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25321 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
25322 CLOBBER_REG is either NULL or a scratch register of type CC to allow
25323 formation of the AND instructions. */
25325 static void
25326 rs6000_split_logical_di (rtx operands[3],
25327 enum rtx_code code,
25328 bool complement_final_p,
25329 bool complement_op1_p,
25330 bool complement_op2_p)
25332 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
25333 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
25334 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
25335 enum hi_lo { hi = 0, lo = 1 };
25336 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
25337 size_t i;
25339 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
25340 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
25341 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
25342 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
25344 if (code == NOT)
25345 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
25346 else
25348 if (!CONST_INT_P (operands[2]))
25350 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
25351 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
25353 else
25355 HOST_WIDE_INT value = INTVAL (operands[2]);
25356 HOST_WIDE_INT value_hi_lo[2];
25358 gcc_assert (!complement_final_p);
25359 gcc_assert (!complement_op1_p);
25360 gcc_assert (!complement_op2_p);
25362 value_hi_lo[hi] = value >> 32;
25363 value_hi_lo[lo] = value & lower_32bits;
25365 for (i = 0; i < 2; i++)
25367 HOST_WIDE_INT sub_value = value_hi_lo[i];
25369 if (sub_value & sign_bit)
25370 sub_value |= upper_32bits;
25372 op2_hi_lo[i] = GEN_INT (sub_value);
25374 /* If this is an AND instruction, check to see if we need to load
25375 the value in a register. */
25376 if (code == AND && sub_value != -1 && sub_value != 0
25377 && !and_operand (op2_hi_lo[i], SImode))
25378 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
25383 for (i = 0; i < 2; i++)
25385 /* Split large IOR/XOR operations. */
25386 if ((code == IOR || code == XOR)
25387 && CONST_INT_P (op2_hi_lo[i])
25388 && !complement_final_p
25389 && !complement_op1_p
25390 && !complement_op2_p
25391 && !logical_const_operand (op2_hi_lo[i], SImode))
25393 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
25394 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
25395 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
25396 rtx tmp = gen_reg_rtx (SImode);
25398 /* Make sure the constant is sign extended. */
25399 if ((hi_16bits & sign_bit) != 0)
25400 hi_16bits |= upper_32bits;
25402 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
25403 code, SImode, false, false, false);
25405 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
25406 code, SImode, false, false, false);
25408 else
25409 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
25410 code, SImode, complement_final_p,
25411 complement_op1_p, complement_op2_p);
25414 return;
25417 /* Split the insns that make up boolean operations operating on multiple GPR
25418 registers. The boolean MD patterns ensure that the inputs either are
25419 exactly the same as the output registers, or there is no overlap.
25421 OPERANDS is an array containing the destination and two input operands.
25422 CODE is the base operation (AND, IOR, XOR, NOT).
25423 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
25424 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
25425 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
25427 void
25428 rs6000_split_logical (rtx operands[3],
25429 enum rtx_code code,
25430 bool complement_final_p,
25431 bool complement_op1_p,
25432 bool complement_op2_p)
25434 machine_mode mode = GET_MODE (operands[0]);
25435 machine_mode sub_mode;
25436 rtx op0, op1, op2;
25437 int sub_size, regno0, regno1, nregs, i;
25439 /* If this is DImode, use the specialized version that can run before
25440 register allocation. */
25441 if (mode == DImode && !TARGET_POWERPC64)
25443 rs6000_split_logical_di (operands, code, complement_final_p,
25444 complement_op1_p, complement_op2_p);
25445 return;
25448 op0 = operands[0];
25449 op1 = operands[1];
25450 op2 = (code == NOT) ? NULL_RTX : operands[2];
25451 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
25452 sub_size = GET_MODE_SIZE (sub_mode);
25453 regno0 = REGNO (op0);
25454 regno1 = REGNO (op1);
25456 gcc_assert (reload_completed);
25457 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25458 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
25460 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
25461 gcc_assert (nregs > 1);
25463 if (op2 && REG_P (op2))
25464 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
25466 for (i = 0; i < nregs; i++)
25468 int offset = i * sub_size;
25469 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
25470 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
25471 rtx sub_op2 = ((code == NOT)
25472 ? NULL_RTX
25473 : simplify_subreg (sub_mode, op2, mode, offset));
25475 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
25476 complement_final_p, complement_op1_p,
25477 complement_op2_p);
25480 return;
25484 /* Return true if the peephole2 can combine a load involving a combination of
25485 an addis instruction and a load with an offset that can be fused together on
25486 a power8. */
25488 bool
25489 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
25490 rtx addis_value, /* addis value. */
25491 rtx target, /* target register that is loaded. */
25492 rtx mem) /* bottom part of the memory addr. */
25494 rtx addr;
25495 rtx base_reg;
25497 /* Validate arguments. */
25498 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
25499 return false;
25501 if (!base_reg_operand (target, GET_MODE (target)))
25502 return false;
25504 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
25505 return false;
25507 /* Allow sign/zero extension. */
25508 if (GET_CODE (mem) == ZERO_EXTEND
25509 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
25510 mem = XEXP (mem, 0);
25512 if (!MEM_P (mem))
25513 return false;
25515 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
25516 return false;
25518 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
25519 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
25520 return false;
25522 /* Validate that the register used to load the high value is either the
25523 register being loaded, or we can safely replace its use.
25525 This function is only called from the peephole2 pass and we assume that
25526 there are 2 instructions in the peephole (addis and load), so we want to
25527 check if the target register was not used in the memory address and the
25528 register to hold the addis result is dead after the peephole. */
25529 if (REGNO (addis_reg) != REGNO (target))
25531 if (reg_mentioned_p (target, mem))
25532 return false;
25534 if (!peep2_reg_dead_p (2, addis_reg))
25535 return false;
25537 /* If the target register being loaded is the stack pointer, we must
25538 avoid loading any other value into it, even temporarily. */
25539 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
25540 return false;
25543 base_reg = XEXP (addr, 0);
25544 return REGNO (addis_reg) == REGNO (base_reg);
25547 /* During the peephole2 pass, adjust and expand the insns for a load fusion
25548 sequence. We adjust the addis register to use the target register. If the
25549 load sign extends, we adjust the code to do the zero extending load, and an
25550 explicit sign extension later since the fusion only covers zero extending
25551 loads.
25553 The operands are:
25554 operands[0] register set with addis (to be replaced with target)
25555 operands[1] value set via addis
25556 operands[2] target register being loaded
25557 operands[3] D-form memory reference using operands[0]. */
25559 void
25560 expand_fusion_gpr_load (rtx *operands)
25562 rtx addis_value = operands[1];
25563 rtx target = operands[2];
25564 rtx orig_mem = operands[3];
25565 rtx new_addr, new_mem, orig_addr, offset;
25566 enum rtx_code plus_or_lo_sum;
25567 machine_mode target_mode = GET_MODE (target);
25568 machine_mode extend_mode = target_mode;
25569 machine_mode ptr_mode = Pmode;
25570 enum rtx_code extend = UNKNOWN;
25572 if (GET_CODE (orig_mem) == ZERO_EXTEND
25573 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
25575 extend = GET_CODE (orig_mem);
25576 orig_mem = XEXP (orig_mem, 0);
25577 target_mode = GET_MODE (orig_mem);
25580 gcc_assert (MEM_P (orig_mem));
25582 orig_addr = XEXP (orig_mem, 0);
25583 plus_or_lo_sum = GET_CODE (orig_addr);
25584 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
25586 offset = XEXP (orig_addr, 1);
25587 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
25588 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
25590 if (extend != UNKNOWN)
25591 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
25593 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
25594 UNSPEC_FUSION_GPR);
25595 emit_insn (gen_rtx_SET (target, new_mem));
25597 if (extend == SIGN_EXTEND)
25599 int sub_off = ((BYTES_BIG_ENDIAN)
25600 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
25601 : 0);
25602 rtx sign_reg
25603 = simplify_subreg (target_mode, target, extend_mode, sub_off);
25605 emit_insn (gen_rtx_SET (target,
25606 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
25609 return;
25612 /* Emit the addis instruction that will be part of a fused instruction
25613 sequence. */
25615 void
25616 emit_fusion_addis (rtx target, rtx addis_value)
25618 rtx fuse_ops[10];
25619 const char *addis_str = NULL;
25621 /* Emit the addis instruction. */
25622 fuse_ops[0] = target;
25623 if (satisfies_constraint_L (addis_value))
25625 fuse_ops[1] = addis_value;
25626 addis_str = "lis %0,%v1";
25629 else if (GET_CODE (addis_value) == PLUS)
25631 rtx op0 = XEXP (addis_value, 0);
25632 rtx op1 = XEXP (addis_value, 1);
25634 if (REG_P (op0) && CONST_INT_P (op1)
25635 && satisfies_constraint_L (op1))
25637 fuse_ops[1] = op0;
25638 fuse_ops[2] = op1;
25639 addis_str = "addis %0,%1,%v2";
25643 else if (GET_CODE (addis_value) == HIGH)
25645 rtx value = XEXP (addis_value, 0);
25646 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
25648 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
25649 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
25650 if (TARGET_ELF)
25651 addis_str = "addis %0,%2,%1@toc@ha";
25653 else if (TARGET_XCOFF)
25654 addis_str = "addis %0,%1@u(%2)";
25656 else
25657 gcc_unreachable ();
25660 else if (GET_CODE (value) == PLUS)
25662 rtx op0 = XEXP (value, 0);
25663 rtx op1 = XEXP (value, 1);
25665 if (GET_CODE (op0) == UNSPEC
25666 && XINT (op0, 1) == UNSPEC_TOCREL
25667 && CONST_INT_P (op1))
25669 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
25670 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
25671 fuse_ops[3] = op1;
25672 if (TARGET_ELF)
25673 addis_str = "addis %0,%2,%1+%3@toc@ha";
25675 else if (TARGET_XCOFF)
25676 addis_str = "addis %0,%1+%3@u(%2)";
25678 else
25679 gcc_unreachable ();
25683 else if (satisfies_constraint_L (value))
25685 fuse_ops[1] = value;
25686 addis_str = "lis %0,%v1";
25689 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
25691 fuse_ops[1] = value;
25692 addis_str = "lis %0,%1@ha";
25696 if (!addis_str)
25697 fatal_insn ("Could not generate addis value for fusion", addis_value);
25699 output_asm_insn (addis_str, fuse_ops);
25702 /* Emit a D-form load or store instruction that is the second instruction
25703 of a fusion sequence. */
25705 static void
25706 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
25708 rtx fuse_ops[10];
25709 char insn_template[80];
25711 fuse_ops[0] = load_reg;
25712 fuse_ops[1] = addis_reg;
25714 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
25716 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
25717 fuse_ops[2] = offset;
25718 output_asm_insn (insn_template, fuse_ops);
25721 else if (GET_CODE (offset) == UNSPEC
25722 && XINT (offset, 1) == UNSPEC_TOCREL)
25724 if (TARGET_ELF)
25725 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
25727 else if (TARGET_XCOFF)
25728 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25730 else
25731 gcc_unreachable ();
25733 fuse_ops[2] = XVECEXP (offset, 0, 0);
25734 output_asm_insn (insn_template, fuse_ops);
25737 else if (GET_CODE (offset) == PLUS
25738 && GET_CODE (XEXP (offset, 0)) == UNSPEC
25739 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
25740 && CONST_INT_P (XEXP (offset, 1)))
25742 rtx tocrel_unspec = XEXP (offset, 0);
25743 if (TARGET_ELF)
25744 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
25746 else if (TARGET_XCOFF)
25747 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
25749 else
25750 gcc_unreachable ();
25752 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
25753 fuse_ops[3] = XEXP (offset, 1);
25754 output_asm_insn (insn_template, fuse_ops);
25757 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
25759 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
25761 fuse_ops[2] = offset;
25762 output_asm_insn (insn_template, fuse_ops);
25765 else
25766 fatal_insn ("Unable to generate load/store offset for fusion", offset);
25768 return;
25771 /* Given an address, convert it into the addis and load offset parts. Addresses
25772 created during the peephole2 process look like:
25773 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
25774 (unspec [(...)] UNSPEC_TOCREL)) */
25776 static void
25777 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
25779 rtx hi, lo;
25781 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
25783 hi = XEXP (addr, 0);
25784 lo = XEXP (addr, 1);
25786 else
25787 gcc_unreachable ();
25789 *p_hi = hi;
25790 *p_lo = lo;
25793 /* Return a string to fuse an addis instruction with a gpr load to the same
25794 register that we loaded up the addis instruction. The address that is used
25795 is the logical address that was formed during peephole2:
25796 (lo_sum (high) (low-part))
25798 The code is complicated, so we call output_asm_insn directly, and just
25799 return "". */
25801 const char *
25802 emit_fusion_gpr_load (rtx target, rtx mem)
25804 rtx addis_value;
25805 rtx addr;
25806 rtx load_offset;
25807 const char *load_str = NULL;
25808 machine_mode mode;
25810 if (GET_CODE (mem) == ZERO_EXTEND)
25811 mem = XEXP (mem, 0);
25813 gcc_assert (REG_P (target) && MEM_P (mem));
25815 addr = XEXP (mem, 0);
25816 fusion_split_address (addr, &addis_value, &load_offset);
25818 /* Now emit the load instruction to the same register. */
25819 mode = GET_MODE (mem);
25820 switch (mode)
25822 case E_QImode:
25823 load_str = "lbz";
25824 break;
25826 case E_HImode:
25827 load_str = "lhz";
25828 break;
25830 case E_SImode:
25831 case E_SFmode:
25832 load_str = "lwz";
25833 break;
25835 case E_DImode:
25836 case E_DFmode:
25837 gcc_assert (TARGET_POWERPC64);
25838 load_str = "ld";
25839 break;
25841 default:
25842 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
25845 /* Emit the addis instruction. */
25846 emit_fusion_addis (target, addis_value);
25848 /* Emit the D-form load instruction. */
25849 emit_fusion_load (target, target, load_offset, load_str);
25851 return "";
25855 #ifdef RS6000_GLIBC_ATOMIC_FENV
25856 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
25857 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
25858 #endif
25860 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
25862 static void
25863 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
25865 if (!TARGET_HARD_FLOAT)
25867 #ifdef RS6000_GLIBC_ATOMIC_FENV
25868 if (atomic_hold_decl == NULL_TREE)
25870 atomic_hold_decl
25871 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25872 get_identifier ("__atomic_feholdexcept"),
25873 build_function_type_list (void_type_node,
25874 double_ptr_type_node,
25875 NULL_TREE));
25876 TREE_PUBLIC (atomic_hold_decl) = 1;
25877 DECL_EXTERNAL (atomic_hold_decl) = 1;
25880 if (atomic_clear_decl == NULL_TREE)
25882 atomic_clear_decl
25883 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25884 get_identifier ("__atomic_feclearexcept"),
25885 build_function_type_list (void_type_node,
25886 NULL_TREE));
25887 TREE_PUBLIC (atomic_clear_decl) = 1;
25888 DECL_EXTERNAL (atomic_clear_decl) = 1;
25891 tree const_double = build_qualified_type (double_type_node,
25892 TYPE_QUAL_CONST);
25893 tree const_double_ptr = build_pointer_type (const_double);
25894 if (atomic_update_decl == NULL_TREE)
25896 atomic_update_decl
25897 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
25898 get_identifier ("__atomic_feupdateenv"),
25899 build_function_type_list (void_type_node,
25900 const_double_ptr,
25901 NULL_TREE));
25902 TREE_PUBLIC (atomic_update_decl) = 1;
25903 DECL_EXTERNAL (atomic_update_decl) = 1;
25906 tree fenv_var = create_tmp_var_raw (double_type_node);
25907 TREE_ADDRESSABLE (fenv_var) = 1;
25908 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
25910 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
25911 *clear = build_call_expr (atomic_clear_decl, 0);
25912 *update = build_call_expr (atomic_update_decl, 1,
25913 fold_convert (const_double_ptr, fenv_addr));
25914 #endif
25915 return;
25918 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
25919 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
25920 tree call_mffs = build_call_expr (mffs, 0);
25922 /* Generates the equivalent of feholdexcept (&fenv_var)
25924 *fenv_var = __builtin_mffs ();
25925 double fenv_hold;
25926 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
25927 __builtin_mtfsf (0xff, fenv_hold); */
25929 /* Mask to clear everything except for the rounding modes and non-IEEE
25930 arithmetic flag. */
25931 const unsigned HOST_WIDE_INT hold_exception_mask =
25932 HOST_WIDE_INT_C (0xffffffff00000007);
25934 tree fenv_var = create_tmp_var_raw (double_type_node);
25936 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
25938 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
25939 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
25940 build_int_cst (uint64_type_node,
25941 hold_exception_mask));
25943 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25944 fenv_llu_and);
25946 tree hold_mtfsf = build_call_expr (mtfsf, 2,
25947 build_int_cst (unsigned_type_node, 0xff),
25948 fenv_hold_mtfsf);
25950 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
25952 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
25954 double fenv_clear = __builtin_mffs ();
25955 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
25956 __builtin_mtfsf (0xff, fenv_clear); */
25958 /* Mask to clear everything except for the rounding modes and non-IEEE
25959 arithmetic flag. */
25960 const unsigned HOST_WIDE_INT clear_exception_mask =
25961 HOST_WIDE_INT_C (0xffffffff00000000);
25963 tree fenv_clear = create_tmp_var_raw (double_type_node);
25965 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
25967 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
25968 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
25969 fenv_clean_llu,
25970 build_int_cst (uint64_type_node,
25971 clear_exception_mask));
25973 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
25974 fenv_clear_llu_and);
25976 tree clear_mtfsf = build_call_expr (mtfsf, 2,
25977 build_int_cst (unsigned_type_node, 0xff),
25978 fenv_clear_mtfsf);
25980 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
25982 /* Generates the equivalent of feupdateenv (&fenv_var)
25984 double old_fenv = __builtin_mffs ();
25985 double fenv_update;
25986 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
25987 (*(uint64_t*)fenv_var 0x1ff80fff);
25988 __builtin_mtfsf (0xff, fenv_update); */
25990 const unsigned HOST_WIDE_INT update_exception_mask =
25991 HOST_WIDE_INT_C (0xffffffff1fffff00);
25992 const unsigned HOST_WIDE_INT new_exception_mask =
25993 HOST_WIDE_INT_C (0x1ff80fff);
25995 tree old_fenv = create_tmp_var_raw (double_type_node);
25996 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
25998 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
25999 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
26000 build_int_cst (uint64_type_node,
26001 update_exception_mask));
26003 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
26004 build_int_cst (uint64_type_node,
26005 new_exception_mask));
26007 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
26008 old_llu_and, new_llu_and);
26010 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
26011 new_llu_mask);
26013 tree update_mtfsf = build_call_expr (mtfsf, 2,
26014 build_int_cst (unsigned_type_node, 0xff),
26015 fenv_update_mtfsf);
26017 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
26020 void
26021 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
26023 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26025 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26026 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26028 /* The destination of the vmrgew instruction layout is:
26029 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26030 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26031 vmrgew instruction will be correct. */
26032 if (BYTES_BIG_ENDIAN)
26034 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
26035 GEN_INT (0)));
26036 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
26037 GEN_INT (3)));
26039 else
26041 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
26042 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
26045 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26046 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26048 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
26049 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
26051 if (BYTES_BIG_ENDIAN)
26052 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26053 else
26054 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26057 void
26058 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
26060 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26062 rtx_tmp0 = gen_reg_rtx (V2DImode);
26063 rtx_tmp1 = gen_reg_rtx (V2DImode);
26065 /* The destination of the vmrgew instruction layout is:
26066 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
26067 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
26068 vmrgew instruction will be correct. */
26069 if (BYTES_BIG_ENDIAN)
26071 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
26072 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
26074 else
26076 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
26077 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
26080 rtx_tmp2 = gen_reg_rtx (V4SFmode);
26081 rtx_tmp3 = gen_reg_rtx (V4SFmode);
26083 if (signed_convert)
26085 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
26086 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
26088 else
26090 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
26091 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
26094 if (BYTES_BIG_ENDIAN)
26095 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
26096 else
26097 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
26100 void
26101 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
26102 rtx src2)
26104 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
26106 rtx_tmp0 = gen_reg_rtx (V2DFmode);
26107 rtx_tmp1 = gen_reg_rtx (V2DFmode);
26109 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
26110 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
26112 rtx_tmp2 = gen_reg_rtx (V4SImode);
26113 rtx_tmp3 = gen_reg_rtx (V4SImode);
26115 if (signed_convert)
26117 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
26118 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
26120 else
26122 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
26123 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
26126 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
26129 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
26131 static bool
26132 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
26133 optimization_type opt_type)
26135 switch (op)
26137 case rsqrt_optab:
26138 return (opt_type == OPTIMIZE_FOR_SPEED
26139 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
26141 default:
26142 return true;
26146 /* Implement TARGET_CONSTANT_ALIGNMENT. */
26148 static HOST_WIDE_INT
26149 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
26151 if (TREE_CODE (exp) == STRING_CST
26152 && (STRICT_ALIGNMENT || !optimize_size))
26153 return MAX (align, BITS_PER_WORD);
26154 return align;
26157 /* Implement TARGET_STARTING_FRAME_OFFSET. */
26159 static HOST_WIDE_INT
26160 rs6000_starting_frame_offset (void)
26162 if (FRAME_GROWS_DOWNWARD)
26163 return 0;
26164 return RS6000_STARTING_FRAME_OFFSET;
26168 /* Create an alias for a mangled name where we have changed the mangling (in
26169 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
26170 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
26172 #if TARGET_ELF && RS6000_WEAK
26173 static void
26174 rs6000_globalize_decl_name (FILE * stream, tree decl)
26176 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
26178 targetm.asm_out.globalize_label (stream, name);
26180 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
26182 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
26183 const char *old_name;
26185 ieee128_mangling_gcc_8_1 = true;
26186 lang_hooks.set_decl_assembler_name (decl);
26187 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
26188 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
26189 ieee128_mangling_gcc_8_1 = false;
26191 if (strcmp (name, old_name) != 0)
26193 fprintf (stream, "\t.weak %s\n", old_name);
26194 fprintf (stream, "\t.set %s,%s\n", old_name, name);
26198 #endif
26201 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
26202 function names from <foo>l to <foo>f128 if the default long double type is
26203 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
26204 include file switches the names on systems that support long double as IEEE
26205 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
26206 In the future, glibc will export names like __ieee128_sinf128 and we can
26207 switch to using those instead of using sinf128, which pollutes the user's
26208 namespace.
26210 This will switch the names for Fortran math functions as well (which doesn't
26211 use math.h). However, Fortran needs other changes to the compiler and
26212 library before you can switch the real*16 type at compile time.
26214 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
26215 only do this if the default is that long double is IBM extended double, and
26216 the user asked for IEEE 128-bit. */
26218 static tree
26219 rs6000_mangle_decl_assembler_name (tree decl, tree id)
26221 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
26222 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
26224 size_t len = IDENTIFIER_LENGTH (id);
26225 const char *name = IDENTIFIER_POINTER (id);
26227 if (name[len - 1] == 'l')
26229 bool uses_ieee128_p = false;
26230 tree type = TREE_TYPE (decl);
26231 machine_mode ret_mode = TYPE_MODE (type);
26233 /* See if the function returns a IEEE 128-bit floating point type or
26234 complex type. */
26235 if (ret_mode == TFmode || ret_mode == TCmode)
26236 uses_ieee128_p = true;
26237 else
26239 function_args_iterator args_iter;
26240 tree arg;
26242 /* See if the function passes a IEEE 128-bit floating point type
26243 or complex type. */
26244 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
26246 machine_mode arg_mode = TYPE_MODE (arg);
26247 if (arg_mode == TFmode || arg_mode == TCmode)
26249 uses_ieee128_p = true;
26250 break;
26255 /* If we passed or returned an IEEE 128-bit floating point type,
26256 change the name. */
26257 if (uses_ieee128_p)
26259 char *name2 = (char *) alloca (len + 4);
26260 memcpy (name2, name, len - 1);
26261 strcpy (name2 + len - 1, "f128");
26262 id = get_identifier (name2);
26267 return id;
26270 /* Predict whether the given loop in gimple will be transformed in the RTL
26271 doloop_optimize pass. */
26273 static bool
26274 rs6000_predict_doloop_p (struct loop *loop)
26276 gcc_assert (loop);
26278 /* On rs6000, targetm.can_use_doloop_p is actually
26279 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
26280 if (loop->inner != NULL)
26282 if (dump_file && (dump_flags & TDF_DETAILS))
26283 fprintf (dump_file, "Predict doloop failure due to"
26284 " loop nesting.\n");
26285 return false;
26288 return true;
26291 struct gcc_target targetm = TARGET_INITIALIZER;
26293 #include "gt-rs6000.h"